
**Install requirements**

In [0]:
#!pip3 install 'torch==1.3.1'
#!pip3 install 'torchvision==0.4.2'
#!pip3 install 'Pillow-SIMD'
#!pip3 install 'tqdm'
#!pip3 install pillow==4.1.1 #if error: module 'PIL._webp' has no attribute 'HAVE_WEBPANIM'

**Import libraries**

In [0]:
import os
import logging

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn
from torch.autograd import Function

import torchvision
from torchvision import transforms
from torchvision.models import alexnet

from PIL import Image
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import random
import collections
from mpl_toolkits import mplot3d
import copy

**Set Arguments**

In [0]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

NUM_CLASSES = 7 # 7 classes and 4 domains: Photo, Art painting, Cartoon, Sketch

BATCH_SIZE = 64  # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

LR = 5e-4          # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 15      # Total number of training epochs (iterations over dataset)
STEP_SIZE = 10       # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 5

**Define Data Preprocessing**

In [0]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                      transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                   # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                   # Remember this when applying different transformations, otherwise you get an error
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) # Normalizes tensor with mean and standard deviation
])

**Prepare Dataset**

In [0]:
# Clone github repository with data
if not os.path.isdir('./Homework3-PACS'):
  !git clone https://github.com/MachineLearning2020/Homework3-PACS

DATA_DIR = 'Homework3-PACS/PACS'

# Prepare Pytorch train/test Datasets
dataset_photo = torchvision.datasets.ImageFolder(os.path.join(DATA_DIR, "photo"), transform=train_transform) #Photo

dataset_art = torchvision.datasets.ImageFolder(os.path.join(DATA_DIR, "art_painting"), transform=train_transform) #Art Painting
dataset_cartoon = torchvision.datasets.ImageFolder(os.path.join(DATA_DIR, "cartoon"), transform=train_transform) #Cartoon
dataset_sketch = torchvision.datasets.ImageFolder(os.path.join(DATA_DIR, "sketch"), transform=train_transform) #Sketch

# Check dataset sizes
print('Photo: {}'.format(len(dataset_photo)))
print('Art Paintings: {}'.format(len(dataset_art)))
print('Cartoon: {}'.format(len(dataset_cartoon)))
print('Sketch: {}'.format(len(dataset_sketch)))

**Prepare Dataloaders**

In [0]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
dataloader_photo = DataLoader(dataset_photo, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)

dataloader_art = DataLoader(dataset_art, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
dataloader_cartoon = DataLoader(dataset_cartoon, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
dataloader_sketch = DataLoader(dataset_sketch, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)

**Define Network**


In [0]:
from torch.hub import load_state_dict_from_url
model_urls = {
    'alexnet': 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth',
}

class ReverseLayerF(Function):
    # Forwards identity
    # Sends backward reversed gradients
    @staticmethod
    def forward(ctx, x, alpha):
        ctx.alpha = alpha
        return x.view_as(x)

    @staticmethod
    def backward(ctx, grad_output):
        output = grad_output.neg() * ctx.alpha
        return output, None

class AlexNet(nn.Module):

    def __init__(self, num_classes=1000):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )
        self.Gd = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, 2),
        )

    def forward(self, x, alpha=None):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        if alpha is None:
          x = self.classifier(x)
        else:
          x = ReverseLayerF.apply(x, alpha)
          x = self.Gd(x) 
        return x


def alexnet(pretrained=False, progress=True, **kwargs):
    r"""AlexNet model architecture from the
    `"One weird trick..." <https://arxiv.org/abs/1404.5997>`_ paper.

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    model = AlexNet(**kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(model_urls['alexnet'],
                                              progress=progress)
        model.load_state_dict(state_dict, strict=False)
        model.Gd[1].weight.data = model.classiﬁer[1].weight.data
        model.Gd[1].bias.data = model.classiﬁer[1].bias.data
        model.Gd[4].weight.data = model.classiﬁer[4].weight.data
        model.Gd[4].bias.data = model.classiﬁer[4].bias.data
    return model

**Prepare Network**

In [0]:
net = alexnet() # Loading AlexNet model

# AlexNet has 1000 output neurons, corresponding to the 1000 ImageNet's classes
# We need 7 outputs for PACS
net.classifier[6] = nn.Linear(4096, NUM_CLASSES) # nn.Linear in pytorch is a fully connected layer
                                                 # The convolutional layer is nn.Conv2d

# We just changed the last layer of AlexNet with a new fully connected layer with 7 outputs

**Prepare Training**

In [0]:
# Define loss function
criterion = nn.CrossEntropyLoss() # for classification, we use Cross Entropy

parameters_to_optimize = net.parameters() # optimize over all the parameters 

# Define optimizer
# An optimizer updates the weights based on loss
# We use SGD with momentum
optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

# Define scheduler
# A scheduler dynamically changes learning rate
# The most common schedule is the step(-down), which multiplies learning rate by gamma every STEP_SIZE epochs
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

In [0]:
def train_step(net, second_dataloader, criterion, optimizer, alpha, current_step, train_losses, train_accuracies):
  # Iterate over the dataset
  step = 1
  train_losses[0].append(0); train_losses[1].append(0); train_losses[2].append(0)
  train_accuracies[0].append(0); train_accuracies[1].append(0); train_accuracies[2].append(0)
  
  for data_photo, data_art in zip(dataloader_photo, second_dataloader):

    #art_images = torch.FloatTensor(next(iter(dataloader_art)))
    # Bring data over the device of choice
    images = data_photo[0].to(DEVICE)
    labels = data_photo[1].to(DEVICE)
    art_images = data_art[0].to(DEVICE)

    # PyTorch, by default, accumulates gradients after each backward pass
    # We need to manually set the gradients to zero before starting a new iteration
    optimizer.zero_grad() # Zero-ing the gradients

    outputs_classifier = net(images) # Forward pass to the network
    loss = criterion(outputs_classifier, labels) # Compute loss based on output and ground truth
    _, preds = torch.max(outputs_classifier.data, 1) # Get predictions
    train_losses[0][-1] += loss.item() # Update train losses
    train_accuracies[0][-1] += torch.sum(preds == labels.data).data.item()/len(labels.data) #Update train accuracies
    if current_step % LOG_FREQUENCY == 0: # Log loss and accuracy
      print('\tStep {}\n,\tClassifier: Loss = {}, Accuracy = {}%'.format(current_step, loss.item(), train_accuracies[0][-1]*100/step))
    loss.backward()  # backward pass: computes gradients
    
    outputs_Gd_photo = net(images, alpha=alpha)
    loss1 = criterion(outputs_Gd_photo, torch.LongTensor(np.ones(len(outputs_Gd_photo.data))).to(DEVICE)) # Compute loss based on output and ground truth
    _, preds = torch.max(outputs_Gd_photo.data, 1) # Get predictions
    train_losses[1][-1] += loss1.item() # Update train losses
    train_accuracies[1][-1] += torch.sum(preds == 1).data.item()/len(preds) #Update train accuracies
    if current_step % LOG_FREQUENCY == 0: # Log loss and accuracy
      print('\t\tGd photo: Loss {}, Accuracy = {}%'.format(loss1.item(), train_accuracies[1][-1]*100/step))
    loss1.backward()

    outputs_Gd_art = net(art_images, alpha=alpha)
    loss2 = criterion(outputs_Gd_art, torch.LongTensor(np.zeros(len(outputs_Gd_art.data))).to(DEVICE)) # Compute loss based on output and ground truth
    _, preds = torch.max(outputs_Gd_art.data, 1) # Get predictions
    train_losses[2][-1] += loss2.item() # Update train losses
    train_accuracies[2][-1] += torch.sum(preds == 0).data.item()/len(preds) #Update train accuracies
    if current_step % LOG_FREQUENCY == 0: # Log loss and accuracy
      print('\t\tGd art: Loss {}, Accuracy = {}%'.format(loss2.item(), train_accuracies[2][-1]*100/step))
    loss2.backward()
    
    optimizer.step() # update weights based on accumulated gradients

    current_step += 1
    step += 1

  train_losses[0][-1] /= step; train_losses[1][-1] /= step; train_losses[2][-1] /= step
  train_accuracies[0][-1] /= step; train_accuracies[1][-1] /= step; train_accuracies[2][-1] /= step

  return current_step, train_losses, train_accuracies

In [0]:
def valid_step(net, criterion, valid_losses, valid_accuracies, valid_dataloader, current_step):
  step = 1
  valid_losses.append(0)
  valid_accuracies.append(0)
  
  for images, labels in valid_dataloader:

    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    # Forward Pass
    outputs_valid = net(images)

    # Get predictions
    _, preds = torch.max(outputs_valid.data, 1)

    # Update valid losses
    valid_losses[-1] += criterion(outputs_valid, labels).item()
    #Update valid accuracies
    valid_accuracies[-1] += torch.sum(preds == labels.data).data.item()/len(labels.data)

    current_step += 1
    step += 1

  valid_losses[-1] /= step
  valid_accuracies[-1] /= step

  print('\tValid_loss = {}, Valid_accuracy = {}%'.format(valid_losses[-1], valid_accuracies[-1]*100))

  return valid_losses, valid_accuracies, current_step

In [0]:
def train_step_without_adaption(net, criterion, optimizer, current_step, train_losses, train_accuracies, train_dataloader):
  # Iterate over the dataset
  step = 1
  train_losses.append(0)
  train_accuracies.append(0)
  
  for images, labels in train_dataloader:
    # Bring data over the device of choice
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    # PyTorch, by default, accumulates gradients after each backward pass
    # We need to manually set the gradients to zero before starting a new iteration
    optimizer.zero_grad() # Zero-ing the gradients

    # Forward pass to the network
    outputs = net(images)

    # Compute loss based on output and ground truth
    loss = criterion(outputs, labels)

    # Get predictions
    _, preds = torch.max(outputs.data, 1)

    # Update train losses
    train_losses[-1] += loss.item()
    #Update train accuracies
    train_accuracies[-1] += torch.sum(preds == labels.data).data.item()/len(labels.data)

    # Log loss and accuracy
    if current_step % LOG_FREQUENCY == 0:
      print('\tStep {}, Loss {}, Accuracy = {}%'.format(current_step, loss.item(), train_accuracies[-1]*100/step))

    # Compute gradients for each layer and update weights
    loss.backward()  # backward pass: computes gradients
    optimizer.step() # update weights based on accumulated gradients

    current_step += 1
    step += 1

  train_losses[-1] /= step
  train_accuracies[-1] /= step

  return current_step, train_losses, train_accuracies

In [0]:
def model_train(net, second_dataloader, optimizer, criterion, scheduler, stats, alpha=None, epochs=NUM_EPOCHS, validation=False, adaptation=False):
  # By default, everything is loaded to cpu
  net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda

  cudnn.benchmark # Calling this optimizes runtime

  current_step = 0
  # Start iterating over the epochs
  for epoch in range(epochs):

    print('Starting epoch {}/{}, LR = {}'.format(epoch+1, epochs, scheduler.get_lr()))

    net.train() # Sets module in training mode
    if adaptation:
      current_step, stats['train_losses'], stats['train_accuracies'] = train_step(net, second_dataloader, criterion, optimizer, alpha, current_step, stats['train_losses'], stats['train_accuracies'])
    else:
      current_step, stats['train_losses'], stats['train_accuracies'] = train_step_without_adaption(net, criterion, optimizer, current_step, stats['train_losses'], stats['train_accuracies'], dataloader_photo)
    # Step the scheduler
    scheduler.step()

    if validation:
      net.train(False) # Sets module in evaluation mode
      stats['valid_losses'], stats['valid_accuracies'], current_step_valid = valid_step(net, criterion, stats['valid_losses'], stats['valid_accuracies'], second_dataloader, current_step)

  return stats

In [0]:
def model_test(test_dataloader, net1, net2=None):
  net1 = net1.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
  net1.train(False) # Set Network to evaluation mode

  step = 1

  #If using two models
  if net2 is not None:
    for net in net2:
      net = net.to(DEVICE)
      net.train(False)

  accuracy = 0
  for images, labels in tqdm(test_dataloader):
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    # Forward Pass 1
    outputs = net1(images).data

    #Forward pass 2 and get predictions
    if net2 is not None:
      for net in net2:
        outputs += net(images).data
      _, preds = torch.max(outputs/(len(net2)+1), 1)
    else:
      _, preds = torch.max(outputs, 1)

    # Update Corrects
    accuracy += torch.sum(preds == labels.data).data.item()/ float(len(labels.data))

    step += 1

  accuracy /= step

  print('Test Accuracy: {}%'.format(accuracy*100))

Creating methods to save and plot values

In [0]:
#Use to save values of hyper-parameters
def save_values(valid_losses, valid_accuracies, second_param, best_values):
  i = np.argmin(valid_losses)
  if len(best_values) < 10:
    best_values[valid_losses[i]] = (valid_accuracies[i], second_param)
  elif list(best_values.keys())[0] > valid_losses[i]:
    best_values.popitem()
    best_values[valid_losses[i]] = (valid_accuracies[i], second_param)
  return collections.OrderedDict(sorted(best_values.items()))

In [0]:
def plot_2d(stats):
  fig, (ax1, ax2) = plt.subplots(2)

  #Plotting losses
  ax1.plot(stats['train_losses'][0])  
  ax1.plot(stats['train_losses'][1])
  ax1.plot(stats['train_losses'][2])
  #ax1.title("Model Loss")  
  ax1.set_ylabel("loss")  
  ax1.set_xlabel("epochs")  
  ax1.legend(['Classifier', 'Discriminator photo', 'Discriminator art'], loc='upper left')

  #Plotting accuracies
  ax2.plot(stats['train_accuracies'][0]) 
  ax2.plot(stats['train_accuracies'][1]) 
  ax2.plot(stats['train_accuracies'][2]) 
  #ax2.title("Model Accuracy")  
  ax2.set_ylabel("accuracy")  
  ax2.set_xlabel("epochs")  
  ax2.legend(['Classifier', 'Discriminator photo', 'Discriminator art'], loc='upper left')

In [0]:
def plot_2d_old(stats):
  fig, (ax1, ax2) = plt.subplots(2)

  #Plotting losses
  ax1.plot(stats['train_losses'])  
  ax1.plot(stats['valid_losses'])  
  #ax1.title("Model Loss")  
  ax1.set_ylabel("loss")  
  ax1.set_xlabel("epochs")  
  ax1.legend(['train', 'valid'], loc='upper left')

  #Plotting accuracies
  ax2.plot(stats['train_accuracies'])  
  ax2.plot(stats['valid_accuracies'])  
  #ax2.title("Model Accuracy")  
  ax2.set_ylabel("accuracy")  
  ax2.set_xlabel("epochs")  
  ax2.legend(['train', 'valid'], loc='upper left')

In [0]:
#Use to plot 3D plot a hyper-parameters / loss
def D_plot(xdata, ydata, zdata, xlabel, ylabel, zlabel='loss'):
  fig = plt.figure()
  ax = plt.axes(projection="3d")

  # Data for three-dimensional scattered points
  ax.scatter3D(xdata, ydata, zdata, cmap="Blues")

  ax.set_xlabel(xlabel)  
  ax.set_ylabel(ylabel)
  ax.set_zlabel(zlabel)

**Train without adaptation**

In [0]:
#stats = {'train_losses': [], 'train_accuracies': []}

In [0]:
#stats = model_train(net, dataloader_art, optimizer, criterion, scheduler, stats)

In [0]:
#plot_2d(stats)

**Test without adaptation**

In [0]:
#model_test(dataloader_art, net)

**Train with adaptation**

In [0]:
#stats = {'train_losses': [[], [], []], 'train_accuracies': [[], [], []]}

In [0]:
#stats = model_train(net, dataloader_art, optimizer, criterion, scheduler, stats, alpha=0.5, adaptation=True)

In [0]:
#plot_2d(stats)

**Test with adaptation**

In [0]:
#model_test(dataloader_art, net)

## Hyper-parameters search without adaptation

In [0]:
best_values = dict()
best_accuracy = 0

max_count = 30
cs_epochs = 10
for count in range(max_count):
  lr = 10**random.uniform(-5, -3) #Initially considered uniform(-5, -1), than uniform(-5, -3)
  batch_size = random.choice([32, 64, 128, 256]) #Initially choice([32, 64, 128, 256])

  stats = {'valid_losses': [], 'train_losses': [], 'valid_accuracies': [], 'train_accuracies': []}

  print("Iteration {}/{}, lr = {}, batch size = {}".format(count, max_count, lr, batch_size))

  dataloader_cartoon = DataLoader(dataset_cartoon, batch_size=batch_size, shuffle=True, num_workers=4, drop_last=True)
  dataloader_sketch = DataLoader(dataset_sketch, batch_size=batch_size, shuffle=True, num_workers=4, drop_last=True)
  dataloader_photo = DataLoader(dataset_photo, batch_size=batch_size, shuffle=True, num_workers=4, drop_last=True)

  net = alexnet(pretrained=True)
  net.classifier[6] = nn.Linear(4096, NUM_CLASSES)
  parameters_to_optimize = net.parameters()
  optimizer = optim.Adam(parameters_to_optimize, lr=lr, weight_decay=WEIGHT_DECAY)             
  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

  stats = model_train(net, optimizer, criterion, scheduler, stats, epochs=cs_epochs, validation=True)

  #Saving best 10 results considering validation loss
  best_values = save_values(stats['valid_losses'], stats['valid_accuracies'], [lr, batch_size], best_values)

  if stats['valid_accuracies'][-1] > best_accuracy: 
    best_net = copy.deepcopy(net)
    best_accuracy = stats['valid_accuracies'][-1]

  print("")

In [0]:
for item in list(best_values.items()):
  print(item)

In [0]:
losses = []
rates = []
batch_sizes = []
for item in list(best_values.items()):
  if item[0] < 4.4:   #Filtring values to better understand from graph
    rates.append(item[1][1][0])
    batch_sizes.append(item[1][1][1])
    losses.append(item[0])
D_plot(rates, batch_sizes, losses, 'Learning rate', 'batch_size')

In [0]:
stats = {'valid_losses': [], 'train_losses': [], 'valid_accuracies': [], 'train_accuracies': []}

dataloader_cartoon = DataLoader(dataset_cartoon, batch_size=32, shuffle=True, num_workers=4, drop_last=True)
dataloader_sketch = DataLoader(dataset_sketch, batch_size=32, shuffle=True, num_workers=4, drop_last=True)
dataloader_photo = DataLoader(dataset_photo, batch_size=32, shuffle=True, num_workers=4, drop_last=True)
dataloader_art = DataLoader(dataset_art, batch_size=32, shuffle=True, num_workers=4, drop_last=True)

net = alexnet(pretrained=True)
net.classifier[6] = nn.Linear(4096, NUM_CLASSES)
parameters_to_optimize = net.parameters()
optimizer = optim.Adam(parameters_to_optimize, lr=3e-4, weight_decay=WEIGHT_DECAY)             
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

In [0]:
stats = model_train(net, optimizer, criterion, scheduler, stats, epochs=10, validation=True)

In [0]:
model_test(dataloader_art, net)

## Hyper-parameters search with adaptation

In [0]:
best_values = dict()

cs_epochs = 10
for lr in [1e-5, 2e-5, 3e-5]: #Before [1e-5, 3e-5, 1e-4, 3e-4, 1e-3], than [1e-5, 2e-5, 3e-5]
  for alpha in [0.7, 0.8, 0.9]: #Before [0.2, 0.3, 0.4, 0.5, 0.6, 0.7], [0.7, 0.8, 0.9]

    stats1 = {'train_losses': [[], [], []], 'train_accuracies': [[], [], []],'valid_losses': [], 'valid_accuracies': []}
    stats2 = {'train_losses': [[], [], []], 'train_accuracies': [[], [], []],'valid_losses': [], 'valid_accuracies': []}

    print("lr = {}, alpha = {}".format(lr, alpha)) 

    net1 = alexnet(pretrained=True)
    net1.classifier[6] = nn.Linear(4096, NUM_CLASSES)
    parameters_to_optimize = net1.parameters()
    optimizer1 = optim.Adam(parameters_to_optimize, lr=lr, weight_decay=WEIGHT_DECAY)             
    scheduler1 = optim.lr_scheduler.StepLR(optimizer1, step_size=STEP_SIZE, gamma=GAMMA)

    net2 = alexnet(pretrained=True)
    net2.classifier[6] = nn.Linear(4096, NUM_CLASSES)
    parameters_to_optimize = net2.parameters()
    optimizer2 = optim.Adam(parameters_to_optimize, lr=lr, weight_decay=WEIGHT_DECAY)             
    scheduler2 = optim.lr_scheduler.StepLR(optimizer2, step_size=STEP_SIZE, gamma=GAMMA)

    stats1 = model_train(net1, dataloader_cartoon, optimizer1, criterion, scheduler1, stats1, epochs=cs_epochs, validation=True, adaptation=True, alpha=alpha)
    stats2 = model_train(net2, dataloader_sketch, optimizer2, criterion, scheduler2, stats2, epochs=cs_epochs, validation=True, adaptation=True, alpha=alpha)

    valid_losses = [] 
    valid_accuracies = []

    for i in range(len(stats1["valid_losses"])):
      valid_losses.append((stats1["valid_losses"][i] + stats2["valid_losses"][i])/2)
      valid_accuracies.append((stats1["valid_accuracies"][i] + stats2["valid_accuracies"][i])/2)

    #Saving best 10 results considering validation loss
    best_values = save_values(valid_losses, valid_accuracies, [lr, alpha], best_values)

    print("")

In [0]:
for item in list(best_values.items()):
  print(item)

In [0]:
losses = []
rates = []
alphas = []
for item in list(best_values.items()):
  if item[0] < 50.4:   #Filtring values to better understand from graph
    rates.append(item[1][1][0])
    alphas.append(item[1][1][1])
    losses.append(item[0])
D_plot(rates, alphas, losses, 'Learning rate', 'alpha')

In [0]:
stats = {'valid_losses': [], 'train_losses': [[], [], []], 'valid_accuracies': [], 'train_accuracies': [[], [], []]}

net = alexnet(pretrained=True)
net.classifier[6] = nn.Linear(4096, NUM_CLASSES)
parameters_to_optimize = net.parameters()
optimizer = optim.Adam(parameters_to_optimize, lr=1e-5, weight_decay=WEIGHT_DECAY)             
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

stats2 = {'valid_losses': [], 'train_losses': [[], [], []], 'valid_accuracies': [], 'train_accuracies': [[], [], []]}

net2 = alexnet(pretrained=True)
net2.classifier[6] = nn.Linear(4096, NUM_CLASSES)
parameters_to_optimize2 = net2.parameters()
optimizer2 = optim.Adam(parameters_to_optimize2, lr=3e-5, weight_decay=WEIGHT_DECAY)             
scheduler2 = optim.lr_scheduler.StepLR(optimizer2, step_size=STEP_SIZE, gamma=GAMMA)

In [0]:
stats = model_train(net, dataloader_art, optimizer, criterion, scheduler, stats, epochs=40, adaptation=True, alpha=0.9)

In [0]:
stats2 = model_train(net2, dataloader_art, optimizer2, criterion, scheduler2, stats2, epochs=40, adaptation=True, alpha=0.9)

In [0]:
plot_2d(stats)

In [0]:
plot_2d(stats2)

In [0]:
model_test(dataloader_art, net2, net2=[net2])