# **Homework3: Domain Adaptation**

**Import libraries**

In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
from statistics import mean 

from sklearn.model_selection import ParameterGrid

import random
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.backends import cudnn

import torchvision
from torchvision import transforms
from torchvision.models import alexnet

from PIL import Image

**Define default parameters**

In [3]:
DEVICE = 'cuda'

NUM_CLASSES = 7
BATCH_SIZE = 128     
MOMENTUM = 0.9      
NUM_EPOCHS = 20 
LOG_FREQUENCY = 50

In [4]:
PRE_TRAINED = True  

**Define Data Preprocessing**

In [5]:
train_transform = transforms.Compose([transforms.Resize(256),     
                                      transforms.CenterCrop(224), 
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

**Prepare Datasets**


In [None]:
# Clone github repository with data
if not os.path.isdir('./Homework3-PACS'):
  !git clone https://github.com/MachineLearning2020/Homework3-PACS.git

In [7]:
from torchvision.datasets import ImageFolder

DATA_DIR = "Homework3-PACS/PACS/"

artpainting_data = ImageFolder(DATA_DIR + "art_painting/", transform = train_transform)
cartoon_data = ImageFolder(DATA_DIR + "cartoon/", transform = train_transform)
photo_data = ImageFolder(DATA_DIR + "photo/", transform = train_transform)
sketch_data = ImageFolder(DATA_DIR + "sketch/", transform = train_transform)

In [None]:
def showimage(tensor):
  plt.imshow(tensor.permute(1,2,0))

**Evaluation**

In [8]:
# predict the labels for the data into [dataloader]
# return the accuracy and the mean of the losses of each batch (useful during the training phase)
def evaluate(net, dataloader):
  net = net.to(DEVICE)
  net.train(False) 
  running_corrects = 0
  losses = []
  for images, labels in dataloader:  
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)
    outputs = net(images)
    loss = criterion(outputs, labels)
    losses.append(loss.item())
    _, preds = torch.max(outputs.data, 1)
    running_corrects += torch.sum(preds == labels.data).data.item()
  accuracy = running_corrects / float(len(dataloader.dataset))
  return accuracy, mean(losses)

**Hyperparmeter tuning through grid search: LR and ALPHA**

In [None]:
# clone repository with the DANN net
if not os.path.isdir('./hw3'):
  !git clone https://github.com/frattinfabio/MLDL-hw3-DA.git
  !mv "MLDL-hw3-DA/" "hw3/"
from hw3.dann_net import dann_net

In [10]:
# instantiate the net and set the parameters for the training 
def prepare_net(params):

  net = dann_net(pretrained = PRE_TRAINED)
  net.classifier[6] = nn.Linear(net.classifier[6].in_features, NUM_CLASSES)
  parameters_to_optimize = net.parameters()
  optimizer = optim.SGD(parameters_to_optimize, lr=params["LR"], momentum=MOMENTUM, weight_decay=params["WD"])
  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=params["STEP_SIZE"], gamma=params["GAMMA"])

  return net, optimizer, scheduler


In [11]:
# train the DANN network
def train(source_dataloader, target_dataloader, net, optimizer, scheduler, criterion, params):

  net = net.to(DEVICE) 
  cudnn.benchmark 
  current_step = 0

  # it is assumed that the source dataset is smaller than the target
  source_dataloader_iterator = iter(source_dataloader)

  dann_source_losses = []
  dann_target_losses = []
  class_losses = []

  for epoch in range(NUM_EPOCHS):
    print(f"\rEpoch {epoch+1}/{NUM_EPOCHS}...", end = "")

    for target_images, _ in target_dataloader:
      net.train()
      optimizer.zero_grad()

      # re-iterate over the smaller dataset if the bigger isn't finished yet
      try:
        images, labels = next(source_dataloader_iterator)
      except StopIteration:
        source_dataloader_iterator = iter(source_dataloader)
        images, labels = next(source_dataloader_iterator)

      # source classification task
      images = images.to(DEVICE)
      labels = labels.to(DEVICE)
      outputs = net(images)
      class_loss = criterion(outputs, labels)
      class_loss.backward() 
      class_losses.append(class_loss.item())

      if params["ALPHA"] is not None:
        # source discrimination task (domain '0')
        class_targets = torch.zeros(BATCH_SIZE, dtype = torch.long).to(DEVICE)
        outputs = net(images, params["ALPHA"])
        dann_source_loss = criterion(outputs, class_targets)
        dann_source_loss.backward() 
        dann_source_losses.append(dann_source_loss.item())

        # target discrimination task (domain '1')
        class_targets = torch.ones(BATCH_SIZE, dtype = torch.long).to(DEVICE)
        target_images = target_images.to(DEVICE)
        outputs = net(target_images, params["ALPHA"])
        dann_target_loss = criterion(outputs, class_targets)
        dann_target_loss.backward() 
        dann_target_losses.append(dann_target_loss.item())

      if (current_step % LOG_FREQUENCY) == 0:
        print("")
        print(f"Step {current_step}\n\tclass_loss: {class_loss.item()}")
        if params["ALPHA"] is not None:
          print(f"\tdann_source_loss: {dann_source_loss.item()}\n\tdann_target_loss: {dann_target_loss.item()}")
      optimizer.step()
      current_step += 1

    scheduler.step() 
  print("")

  return class_losses, dann_source_losses, dann_target_losses

In [None]:
# defining the values for the hyperparameters to tune
#  first look for the best lr-alpha combination
lr_alpha_param_grid = {
    "LR": [0.1, 0.01, 0.005, 0.001],
    "ALPHA": [None, 0.5, 0.1, 0.05, 0.05],
    "STEP_SIZE": [10],
    "GAMMA": [0.1],
    "WD": [5e-3]
}

# then the best lr-alpha fixed and tune the others
step_wd_param_grid = {
     "LR": [0.005], # put here the best LR found in the first search
    "ALPHA": [0.05], # put here the best ALPHA found in the first search
    "STEP_SIZE": [5,10],
    "GAMMA": [0.1, 0.5],
    "WD": [5e-3, 5e-4]
}

param_grid = lr_alpha_param_grid
# param_grid = step_wd_param_grid

In [15]:
criterion = nn.CrossEntropyLoss()

In [12]:
# step 1: {source = photo, target = art-painting} --> best model --> test on art-painting (cheating)
# step 2: {source = photo, target = cartoon} + {source = photo, target = sketch} --> average results --> best model --> test on art-painting
source_dataloader = DataLoader(photo_data, batch_size = BATCH_SIZE, shuffle = True, num_workers = 4, drop_last = True)
target_dataloader_1 = DataLoader(cartoon_data, batch_size = BATCH_SIZE, shuffle = True, num_workers = 4, drop_last = True)
target_dataloader_2 = DataLoader(sketch_data, batch_size = BATCH_SIZE, shuffle = True, num_workers = 4, drop_last = True)
target_dataloader_3 = DataLoader(artpainting_data, batch_size = BATCH_SIZE, shuffle = True, num_workers = 4, drop_last = True)

In [None]:
# check which of the two is bigger to properly iterate over both at the same time 
len(source_dataloader), len(target_dataloader_1), len(target_dataloader_2)

In [None]:
results_df = pd.DataFrame(columns = ["LR", "ALPHA", "STEP_SIZE", "GAMMA", "WD", "cartoon_accuracy", "sketch_accuracy", "avg_accuracy"])

In [None]:
# perform grid search over the parameters previously defined
for params in ParameterGrid(param_grid):

  print(f"Trying with paramaters\tLR: {params['LR']}\tALPHA: {params['ALPHA']}\tGAMMA: {params['GAMMA']}\tWD: {params['WD']}\tSTEP_SIZE: {params['STEP_SIZE']}")

  net, optimizer, scheduler = prepare_net(params)
  train(source_dataloader, target_dataloader_1, net, optimizer, scheduler, criterion, params)
  target_accuracy_1 = evaluate(net, target_dataloader_1)[0]

  net, optimizer, scheduler = prepare_net(params)
  train(source_dataloader, target_dataloader_2, net, optimizer, scheduler, criterion, params)
  target_accuracy_2 = evaluate(net, target_dataloader_2)[0]

  avg_accuracy = (target_accuracy_1 + target_accuracy_2)/2

  results_df = results_df.append({"LR": params['LR'], "ALPHA": params['ALPHA'], "STEP_SIZE": params['STEP_SIZE'], "GAMMA": params['GAMMA'], "WD": params['WD'], "cartoon_accuracy": target_accuracy_1, "sketch_accuracy": target_accuracy_2, "avg_accuracy": avg_accuracy}, ignore_index = True)

In [16]:
# results_df

In [None]:
# get the best configuration of parameters
idx_max_acc = results_df["avg_accuracy"].argmax()
best_params = results_df.iloc[idx_max_acc]
best_params

**Training with the best hyperparameters on the actual target**

In [None]:
net, optimizer, scheduler = prepare_net(best_params)
class_source_losses, dann_source_losses, dann_target_losses =  train(source_dataloader, target_dataloader_3, net, optimizer, scheduler, criterion, best_params)

In [25]:
artpainting_acc = evaluate(net, target_dataloader_3)[0]
print(f"Accuracy on art-painting: {artpainting_acc}")

Accuracy on art-painting: 0.5


**Plotting losses behaviour**

In [None]:
plt.subplots(figsize = (16, 8))
plt.plot(dann_source_losses)
plt.ylabel("source domain loss")

plt.subplots(figsize = (16, 8))
plt.plot(dann_source_losses)
plt.ylabel("target domain loss")

plt.subplots(figsize = (16, 8))
plt.plot(class_source_losses)
plt.ylabel("source classification loss")
plt.xlabel("epoch")

**Feature space adaptation**

In [32]:
from sklearn.manifold import TSNE

def get_2D_representation(loader, net, class_label):
  all_images = []
  with torch.no_grad():
    net.train(False)
    for images, labels in loader:
        for img, label  in  zip(images, labels):
          if label == class_label:
            features = net.features(img.unsqueeze(0).cuda())
            features = net.avgpool(features)
            features = torch.flatten(features,1).squeeze(0)
            all_images.append(features)

    all_images = torch.stack(all_images)
  
    reduced_2D_images = TSNE().fit_transform(all_images.cpu().detach().numpy())
    return reduced_2D_images

# class label 0 stands for "dogs"
reduced_2D_images_source = get_2D_representation(source_dataloader, net, 2)
reduced_2D_images_target = get_2D_representation(target_dataloader_3, net, 2)

In [None]:
fig, ax = plt.subplots(figsize = (12,8))
ax.scatter(reduced_2D_images_source[:,0], reduced_2D_images_source[:,1], c = 'b', label = 'source')
ax.scatter(reduced_2D_images_target[:,0], reduced_2D_images_target[:,1], c = 'r', label = 'target')
ax.legend()