
**Install requirements**

In [0]:
!pip3 install 'Pillow==6.1'
!pip3 install 'torch==1.3.1'
!pip3 install 'torchvision==0.4.2'
!pip3 install 'Pillow-SIMD'
!pip3 install 'tqdm'
#!pip install --upgrade pillow

Collecting Pillow==6.1
[?25l  Downloading https://files.pythonhosted.org/packages/14/41/db6dec65ddbc176a59b89485e8cc136a433ed9c6397b6bfe2cd38412051e/Pillow-6.1.0-cp36-cp36m-manylinux1_x86_64.whl (2.1MB)
[K     |████████████████████████████████| 2.1MB 9.7MB/s 
[31mERROR: albumentations 0.1.12 has requirement imgaug<0.2.7,>=0.2.5, but you'll have imgaug 0.2.9 which is incompatible.[0m
[?25hInstalling collected packages: Pillow
  Found existing installation: Pillow 6.2.2
    Uninstalling Pillow-6.2.2:
      Successfully uninstalled Pillow-6.2.2
Successfully installed Pillow-6.1.0


Collecting Pillow-SIMD
[?25l  Downloading https://files.pythonhosted.org/packages/b1/19/b7043190f481abb94dcdd1e69c4432432aaa73455cf1128eae39b8eb2518/Pillow-SIMD-6.0.0.post0.tar.gz (621kB)
[K     |████████████████████████████████| 624kB 8.9MB/s 
[?25hBuilding wheels for collected packages: Pillow-SIMD
  Building wheel for Pillow-SIMD (setup.py) ... [?25l[?25hdone
  Created wheel for Pillow-SIMD: filename=Pillow_SIMD-6.0.0.post0-cp36-cp36m-linux_x86_64.whl size=1062936 sha256=8023e44aeeda53944fd0d80bebd7d5d5a665a0152ccd431b10021856cc7aaaf8
  Stored in directory: /root/.cache/pip/wheels/06/60/65/cc9afa345ccbf10a34cc208266b992941a8608010b592f43d1
Successfully built Pillow-SIMD
Installing collected packages: Pillow-SIMD
Successfully installed Pillow-SIMD-6.0.0.post0




**Import libraries**

In [0]:
import os
import logging
import math

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader
from torch.backends import cudnn

import torchvision
from torchvision import transforms

from PIL import Image
from tqdm import tqdm

from torch.autograd import Function

try:
    from torch.hub import load_state_dict_from_url
except ImportError:
    from torch.utils.model_zoo import load_url as load_state_dict_from_url

**ReverseLayer**


In [0]:
class ReverseLayerF(Function):
    # Forwards identity
    # Sends backward reversed gradients
    @staticmethod
    def forward(ctx, x, alpha):
        ctx.alpha = alpha
        return x.view_as(x)

    @staticmethod
    def backward(ctx, grad_output):
        output = grad_output.neg() * ctx.alpha
        return output, None

**AlexNetDANN**




In [0]:
__all__ = ['AlexNet', 'alexnet']

model_urls = {
    'alexnet': 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth',
}

class AlexNetDANN(nn.Module):

    def __init__(self, num_classes=1000):
        super(AlexNetDANN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )
        
        self.domain = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, 2),
        )
        

    def forward(self, x, alpha=None):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        if alpha == None:
          x = self.classifier(x)
        else:
          x = ReverseLayerF.apply(x, alpha)
          x = self.domain(x)
        return x


def alexnetDANN(pretrained=False, progress=True, **kwargs):
    """AlexNet model architecture from the
    `"One weird trick..." <https://arxiv.org/abs/1404.5997>`_ paper.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    model = AlexNetDANN(**kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(model_urls['alexnet'],
                                              progress=progress)
        model.load_state_dict(state_dict, strict=False)
    return model

**Set Arguments**

In [0]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

BATCH_SIZE = 256     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 30      # Total number of training epochs (iterations over dataset)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 10

**Define Data Preprocessing**

In [0]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.Resize(256),      
                                      transforms.CenterCrop(224), 
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])
# Define transforms for the evaluation phase
eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))                                    
])

**Prepare Dataset**

In [0]:
# Clone github repository with data
if not os.path.isdir('./Homework3-PACS'):
  !git clone https://github.com/MachineLearning2020/Homework3-PACS

P_DIR = 'Homework3-PACS/PACS/photo'
A_DIR = 'Homework3-PACS/PACS/art_painting'
C_DIR = 'Homework3-PACS/PACS/cartoon'
S_DIR = 'Homework3-PACS/PACS/sketch'

# Prepare Pytorch train/test Datasets
train_dataset = torchvision.datasets.ImageFolder(P_DIR, transform=train_transform)
test_dataset = torchvision.datasets.ImageFolder(A_DIR, transform=eval_transform)
val1_dataset = torchvision.datasets.ImageFolder(C_DIR, transform=eval_transform)
val2_dataset = torchvision.datasets.ImageFolder(S_DIR, transform=eval_transform)

print('Train Dataset: {}'.format(len(train_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))
print('Val 1 Dataset: {}'.format(len(val1_dataset)))
print('Val 2 Dataset: {}'.format(len(val2_dataset)))

Cloning into 'Homework3-PACS'...
remote: Enumerating objects: 3, done.[K
remote: Counting objects:  33% (1/3)[Kremote: Counting objects:  66% (2/3)[Kremote: Counting objects: 100% (3/3)[Kremote: Counting objects: 100% (3/3), done.[K
remote: Compressing objects: 100% (3/3), done.[K
remote: Total 10032 (delta 0), reused 3 (delta 0), pack-reused 10029[K
Receiving objects: 100% (10032/10032), 174.13 MiB | 33.37 MiB/s, done.
Checking out files: 100% (9993/9993), done.
Train Dataset: 1670
Test Dataset: 2048
Val 1 Dataset: 2344
Val 2 Dataset: 3929


**Prepare Dataloaders**

In [0]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
val1_dataloader = DataLoader(val1_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
val2_dataloader = DataLoader(val2_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

**Normal Training**

In [0]:
def training_phase(rates, steps, train_dataloader, val1_dataloader, val2_dataloader):
  best_accuracy = -1
  for lr in rates:
    for ss in steps: 
      print("------------------------------------------------------")
      print("Start training with LR: " + str(lr) + " and SS: " + str(ss))
      print("------------------------------------------------------")
      net = alexnetDANN(pretrained=True)
      net.classifier[6] = nn.Linear(4096, 7)

      criterion = nn.CrossEntropyLoss() # for classification, we use Cross Entropy
      parameters_to_not_optimize = net.domain.parameters()
      for p in parameters_to_not_optimize:
        p.requires_grad = False
      parameters_to_optimize = net.parameters() # In this case we optimize over all the parameters of AlexNet
      optimizer = optim.SGD(parameters_to_optimize, lr=lr, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
      scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=ss, gamma=GAMMA)

      net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
      cudnn.benchmark # Calling this optimizes runtime

      current_step = 0      
      for epoch in range(NUM_EPOCHS):
        print('Starting epoch {}/{}, LR = {}, SS = {}'.format(epoch+1, NUM_EPOCHS, lr, ss))
        net = net.to(DEVICE)
        net.train(True)
        for images, labels in train_dataloader:
          images = images.to(DEVICE)
          labels = labels.to(DEVICE)
          optimizer.zero_grad() # Zero-ing the gradients
          net.train() # Sets module in training mode
          outputs = net(images) # Forward pass to the network
          loss = criterion(outputs, labels)
          if math.isnan(loss):
            print ("Loss is NaN!")
            break
          loss.backward()  # backward pass: computes gradients
          optimizer.step() # update weights based on accumulated gradients

          if current_step % LOG_FREQUENCY == 0:
            print('Step {}, Loss {}'.format(current_step, loss.item()))
          current_step += 1
        if math.isnan(loss):
          break
        scheduler.step() # Step the scheduler

        net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
        net.train(False) # Set Network to evaluation mode

        running_corrects = 0
        for images, labels in val1_dataloader:
          images = images.to(DEVICE)
          labels = labels.to(DEVICE)
          outputs = net(images)
          _, preds = torch.max(outputs.data, 1)
          running_corrects += torch.sum(preds == labels.data).data.item()
        cartoon_accuracy = running_corrects / float(len(val1_dataloader.dataset))
        print ("Cartoon Accuracy: " + str(cartoon_accuracy))

        running_corrects = 0
        for images, labels in val2_dataloader:
          images = images.to(DEVICE)
          labels = labels.to(DEVICE)
          outputs = net(images)
          _, preds = torch.max(outputs.data, 1)
          running_corrects += torch.sum(preds == labels.data).data.item()
        sketch_accuracy = running_corrects / float(len(val2_dataloader.dataset))
        print ("Sketch Accuracy: " + str(sketch_accuracy))

        accuracy = (cartoon_accuracy + sketch_accuracy) / 2
        print ("Accuracy: " + str(accuracy))
        if (accuracy > best_accuracy):
          print("New best found!")
          best_accuracy = accuracy
          best_net = net
          best_hyper = [epoch, lr, ss]
  print ("best hyperparameters:") 
  print (best_hyper) 
  return best_net

**Training with DANN**

In [0]:
def DANN_training_phase(rates, steps, alphas, train_dataloader, val1_dataloader):
  accuracies = []
  comb = []
  for lr in rates:
    for ss in steps:
      for alpha in alphas:
        print("------------------------------------------------------")
        print("Start training with LR: " + str(lr) + " and SS: " + str(ss) + " and ALPHA: " + str(alpha))
        print("------------------------------------------------------")
        comb.append({"lr":lr,"ss":ss,"alpha":alpha})
        accuracy = []
        net = alexnetDANN(pretrained=True)
        net.classifier[6] = nn.Linear(4096, 7)
        net.domain[1].weight.data = net.classifier[1].weight.data
        net.domain[1].bias.data = net.classifier[1].bias.data
        net.domain[4].weight.data = net.classifier[4].weight.data
        net.domain[4].bias.data = net.classifier[4].bias.data

        criterion = nn.CrossEntropyLoss() # for classification, we use Cross Entropy
        parameters_to_optimize = net.parameters() # In this case we optimize over all the parameters of AlexNet
        optimizer = optim.SGD(parameters_to_optimize, lr=lr, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=ss, gamma=GAMMA)

        net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
        cudnn.benchmark # Calling this optimizes runtime

        train_batches = len(train_dataloader)
        val_batches = len(val1_dataloader)
        n = max(train_batches, val_batches)

        current_step = 0
        # Start iterating over the epochs
        for epoch in range(NUM_EPOCHS):
          net = net.to(DEVICE)
          net.train(True)
          train_iter = iter(train_dataloader)
          val_iter = iter(val1_dataloader)
          print('Starting epoch {}/{}, LR = {}, SS = {}, alpha = {}'.format(epoch+1, NUM_EPOCHS, lr, ss, alpha))  
          for i in range(n):
            optimizer.zero_grad() # Zero-ing the gradients
            if i < train_batches:
              images,labels = next(train_iter)
              images = images.to(DEVICE)
              labels = labels.to(DEVICE)
              net.train() # Sets module in training mode
              outputs = net(images) # Forward pass to the network
              loss = criterion(outputs, labels)
              if math.isnan(loss):
                print ("Loss is NaN!")
                break
              loss.backward()  # backward pass: computes gradients
            
              l = [0] * len(labels)
              labels = torch.LongTensor(l)
              images = images.to(DEVICE)
              labels = labels.to(DEVICE)
              net.train() # Sets module in training mode
              outputs = net(images, alpha=0.1) # Forward pass to the network
              loss = criterion(outputs, labels)
              if math.isnan(loss):
                print ("Loss is NaN!")
                break
              loss.backward()  # backward pass: computes gradients

            if i < val_batches:
              images,labels = next(val_iter)
              l = [1] * len(labels)
              labels = torch.LongTensor(l)
              images = images.to(DEVICE)
              labels = labels.to(DEVICE)
              net.train() # Sets module in training mode
              outputs = net(images, alpha=0.1) # Forward pass to the network
              loss = criterion(outputs, labels)
              if math.isnan(loss):
                print ("Loss is NaN!")
                break
              loss.backward()  # backward pass: computes gradients

            if current_step % LOG_FREQUENCY == 0:
              print('Step {}, Loss {}'.format(current_step, loss.item()))

            current_step += 1
            optimizer.step() # update weights based on accumulated gradients
          if math.isnan(loss):
            break
          scheduler.step() # Step the scheduler

          net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
          net.train(False) # Set Network to evaluation mode

          running_corrects = 0
          for images, labels in val1_dataloader:
            images = images.to(DEVICE)
            labels = labels.to(DEVICE)
            outputs = net(images)
            _, preds = torch.max(outputs.data, 1)
            running_corrects += torch.sum(preds == labels.data).data.item()
          single_accuracy = running_corrects / float(len(val1_dataloader.dataset))
          accuracy.append(single_accuracy)
          print ("Single Accuracy: " + str(single_accuracy))
        print(accuracy)
        accuracies.append(accuracy)      
  return accuracies, comb

**Final Training with DANN**


In [0]:
def DANN_final_training(lr, ss, alpha, epochs, train_dataloader, test_dataloader):
        net = alexnetDANN(pretrained=True)
        net.classifier[6] = nn.Linear(4096, 7)
        net.domain[1].weight.data = net.classifier[1].weight.data
        net.domain[1].bias.data = net.classifier[1].bias.data
        net.domain[4].weight.data = net.classifier[4].weight.data
        net.domain[4].bias.data = net.classifier[4].bias.data

        criterion = nn.CrossEntropyLoss() # for classification, we use Cross Entropy
        parameters_to_optimize = net.parameters() # In this case we optimize over all the parameters of AlexNet
        optimizer = optim.SGD(parameters_to_optimize, lr=lr, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=ss, gamma=GAMMA)

        net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
        cudnn.benchmark # Calling this optimizes runtime

        train_batches = len(train_dataloader)
        test_batches = len(test_dataloader)
        n = max(train_batches, test_batches)

        current_step = 0
        # Start iterating over the epochs
        for epoch in range(epochs):
          train_iter = iter(train_dataloader)
          test_iter = iter(test_dataloader)
          print('Starting epoch {}/{}, LR = {}, SS = {}, alpha = {}'.format(epoch+1, epochs, lr, ss, alpha))  
          for i in range(n):
            optimizer.zero_grad() # Zero-ing the gradients
            if i < train_batches:
              images,labels = next(train_iter)
              images = images.to(DEVICE)
              labels = labels.to(DEVICE)
              net.train() # Sets module in training mode
              outputs = net(images) # Forward pass to the network
              loss = criterion(outputs, labels)
              if math.isnan(loss):
                print ("Loss is NaN!")
                break
              loss.backward()  # backward pass: computes gradients
            
              l = [0] * len(labels)
              labels = torch.LongTensor(l)
              images = images.to(DEVICE)
              labels = labels.to(DEVICE)
              net.train() # Sets module in training mode
              outputs = net(images, alpha=0.1) # Forward pass to the network
              loss = criterion(outputs, labels)
              if math.isnan(loss):
                print ("Loss is NaN!")
                break
              loss.backward()  # backward pass: computes gradients

            if i < test_batches:
              images,labels = next(test_iter)
              l = [1] * len(labels)
              labels = torch.LongTensor(l)
              images = images.to(DEVICE)
              labels = labels.to(DEVICE)
              net.train() # Sets module in training mode
              outputs = net(images, alpha=0.1) # Forward pass to the network
              loss = criterion(outputs, labels)
              if math.isnan(loss):
                print ("Loss is NaN!")
                break
              loss.backward()  # backward pass: computes gradients

            if current_step % LOG_FREQUENCY == 0:
              print('Step {}, Loss {}'.format(current_step, loss.item()))

            current_step += 1
            optimizer.step() # update weights based on accumulated gradients
          if math.isnan(loss):
            break
          scheduler.step() # Step the scheduler
        return net

**Test**

In [0]:
def test_phase(net, test_dataloader):
  net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
  net.train(False) # Set Network to evaluation mode

  running_corrects = 0
  for images, labels in tqdm(test_dataloader):
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)
    outputs = net(images)
    _, preds = torch.max(outputs.data, 1)
    running_corrects += torch.sum(preds == labels.data).data.item()
  accuracy = running_corrects / float(len(test_dataloader.dataset))
  print('Test Accuracy: {}'.format(accuracy))
  return accuracy

**Main**

In [0]:
best_net = training_phase([0.001, 0.005, 0.01], [20, 10], train_dataloader, val1_dataloader, val2_dataloader)
accuracy = test_phase(best_net, test_dataloader)
print("Accuracy without DANN: " + str(accuracy))

cartoon_accuracies, comb = DANN_training_phase([0.001,0.0005,0.0001], [10,20], [0.5,0.3,0.1], train_dataloader, val1_dataloader)
print ("CARTOON Accuracies:")
print (cartoon_accuracies)

sketch_accuracies, comb = DANN_training_phase([0.001,0.0005,0.0001], [10,20], [0.5,0.3,0.1], train_dataloader, val2_dataloader)
print ("SKETCH Accuracies:")
print (sketch_accuracies)

accuracies = []
for i in range(len(cartoon_accuracies)):
  average = []
  n = min(len(cartoon_accuracies[i]), len(sketch_accuracies[i]))
  for j in range(n):
    average.append((cartoon_accuracies[i][j] + sketch_accuracies[i][j])/2)
  accuracies.append(average)

print ("Average Accuracies:")
print (accuracies)

maximum = 0
for i in range(len(accuracies)):
  for j in range(5, len(accuracies[i])):
    if accuracies[i][j] > maximum:
      maximum = accuracies[i][j]
      max_i = i
      max_j = j

print ("Highest Accuracy found:" + str(accuracies[max_i][max_j]))
print ("Best Hyperparameters: " + str(comb[max_i]))
print ("Best number of Epochs: " + str(max_j))

best_net = DANN_final_training(comb[max_i]["lr"], comb[max_i]["ss"], comb[max_i]["alpha"], max_j+1, train_dataloader, test_dataloader)
accuracy = test_phase(best_net, test_dataloader)
print("Accuracy with DANN: " + str(accuracy))

[[0.2019067435894986, 0.21595006111030518, 0.2097119346210944, 0.2002244728747556, 0.2243733587735201, 0.21732444577253068, 0.23732227194824168, 0.2419860045674198, 0.24537714874170102, 0.26246257156681263, 0.26068094774395695, 0.25588631876212325, 0.251727984002738, 0.25062022399293954, 0.2509195863088594, 0.24990151555294188, 0.24867013421681955, 0.24867013421681955, 0.24833320231029093, 0.24829199520151635, 0.24829199520151635, 0.24829199520151635, 0.24829199520151635, 0.24829199520151635, 0.24829199520151635, 0.24829199520151635, 0.24829199520151635, 0.24829199520151635, 0.24829199520151635, 0.24807868462131155], [0.19742841581414822, 0.1990755057561825, 0.17983515202002787, 0.1896110635277889, 0.19474989945248294, 0.20056140478128417, 0.21858731607187998, 0.2065013633635251, 0.2178419505957712, 0.2087435404192332, 0.20972040406637613, 0.21273704674352, 0.21477682577352097, 0.2145186705663757, 0.21494165420861938, 0.21498286131739397, 0.2160045695914774, 0.21557794843106784, 0.2161

NameError: ignored