## Import  libraries

In [1]:
import os 
import imagesize
import zipfile 
import statistics 
import math
import torch
import torchvision

import numpy as np
import torchvision.transforms as T
import torch.nn.functional as F
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


from tqdm import tqdm
from pathlib import Path
from sklearn.model_selection import train_test_split
from torch import Tensor
from torchvision import models
from torch.autograd import Variable
from torch.nn import CrossEntropyLoss
from torch.optim import RMSprop, Adagrad
from overrides import overrides, final
from abc import abstractmethod
#from google.colab import drive



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
classes = ["backpack", "bookcase", "car jack", "comb", "crown", "file cabinet", "flat iron", "game controller", "glasses",
           "helicopter", "ice skates", "letter tray", "monitor", "mug", "network switch", "over-ear headphones", "pen",
           "purse", "stand mixer", "stroller"]

domains = ["product_images", "real_life"]

In [3]:
def data_transformation(resize_dim = 256, crop_dim = 224, grayscale = True, crop_center = True):
    
    transform_lst = []
    transform_lst.append(T.Resize((resize_dim)))                                                          
    
    if grayscale:
        transform_lst.append(T.Grayscale(num_output_channels=3))                        
    
    if crop_center:
        transform_lst.append(T.CenterCrop((crop_dim)))
    else:
        transform_lst.append(T.RandomCrop((crop_dim)))
    
    transform_lst.append(T.RandomHorizontalFlip(p=0.5))                                  
    transform_lst.append(T.ToTensor())                                             
        
    return T.Compose(transform_lst)  



In [4]:
def normalization(dataset):
    ds_length = len(dataset)
    for i in tqdm(range(ds_length)):
        r_mean, g_mean, b_mean = torch.mean(dataset[i][0], dim = [1,2])
        r_std, g_std, b_std = torch.std(dataset[i][0], dim = [1,2])
        T.functional.normalize(
            tensor = dataset[i][0], 
            mean = [r_mean, g_mean, b_mean],
            std = [r_std, g_std, b_std],
            inplace=True
            )
    return dataset

In [5]:
source = "product_images"
target = "real_life"
resize_dim = 256
crop_dim = 224
grayscale = False
crop_center = True 


source_ds = torchvision.datasets.ImageFolder(
    root = f"data/Adaptiope/{source}",
    transform = data_transformation(resize_dim, crop_dim, grayscale, crop_center)
    )

target_ds = torchvision.datasets.ImageFolder(
    root = f"data/Adaptiope/{target}",
    transform = data_transformation(resize_dim, crop_dim, grayscale, crop_center)
    ) 

if not grayscale:
    normalization(source_ds)
    normalization(target_ds)
    

100%|██████████| 2000/2000 [01:38<00:00, 20.37it/s]
100%|██████████| 2000/2000 [02:51<00:00, 11.64it/s]


In [6]:
def get_data(dataset, test_split=0.2, batch_size=32):
    
    train_indices, val_indices = train_test_split(
        list(range(len(dataset.targets))),
        test_size = test_split,
        stratify = dataset.targets, 
        random_state = 42
        )
    
    train_dataset = torch.utils.data.Subset(dataset, train_indices)
    val_dataset = torch.utils.data.Subset(dataset, val_indices)

    train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_data_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    return train_data_loader, val_data_loader
    

In [7]:
batch_size = 32
test_split = 0.2

source_train_loader, source_val_loader = get_data(source_ds, test_split, batch_size)
target_train_loader, target_val_loader = get_data(target_ds, test_split, batch_size)

### Losses

In [8]:
class _Loss(torch.nn.Module):
    
    _THRESHOLD = 1e-20
    
    def __init__(self):
        super(_Loss, self).__init__()
        
    def forward(self, input: Tensor):
        prob = self.to_softmax(input)
        return self.loss(prob)
        
    @final
    def add_threshold(self, prob: Tensor):
        '''
        Check whether the probability distribution after the softmax 
        is equal to 0 in any cell. If this holds, a standard threshold
        is added in order to avoid log(0) case. 

        Parameters
        ----------
        prob: Tensor
            output tensor of the softmax operation

        Returns
        -------
        Tensor
            updated tensor (in case the condition above holds)
        '''
        zeros = (prob == 0)
        if torch.any(zeros):
            thre_tensor = torch.zeros(zeros.shape)
            thre_tensor[zeros] = self._THRESHOLD
            prob += thre_tensor
        return prob
    
    def to_softmax(self, features: Tensor):
        '''
        Apply the softmax operation on the features tensor, 
        being the output of a feature extractor. 
        
        Parameters
        ----------
        features: Tensor
            input tensor of the softmax operation

        Returns
        -------
        Tensor
            probability distribution with (possible) threshold
        '''
        prob = F.softmax(features, dim=1)
        return self.add_threshold(prob)
    
    @abstractmethod
    def loss(self, prob: Tensor):
        pass

In [9]:
class EntropyMinimizationLoss(_Loss):
    
    def __init__(self, n_classes: int):
        super(EntropyMinimizationLoss, self).__init__()
        self.n_classes = n_classes
    
    @overrides
    def loss(self, prob: Tensor):
        prob_source = prob[:, :self.n_classes]
        prob_target = prob[:, self.n_classes:]
        prob_sum = prob_source + prob_target
        return -(prob_sum.log().mul(prob_sum).sum(dim=1).mean())

In [10]:
class SplitLoss(_Loss):
    
    def __init__(self, n_classes: int, source: bool, split_first: bool):
        super(SplitLoss, self).__init__()
        self.n_classes = n_classes
        self._is_source = source
        self._split_first = split_first
    
    @overrides
    def to_softmax(self, features: Tensor):
        if self._split_first:
            prob = self.split_vector(features)
            prob = F.softmax(prob, dim=1)
        else:
            prob = F.softmax(features, dim=1)
            prob = self.split_vector(prob)
        return self.add_threshold(prob)
    
    @final
    def split_vector(self, prob: Tensor):
        return prob[:,:self.n_classes] if self._is_source else prob[:,self.n_classes:]

In [11]:
class SplitCrossEntropyLoss(SplitLoss):
    
    def _get_y_labels(self):
        return self._y_labels
    def _set_y_labels(self, y_labels: Variable):
        if not all(y < self.n_classes for y in y_labels):
            raise ValueError('Expected all y labels < n_classes')
        self._y_labels = y_labels
    y_labels = property(fget=_get_y_labels, fset=_set_y_labels)
    
    def __init__(self, n_classes: int, source: bool, split_first: bool):
        super(SplitCrossEntropyLoss, self).__init__(n_classes, source, split_first)
        self.cross_entropy_loss = torch.nn.CrossEntropyLoss()
    
    @overrides
    def loss(self, prob: Tensor):
        '''Computes cross-entropy loss w.r.t. ground-truth (y label)'''
        return self.cross_entropy_loss(prob, self.y_labels)

In [12]:
class DomainDiscriminationLoss(SplitLoss):
    
    def __init__(self, n_classes: int, source: bool):
        super(DomainDiscriminationLoss, self).__init__(n_classes, source, False)
        
    @overrides
    def loss(self, prob: Tensor):
        return -(prob.sum(dim=1).log().mean())

In [13]:
class TrainingObjectives:
    
    @staticmethod
    def domain_discrimination_loss(src_dom_discrim_loss, tgt_dom_discrim_loss):
        return src_dom_discrim_loss + tgt_dom_discrim_loss
    
    @staticmethod
    def category_confusion_loss(src_cat_conf_loss, tgt_cat_conf_loss):
        return 0.5 * (src_cat_conf_loss + tgt_cat_conf_loss)
    
    @staticmethod
    def domain_confusion_loss(src_dom_conf_loss, tgt_dom_conf_loss):
        return 0.5 * (src_dom_conf_loss + tgt_dom_conf_loss)
    
    @staticmethod
    def overall_classifier_loss(src_task_class_loss, tgt_task_class_loss, domain_discrim_loss):
        return src_task_class_loss + tgt_task_class_loss + domain_discrim_loss
    
    @staticmethod
    def overall_generator_loss(cat_conf_loss, dom_conf_loss, tgt_entropy_loss, curr_epoch, tot_epochs):
        lambda_trade_off = 2 / (1 + math.exp(-1 * 10 * curr_epoch / tot_epochs)) - 1
        return cat_conf_loss + lambda_trade_off * (dom_conf_loss + tgt_entropy_loss)

### feature extractor

In [14]:
class FeatureExtractor:
    
    def __init__(self, n_classes: int, n_layers_trained: int, model='resnet18', optimizer='rmsprop', lr=0.01, weight_decay=0):
        # TODO: scrivi la descrizione!!!!
        # Upload pretrained model 
        if model.lower() == 'resnet18': 
            self.model = models.resnet18(pretrained=True)
        elif model.lower() == 'resnet50': 
            self.model = models.resnet50(pretrained=True)
        else:
            raise ValueError('Unknown model')
        
        # Modify last fully-connected layer
        self.model.fc = torch.nn.Linear(
            in_features = self.model.fc.in_features, 
            out_features = n_classes * 2
        )
        
        # Freeze pretrained layers
        
        count = 0 
        n_params = len(list(self.model.parameters()))
        for param in self.model.parameters():
            n_layers_frozen = n_params - count - 1
            param.requires_grad = (n_layers_frozen < n_layers_trained)
            count += 1 
        
        #params = list(self.model.parameters())
        #for i in range(len(params)):
        #    n_layers_frozen = len(params) - i - 1
        #    params[i].requires_grad = (n_layers_frozen < n_layers_trained)
        
        params_to_train = filter(lambda p: p.requires_grad, self.model.parameters())
        
        # Initialize optimizer
        if optimizer.lower() == 'rmsprop':
            self.optim = torch.optim.RMSprop(
                params = params_to_train,
                lr = lr,
                weight_decay = weight_decay
            )
        elif optimizer.lower() == 'adadelta':
            self.optim = torch.optim.Adadelta(
                params = params_to_train,
                lr = lr,
                weight_decay = weight_decay
            )
        elif optimizer.lower() == 'sgd':
            self.optim = torch.optim.SGD(
                params = params_to_train,
                lr = lr,
                weight_decay = weight_decay,
                nesterov = True
            )
        else:
            raise ValueError('Unknown optimizer')
        

## Model 

In [59]:
class ModelTrainer:
    
    def __init__(self, model: FeatureExtractor, n_classes: int, epochs: int):
        self.model = model 
        self.curr_epoch = 0
        self.tot_epochs = epochs
        self.n_classes = n_classes
        # Task classifier losses
        self.src_task_class_loss = SplitCrossEntropyLoss(n_classes=n_classes, source=True, split_first=True).cuda()
        self.tgt_task_class_loss = SplitCrossEntropyLoss(n_classes=n_classes, source=False, split_first=True).cuda()
        # Domain discrimination losses
        self.src_dom_discrim_loss = DomainDiscriminationLoss(n_classes=n_classes, source=True).cuda()
        self.tgt_dom_discrim_loss = DomainDiscriminationLoss(n_classes=n_classes, source=False).cuda()
        # Category-level confusion losses
        self.src_cat_conf_loss = SplitCrossEntropyLoss(n_classes=n_classes, source=True, split_first=False).cuda()
        self.tgt_cat_conf_loss = SplitCrossEntropyLoss(n_classes=n_classes, source=False, split_first=False).cuda()
        # Domain-level confusion losses
        self.src_dom_conf_loss = DomainDiscriminationLoss(n_classes=n_classes, source=True).cuda()
        self.tgt_dom_conf_loss = DomainDiscriminationLoss(n_classes=n_classes, source=False).cuda()
        # Entropy minimization loss
        self.tgt_entropy_loss = EntropyMinimizationLoss(n_classes=n_classes).cuda()
        
        
        
        
    def train_one_epoch(self, source_dataloader, target_dataloader):
        self.curr_epoch += 1
        end_of_epoch = False
        source_batch_loader = enumerate(source_dataloader)
        target_batch_loader = enumerate(target_dataloader)
        
        torch.autograd.set_detect_anomaly(True)
        
        # Train for current epoch
        while not end_of_epoch:
            try:
                # Get next batch for both source and target
                (X_source, y_source) = source_batch_loader.__next__()[1]
                (X_target, _) = target_batch_loader.__next__()[1]
            except StopIteration:
                end_of_epoch = True
                continue
            
            # Tell model go training mode
            self.model.model.train()
            
            # Convert to torch.autograd variables
            X_source_var = Variable(X_source) 
            y_source_var = Variable(y_source)
            X_target_var = Variable(X_target)
            
            # Compute features for both inputs
            X_source_features = self.model.model(X_source_var)
            X_target_features = self.model.model(X_target_var)
            
            # Compute overall training objective losses
            classifier_loss, generator_loss = self.overall_losses(
                X_source_features, 
                X_target_features, 
                y_source_var
            )
            
            self.model.optim.zero_grad()
            classifier_loss.backward(retain_graph=True) 
            grad_classifier_tmp = []
            for p in self.model.model.parameters():
                grad_classifier_tmp.append(p.grad.data.clone() if p.requires_grad else p.data.clone().zero_())
                
            self.model.optim.zero_grad()
            generator_loss.backward()
            grad_generator_tmp = []
            for p in self.model.model.parameters():
                grad_generator_tmp.append(p.grad.data.clone() if p.requires_grad else p.data.clone().zero_())
            
            count = 0 
            for p in self.model.model.parameters():
                grad_tmp = p.data.clone().zero_() 
                if count < 159: # FIXME: capire perché 159
                    grad_tmp += grad_generator_tmp[count]
                else: 
                    grad_tmp += grad_classifier_tmp[count]
                p.grad = grad_tmp 
                count += 1 
            self.model.optim.step()
            
            

    def overall_losses(self, X_source_features, X_target_features, y_source_var) -> tuple[Tensor, Tensor]:
        # Source task classifier loss
        self.src_task_class_loss.y_labels = y_source_var
        _src_task_class_loss = self.src_task_class_loss(X_source_features)
        
        # (Cross-domain) Target task classifier loss
        self.tgt_task_class_loss.y_labels = y_source_var
        _tgt_task_class_loss = self.tgt_task_class_loss(X_source_features)
        
        # Domain discrimination loss
        _src_dom_discrim_loss = self.src_dom_discrim_loss(X_source_features)
        _tgt_dom_discrim_loss = self.tgt_dom_discrim_loss(X_target_features)
        _domain_discrim_loss = TrainingObjectives.domain_discrimination_loss(
            _src_dom_discrim_loss, 
            _tgt_dom_discrim_loss
        )
        
        # Category-level confusion loss
        self.src_cat_conf_loss.y_labels = y_source_var
        self.tgt_cat_conf_loss.y_labels = y_source_var
        _src_cat_conf_loss = self.src_cat_conf_loss(X_source_features)
        _tgt_cat_conf_loss = self.tgt_cat_conf_loss(X_source_features)
        _category_conf_loss = TrainingObjectives.category_confusion_loss(
            _src_cat_conf_loss, 
            _tgt_cat_conf_loss
        )
        
        # Domain-level confusion loss
        _src_dom_conf_loss = self.src_cat_conf_loss(X_target_features)
        _tgt_dom_conf_loss = self.tgt_cat_conf_loss(X_target_features)
        _domain_conf_loss = TrainingObjectives.domain_confusion_loss(
            _src_dom_conf_loss, 
            _tgt_dom_conf_loss
        )

        # Entropy minimization loss
        _tgt_entropy_loss = self.tgt_entropy_loss(X_target_features)
        
        # Overall classifier loss
        _overall_classifier_loss = TrainingObjectives.overall_classifier_loss(
            _src_task_class_loss, 
            _tgt_task_class_loss, 
            _domain_discrim_loss
        )

        # Overall feature extractor loss
        _overall_generator_loss = TrainingObjectives.overall_generator_loss(
            _category_conf_loss, 
            _domain_conf_loss, 
            _tgt_entropy_loss, 
            self.curr_epoch, 
            self.tot_epochs
        )
        
        return _overall_classifier_loss, _overall_generator_loss

source_train_loader, source_val_loader


target_train_loader, target_val_loader 

In [61]:
generator = FeatureExtractor(20, 3, 'resnet50')

symnet = ModelTrainer(generator, 20, 100)

symnet.train_one_epoch(source_train_loader, target_train_loader)





  File "/opt/homebrew/Cellar/python@3.9/3.9.13_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/opt/homebrew/Cellar/python@3.9/3.9.13_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "/Users/luciahrovatin/Desktop/deep-learning-proj/venv/lib/python3.9/site-packages/ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "/Users/luciahrovatin/Desktop/deep-learning-proj/venv/lib/python3.9/site-packages/traitlets/config/application.py", line 976, in launch_instance
    app.start()
  File "/Users/luciahrovatin/Desktop/deep-learning-proj/venv/lib/python3.9/site-packages/ipykernel/kernelapp.py", line 712, in start
    self.io_loop.start()
  File "/Users/luciahrovatin/Desktop/deep-learning-proj/venv/lib/python3.9/site-packages/tornado/platform/asyncio.py", line 199, in start
    self.asy

RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [32, 40]], which is output 0 of SoftmaxBackward0, is at version 1; expected version 0 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!