#### Implementation of SimCLR framework to carry out emotion classification on German Shepherds
- Date : February 17th 2024
- Author : Aarya Bhave
- Project : Dog_Emotion_Classification
  
This code implements the SimCLR framework to carry out self-supervised contrastive leanrning to detect emotions in German Shepherds.  
Currently Limited to German Shepherds only.  
PyTorch version '2.1.1'.

In [3]:
import numpy as np
import pandas as pd
import shutil, time, os, tqdm, requests, random, copy
import PIL
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms, models

import matplotlib.pyplot as plt 
%matplotlib inline

from sklearn.manifold import TSNE

In [4]:
def set_seed(seed = 16):
    np.random.seed(seed)
    torch.manual_seed(seed)

##### Set up the DataSet class.
- 264 unlabeled images of German Shepherds.
- Processor threading based augmentation techniques for low latency.

In [5]:
from imutils import paths

class GSDDataset(torch.utils.data.Dataset):
    def __init__(self, transforms = None):
        self.root_dir = 'data/processed/YOLOCrops/German_Shepherd/dog'
        self.image_paths = list(paths.list_images(self.root_dir))
        self.transforms = transforms

    def __getitem__(self, index):
        sample = Image.open(self.image_paths[index])
        if self.transforms:
            sample = self.transforms(sample)
        return sample
    
    def __len__(self):
        return len(self.image_paths)
    

##### Set up the Data Transformations.
- MUST BE TINKERED WITH.
- Data augmentations are treated as hyper-parameters.
- Images will be resized to 224, 224.
- 18FEB2024 - Random Resize and Crop, Random Horizontal Flip, Random Color Jitters.

In [6]:
class Augment:
    def __call__(self, sample):
        my_transforms = transforms.Compose([transforms.RandomResizedCrop((224, 224), scale = (0.75, 1)),
                                         transforms.RandomHorizontalFlip(p=0.5),
                                         transforms.ColorJitter(brightness=(0.80,1.20),contrast=(0.75, 1.25),saturation=(0.75,1.25),hue=(-0.1,0.1))])
        sample = my_transforms(sample)
        return sample
    
augmentations = transforms.Compose([Augment(), transforms.ToTensor()])

##### Set up hyper-parameters.
- Epochs ~600.
- Batch Size ~256.
- Temperature ~0.1

In [7]:
BATCH_SIZE = 256
EPOCHS = 600

##### Set up the DataLoaders
- Issue : shuffle=True causes len() arguement to breakdown. !Fixed!

In [8]:
dataset = GSDDataset(transforms=augmentations)
dataloader = DataLoader(dataset = dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=8)

In [9]:
dataiter = iter(dataloader)
data = next(dataiter)
data.shape

torch.Size([256, 3, 224, 224])

##### Model
- Encoder -> Projection Head -> NT Xent Loss.
- Encoder ~ResNet50 18FEB2024
- Projection Head ~MultiLayerPerceptron with one hidden layer 18FEB2024
- NT Xent Loss Temperature ~0.2 18FEB2024

In [15]:
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
    
    def forward(self, x):
        return x

class LinearLayer(nn.Module):
    def __init__(self,
                 in_features,
                 out_features,
                 use_bias = True,
                 use_bn = False,
                 **kwargs):
        super(LinearLayer, self).__init__(**kwargs)
        self.in_features = in_features
        self.out_features = out_features
        self.use_bias = use_bias
        self.use_bn = use_bn

        self.linear = nn.Linear(self.in_features, self.out_features, bias=self.use_bias and not self.use_bn)

        if self.use_bn:
            self.bn = nn.BatchNorm1d(self.out_features)

    def forward(self, x):
        x = self.linear(x)
        if self.use_bn:
            x = self.bn(x)
        return x
    
class ProjectionHead(nn.Module):
    def __init__(self,
                 in_features,
                 hidden_features,
                 out_features,
                 head_type = 'nonlinear',
                 **kwargs):
        super(ProjectionHead, self).__init__(**kwargs)
        self.in_features = in_features
        self.out_features = out_features
        self.hidden_features = hidden_features
        self.head_type = head_type

        if self.head_type == 'linear':
            self.layers = LinearLayer(self.in_features, self.out_features, False, True)
        elif self.head_type =='nonlinear':
            self.layers = nn.Sequential(LinearLayer(self.in_features, self.hidden_features, True, True),
                                        nn.ReLU(),
                                        LinearLayer(self.hidden_features, self.out_features, False, True))
        
    def forward(self, x):
        x = self.layers(x)
        return x
    
class PreModel(nn.Module):
    def __init__(self, base_model):
        super().__init__()
        self.base_model = base_model

        #PRETRAINED MODEL
        self.pretrained = models.resnet50(pretrained = True)

        self.pretrained.conv1 = nn.Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), bias=False)
        self.pretrained.maxpool = Identity()

        self.pretrained.fc = Identity()

        for p in self.pretrained.parameters():
            p.requires_grad = True

        self.projector = ProjectionHead(2048, 2048, 128)

    def forward(self, x):
        out = self.pretrained(x)
        xp = self.projector(torch.squeeze(out))
        return xp