## Import packages

In [1]:
import torch
import torch.nn as nn
import torchvision

from torch.utils.data import (
    Dataset,
    DataLoader,
)

from torchvision.datasets import ImageFolder
from torchvision.transforms import (
    RandomResizedCrop,
    RandomHorizontalFlip,
    ColorJitter,
    RandomGrayscale,
    RandomApply,
    Compose,
    GaussianBlur,
    ToTensor,
)
import torchvision.models as models

import os
import glob
import time
from skimage import io
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from torch.utils.data import random_split

In [3]:
import numpy as np

## Get device type

In [4]:
print(f'Torch-Version {torch.__version__}')
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'DEVICE: {DEVICE}')

Torch-Version 1.12.1.post201
DEVICE: cuda


## Define transforms

In [5]:
def get_complete_transform(output_shape, kernel_size, s=1.0):
    """
    The color distortion transform.
    
    Args:
        s: Strength parameter.
    
    Returns:
        A color distortion transform.
    """
    rnd_crop = RandomResizedCrop(output_shape)
    rnd_flip = RandomHorizontalFlip(p=0.5)
    
    color_jitter = ColorJitter(0.8*s, 0.8*s, 0.8*s, 0.2*s)
    # rnd_color_jitter = RandomApply([color_jitter], p=0.8)
    
    rnd_gray = RandomGrayscale(p=0.2)
    gaussian_blur = GaussianBlur(kernel_size=kernel_size)
    rnd_gaussian_blur = RandomApply([gaussian_blur], p=0.5)
    to_tensor = ToTensor()

    image_transform = Compose([
        to_tensor,
        rnd_crop,
        rnd_flip,
        # rnd_color_jitter,
        rnd_gray,
        rnd_gaussian_blur,
    ])
    return image_transform


class ContrastiveLearningViewGenerator(object):
    """Take two random crops of one image as the query and key."""

    def __init__(self, base_transform, n_views=2):
        self.base_transform = base_transform
        self.n_views = n_views

    def __call__(self, x):
        views = [self.base_transform(x) for i in range(self.n_views)]
        return views

## Read dataset

In [6]:
class CustomDataset(Dataset):
    """Flowers Dataset"""

    def __init__(self, list_images, transform=None):
        """
        Args:
            list_images (list): List of all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.list_images = list_images
        self.transform = transform

    def __len__(self):
        return len(self.list_images)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        img_name = self.list_images[idx]
        image = io.imread(img_name)
        if self.transform:
            image = self.transform(image)

        return image

In [7]:
# The size of the images
output_shape = [299, 299]
kernel_size = [21,21] # 10% of the output_shape

# The custom transform
base_transforms = get_complete_transform(output_shape=output_shape, kernel_size=kernel_size, s=1.0)
custom_transform = ContrastiveLearningViewGenerator(base_transform=base_transforms)

train_ds = CustomDataset(
    list_images=glob.glob("scenario_2_with_nst/train/*/*.jpg"),
    transform=custom_transform
)


In [None]:
total_train_examples = len(train_ds)

## 90:10 train val split

In [None]:
# train_ds_1, val_ds = data.random_split(train_ds, [int(total_train_examples*0.9), total_train_examples - int(total_train_examples*0.9)])

In [9]:
plt.figure(figsize=(10,20))
def view_data(transform_ds, index):
    for i in range(1, 4):
        images = transform_ds[index]
        print(np.array(images[0]).shape)
        view1, view2 = images
        plt.subplot(5,2,2*i-1)
        plt.imshow(view1.permute(1,2,0))
        plt.subplot(5,2,2*i)
        plt.imshow(view2.permute(1,2,0))

view_data(train_ds, 2)

IndexError: list index out of range

<Figure size 1000x2000 with 0 Axes>

## Data loader

In [11]:
BATCH_SIZE = 64

# Building the data loader
train_dl = torch.utils.data.DataLoader(
    train_ds,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=os.cpu_count(),
    drop_last=True,
    pin_memory=True,
)

## SimCLR model with pretrained InceptionV3 model for encoding

In [12]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

In [13]:
from pytorch_lightning.callbacks.early_stopping import EarlyStopping


In [14]:
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
    def forward(self, x):
        return x

class SimCLR(nn.Module):
    def __init__(self, linear_eval=False):
        super().__init__()
        self.linear_eval = linear_eval
        inceptionv3 = models.inception_v3(pretrained=True)
        # set_parameter_requires_grad(inceptionv3, True)
        inceptionv3.aux_logits = False
        inceptionv3.fc = Identity()
        inceptionv3.AuxLogits.fc = Identity()
        self.encoder = inceptionv3
        self.projection = nn.Sequential(
            nn.Linear(2048, 512),
            nn.ReLU(), 
            nn.Linear(512, 256)
        )

    def forward(self, x):
        if not self.linear_eval:
            x = torch.cat(x, dim=0)

        encoding = self.encoder(x)
        projection = self.projection(encoding) 
        return projection

In [15]:
LABELS = torch.cat([torch.arange(BATCH_SIZE) for i in range(2)], dim=0)
LABELS = (LABELS.unsqueeze(0) == LABELS.unsqueeze(1)).float() # Creates a one-hot with broadcasting
LABELS = LABELS.to(DEVICE) #128,128

def cont_loss(features, temp):
    """
    The NTxent Loss.
    
    Args:
        z1: The projection of the first branch
        z2: The projeciton of the second branch
    
    Returns:
        the NTxent loss
    """
    similarity_matrix = torch.matmul(features, features.T) # 128, 128
    # discard the main diagonal from both: labels and similarities matrix
    mask = torch.eye(LABELS.shape[0], dtype=torch.bool).to(DEVICE)
    # ~mask is the negative of the mask
    # the view is required to bring the matrix back to shape
    labels = LABELS[~mask].view(LABELS.shape[0], -1) # 128, 127
    similarity_matrix = similarity_matrix[~mask].view(similarity_matrix.shape[0], -1) # 128, 127

    # select and combine multiple positives
    positives = similarity_matrix[labels.bool()].view(labels.shape[0], -1) # 128, 1

    # select only the negatives
    negatives = similarity_matrix[~labels.bool()].view(similarity_matrix.shape[0], -1) # 128, 126

    logits = torch.cat([positives, negatives], dim=1) # 128, 127
    labels = torch.zeros(logits.shape[0], dtype=torch.long).to(DEVICE)

    logits = logits / temp
    return logits, labels

In [16]:
simclr_model = SimCLR().to(DEVICE)
criterion = nn.CrossEntropyLoss().to(DEVICE)
optimizer = torch.optim.Adam(simclr_model.parameters())



In [None]:
EPOCHS = 15
train_loss_list = []
for epoch in range(EPOCHS):
    t0 = time.time()
    running_loss = 0.0
    for i, views in enumerate(train_dl):
        projections = simclr_model([view.to(DEVICE) for view in views])
        logits, labels = cont_loss(projections, temp=2)
        loss = criterion(logits, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # print statistics
        running_loss += loss.item()
        if i % 10 == 9:    # print every 10 mini-batches
            print(f'EPOCH: {epoch+1} BATCH: {i+1} LOSS: {(running_loss/100):.4f} ')
            train_loss_list.append(running_loss)
            running_loss = 0.0
    print(f'Time taken: {((time.time()-t0)/60):.3f} mins')

EPOCH: 1 BATCH: 10 LOSS: 0.4300 
EPOCH: 1 BATCH: 20 LOSS: 0.3634 
EPOCH: 1 BATCH: 30 LOSS: 0.3056 
EPOCH: 1 BATCH: 40 LOSS: 0.2955 


In [28]:
import torch
torch.cuda.empty_cache()
import gc
# del variables
gc.collect()

2104

In [None]:
torch.save(simclr_model.state_dict(), "simclr_weights_scenario_no_nst.pt")

## Downstream task

In [71]:
from torchvision.transforms import (
    CenterCrop,
    Resize, 
    ToTensor
)

In [72]:
resize = Resize(400)
ccrop = CenterCrop(299)
ttensor = ToTensor()

custom_transform = Compose([
    resize,
    ccrop,
    ttensor,
])

train_ds_downstream = ImageFolder(
    root="scenario_1_no_nst/train",
    transform=custom_transform
)

nu_classes = len(train_ds_downstream.classes)

BATCH_SIZE = 128

# Building the data loader
train_dl_downstream = torch.utils.data.DataLoader(
    train_ds_downstream,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=os.cpu_count(),
    drop_last=True,
    pin_memory=True,
)

In [73]:
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
    def forward(self, x):
        return x

class LinearEvaluation(nn.Module):
    def __init__(self, model, nu_classes):
        super().__init__()
        simclr = model
        simclr.linear_eval = True
        simclr.projection = Identity()
        self.simclr = simclr
        for param in self.simclr.parameters():
            param.requires_grad = False
        self.linear1 = nn.Linear(2048, 512)
        self.linear2 = nn.Linear(512, nu_classes)

    def forward(self, x):
        encoding = self.simclr(x)
        encoding = self.linear1(encoding)
        encoding = nn.Dropout(0.8)(encoding)
        pred = self.linear2(encoding)
        return pred

In [74]:
eval_model = LinearEvaluation(simclr_model, nu_classes).to(DEVICE)
criterion = nn.CrossEntropyLoss().to(DEVICE)
optimizer = torch.optim.Adam(eval_model.parameters())

In [None]:
EPOCHS = 50
eval_model.train()
for epoch in range(EPOCHS):
    t0 = time.time()
    running_loss = 0.0
    for i, element in enumerate(train_dl_downstream):
        image, label = element
        image = image.to(DEVICE)
        label = label.to(DEVICE)
        pred = eval_model(image)
        loss = criterion(pred, label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # print statistics
        running_loss += loss.item()
        if i % 10 == 9:    # print every 10 mini-batches
            print(f'EPOCH: {epoch+1} BATCH: {i+1} LOSS: {(running_loss/100):.4f} ')
            running_loss = 0.0
    print(f'Time taken: {((time.time()-t0)/60):.3f} mins')

EPOCH: 1 BATCH: 10 LOSS: 0.0462 
EPOCH: 1 BATCH: 20 LOSS: 0.0353 
EPOCH: 1 BATCH: 30 LOSS: 0.0302 
EPOCH: 1 BATCH: 40 LOSS: 0.0340 
EPOCH: 1 BATCH: 50 LOSS: 0.0328 
EPOCH: 1 BATCH: 60 LOSS: 0.0318 
EPOCH: 1 BATCH: 70 LOSS: 0.0329 
EPOCH: 1 BATCH: 80 LOSS: 0.0335 
EPOCH: 1 BATCH: 90 LOSS: 0.0325 
EPOCH: 1 BATCH: 100 LOSS: 0.0309 
EPOCH: 1 BATCH: 110 LOSS: 0.0333 
EPOCH: 1 BATCH: 120 LOSS: 0.0348 
EPOCH: 1 BATCH: 130 LOSS: 0.0299 
EPOCH: 1 BATCH: 140 LOSS: 0.0312 
EPOCH: 1 BATCH: 150 LOSS: 0.0332 
EPOCH: 1 BATCH: 160 LOSS: 0.0327 
EPOCH: 1 BATCH: 170 LOSS: 0.0344 
EPOCH: 1 BATCH: 180 LOSS: 0.0313 
EPOCH: 1 BATCH: 190 LOSS: 0.0299 
EPOCH: 1 BATCH: 200 LOSS: 0.0322 
EPOCH: 1 BATCH: 210 LOSS: 0.0342 
EPOCH: 1 BATCH: 220 LOSS: 0.0318 
EPOCH: 1 BATCH: 230 LOSS: 0.0311 
EPOCH: 1 BATCH: 240 LOSS: 0.0300 
EPOCH: 1 BATCH: 250 LOSS: 0.0324 
EPOCH: 1 BATCH: 260 LOSS: 0.0323 
EPOCH: 1 BATCH: 270 LOSS: 0.0302 
EPOCH: 1 BATCH: 280 LOSS: 0.0300 
EPOCH: 1 BATCH: 290 LOSS: 0.0309 
Time taken: 1.102 mins


## Test data performance

In [None]:
custom_transform = Compose([
    resize,
    ccrop,
    ttensor,
])

test_ds_downstream = ImageFolder(
    root="new_test_w_nst_images_removed",
    transform=custom_transform
)

nu_classes = len(test_ds_downstream.classes)

BATCH_SIZE = 64

# Building the data loader
test_dl_downstream = torch.utils.data.DataLoader(
    test_ds_downstream,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=os.cpu_count(),
    drop_last=False,
    pin_memory=True
)

In [None]:
# target_true = 0
# predicted_true = 0
# correct_true = 0

y_pred_list = []
y_true_list = []

eval_model.eval()
for i, element in enumerate(test_dl_downstream):
    image, label = element
    image = torch.Tensor(image).to(DEVICE)
    # label = label.to(DEVICE)
    y_pred = np.argmax(np.array(eval_model(image).detach().cpu()), axis=1)
    
    y_pred_list.extend(y_pred)
    y_true_list.extend(label)
    
    
    
    # predicted_classes = torch.argmax(y_pred, dim=1) == 0
    # target_classes = label
    # target_true += torch.sum(target_classes == 0).float()
    # predicted_true += torch.sum(predicted_classes).float()
    # correct_true += torch.sum(
    #     predicted_classes == target_classes & predicted_classes == 0).float()
    
    


In [None]:
np.sum(v == 1 for v in y_true_list)

In [None]:
c = 0
for i, v in enumerate(y_pred_list):
    if v == y_true_list[i]:
        c+=1
c

In [None]:
correct_count = 0
tp = 0
fp = 0
tn = 0
fn = 0

for i, v in enumerate(y_pred_list):
    if v == y_true_list[i]:
        correct_count+=1
    if v == 1 and y_true_list[i] == 1:
        tp+=1
    if v == 0 and y_true_list[i] == 0:
        tn+=1
    if v == 1 and y_true_list[i] == 0:
        fp+=1
    if v == 0 and y_true_list[i] == 1:
        fn+=1
    

In [1]:
tp, tn, fp, fn

NameError: name 'tp' is not defined

In [98]:
r = tp*100/(tp+fn)

In [99]:
correct_count/(tp + tn + fp + fn)

0.7063758389261745

In [100]:
p = tp*100/(tp+fp)

In [101]:
2*p*r/(p+r)

21.524663677130047

In [97]:
r, p

(15.894039735099337, 33.333333333333336)