In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
cd /content/gdrive/'My Drive'/'directory-with-ssl-resnet'

In [None]:
#dataset preparation 

!unzip /content/gdrive/'My Drive'/NSFW_Classification/data.zip -d /content/

In [None]:
import argparse
import os
import random
import shutil
import time
import warnings
import numpy as np
from imutils import paths
import matplotlib.pyplot as plt


import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
from torchvision import transforms
from torch.utils.data import DataLoader
from resnet_wider import resnet50x1, resnet50x2, resnet50x4

In [None]:
# create model

model = resnet50x1()
sd = 'resnet50-1x.pth'
'''
elif args.arch == 'resnet50-2x':
    model = resnet50x2()
    sd = 'resnet50-2x.pth'
elif args.arch == 'resnet50-4x':
    model = resnet50x4()
    sd = 'resnet50-4x.pth'
else:
    raise NotImplementedError
'''
sd = torch.load(sd, map_location='cpu')
model.load_state_dict(sd['state_dict'])

model.fc = nn.Identity()
model = (model).to('cuda')

# define loss function (criterion) and optimizer
criterion = nn.CrossEntropyLoss()

cudnn.benchmark = True

print("Model successfully loaded")

Model successfully loaded


In [None]:
class TransformsSimCLR:

    def __init__(self, size):

        self.test_transform = torchvision.transforms.Compose(
            [
                torchvision.transforms.Resize(size=size),
                torchvision.transforms.ToTensor(),
            ]
        )

    def __call__(self, x):
        return self.train_transform(x), self.train_transform(x)

In [None]:
import os 
os.mkdir('models')
os.mkdir('history')

In [None]:
size = 224
image_transforms = {
    'train':transforms.Compose([
        transforms.Resize(size=size),
        transforms.ToTensor()
    ])
}

In [None]:
# Set the train, test and validation directory
train_directory = '/content/data/train'
test_directory = '/content/data/test'
valid_directory = '/content/data/valid'

# Setting batch size for training
batch_size=128

#Number of classes for the data
num_classes = 5

#Loading the data from the folders into the variable 'data'
data = {
    'train': datasets.ImageFolder(root=train_directory, transform=image_transforms['train']),
    'valid': datasets.ImageFolder(root=valid_directory, transform=image_transforms['train']),
    'test': datasets.ImageFolder(root=test_directory, transform=image_transforms['train'])
}

#Find out the size of the data
train_data_size = len(data['train'])
test_data_size = len(data['test'])
val_data_size = len(data['valid'])

# Create iterators for the Data loaded using DataLoader module
train_loader = DataLoader(data['train'],batch_size=batch_size,shuffle=True)
test_loader = DataLoader(data['test'],batch_size=batch_size,shuffle=True)
val_loader = DataLoader(data['valid'],batch_size=batch_size,shuffle=True)

#Printing the sizes of the sets
print(train_data_size,test_data_size,val_data_size)

14975 2920 2492


In [None]:
# For one image and text

for step, (x,y) in enumerate(train_loader):
  print(x.shape)
  x = x.to('cuda')
  x = model(x)
  print(x.shape)
  break

torch.Size([128, 3, 224, 224])
torch.Size([128, 2048])


In [None]:
def inference(loader, simclr_model, device):
    feature_vector = []
    labels_vector = []
    for step, (x, y) in enumerate(loader):
        x = x.to(device)

        # get encoding
        with torch.no_grad():
            h = simclr_model(x)

        h = h.detach()

        feature_vector.extend(h.cpu().detach().numpy())
        labels_vector.extend(y.numpy())

        if step % 20 == 0:
            print(f"Step [{step}/{len(loader)}]\t Computing features...")

    feature_vector = np.array(feature_vector)
    labels_vector = np.array(labels_vector)
    print("Features shape {}".format(feature_vector.shape))
    return feature_vector, labels_vector


def get_features(context_model, train_loader, test_loader, val_loader, device):
    train_X, train_y = inference(train_loader, context_model, device)
    print("Computed train features")
    test_X, test_y = inference(test_loader, context_model, device)
    print("Computed test features")
    val_x, val_y = inference(val_loader, context_model, device)
    return train_X, train_y, test_X, test_y, val_x, val_y

train_x, train_y, test_x, test_y, val_x, val_y = get_features(model,train_loader,test_loader,val_loader,'cuda')

Step [0/117]	 Computing features...
Step [20/117]	 Computing features...
Step [40/117]	 Computing features...
Step [60/117]	 Computing features...
Step [80/117]	 Computing features...
Step [100/117]	 Computing features...
Features shape (14975, 2048)
Computed train features
Step [0/23]	 Computing features...
Step [20/23]	 Computing features...
Features shape (2920, 2048)
Computed test features
Step [0/20]	 Computing features...
Features shape (2492, 2048)


In [None]:
#create dataloader from those features

def create_data_loaders_from_arrays(X_train, y_train, X_test, y_test, X_val, y_val, batch_size):
    train = torch.utils.data.TensorDataset(
        torch.from_numpy(X_train), torch.from_numpy(y_train)
    )
    train_loader = torch.utils.data.DataLoader(
        train, batch_size=batch_size, shuffle=False
    )
    print("Trainloader successfully made")

    val = torch.utils.data.TensorDataset(
        torch.from_numpy(X_val), torch.from_numpy(y_val)
    )
    val_loader = torch.utils.data.DataLoader(
        val, batch_size=batch_size, shuffle=False
    )
    print("Valloader successfully made")

    test = torch.utils.data.TensorDataset(
        torch.from_numpy(X_test), torch.from_numpy(y_test)
    )
    test_loader = torch.utils.data.DataLoader(
        test, batch_size=batch_size, shuffle=False
    )
    print("Testloader successfully made")
    return train_loader, test_loader,val_loader

train_loader, test_loader, val_loader = create_data_loaders_from_arrays(train_x, train_y, test_x, test_y, val_x, val_y, 128)

Trainloader successfully made
Valloader successfully made
Testloader successfully made


In [None]:
class LinearEvaluation(nn.Module):

  def __init__(self,num_classes):
    super(LinearEvaluation,self).__init__()
    self.Linear = nn.Sequential(
                                nn.Linear(2048,num_classes,bias=False),
                                nn.Softmax(dim=1)
                                )
  def forward(self,x):
    x = self.Linear(x)
    return x

In [None]:
linmodel = LinearEvaluation(5).to('cuda')
print(linmodel)

LinearEvaluation(
  (Linear): Sequential(
    (0): Linear(in_features=2048, out_features=5, bias=False)
    (1): Softmax(dim=1)
  )
)


In [None]:
# For one image and label

for step, (x,y) in enumerate(train_loader):
  print(x.shape)
  print(y)
  x = x.to('cuda')
  x = linmodel(x)
  print(x.shape)
  break

torch.Size([128, 2048])
tensor([0, 1, 1, 4, 1, 1, 0, 4, 4, 4, 2, 0, 3, 4, 2, 3, 2, 3, 3, 0, 1, 1, 3, 4,
        1, 2, 3, 1, 3, 3, 0, 1, 1, 0, 1, 3, 2, 4, 2, 2, 2, 1, 3, 0, 4, 0, 2, 1,
        2, 2, 3, 0, 0, 2, 3, 2, 2, 4, 3, 3, 4, 1, 3, 1, 1, 3, 3, 3, 1, 4, 2, 1,
        2, 3, 1, 3, 1, 1, 0, 1, 3, 1, 1, 3, 1, 2, 3, 1, 0, 3, 1, 1, 3, 1, 3, 2,
        2, 4, 3, 4, 1, 4, 3, 2, 2, 4, 1, 1, 4, 1, 2, 2, 1, 4, 4, 2, 1, 3, 4, 2,
        4, 3, 3, 2, 4, 2, 0, 2])
torch.Size([128, 5])


In [None]:
#define train

def train(device, loader, model, criterion, optimizer,val_loader):
    train_loss_epoch = 0
    train_accuracy_epoch = 0
    val_loss_epoch = 0
    val_accuracy_epoch = 0
    for step, (x, y) in enumerate(loader):
        optimizer.zero_grad()
        x = x.to(device)
        y = y.to(device)

        output = model(x)
        loss = criterion(output, y)

        predicted = output.argmax(1)
        acc = (predicted == y).sum().item() / y.size(0)
        train_accuracy_epoch += acc
        loss.backward()
        optimizer.step()

        train_loss_epoch += loss.item()
        # if step % 100 == 0:
        #     print(
        #         f"Step [{step}/{len(loader)}]\t Loss: {loss.item()}\t Accuracy: {acc}"
        #     )
    with torch.no_grad():
            model.eval()
            for step, (x, y) in enumerate(val_loader):
                model.zero_grad()

                x = x.to(device)
                y = y.to(device)

                output = model(x)
                loss = criterion(output, y)

                predicted = output.argmax(1)
                acc = (predicted == y).sum().item() / y.size(0)
                val_accuracy_epoch += acc

                val_loss_epoch += loss.item()


    return train_loss_epoch, train_accuracy_epoch, val_loss_epoch, val_accuracy_epoch

In [None]:
#define test

def test(device, loader, model, criterion, optimizer):
    loss_epoch = 0
    accuracy_epoch = 0
    model.eval()
    for step, (x, y) in enumerate(loader):
        model.zero_grad()

        x = x.to(device)
        y = y.to(device)

        output = model(x)
        loss = criterion(output, y)

        predicted = output.argmax(1)
        acc = (predicted == y).sum().item() / y.size(0)
        accuracy_epoch += acc

        loss_epoch += loss.item()

    return loss_epoch, accuracy_epoch



In [None]:
class LinearEvaluation(nn.Module):

  def __init__(self,num_classes):
    super(LinearEvaluation,self).__init__()
    self.Linear = nn.Sequential(
                                nn.Linear(2048,num_classes,bias=False),

                                )
  def forward(self,x):
    x = self.Linear(x)
    return x

In [None]:
linmodel = LinearEvaluation(5).to('cuda')
print(linmodel)

LinearEvaluation(
  (Linear): Sequential(
    (0): Linear(in_features=2048, out_features=5, bias=False)
  )
)


In [None]:
optimizer = torch.optim.Adam(linmodel.parameters(), lr=0.0001)
criterion = torch.nn.CrossEntropyLoss()
epochs = 100
device = 'cuda'

for epoch in range(epochs):
    train_loss_epoch, train_accuracy_epoch, val_loss_epoch, val_accuracy_epoch = train(device, train_loader, linmodel, criterion, optimizer,val_loader)
    
    if epoch % 10 == 0:
      print(f"Epoch [{epoch}/{epochs}]\t Train Loss: {train_loss_epoch / len(train_loader)}\t Train Accuracy: {train_accuracy_epoch / len(train_loader)}\t Val Loss: {val_loss_epoch / len(val_loader)}\t Accuracy: {val_accuracy_epoch / len(val_loader)}")

print("Training successfully completed")

# final testing
loss_epoch, accuracy_epoch = test(
    device, test_loader,linmodel, criterion, optimizer
)
print(
    f"[FINAL]\t Loss: {loss_epoch / len(test_loader)}\t Accuracy: {accuracy_epoch / len(test_loader)}"
)

Epoch [0/100]	 Train Loss: 1.4538166380336142	 Train Accuracy: 0.5329661316373915	 Val Loss: 1.279346239566803	 Accuracy: 0.7071614583333333
Epoch [10/100]	 Train Loss: 0.6026655285276918	 Train Accuracy: 0.8140179142270678	 Val Loss: 0.5921848386526107	 Accuracy: 0.8115364583333333
Epoch [20/100]	 Train Loss: 0.4857145420506469	 Train Accuracy: 0.8385926669022141	 Val Loss: 0.5049377381801605	 Accuracy: 0.8264322916666667
Epoch [30/100]	 Train Loss: 0.43249791886052513	 Train Accuracy: 0.8536861077461472	 Val Loss: 0.4687956377863884	 Accuracy: 0.8365885416666666
Epoch [40/100]	 Train Loss: 0.39934752970679194	 Train Accuracy: 0.8627005308230702	 Val Loss: 0.4483228579163551	 Accuracy: 0.8441145833333333
Epoch [50/100]	 Train Loss: 0.3755764971431504	 Train Accuracy: 0.8705803301029679	 Val Loss: 0.43502008467912673	 Accuracy: 0.848359375
Epoch [60/100]	 Train Loss: 0.3571187935323797	 Train Accuracy: 0.87625660374184	 Val Loss: 0.42568217068910597	 Accuracy: 0.849921875
Epoch [70/100