In [None]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision
import torch.optim as optim
import torch.nn as nn
from torchinfo import summary
from tqdm.notebook import tqdm, trange
torch.random.manual_seed(42)
torch.cuda.manual_seed(42)
from torchvision.transforms import Compose,  Resize, CenterCrop, Normalize
from torchvision.models import resnet50, ResNet50_Weights


In [None]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter('../runs/resnet_exp_1')

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
torch.cuda.get_device_properties(device = 'cuda:0')

In [None]:
 # Reading the image and the captions dataframe and the labels dataframe which has the super categories.
image_df = pd.read_csv('F://coco/captions/final_captions.csv',)
labels_df = pd.read_csv('F://coco/captions/labels_information_full.csv')

# Also we need to read the multilabel dataframe which has the multilabel infomration about the images.
multilabel_df = pd.read_csv('F://coco/captions/multilabel_labels.csv').fillna(0)


In [None]:
image_label_df = image_df[['image_id','file_name']].merge(multilabel_df, on='image_id',validate='1:1')
image_label_df

In [None]:
single_channel_list = ['COCO_train2014_000000061048.jpg',
'COCO_train2014_000000434765.jpg',
'COCO_train2014_000000053756.jpg',
'COCO_train2014_000000470933.jpg',
'COCO_train2014_000000571415.jpg',
'COCO_train2014_000000492325.jpg',
'COCO_train2014_000000155083.jpg',
'COCO_train2014_000000421613.jpg',
'COCO_train2014_000000431115.jpg',
'COCO_train2014_000000173610.jpg',
'COCO_train2014_000000336668.jpg',
'COCO_train2014_000000316867.jpg',
'COCO_train2014_000000269858.jpg',
'COCO_train2014_000000250239.jpg',
'COCO_train2014_000000123539.jpg',
'COCO_train2014_000000140092.jpg',
'COCO_train2014_000000343009.jpg',
'COCO_train2014_000000003293.jpg',
'COCO_train2014_000000578250.jpg',
'COCO_train2014_000000518025.jpg',
'COCO_train2014_000000008794.jpg',
'COCO_train2014_000000400107.jpg',
'COCO_train2014_000000394547.jpg',
'COCO_train2014_000000389984.jpg',
'COCO_train2014_000000225717.jpg',
'COCO_train2014_000000032405.jpg',
'COCO_train2014_000000084582.jpg',
'COCO_train2014_000000549879.jpg',
'COCO_train2014_000000358281.jpg',
'COCO_train2014_000000457741.jpg',
'COCO_train2014_000000204792.jpg',
'COCO_train2014_000000124694.jpg',
'COCO_train2014_000000390663.jpg',
'COCO_train2014_000000179405.jpg',
'COCO_train2014_000000443909.jpg',
'COCO_train2014_000000268036.jpg',
'COCO_train2014_000000217341.jpg',
'COCO_train2014_000000363331.jpg',
'COCO_train2014_000000134071.jpg',
'COCO_train2014_000000505962.jpg',
'COCO_train2014_000000347111.jpg',
'COCO_train2014_000000484742.jpg',
'COCO_train2014_000000064270.jpg',
'COCO_train2014_000000012345.jpg',
'COCO_train2014_000000226585.jpg',
'COCO_train2014_000000575029.jpg',
'COCO_train2014_000000564314.jpg',
'COCO_train2014_000000011801.jpg',
'COCO_train2014_000000033127.jpg',
'COCO_train2014_000000312288.jpg',
'COCO_train2014_000000126531.jpg',
'COCO_train2014_000000140627.jpg',
'COCO_train2014_000000131366.jpg',
'COCO_train2014_000000406011.jpg',
'COCO_train2014_000000369966.jpg',
'COCO_train2014_000000081003.jpg',
'COCO_train2014_000000010125.jpg',
'COCO_train2014_000000006432.jpg',
'COCO_train2014_000000384693.jpg',
'COCO_train2014_000000470442.jpg',
'COCO_train2014_000000280731.jpg',
'COCO_train2014_000000113929.jpg',
'COCO_train2014_000000416869.jpg',
'COCO_train2014_000000066642.jpg',
'COCO_train2014_000000233263.jpg',
'COCO_train2014_000000025404.jpg',
'COCO_train2014_000000156878.jpg',
'COCO_train2014_000000166522.jpg',
'COCO_train2014_000000060060.jpg',
'COCO_train2014_000000445845.jpg',
'COCO_train2014_000000205486.jpg',
'COCO_train2014_000000577207.jpg',
'COCO_train2014_000000005294.jpg',
'COCO_train2014_000000186888.jpg',
'COCO_train2014_000000503640.jpg',
'COCO_train2014_000000000086.jpg',
'COCO_train2014_000000087509.jpg',
'COCO_train2014_000000571503.jpg',
'COCO_train2014_000000000821.jpg',
'COCO_train2014_000000579138.jpg',
'COCO_train2014_000000134918.jpg',
'COCO_train2014_000000259284.jpg',
'COCO_train2014_000000257178.jpg',
'COCO_train2014_000000221691.jpg',
'COCO_train2014_000000077709.jpg',
'COCO_train2014_000000263002.jpg',
'COCO_train2014_000000341892.jpg',
'COCO_train2014_000000349069.jpg',
'COCO_train2014_000000563376.jpg',
'COCO_train2014_000000220770.jpg',
'COCO_train2014_000000208206.jpg',
'COCO_train2014_000000027412.jpg',
'COCO_train2014_000000434837.jpg',
'COCO_train2014_000000080906.jpg',
'COCO_train2014_000000150354.jpg',
'COCO_train2014_000000107450.jpg',
'COCO_train2014_000000577265.jpg',
'COCO_train2014_000000416372.jpg',
'COCO_train2014_000000377837.jpg',
'COCO_train2014_000000579239.jpg',
'COCO_train2014_000000540378.jpg',
'COCO_train2014_000000525513.jpg',
'COCO_train2014_000000353952.jpg',
'COCO_train2014_000000006379.jpg',
'COCO_train2014_000000381270.jpg',
'COCO_train2014_000000520479.jpg',
'COCO_train2014_000000563447.jpg',
'COCO_train2014_000000085407.jpg',
'COCO_train2014_000000210175.jpg',
'COCO_train2014_000000397575.jpg',
'COCO_train2014_000000058517.jpg',
'COCO_train2014_000000384907.jpg',
'COCO_train2014_000000509358.jpg',
'COCO_train2014_000000264165.jpg',
'COCO_train2014_000000072098.jpg',
'COCO_train2014_000000155954.jpg',
'COCO_train2014_000000270925.jpg',
'COCO_train2014_000000104124.jpg',
'COCO_train2014_000000095753.jpg',
'COCO_train2014_000000210847.jpg',
'COCO_train2014_000000507794.jpg',
'COCO_train2014_000000561842.jpg',
'COCO_train2014_000000249835.jpg',
'COCO_train2014_000000361516.jpg',
'COCO_train2014_000000451074.jpg',
'COCO_train2014_000000480482.jpg',
'COCO_train2014_000000220898.jpg',
'COCO_train2014_000000260962.jpg',
'COCO_train2014_000000576700.jpg',
'COCO_train2014_000000296884.jpg',
'COCO_train2014_000000342921.jpg',
'COCO_train2014_000000384910.jpg',
'COCO_train2014_000000040428.jpg',
'COCO_train2014_000000145288.jpg',
'COCO_train2014_000000321897.jpg',
'COCO_train2014_000000449901.jpg',
'COCO_train2014_000000107962.jpg',
'COCO_train2014_000000001350.jpg',
'COCO_train2014_000000249711.jpg',
'COCO_train2014_000000140623.jpg',
'COCO_train2014_000000211867.jpg',
'COCO_train2014_000000496444.jpg',
'COCO_train2014_000000287422.jpg',
'COCO_train2014_000000118895.jpg',
'COCO_train2014_000000075052.jpg',
'COCO_train2014_000000436984.jpg',
'COCO_train2014_000000555583.jpg',
'COCO_train2014_000000029275.jpg',
'COCO_train2014_000000176397.jpg',
'COCO_train2014_000000034861.jpg',
'COCO_train2014_000000517899.jpg',]


In [None]:
indexes = []
for file_name in single_channel_list:
    index = image_label_df[image_label_df['file_name']==file_name].index.values[0]
    indexes.append(index)

In [None]:
image_label_df

In [None]:
image_label_df = image_label_df.drop(indexes, axis = 0)
image_label_df = image_label_df.reset_index(drop=True)
image_label_df

In [None]:
# image_label_df.to_parquet('F://coco/captions/image_labels.parquet')

In [None]:
auto_transforms = ResNet50_Weights.DEFAULT.transforms()
auto_transforms

In [None]:
manual_transforms = Compose([
    Resize(size = (256,256)),
    CenterCrop(size=(224,224)),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
manual_transforms

In [None]:
class CocoDataset(Dataset):
    def __init__(self,image_label_df, val_stride = 10, is_val_set_bool = False, test_data_set =False, test_stride = 5, transforms = None):
        
        self.transfrom = transforms
        self.is_val_set_bool = is_val_set_bool
        self.val_stride = val_stride
        self.image_label_df = image_label_df.copy()
        self.test_data_set = test_data_set
        self.test_stride = test_stride

        if self.test_data_set: #If we need only a small subset of the data to work with
            self.image_label_df = self.image_label_df[::test_stride].reset_index(drop = True)

        elif self.is_val_set_bool: # If we need only the validation data then return the validation data which is a subset of total data
            assert self.val_stride > 0
            self.image_label_df = self.image_label_df[::val_stride]

        elif self.val_stride > 0:  # Else if val_stride is greater than zero then return the remaining dataframe after removing 10% of data
            self.image_label_df = self.image_label_df.drop(index = list(range(0,len(self.image_label_df),self.val_stride)))
            
        else: # else train on the full dataset
            self.image_label_df = self.image_label_df
        
    def __len__(self):
        """ This method calculates the length of your data."""
        return len(self.image_label_df)

    # Now we will introduce function which gives us an image and its corresponding labels by id
    def get_image_by_id(self, img_id = None):
        """ This function returns an image by its id and the corresponding multiple labesl in a present/no_present binary format"""
        

        folder_path = 'F://coco/train2014/train2014/'

        if img_id == None:
            raise ValueError('Must provide IMAGE ID')

        else:
            row = self.image_label_df[self.image_label_df['image_id']==img_id]
            file_name = row['file_name'].values[0]

            
            label_array = torch.zeros(len(self.image_label_df.columns[2:]),2)  # create an array of zeros which is a (num_classes, 2) array

            # Lets get the multilabel array for a particular image_id from image_label_df
            multilabel_array = row.values[0][2:]

            # Now lets populate the label_array with values from the multilabel array
            label_array[range(len(multilabel_array)), multilabel_array] = 1

            # Get the image-data from file_name
            image_array = torchvision.io.read_image(folder_path + file_name)
            image_array = image_array/255.0
            image_array = self.transfrom(image_array).to(torch.float32)
        
            return image_array, label_array


    # Now we would write the code for returning the image from index and its corresponding labels. Which would be used by the train/val dataloaders
    def __getitem__(self, ndx):

        """ This function takes in an index and returns the image and the labels of the image at that index"""
        folder_path = 'F://coco/train2014/train2014/'
        
        row = self.image_label_df.iloc[ndx]

        # Now get the image_id and the file_name of the image
        image_id = row['image_id']
        file_name = row['file_name']

        # print(image_id, file_name)
        # Now get the multilabel in the form of a numpy array
        # This array would be of the shape (num_classes, 2). Which means that if a class is present or not present and we will keep 1 at that
        # Input probabilities for each class wo
        label_array = torch.zeros(len(self.image_label_df.columns[2:]),2)  # create an array of zeros which is a (num_classes, 2) array


        # Now we will use the multiple_labels array to populate the categories which are present in the picture.
        # For each category we will populate 1 at the 0th position if it is not present or 1 at the 1th position if it is present
        # This way we can check for the prescence or absence of multiple categories which makes it a multilabel classification problem

        

        # Lets get the multilabel array for a particular image_id
        multilabel_array = row.values[2:].astype(np.float32)

        # Now lets populate the label_array with values from the multilabel array
        label_array[range(len(multilabel_array)), multilabel_array] = 1

        # Now get the image_data from storage
        image_array = torchvision.io.read_image(folder_path + file_name)
        image_array = image_array/255.0
        image_array = self.transfrom(image_array).to(torch.float32)
        return (image_array, label_array, image_id)


            

In [None]:
train_coco = CocoDataset(image_label_df=image_label_df, val_stride=10, transforms=manual_transforms)
val_coco = CocoDataset(image_label_df=image_label_df,is_val_set_bool=True, val_stride=10, transforms=manual_transforms)

train_dataloader = DataLoader(dataset=train_coco, batch_size=128, pin_memory=True, drop_last=True, num_workers=1)
val_dataloader = DataLoader(dataset=val_coco, batch_size=128,pin_memory=True, drop_last = True, num_workers=1)

# test_coco = CocoDataset(image_label_df=image_label_df, test_data_set=True, test_stride=50, transforms=auto_transforms)

# test_coco_dataloader = DataLoader(dataset=test_coco, batch_size=16, pin_memory=True, drop_last=True)

In [None]:
exp_train_data, exp_train_label = [],[]

for i in range(128):
    img, labels,_ = train_coco[i]
    exp_train_data.append(img)
    exp_train_label.append(labels)
# exp_train_data = torch.tensor(exp_train_data)
# exp_train_label = torch.tensor(exp_train_label)
# print(exp_train_data.shape)
# print(exp_train_label.shape)
# print(len(exp_train_data), len(exp_train_label))
exp_train_data = torch.stack(exp_train_data)
exp_train_label = torch.stack(exp_train_label)
exp_train_data.shape

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.imshow(exp_train_data[15].permute(1,2,0).numpy())

In [None]:
def validation_loop(model, val_dataloader, device, loss_fn):
    # print("Validation is progressing")
    val_loss_list = []
    model.eval()
    with torch.no_grad():
        for val_img, val_label, _ in tqdm(val_dataloader, nrows = 1):
            # transfer the images and labels to the GPU
            val_img = val_img.to(device)
            val_label = val_label.to(device)

            # Now run it through the model and get the val_logits
            val_logits = model(val_img)

            val_loss = loss_fn(val_logits, val_label)
            out_val_loss = val_loss.clone().to(torch.device('cpu'))
            val_loss_list.append(out_val_loss.item())
            # val_loss += out_val_loss.item()
            # val_i +=1
    return val_loss_list

In [None]:
# Now we will begin to write the training loop that will ingest the data and output the result.
def training_loop(epochs, conv_model,train_dataloader,device,optimizer= None, val_dataloader=None, loss_fn = None, schedular = None, writer = None):
    

    for epoch in trange(epochs):
        print("Training is progressing")
        train_losses = []
        # print("Training is Progressing")
        for img, label,_ in tqdm(train_dataloader, nrows=1):
            batch_losses = []
            img = img.to(device)
            label = label.to(device)

            # Now put the training data into the model to get the logits
            pred = conv_model(img)

            # Now we will zero out the optimizer
            optimizer.zero_grad()
            
            # calculate the loss
            loss = loss_fn(pred, label)

            out_loss = loss.clone().to(device=torch.device('cpu'))

            # backpropagate
            loss.backward()
            # Next we will update the parameters
            optimizer.step()
            # print(out_loss.item())
            train_losses.append(out_loss.item())
        print("Validation is progressing")
        val_loss_list = validation_loop(model = conv_model, val_dataloader=val_dataloader, device = device, loss_fn= loss_fn)
        print(f"Epoch: {epoch} | Train_loss: {torch.tensor(train_losses).mean().item()}) | Val_loss : {torch.tensor(val_loss_list).mean().item()}")

        # step the learning rate schedular
        schedular.step()



        # Writing the training and the validation losses to tensorboard.
        writer.add_scalar(tag = 'Training Loss Per Epoch',scalar_value=torch.tensor(train_losses).mean().item(), global_step = epoch)
        writer.add_scalar(tag = 'Validation Loss Per Epoch',scalar_value=torch.tensor(val_loss_list).mean().item(), global_step = epoch)
        # break
            

    return

In [None]:
# # confusion_mat = torch.zeros(12,12)
# def make_confusion_mat(logits_tensor, labels_tensor):
#     logits_tensor_argmax = torch.argmax(logits_tensor, dim = 2)
#     labels_tensor_argmax = torch.argmax(labels_tensor, dim = 2)
#     batch_conf_mat = mlcm.cm(labels_tensor_argmax.numpy(), logits_tensor_argmax.numpy(), print_note=False)
    
#     return batch_conf_mat

# # px.imshow(confusion_mat)

In [None]:

# conf_mat = torch.zeros((13,13))
# seq_convnet.eval()
# seq_convnet = seq_convnet.to(device=device)
# with torch.no_grad():
    
#     for data, label, _ in val_dataloader:
#         data = data.to(device = device)
        
#         # label = label.to(device = device)
#         logits = seq_convnet(data)
#         label = label.to(device= torch.device('cpu'))
#         logits = logits.to(device= torch.device('cpu'))
#         batch_conf_mat = make_confusion_mat(logits_tensor= logits, labels_tensor=label)
#         conf_mat += batch_conf_mat[0]


In [None]:
# conf_mat


In [None]:
# img = px.imshow(conf_mat, color_continuous_scale='tempo')
# img.update_layout(
#     xaxis = dict(
#         tickmode = 'array',
#         tickvals = [0,1, 2, 3, 4, 5,6, 7,8, 9,10, 11],
#         ticktext =['food', 'animal', 'furniture', 'electronic', 'kitchen', 'vehicle',
#        'person', 'outdoor', 'accessory', 'sports', 'appliance', 'indoor']
#     ),
#     yaxis = dict
#     (
#         tickmode = 'array',
#         tickvals = [0,1, 2, 3, 4, 5,6, 7,8, 9,10, 11],
#         ticktext = ['food', 'animal', 'furniture', 'electronic', 'kitchen', 'vehicle',
#        'person', 'outdoor', 'accessory', 'sports', 'appliance', 'indoor']
#     )
# )

In [None]:
# First we would implement the resblock which has two 3x3 convolution and see how much we can get out of that training on the coco image dataset.
class ResBlock64(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels = 64, out_channels = 32, kernel_size = 1, padding = 'same')
        self.b1 = nn.BatchNorm2d(32)
        self.relu1 = nn.ReLU()

        self.conv2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, padding='same')
        self.b2 = nn.BatchNorm2d(32)
        self.relu2 = nn.ReLU()

        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=1, padding = 'same')
        self.b3 =nn.BatchNorm2d(64)
        self.relu3 = nn.ReLU()
    
    def forward(self, x):
        # Now lets define the computation of the resblock
        out = self.relu1(self.b1(self.conv1(x)))
        out = self.relu2(self.b2(self.conv2(out)))
        out = self.b3(self.conv3(out))
        out_final = self.relu3(out + x)
        
        return out_final

class ResBlock128(nn.Module):
    def __init__(self):
        super().__init__()
        # First 1x1 conv layer
        self.conv1 = nn.Conv2d(in_channels=128, out_channels=32, kernel_size=1, padding='same')
        self.b1 = nn.BatchNorm2d(32)
        self.relu1 = nn.ReLU()
        # Second 3x3 conv layer
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, padding = 'same')
        self.b2 =nn.BatchNorm2d(32)
        self.relu2 = nn.ReLU()

        # Third 1x1 conv layer
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=128, kernel_size=1, padding = 'same')
        self.b3 =nn.BatchNorm2d(128)
        self.relu3 = nn.ReLU()

    
    def forward(self, x):
        # Now lets define the computation of the resblock
        out = self.relu1(self.b1(self.conv1(x)))
        out = self.relu2(self.b2(self.conv2(out)))
        out = self.b3(self.conv3(out))
        out_final = self.relu3(out + x)
        return out_final

class downsampleBlock(nn.Module):
    """ This class defines a downsampling block which is used to half the image size and double the channels of an image."""
    def __init__(self, in_chan, out_chan):
        super().__init__()
        # Define the first 1x1 conv layer. This layer would increase the channels of the input
        self.conv1 = nn.Conv2d(in_channels = in_chan, out_channels=out_chan, kernel_size=1, stride = 2)
        self.b1 = nn.BatchNorm2d(out_chan)
        self.relu1 = nn.ReLU()

        # Then define the 3x3 conv layer. This layer would decrease the size of the image while making the channels constant.
        self.conv2 = nn.Conv2d(in_channels = out_chan, out_channels=out_chan, kernel_size=3, padding='same')
        self.b2 = nn.BatchNorm2d(out_chan)
        self.relu2 = nn.ReLU()

        # Now define the third 1x1 layer. This layer would increase the channels of the input so that it 
        self.conv3 = nn.Conv2d(in_channels = in_chan, out_channels=out_chan, kernel_size = 1, stride = 2)
        self.b3 = nn.BatchNorm2d(out_chan)


    def forward(self, X):
        out = self.relu1(self.b1(self.conv1(X)))
        out = self.b2(self.conv2(out))

        out_short = self.b3(self.conv3(X))

        out_final = self.relu2(out + out_short)

        return out_final

class ResBlock256(nn.Module):
    def __init__(self):
        super().__init__()
        # First 1x1 conv layer
        self.conv1 = nn.Conv2d(in_channels=256, out_channels=64, kernel_size=1, padding='same')
        self.b1 = nn.BatchNorm2d(64)
        self.relu1 = nn.ReLU()
        # Second 3x3 conv layer
        self.conv2 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding = 'same')
        self.b2 =nn.BatchNorm2d(64)
        self.relu2 = nn.ReLU()

        # Third 1x1 conv layer
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=256, kernel_size=1, padding = 'same')
        self.b3 =nn.BatchNorm2d(256)
        self.relu3 = nn.ReLU()

    
    def forward(self, x):
        # Now lets define the computation of the resblock
        out = self.relu1(self.b1(self.conv1(x)))
        out = self.relu2(self.b2(self.conv2(out)))
        out = self.b3(self.conv3(out))
        out_final = self.relu3(out + x)
        return out_final

class ResBlock512(nn.Module):
    def __init__(self):
        super().__init__()
        # First 1x1 conv layer
        self.conv1 = nn.Conv2d(in_channels=512, out_channels=128, kernel_size=1, padding='same')
        self.b1 = nn.BatchNorm2d(128)
        self.relu1 = nn.ReLU()
        # Second 3x3 conv layer
        self.conv2 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding = 'same')
        self.b2 =nn.BatchNorm2d(128)
        self.relu2 = nn.ReLU()

        # Third 1x1 conv layer
        self.conv3 = nn.Conv2d(in_channels=128, out_channels=512, kernel_size=1, padding = 'same')
        self.b3 =nn.BatchNorm2d(512)
        self.relu3 = nn.ReLU()

    
    def forward(self, x):
        # Now lets define the computation of the resblock
        out = self.relu1(self.b1(self.conv1(x)))
        out = self.relu2(self.b2(self.conv2(out)))
        out = self.b3(self.conv3(out))
        out_final = self.relu3(out + x)
        return out_final

In [None]:
class ResNet(nn.Module):
    def __init__(self,batch_size = None):
        super().__init__()
        self.batch_size = batch_size
        # The first layer will input an  image of 224x224
        self.input_conv = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=(7,7), padding = 'same') # in = (3,224,224) | out = (64,224,224)
        self.max_pool1 = nn.MaxPool2d(2)  # in = (64,224,224) | out = (64,112,112)

        self.resblocks64 = nn.Sequential(
            *(3*[ResBlock64()]+[downsampleBlock(64,128)]))                      # in = (64,128,128) | out = (64,128,128)
        # self.max_pool2 = nn.MaxPool2d(2)            # in = (64,128,128) | out = (64,64,64)
        # self.channel_inc1 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3,3), padding='same') # in = (64,64,64) | out = (128,64,64)

        self.resblocks128 = nn.Sequential(
            *(3*[ResBlock128()]+[downsampleBlock(128,256)]))                     # in = (128,64,64) | out = (128,64,64)
        # self.max_pool3 = nn.MaxPool2d(2)            # in = (128,64,64) | out = (128,32,32)
        # self.channel_inc2 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(3,3), padding='same')   # in = (128,32,32) | out = (256,32,32)

        self.resblocks256 = nn.Sequential(      
            *(5*[ResBlock256()]+[downsampleBlock(256,512)]))                    # in = (256,32,32) | out = (256,32,32)
        # self.max_pool4 = nn.MaxPool2d(2)            # in = (256,16,16) | out = (256,16,16)
        # self.channel_inc3 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=(3,3), padding='same')   # in = (512,16,16) | out = (512,16,16)

        self.resblocks512 = nn.Sequential(
            *(2*[ResBlock512()]))                     # in = (512,16,16) | out = (512,16,16)
        # self.max_pool5 = nn.MaxPool2d(2)            # in = (512,8,8) | out = (512,8,8)

        # Now we would use a Global Average Pooling Layer which is just a mean on the first two image dimensions excluding the channel dim.
        # But that would just be an operation in the forward pass

        self.fc2 = nn.Linear(512, 128)
        self.bfc2 = nn.BatchNorm1d(128)
        self.relu_fc2 = nn.ReLU()

        # self.fc3 = nn.Linear(512, 128)
        # self.bfc3 = nn.BatchNorm2d(128)
        # self.relu3 = nn.ReLU()

        # Now we will need to define our output layers
        # computation for the multi-label heads
        
        self.head1 = nn.Linear(128,2)
        self.sig1 = nn.Sigmoid()
        self.head2 = nn.Linear(128,2)
        self.sig2 = nn.Sigmoid()
        self.head3 = nn.Linear(128,2)
        self.sig3 = nn.Sigmoid()
        self.head4 = nn.Linear(128,2)
        self.sig4 = nn.Sigmoid()
        self.head5 = nn.Linear(128,2)
        self.sig5 = nn.Sigmoid()
        self.head6 = nn.Linear(128,2)
        self.sig6 = nn.Sigmoid()
        self.head7 = nn.Linear(128,2)
        self.sig7 = nn.Sigmoid()
        self.head8 = nn.Linear(128,2)
        self.sig8 = nn.Sigmoid()
        self.head9 = nn.Linear(128,2)
        self.sig9 = nn.Sigmoid()
        self.head10 = nn.Linear(128,2)
        self.sig10 = nn.Sigmoid()
        self.head11 = nn.Linear(128,2)
        self.sig11 = nn.Sigmoid()
        self.head12 = nn.Linear(128,2)
        self.sig12 = nn.Sigmoid()



    def forward(self, X):
        """ This function would define the computations of the layers to produce the output"""
        # assert X.shape == (2,3,224,224)
        # print(self.input_conv(X).shape)
        # output of the first layer conv and maxpool
        out = self.max_pool1(self.input_conv(X))

        # output of the first resblock, maxpool2, channel_inc1
        out = self.resblocks64(out)

        # output of  resblock128
        out = self.resblocks128(out)

        # output after resblock256
        out = self.resblocks256(out)

        # output after resblock 512
        out =  self.resblocks512(out)

        # Then we would use a global average pooling layer i.e mean on the image_height and width using torch.mean()
        out = torch.mean(out, dim = (2,3), keepdim=True)
        # Now we will reshape the tensor to have a
        # out = out.view(128,-1)

        out = out.view(self.batch_size,-1)
        # output of the first fully conected layer
        # out = self.relu1(self.bfc1(self.fc1(out)))
        # print(f"The output after the average pooling op is {out.shape}")

        out = self.fc2(out)
        out = self.bfc2(out)
        out = self.relu_fc2(out)
        # output after second fully connected layer
        # out = self.relu_fc2(self.bfc2(self.fc2(out)))

        # output after the third fully connected layer
        # out = self.relu3(self.bfc3(self.fc3(out)))


        # Now we will define the computations of the 12 heads and heads and put all the outputs into one tensor and return that tensor.
        self.out_head1 = self.sig1(self.head1(out))  # 'food'
        self.out_head2 = self.sig2(self.head2(out)) # animal
        self.out_head3 = self.sig3(self.head3(out)) # furniture
        self.out_head4 = self.sig4(self.head4(out)) # electronic
        self.out_head5 = self.sig5(self.head5(out)) # kitchen
        self.out_head6 = self.sig6(self.head6(out)) # vehicle
        self.out_head7 = self.sig7(self.head7(out)) # person
        self.out_head8 = self.sig8(self.head8(out)) # outdoor
        self.out_head9 = self.sig9(self.head9(out)) # accessory
        self.out_head10 = self.sig10(self.head10(out)) # sports
        self.out_head11 = self.sig11(self.head11(out)) # appliance
        self.out_head12 = self.sig12(self.head12(out)) # indoor
        out_list = [self.out_head1,self.out_head2,self.out_head3,self.out_head4,self.out_head5,self.out_head6,self.out_head7,self.out_head8,
                    self.out_head9,self.out_head10,self.out_head11,self.out_head12]

        out_tensor = torch.stack(out_list, dim = 1)
        return out_tensor

In [None]:
# images,labels,_ = next(iter(test_coco_dataloader))

In [None]:
# res_c = ResNet(batch_size = 16)
# writer.add_graph(res_c, images)
# writer.close()
# summary(res_c, input_size = (16,3,224,224))

In [None]:
# # Now lets get a batch of data and try to our model into our data.
# exp_train_data, exp_train_label = [],[]

# for i in range(32):
#     img, labels,_ = train_coco[i]
#     exp_train_data.append(img)
#     exp_train_label.append(labels)
# # exp_train_data = torch.tensor(exp_train_data)
# # exp_train_label = torch.tensor(exp_train_label)
# # print(exp_train_data.shape)
# # print(exp_train_label.shape)
# # print(len(exp_train_data), len(exp_train_label))
# exp_train_data = torch.stack(exp_train_data)
# exp_train_label = torch.stack(exp_train_label)
# exp_train_data.shape

# exp_train_data = exp_train_data.to(device=device)
# exp_train_label = exp_train_label.to(device=device)

# res_c = res_c.to(device=device)
# res_c(exp_train_data)

#### Now we  will try to train the resnet on one 1300 images of the test dataset.

In [None]:
# # No we will define the model
# res_c = res_c.to(device = device)
# bce_loss = torch.nn.BCELoss()
# optimizer_adam = optim.Adam(res_c.parameters(), lr = 0.1)
# scehdular = optim.lr_scheduler.StepLR(optimizer= optimizer_adam, gamma = 0.1, step_size = 20, verbose = True )

In [None]:
# training_loop(
#     epochs = 100,
#     conv_model=res_c,
#     train_dataloader=test_coco_dataloader,
#     device = device,
#     optimizer=optimizer_adam,
#     loss_fn=bce_loss,
#     val_dataloader=val_dataloader,
#     schedular = scehdular    
# )

In [None]:
res_50 = torchvision.models.resnet50(weights =ResNet50_Weights.DEFAULT)
print(summary(res_50, input_size=(16,3,224,224)))

In [None]:
res_50

Now we would create out custom model with 12 heads and using the resnet50 model as the backbone.

In [None]:
class CustomResNet(nn.Module):
    def __init__(self, pretrained_model = None):
        super().__init__()



        for parameter in pretrained_model.parameters():
            parameter.requires_grad = False


        pretrained_model.fc = nn.Linear(2048,1024)
        # pretrained_model.b0 = nn.BatchNorm1d(1024, track_running_stats=True)
        self.backbone = nn.Sequential(pretrained_model)

        self.fc1 = nn.Linear(1024,512)
        self.b1 = nn.BatchNorm1d(512)
        self.relu1 = nn.ReLU()

        self.fc2 = nn.Linear(512,128)
        self.b2 = nn.BatchNorm1d(128)
        self.relu2 = nn.ReLU()

        # Now we will need to define our output layers
        # computation for the multi-label heads
        
        self.head1 = nn.Linear(128,2)
        self.sig1 = nn.Sigmoid()
        self.head2 = nn.Linear(128,2)
        self.sig2 = nn.Sigmoid()
        self.head3 = nn.Linear(128,2)
        self.sig3 = nn.Sigmoid()
        self.head4 = nn.Linear(128,2)
        self.sig4 = nn.Sigmoid()
        self.head5 = nn.Linear(128,2)
        self.sig5 = nn.Sigmoid()
        self.head6 = nn.Linear(128,2)
        self.sig6 = nn.Sigmoid()
        self.head7 = nn.Linear(128,2)
        self.sig7 = nn.Sigmoid()
        self.head8 = nn.Linear(128,2)
        self.sig8 = nn.Sigmoid()
        self.head9 = nn.Linear(128,2)
        self.sig9 = nn.Sigmoid()
        self.head10 = nn.Linear(128,2)
        self.sig10 = nn.Sigmoid()
        self.head11 = nn.Linear(128,2)
        self.sig11 = nn.Sigmoid()
        self.head12 = nn.Linear(128,2)
        self.sig12 = nn.Sigmoid()



    def forward(self, X):
        out_backbone = self.backbone(X)

        out  = self.relu1(self.b1(self.fc1(out_backbone)))
        out = self.relu2(self.b2(self.fc2(out)))

        
        # Now we will define the computations of the 12 heads and heads and put all the outputs into one tensor and return that tensor.
        self.out_head1 = self.sig1(self.head1(out))  # 'food'
        self.out_head2 = self.sig2(self.head2(out)) # animal
        self.out_head3 = self.sig3(self.head3(out)) # furniture
        self.out_head4 = self.sig4(self.head4(out)) # electronic
        self.out_head5 = self.sig5(self.head5(out)) # kitchen
        self.out_head6 = self.sig6(self.head6(out)) # vehicle
        self.out_head7 = self.sig7(self.head7(out)) # person
        self.out_head8 = self.sig8(self.head8(out)) # outdoor
        self.out_head9 = self.sig9(self.head9(out)) # accessory
        self.out_head10 = self.sig10(self.head10(out)) # sports
        self.out_head11 = self.sig11(self.head11(out)) # appliance
        self.out_head12 = self.sig12(self.head12(out)) # indoor
        out_list = [self.out_head1,self.out_head2,self.out_head3,self.out_head4,self.out_head5,self.out_head6,self.out_head7,self.out_head8,
                    self.out_head9,self.out_head10,self.out_head11,self.out_head12]

        out_tensor = torch.stack(out_list, dim = 1)
        return out_tensor


In [None]:
custom_model = CustomResNet(pretrained_model=res_50)
print(summary(custom_model, input_size=(128,3,224,224)))

In [None]:
writer = SummaryWriter('../runs/resnet_exp_2_pretrained')

In [None]:
custom_model = custom_model.to(device = device)
bce_loss = torch.nn.BCELoss()
optimizer_adam = optim.Adam(custom_model.parameters(), lr = 0.001)
scehdular = optim.lr_scheduler.StepLR(optimizer= optimizer_adam, gamma = 0.01, step_size = 30, verbose = True )

In [None]:
training_loop(
    epochs = 10,
    conv_model=custom_model,
    train_dataloader=train_dataloader,
    device = device,
    optimizer=optimizer_adam,
    loss_fn=bce_loss,
    val_dataloader=val_dataloader,
    schedular = scehdular,
    writer = writer
)

In [None]:
# Lets save the model
torch.save(custom_model.state_dict(), 'F://coco/saved_models/custom_ResNet_pretrained.pt')

In [None]:
x = list(range(0,66000, 6))
print(len(x))

In [None]:
import datetime as dt
dt.datetime.now()

adam schedular(gamma = 0.1)
        lr = 3e-5
        weight_decay = 0.001