# Contrastive Learning using MSCOCO dataset

First of all import the basic labraries to be used in the rest of the notobook

In [None]:
import sys
import random
import numpy as np
import torch
import torchvision.datasets as dset
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Check the device available

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

## This is for image Captioning section from MSCOCO

In [None]:
# This is the path where I have the dataset
path = '/content/drive/My Drive'

cap = dset.CocoCaptions(root= path + '/MSCOCO/cocoapi/images/val2014',
                        annFile= path + '/MSCOCO/cocoapi/annotations/captions_val2014.json',
                        transform=transforms.ToTensor())

## Foveated Retinal Processor

First of all, install a compatible version of NVIDIA DALI

In [None]:
!pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda100
#!pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda110

In [None]:
path = '/content/drive/My Drive'

from time import time
sys.path.append(path + '/Colab Notebooks/Multimodal Active AI/SimCLR/NVIDIA DALI')
import NVIDIA_DALI_Pipelines as NDP

Reader operator that reads a COCO dataset (or subset of COCO), which consists of an annotation file and the images directory.

In [None]:
num_gpus = 1
batch_size = 4
#batch_size = 1024*2
start = time()
pipes = [NDP.COCOReader(batch_size=batch_size, num_threads=2, device_id = device_id, num_gpus = num_gpus)  for device_id in range(num_gpus)]
for pipe in pipes:
    pipe.build()
total_time = time() - start
print("Computation graph built and dataset loaded in %f seconds." % total_time)

In [None]:
pipe_out = [pipe.run() for pipe in pipes]

images_gpu = pipe_out[0][0]

images_cpu = pipe_out[0][0].as_cpu()
bboxes_cpu = pipe_out[0][1]
labels_cpu = pipe_out[0][2]

Let’s see the ground truth bounding boxes drawn on the image.


In [None]:
img_index = 3

import matplotlib.pyplot as plt
import matplotlib.patches as patches
import random

img = images_cpu.at(img_index)

H = img.shape[0]
W = img.shape[1]

fig,ax = plt.subplots(1)

ax.imshow(img)
bboxes = bboxes_cpu.at(img_index)
labels = labels_cpu.at(img_index)
categories_set = set()
for label in labels:
    categories_set.add(label[0])

category_id_to_color = dict(
    [ (cat_id , [random.uniform(0, 1) ,random.uniform(0, 1), random.uniform(0, 1)]) for cat_id in categories_set])

for bbox, label in zip(bboxes, labels):
    rect = patches.Rectangle(
        (bbox[0]*W,bbox[1]*H),
        (bbox[2] - bbox[0]) * W,
        (bbox[3] - bbox[1]) * H,
        linewidth=2,
        edgecolor=category_id_to_color[label[0]],
        facecolor='none')
    ax.add_patch(rect)

plt.show()

## COCO Reader with augmentations

Create reader, decoder and flip operator for images and bounding boxes

In [None]:
img_size = (30,30)
NDP.fixation_pos_x = torch.rand((batch_size,1))
NDP.fixation_pos_y = torch.rand((batch_size,1))
NDP.fixation_angle = (torch.rand((batch_size,1))-0.5)*60

#NDP.fixation_pos_x = torch.repeat_interleave(torch.Tensor([0]), batch_size).view(-1,1)
#NDP.fixation_pos_y = torch.repeat_interleave(torch.Tensor([0]), batch_size).view(-1,1)
#NDP.fixation_angle = torch.repeat_interleave(torch.Tensor([30]), batch_size).view(-1,1)

images = NDP.ImageCollector()
fixation = NDP.FixationCommand(batch_size)

images.data = images_gpu

start = time()
pipes1 = [NDP.FoveatedRetinalProcessor(batch_size=batch_size, num_threads=2, device_id=device_id, num_gpus=num_gpus, fixation=fixation, images=images)  for device_id in range(num_gpus)]
for pipe1 in pipes1:
  pipe1.build()

total_time = time() - start
print("Computation graph built and dataset loaded in %f seconds." % total_time)

In [None]:
NDP.fixation_pos_x = torch.rand((batch_size,1))
NDP.fixation_pos_y = torch.rand((batch_size,1))
NDP.fixation_angle = (torch.rand((batch_size,1))-0.5)*60

#NDP.fixation_pos_x = torch.repeat_interleave(torch.Tensor([0.5]), batch_size).view(-1,1)
#NDP.fixation_pos_y = torch.repeat_interleave(torch.Tensor([0.5]), batch_size).view(-1,1)
#NDP.fixation_angle = torch.repeat_interleave(torch.Tensor([30]), batch_size).view(-1,1)

start = time()
pipe_out1 = NDP.pytorch_wrapper(pipes1)
#pipe_out1 = [pipe1.run() for pipe1 in pipes1]
total_time = time() - start
print("Computation graph run in %f seconds." % total_time)

print('Does pipe_out1 live in GPU: ', pipe_out1[0][0].is_cuda)

crop_images_cpu0 = pipe_out1[0][0].to('cpu')
crop_images_cpu1 = pipe_out1[0][1].to('cpu')
crop_images_cpu2 = pipe_out1[0][2].to('cpu')
crop_images_cpu3 = pipe_out1[0][3].to('cpu')
crop_images_cpu4 = pipe_out1[0][4].to('cpu')

sized_images_cpu0 = pipe_out1[0][5].to('cpu')
sized_images_cpu1 = pipe_out1[0][6].to('cpu')
sized_images_cpu2 = pipe_out1[0][7].to('cpu')
sized_images_cpu3 = pipe_out1[0][8].to('cpu')
sized_images_cpu4 = pipe_out1[0][9].to('cpu')

Let’s see the ground truth bounding boxes drawn on the image.


In [None]:
index = img_index

import matplotlib.pyplot as plt
import matplotlib.patches as patches
import random

img0 = crop_images_cpu0[index]
img1 = crop_images_cpu1[index]
img2 = crop_images_cpu2[index]
img3 = crop_images_cpu3[index]
img4 = crop_images_cpu4[index]

img5 = sized_images_cpu0[index]
img6 = sized_images_cpu1[index]
img7 = sized_images_cpu2[index]
img8 = sized_images_cpu3[index]
img9 = sized_images_cpu4[index]

fig,ax = plt.subplots(5,2, figsize=(25,25))

ax[0,0].imshow(img0)
ax[1,0].imshow(img1)
ax[2,0].imshow(img2)
ax[3,0].imshow(img3)
ax[4,0].imshow(img4)

ax[0,1].imshow(img5)
ax[1,1].imshow(img6)
ax[2,1].imshow(img7)
ax[3,1].imshow(img8)
ax[4,1].imshow(img9)

In [None]:
NDP.fixation_pos_x = torch.rand((batch_size,1))
NDP.fixation_pos_y = torch.rand((batch_size,1))
NDP.fixation_angle = (torch.rand((batch_size,1))-0.5)*60

#NDP.fixation_pos_x = torch.repeat_interleave(torch.Tensor([0.5]), batch_size).view(-1,1)
#NDP.fixation_pos_y = torch.repeat_interleave(torch.Tensor([0.5]), batch_size).view(-1,1)
#NDP.fixation_angle = torch.repeat_interleave(torch.Tensor([30]), batch_size).view(-1,1)

start = time()
pipe_out2 = NDP.pytorch_wrapper(pipes1)
#pipe_out2 = [pipe1.run() for pipe1 in pipes1]
total_time = time() - start
print("Computation graph run in %f seconds." % total_time)

print('Does pipe_out2 live in GPU: ', pipe_out2[0][0].is_cuda)

crop_images_cpu0 = pipe_out2[0][0].to('cpu')
crop_images_cpu1 = pipe_out2[0][1].to('cpu')
crop_images_cpu2 = pipe_out2[0][2].to('cpu')
crop_images_cpu3 = pipe_out2[0][3].to('cpu')
crop_images_cpu4 = pipe_out2[0][4].to('cpu')

sized_images_cpu0 = pipe_out2[0][5].to('cpu')
sized_images_cpu1 = pipe_out2[0][6].to('cpu')
sized_images_cpu2 = pipe_out2[0][7].to('cpu')
sized_images_cpu3 = pipe_out2[0][8].to('cpu')
sized_images_cpu4 = pipe_out2[0][9].to('cpu')

In [None]:
index = img_index

import matplotlib.pyplot as plt
import matplotlib.patches as patches
import random

img0 = crop_images_cpu0[index]
img1 = crop_images_cpu1[index]
img2 = crop_images_cpu2[index]
img3 = crop_images_cpu3[index]
img4 = crop_images_cpu4[index]

img5 = sized_images_cpu0[index]
img6 = sized_images_cpu1[index]
img7 = sized_images_cpu2[index]
img8 = sized_images_cpu3[index]
img9 = sized_images_cpu4[index]

fig,ax = plt.subplots(5,2, figsize=(25,25))

ax[0,0].imshow(img0)
ax[1,0].imshow(img1)
ax[2,0].imshow(img2)
ax[3,0].imshow(img3)
ax[4,0].imshow(img4)

ax[0,1].imshow(img5)
ax[1,1].imshow(img6)
ax[2,1].imshow(img7)
ax[3,1].imshow(img8)
ax[4,1].imshow(img9)

## ResNet

First reshape the outputs from the foveated system

In [None]:
print('pipe_out is a: ', type(pipe_out[0][0]))
print('Is pipe_out1 livin in GPU: ', pipe_out1[0][0].is_cuda)
print('Is pipe_out2 livin in GPU: ', pipe_out2[0][0].is_cuda)

In [None]:
outputs1 = torch.stack(pipe_out1[0][5:]).permute(1,0,4,2,3).reshape(batch_size,-1,img_size[0],img_size[1])
print(outputs1.shape)

In [None]:
plt.imshow(outputs1[index,0:3,:,:].permute(1, 2, 0).to('cpu'))

In [None]:
outputs2 = torch.stack(pipe_out2[0][5:]).permute(1,0,4,2,3).reshape(batch_size,-1,img_size[0],img_size[1])
print(outputs2.shape)

In [None]:
plt.imshow(outputs2[index,0:3,:,:].permute(1, 2, 0).to('cpu'))

In [None]:
sys.path.append(path + '/Colab Notebooks/Multimodal Active AI/SimCLR/ResNet')
import ResNet as rn

In [None]:
function_f = rn.ResNet.ResNet18()
function_f.to(device)

In [None]:
outputs1 = function_f(outputs1.float())
outputs1.shape

In [None]:
outputs2 = function_f(outputs2.float())
outputs2.shape

## Projections

In [None]:
sys.path.append(path + '/Colab Notebooks/Multimodal Active AI/SimCLR/MLP')
import multilayerPerceptron as mlp

In [None]:
function_g = mlp.MLP(512*4*4, 1024, 128)
function_g.to(device)

In [None]:
outputs1 = function_g(outputs1)
outputs1.shape

In [None]:
outputs2 = function_g(outputs2)
outputs2.shape

## SimCLR

In [None]:
sys.path.append(path + '/Colab Notebooks/Multimodal Active AI/SimCLR')
import SimCLR

Create COCOReader pipeline

In [None]:
num_gpus = 1
batch_size = 256
start = time()
pipes = [NDP.COCOReader(batch_size=batch_size, num_threads=2, device_id = device_id, num_gpus = num_gpus)  for device_id in range(num_gpus)]
for pipe in pipes:
    pipe.build()
total_time = time() - start
print("Computation graph built and dataset loaded in %f seconds." % total_time)

In [None]:
temperature = 0.05
img_size = (30,30)
model = SimCLR.SimCLR_Module(temperature, function_f, function_g, batch_size, img_size, device)
model.to(device)

Bring image batch using DALI pipe

In [None]:
pipe_out = [pipe.run() for pipe in pipes]

images_gpu = pipe_out[0][0]

images_cpu = pipe_out[0][0].as_cpu()
bboxes_cpu = pipe_out[0][1]
labels_cpu = pipe_out[0][2]

Create the Foveated processor pipeline

In [None]:
img_size = (30,30)
NDP.fixation_pos_x = torch.rand((batch_size,1))
NDP.fixation_pos_y = torch.rand((batch_size,1))
NDP.fixation_angle = (torch.rand((batch_size,1))-0.5)*60

#NDP.fixation_pos_x = torch.repeat_interleave(torch.Tensor([0]), batch_size).view(-1,1)
#NDP.fixation_pos_y = torch.repeat_interleave(torch.Tensor([0]), batch_size).view(-1,1)
#NDP.fixation_angle = torch.repeat_interleave(torch.Tensor([30]), batch_size).view(-1,1)

images = NDP.ImageCollector()
fixation = NDP.FixationCommand(batch_size)

images.data = images_gpu

start = time()
pipes1 = [NDP.FoveatedRetinalProcessor(batch_size=batch_size, num_threads=2, device_id=device_id, num_gpus=num_gpus, fixation=fixation, images=images)  for device_id in range(num_gpus)]
for pipe1 in pipes1:
  pipe1.build()

total_time = time() - start
print("Computation graph built and dataset loaded in %f seconds." % total_time)

Run the Foveated processor once

In [None]:
NDP.fixation_pos_x = torch.rand((batch_size,1))
NDP.fixation_pos_y = torch.rand((batch_size,1))
NDP.fixation_angle = (torch.rand((batch_size,1))-0.5)*60

#NDP.fixation_pos_x = torch.repeat_interleave(torch.Tensor([0.5]), batch_size).view(-1,1)
#NDP.fixation_pos_y = torch.repeat_interleave(torch.Tensor([0.5]), batch_size).view(-1,1)
#NDP.fixation_angle = torch.repeat_interleave(torch.Tensor([30]), batch_size).view(-1,1)

start = time()
pipe_out1 = NDP.pytorch_wrapper(pipes1)
#pipe_out1 = [pipe1.run() for pipe1 in pipes1]
total_time = time() - start
print("Computation graph run in %f seconds." % total_time)

print('Does pipe_out1 live in GPU: ', pipe_out1[0][0].is_cuda)

In [None]:
outputs1 = model(pipe_out1[0][5:])
outputs1.shape

In [None]:
NDP.fixation_pos_x = torch.rand((batch_size,1))
NDP.fixation_pos_y = torch.rand((batch_size,1))
NDP.fixation_angle = (torch.rand((batch_size,1))-0.5)*60

#NDP.fixation_pos_x = torch.repeat_interleave(torch.Tensor([0.5]), batch_size).view(-1,1)
#NDP.fixation_pos_y = torch.repeat_interleave(torch.Tensor([0.5]), batch_size).view(-1,1)
#NDP.fixation_angle = torch.repeat_interleave(torch.Tensor([30]), batch_size).view(-1,1)

start = time()
pipe_out2 = NDP.pytorch_wrapper(pipes1)
#pipe_out1 = [pipe1.run() for pipe1 in pipes1]
total_time = time() - start
print("Computation graph run in %f seconds." % total_time)

print('Does pipe_out1 live in GPU: ', pipe_out2[0][0].is_cuda)

In [None]:
outputs2 = model(pipe_out2[0][5:])
outputs2.shape

In [None]:
loss = model.compute_loss(outputs1, outputs2)

In [None]:
loss.shape

In [None]:
loss

### Define the Optimizer

In [None]:
import torch.optim as optim

optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 100, 0.5)

### Train the Network

First configure the initial checkpoint to start the training process

In [None]:
import os
import torch.nn.functional as F

PATH = path + '/Colab Notebooks/Multimodal Active AI/SimCLR/simclr_net.pth'
if os.path.isfile(PATH):
  print('Checkpoint already stablished')
  if device == 'cpu':
    checkpoint = torch.load(PATH, map_location=torch.device('cpu'))
  else:
    checkpoint = torch.load(PATH, map_location=torch.device('cuda'))

  checkpoint = torch.load(PATH)
  model.load_state_dict(checkpoint['model_state_dict'])
  optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
  lr_scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
  start = checkpoint['epoch']
  loss = checkpoint['loss']
else:
  print('Without checkpoint, then stablish initial checkpoint')
  start = 0

In [None]:
print(device)

In [None]:
number_of_fixations = 100

In [None]:
NDP.fixation_pos_x = torch.rand((batch_size,1))
NDP.fixation_pos_y = torch.rand((batch_size,1))
NDP.fixation_angle = (torch.rand((batch_size,1))-0.5)*60
#NDP.fixation_pos_x = torch.repeat_interleave(torch.Tensor([0.5]), batch_size).view(-1,1)
#NDP.fixation_pos_y = torch.repeat_interleave(torch.Tensor([0.5]), batch_size).view(-1,1)
#NDP.fixation_angle = torch.repeat_interleave(torch.Tensor([30]), batch_size).view(-1,1)
pipe_out1 = NDP.pytorch_wrapper(pipes1)
outputs1 = model(pipe_out1[0][5:])

print('Start Training')

if start == 0:
  print('Since start is {}, we start the training using a new model from scratch.' .format(start))
else:
  print('Since start is {}, we load a model from checkpoint number {}.' .format(start, start))

for epoch in range(start,5): # loop over the dataset multiple times
    
    #CCD.reset_avail_images()
    running_loss = 0.0
    i = 0
    while i < 1000:
        # get the noisy inputs and the labels
        #image_batch1 = CCD.get_image_batch()
        
        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward + backward + optimize
        #fixation_centers, rotation_angles = generate_fixation_batch(batch_size)
        #outputs1 = model(image_batch, fixation_centers, rotation_angles)

        for j in range(number_of_fixations):
            optimizer.zero_grad()

            #fixation_centers, rotation_angles = generate_fixation_batch(batch_size)
            #NDP.fixation_pos_x = torch.rand((batch_size,1))
            #NDP.fixation_pos_y = torch.rand((batch_size,1))
            #NDP.fixation_angle = (torch.rand((batch_size,1))-0.5)*60
            NDP.fixation_pos_x = torch.repeat_interleave(torch.Tensor([0.5]), batch_size).view(-1,1)
            NDP.fixation_pos_y = torch.repeat_interleave(torch.Tensor([0.5]), batch_size).view(-1,1)
            NDP.fixation_angle = torch.repeat_interleave(torch.Tensor([30]), batch_size).view(-1,1)
            pipe_out2 = NDP.pytorch_wrapper(pipes1)
            outputs2 = model(pipe_out2[0][5:])
            
            # Compute Huber loss
            loss = model.compute_loss(outputs1.detach(), outputs2)
            
            loss.backward()
            optimizer.step()
            
            #outputs1=outputs2
            
        # print statistics
        print(loss.item())
        running_loss += loss.item()
        i += 1
    
    print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / (i+1)))
    lr_scheduler.step()
    
    if ((epoch+1) % 1 == 0):
      print('Checkpoint ...')
      print('Saving the complete model state ...')
      PATH = path + '/Colab Notebooks/Multimodal Active AI/SimCLR/simclr_net.pth'
      torch.save({
          'epoch': epoch+1,
          'model_state_dict': model.state_dict(),
          'optimizer_state_dict': optimizer.state_dict(),
          'scheduler_state_dict': lr_scheduler.state_dict(),
          'loss': loss
          }, PATH)
      print('Done')
      
            
print('Training Finished')