# COCO Reader with augmentations

## Let's first manage COCO in its standard way

In [None]:
import sys
import random
import numpy as np
import torch
import torchvision.datasets as dset
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import matplotlib.patches as patches

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Check the device available

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

## This is for image Captioning section from MSCOCO

In [None]:
# This is the path where I have the dataset
path = '/content/drive/My Drive'

cap = dset.CocoCaptions(root= path + '/MSCOCO/cocoapi/images/val2014',
                        annFile= path + '/MSCOCO/cocoapi/annotations/captions_val2014.json',
                        transform=transforms.ToTensor())

### Show image and its caption

In [None]:
image_number = 165
print('Number of samples: ', len(cap))
image, caption = cap[image_number]
image.shape

In [None]:
plt.imshow(image.permute(1, 2, 0))
plt.title(caption[0])

## This is for image Detection section from MSCOCO

In [None]:
det = dset.CocoDetection(root= path + '/MSCOCO/cocoapi/images/val2014',
                          annFile= path + '/MSCOCO/cocoapi/annotations/instances_val2014.json',
                          transform=transforms.ToTensor())

### Show image and its bbox

In [None]:
print('Number of samples: ', len(det))
image, target = det[image_number] # load 4th sample
image.shape

In [None]:
len(target)

In [None]:
element = 2
target[element].keys()

In [None]:
print('Element: {}\n\ncategory_id: {}\n\nid: {}\n\niscrowd: {}\n\nsegmentation: {}\n\nimage_id: {}\n\nbbox: {}'\
      .format(element,target[element]['category_id'],target[element]['id'],target[element]['iscrowd'],target[element]['segmentation'], target[element]['image_id'], target[element]['bbox']))

In [None]:
bbox = target[element]['bbox']
rect = patches.Rectangle((bbox[0],bbox[1]),bbox[2],bbox[3], linewidth=3, edgecolor='g',facecolor='none')
fig, ax = plt.subplots(figsize=(6,6))
ax.imshow(image.permute(1, 2, 0))
ax.add_patch(rect)

## COCO Reader with augmentations

First of all, install a compatible version of NVIDIA DALI

In [None]:
!pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda100
#!pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda110

Reader operator that reads a COCO dataset (or subset of COCO), which consists of an annotation file and the images directory.

In [None]:
path = '/content/drive/My Drive'

from time import time
sys.path.append(path + '/Colab Notebooks/Multimodal Active AI/SimCLR/NVIDIA DALI')
import NVIDIA_DALI_Pipelines as NDP

Reader operator that reads a COCO dataset (or subset of COCO), which consists of an annotation file and the images directory.

In [None]:
num_gpus = 1
batch_size = 4
#batch_size = 1024*2
start = time()
pipes = [NDP.COCOReader(batch_size=batch_size, num_threads=2, device_id = device_id, num_gpus = num_gpus)  for device_id in range(num_gpus)]
for pipe in pipes:
    pipe.build()
total_time = time() - start
print("Computation graph built and dataset loaded in %f seconds." % total_time)

In [None]:
pipe_out = [pipe.run() for pipe in pipes]

images_gpu = pipe_out[0][0]

images_cpu = pipe_out[0][0].as_cpu()
bboxes_cpu = pipe_out[0][1]
labels_cpu = pipe_out[0][2]

Bounding boxes returned by the operator are lists of floats containing composed of **[x, y, width, height]** (`ltrb` is set to `False` by default).

In [None]:
img_index = 1

bboxes = bboxes_cpu.at(img_index)
bboxes

In [None]:
labels = labels_cpu.at(img_index)
labels

Let’s see the ground truth bounding boxes drawn on the image.



In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import random

img = images_cpu.at(img_index)

H = img.shape[0]
W = img.shape[1]

fig,ax = plt.subplots(1)

ax.imshow(img)
bboxes = bboxes_cpu.at(img_index)
labels = labels_cpu.at(img_index)
categories_set = set()
for label in labels:
    categories_set.add(label[0])

category_id_to_color = dict([ (cat_id , [random.uniform(0, 1) ,random.uniform(0, 1), random.uniform(0, 1)]) for cat_id in categories_set])

for bbox, label in zip(bboxes, labels):
    rect = patches.Rectangle((bbox[0]*W,bbox[1]*H),bbox[2]*W,bbox[3]*H,linewidth=2,edgecolor=category_id_to_color[label[0]],facecolor='none')
    ax.add_patch(rect)

plt.show()

## COCO Reader with augmentations

Create reader, decoder and flip operator for images and bounding boxes

In [None]:
NDP.fixation_pos_x = torch.rand((batch_size,1))
NDP.fixation_pos_y = torch.rand((batch_size,1))
NDP.fixation_angle = (torch.rand((batch_size,1))-0.5)*60

#NDP.fixation_pos_x = torch.repeat_interleave(torch.Tensor([0]), batch_size).view(-1,1)
#NDP.fixation_pos_y = torch.repeat_interleave(torch.Tensor([0]), batch_size).view(-1,1)
#NDP.fixation_angle = torch.repeat_interleave(torch.Tensor([30]), batch_size).view(-1,1)

images = NDP.ImageCollector()
fixation = NDP.FixationCommand(batch_size)

images.data = images_gpu

start = time()
pipes1 = [NDP.FoveatedRetinalProcessor(batch_size=batch_size, num_threads=2, device_id=device_id, num_gpus=num_gpus, fixation=fixation, images=images)  for device_id in range(num_gpus)]
for pipe1 in pipes1:
  pipe1.build()

total_time = time() - start
print("Computation graph built and dataset loaded in %f seconds." % total_time)

In [None]:
NDP.fixation_pos_x = torch.rand((batch_size,1))
NDP.fixation_pos_y = torch.rand((batch_size,1))
NDP.fixation_angle = (torch.rand((batch_size,1))-0.5)*60

#NDP.fixation_pos_x = torch.repeat_interleave(torch.Tensor([0.5]), batch_size).view(-1,1)
#NDP.fixation_pos_y = torch.repeat_interleave(torch.Tensor([0.5]), batch_size).view(-1,1)
#NDP.fixation_angle = torch.repeat_interleave(torch.Tensor([30]), batch_size).view(-1,1)


start = time()
pipe_out1 = [pipe1.run() for pipe1 in pipes1]
total_time = time() - start
print("Computation graph run in %f seconds." % total_time)

crop_images_cpu0 = pipe_out1[0][0].as_cpu()
crop_images_cpu1 = pipe_out1[0][1].as_cpu()
crop_images_cpu2 = pipe_out1[0][2].as_cpu()
crop_images_cpu3 = pipe_out1[0][3].as_cpu()
crop_images_cpu4 = pipe_out1[0][4].as_cpu()

sized_images_cpu0 = pipe_out1[0][5].as_cpu()
sized_images_cpu1 = pipe_out1[0][6].as_cpu()
sized_images_cpu2 = pipe_out1[0][7].as_cpu()
sized_images_cpu3 = pipe_out1[0][8].as_cpu()
sized_images_cpu4 = pipe_out1[0][9].as_cpu()

In [None]:
#images_gpu.as_cpu().as_array().shape

Let’s see the ground truth bounding boxes drawn on the image.


In [None]:
index = img_index

import matplotlib.pyplot as plt
import matplotlib.patches as patches
import random

img0 = crop_images_cpu0.at(index)
img1 = crop_images_cpu1.at(index)
img2 = crop_images_cpu2.at(index)
img3 = crop_images_cpu3.at(index)
img4 = crop_images_cpu4.at(index)

img5 = sized_images_cpu0.at(index)
img6 = sized_images_cpu1.at(index)
img7 = sized_images_cpu2.at(index)
img8 = sized_images_cpu3.at(index)
img9 = sized_images_cpu4.at(index)

fig,ax = plt.subplots(5,2, figsize=(25,25))

ax[0,0].imshow(img0)
ax[1,0].imshow(img1)
ax[2,0].imshow(img2)
ax[3,0].imshow(img3)
ax[4,0].imshow(img4)

ax[0,1].imshow(img5)
ax[1,1].imshow(img6)
ax[2,1].imshow(img7)
ax[3,1].imshow(img8)
ax[4,1].imshow(img9)