# R-CNN

In [52]:
import pandas as pd
import json
import numpy as np
import os
import torch 

ann_dir = '../annotation/annotations/panoptic_val2017.json'
root_dir = '../data/val2017/'


os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = '2'

In [53]:
def get_items(ann_dir):
    with open(ann_dir, 'r') as f:
        temp = json.load(f)
    image_list = []
    ctg_df = pd.DataFrame(temp['categories'])
    id2ctg = dict(ctg_df.set_index('id')['name'])
    ctg2id = dict(ctg_df.set_index('name')['id'])
    for a in temp['annotations']:
        image_id = a['file_name'][:-4]
        
        bbox = np.stack([i['bbox'] for i in a['segments_info']])
        labels = np.asarray([i['category_id'] for i in a['segments_info']])
        image_list.append({'image_id': image_id,
                           'bbox': bbox,
                           'labels': labels})
    return np.asanyarray(image_list), id2ctg

In [54]:
image_list, id2ctg = get_items(ann_dir)

In [55]:
from random import sample
def get_tv_indx(tl, k=0.8):
    total_idx = range(tl)
    train_idx = sample(total_idx, int(tl * k))
    valid_idx = set(total_idx) - set(train_idx)
    return train_idx, list(valid_idx)

In [56]:
train_idx, valid_idx = get_tv_indx((len(image_list)))
train_list = image_list[train_idx]
valid_list = image_list[valid_idx]

In [57]:
def get_iou(box1, box2):
    # box = (x1, y1, x2, y2)
    box1_area = (box1[2] - box1[0] + 1) * (box1[3] - box1[1] + 1)
    box2_area = (box2[2] - box2[0] + 1) * (box2[3] - box2[1] + 1)

    # obtain x1, y1, x2, y2 of the intersection
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])

    # compute the width and height of the intersection
    w = max(0, x2 - x1 + 1)
    h = max(0, y2 - y1 + 1)

    inter = w * h
    iou = inter / (box1_area + box2_area - inter)
    return iou

In [58]:
import cv2
from matplotlib import pyplot as plt
ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()

def SelectiveSearch(t, root_dir):
    train_images = []
    train_labels = []
    
    img_id = t['image_id']
    img = cv2.imread(f'{root_dir}{img_id}.jpg')
    
    ss.setBaseImage(img)
    ss.switchToSelectiveSearchFast()
    ssresults = ss.process()
    
    imout = img.copy()
    counter = 0 
    falsecounter = 0
    flag = 0
    fflag = 0
    bflag = 0
    boxes = t['bbox']
    boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
    boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
    for idx, result in enumerate(ssresults):
        if idx < 2000 and flag == 0:
            for i, gtval in enumerate(boxes):    # ground truth
                x, y, w, h = result
                iou = get_iou(gtval, [x, y, x+w, y+h])
                if counter < 30:
                    if iou > 0.70 :
                        timage = imout[y:y+h, x:x+w]
                        resized = cv2.resize(timage, (224,224), interpolation = cv2.INTER_AREA)
                        train_images.append(resized)
                        train_labels.append(t['labels'][i])
                        counter += 1
                else:
                    fflag = 1
                if falsecounter < 30:
                    if iou < 0.3:
                        timage = imout[y:y+h, x:x+w]
                        resized = cv2.resize(timage, (224,224), interpolation = cv2.INTER_AREA)
                        train_images.append(resized)
                        train_labels.append(0)
                        falsecounter += 1
                else:
                    bflag = 1
            if fflag == 1 and bflag ==1:
                flag = 1
                
    return np.array(train_images, dtype=np.uint8), np.array(train_labels, dtype=np.int_)

In [59]:
from torchvision import models
from torch import nn

model = models.alexnet()

for param in model.parameters():
    param.requires_grad = False
    
model.classifier = nn.Sequential(nn.Linear(9216, 4096),
                                 nn.Linear(4096, len(id2ctg) + 1))


model.cuda()

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Linear(in_features=9216, out_features=4096, bias=True)
    (1): Linear(in_features=4096, out_fe

In [60]:
from torch import optim

criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0005)

In [61]:
from torchvision import transforms
train_transform = transforms.Compose([transforms.ToTensor(),
                                      transforms.RandomVerticalFlip(p=0.5),
                                      transforms.RandomHorizontalFlip(p=0.5)])

In [62]:
from tqdm.notebook import tqdm
total_loss = 0.0
tk0 = tqdm(train_list, total=len(train_list), leave=False)

for idx, t in enumerate(tk0, start=1):
    image_data, label_data = SelectiveSearch(t, root_dir)
    inputs = torch.cat(tuple(train_transform(id).cuda().reshape(-1, 3, 224, 224) for id in image_data))
    labels = torch.Tensor(label_data).cuda()
    labels = labels.type(torch.long)

    outputs = model(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    total_loss += loss.item()
    tk0.set_description(f'Step: {idx}. loss: {loss.item():.3f}. Mean loss: {total_loss / idx:.3f}')

  0%|          | 0/4000 [00:00<?, ?it/s]

RuntimeError: cuda runtime error (710) : device-side assert triggered at /pytorch/aten/src/THC/generic/THCTensorMath.cu:29