In [11]:
import numpy as np
import pandas as pd
import os
import json

In [86]:
import random
from PIL import Image, ImageDraw
from collections import Counter
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import torch
import torchvision
from torchvision import transforms as T
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

In [77]:
baseTrainPath = 'data/train_images/train_images/'

train_file = open('data/usdc_train.json')

data_json = json.load(train_file)
 
classes = data_json['categories']

In [41]:
data = pd.DataFrame(data_json['annotations'])
images = dict({i['id']: i['file_name'] for i in data_json['images']})

In [28]:
data.head()

Unnamed: 0,id,image_id,category_id,bbox,area,segmentation,iscrowd,confidence,score
0,20,5,2,"[196, 234, 20.266666666666666, 24.74666666666667]",501.532444,[],0,0.5,0.5
1,21,5,2,"[247, 236, 16.53333333333333, 24.74666666666667]",409.144889,[],0,0.5,0.5
2,22,5,2,"[265, 234, 20.8, 23.893333333333334]",496.981333,[],0,0.5,0.5
3,23,5,2,"[267, 237, 15.466666666666667, 17.066666666666...",263.964444,[],0,0.5,0.5
4,24,5,2,"[305, 233, 78.93333333333334, 75.94666666666667]",5994.723556,[],0,0.5,0.5


In [67]:
ds = pd.DataFrame(data)
ds['bbox'] = [[i[0], i[1], i[0] + i[2], i[1] + i[3]] for i in data.bbox]
ds = ds.groupby('image_id').agg({'category_id': list, 'bbox': list}).reset_index()

ds

Unnamed: 0,image_id,category_id,bbox
0,5,"[2, 2, 2, 2, 2]","[[196, 234, 216.26666666666665, 258.7466666666..."
1,14,"[2, 8, 1, 2, 2, 2, 8, 7, 7, 2, 7, 2]","[[0, 224, 22.666666666666668, 264.533333333333..."
2,22,"[11, 2, 2, 2]","[[3, 230, 67, 299.97333333333336], [82, 241, 1..."
3,58,"[2, 2]","[[228, 261, 244.26666666666665, 284.04], [198,..."
4,74,"[2, 2]","[[72, 268, 117.33333333333334, 311.52], [157, ..."
...,...,...,...
10592,29792,"[2, 2, 2, 2, 11, 3, 3]","[[89, 230, 155.13333333333333, 317.89333333333..."
10593,29793,"[2, 2, 2, 2]","[[65, 269, 112.46666666666667, 311.66666666666..."
10594,29795,"[5, 5, 3, 3, 2]","[[193, 137, 203.66666666666666, 180.52], [270,..."
10595,29796,"[2, 7, 7]","[[235, 256, 250.73333333333332, 266.24], [239,..."


In [69]:
ds['file_name'] = [images[i] for i in ds.image_id]

In [72]:
ds.head()

Unnamed: 0,image_id,category_id,bbox,file_name
0,5,"[2, 2, 2, 2, 2]","[[196, 234, 216.26666666666665, 258.7466666666...",1478020898717725646_jpg.rf.68EjFVQdDWrB0SW6qVl...
1,14,"[2, 8, 1, 2, 2, 2, 8, 7, 7, 2, 7, 2]","[[0, 224, 22.666666666666668, 264.533333333333...",1478020650710690845_jpg.rf.68NUjFyrbU9Nsyt3ika...
2,22,"[11, 2, 2, 2]","[[3, 230, 67, 299.97333333333336], [82, 241, 1...",1478898975867837103_jpg.rf.6847BekYxQ4SlhtvjlE...
3,58,"[2, 2]","[[228, 261, 244.26666666666665, 284.04], [198,...",1478896447910420804_jpg.rf.68xeyVz6sbVFpLVku7W...
4,74,"[2, 2]","[[72, 268, 117.33333333333334, 311.52], [157, ...",1478900272123772897_jpg.rf.694dEtyhq6YM051Zxsj...


In [105]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, dataset, imageBasePath, transforms=None):
        self.dataset = dataset
        self.transforms = transforms
        self.imageBasePath = imageBasePath

    def __getitem__(self, idx):
        image_name = self.dataset['file_name'][idx]
        image = Image.open(self.imageBasePath + image_name).convert('RGB')
        
        target = {}
        target['boxes'] = torch.as_tensor(self.dataset['bbox'][idx])
        target['labels'] = torch.as_tensor(self.dataset['category_id'][idx])
        
        return T.ToTensor()(image), target

    def __len__(self):
        return len(self.dataset)

In [75]:
train_inds, test_inds = train_test_split(range(len(ds)), test_size=0.1)

In [76]:
def custom_collate(data):
    return data

In [106]:
train_dl = torch.utils.data.DataLoader(CustomDataset(ds, baseTrainPath),
                                       batch_size=16,
                                       shuffle=True,
                                       collate_fn = custom_collate,
                                       pin_memory = True if torch.cuda.is_available() else False)

In [87]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = len(classes)
in_featured = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_featured, num_classes)



In [90]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda')

In [91]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0005)
num_epochs = 5

In [107]:
model.to(device)

for epoch in range(num_epochs):
    epoch_loss = 0
    for data in train_dl:
        imgs = []
        targets = []
        for d in data:
            imgs.append(d[0].to(device))
            targ = {}
            targ['boxes'] = d[1]['boxes'].to(device)
            targ['labels'] = d[1]['labels'].to(device)
            targets.append(targ)
        loss_dict = model(imgs, targets)
        loss = sum(v for v in loss_dict.values())
        epoch_loss += loss.cpu().detach().numpy()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(epoch_loss)

KeyboardInterrupt: 