In [1]:
!pip install torch
!pip install pandas
!pip install numpy
!pip install torch-lr-finder

Collecting torch-lr-finder
  Downloading https://files.pythonhosted.org/packages/ea/51/1a869067989a0fdaf18e49f0ee3aebfcb63470525245aac7dc390cfc676a/torch_lr_finder-0.2.1-py3-none-any.whl
Installing collected packages: torch-lr-finder
Successfully installed torch-lr-finder-0.2.1


In [2]:
!nvidia-smi

Sun Dec 13 12:37:53 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 455.45.01    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   44C    P8     9W /  70W |      0MiB / 15079MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
import torch
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision import transforms
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torch.utils.data.sampler import SubsetRandomSampler

from math import floor, ceil
import tqdm
import numpy as np
from PIL import Image, ImageDraw
import pandas as pd
from os import listdir
from os.path import isdir, join, basename, normpath, isfile
from google.colab import drive
import csv 
from timeit import default_timer as timer

In [4]:
data_dir = '/content/gdrive/My Drive/simpsons/bounding_data'
annotation_loc = '/content/gdrive/My Drive/simpsons/annotation.txt'

In [5]:
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [6]:
def convert_bbox(old_size, new_size, x_min, y_min, x_max, y_max):
  x_min_new = floor((x_min / old_size[0]) * new_size[0])
  y_min_new = floor((y_min / old_size[1]) * new_size[1])
  x_max_new = ceil((x_max / old_size[0]) * new_size[0])
  y_max_new = ceil((y_max / old_size[1]) * new_size[1])
  
  return [x_min_new, y_min_new, x_max_new, y_max_new]


In [7]:
class SimpsonsObjectDetectionDataset(Dataset):

  def __init__(self, data_dir: str, annotations_loc: str, train: bool, image_size: tuple, transforms = None):
    self.transform = transforms
    self.train = train
    self.image_size = image_size
    char_dirs = [data_dir + "/" + d for d in listdir(data_dir) if isdir(data_dir + "/" + d)]
    class_names = [basename(normpath(cn)) for cn in char_dirs]
    class_names.sort()
    self.class_to_idx = {}
    for i in range(len(class_names)):
      self.class_to_idx[class_names[i]] = i + 1
    self.idx_to_class = {v: k for k, v in self.class_to_idx.items()}
    # lets load in a full list of every image in our dataset
    self.all_images = []
    for char_dir in char_dirs:
      char_images = [char_dir + "/" + i for i in listdir(char_dir) if isfile(char_dir + "/" + i)]
      for image in char_images:
        self.all_images.append(image)
    # now get the annotation for each image
    annotations_df = pd.read_csv(annotations_loc, header=None)
    all_annotations = []
    self.x_mins = []
    self.y_mins = []
    self.x_maxes = []
    self.y_maxes = []
    self.labels = []

    for i in range(len(self.all_images)):
      filtered_df = annotations_df[annotations_df[0]==self.all_images[i]].reset_index(drop=True)
      self.x_mins.append(filtered_df.iloc[0][1])
      self.y_mins.append(filtered_df.iloc[0][2])
      self.x_maxes.append(filtered_df.iloc[0][3])
      self.y_maxes.append(filtered_df.iloc[0][4])
      self.labels.append(self.class_to_idx[(filtered_df.iloc[0][5])])

  def __len__(self) -> int:
    return len(self.all_images)
  
  def __getitem__(self, idx: int):
    image_to_load = self.all_images[idx]
    image = Image.open(image_to_load)
    org_size = image.size
    image = image.resize(self.image_size)
    new_size = image.size
    new_bbox = convert_bbox(org_size, 
                          new_size, 
                          self.x_mins[idx], 
                          self.y_mins[idx], 
                          self.x_maxes[idx], 
                          self.y_maxes[idx]
                          )
    image = np.array(image, dtype = np.float32)
    image /= 255.0

    if self.transform:
      image = self.transform(image)
      
    boxes = np.array([new_bbox])
    area = (boxes[0][3] - boxes[0][1])* (boxes[0][2] - boxes[0][0])
    area = torch.as_tensor(area,  dtype=torch.float32)
    labels = np.array([self.labels[idx]])
    labels = torch.as_tensor(labels, dtype=torch.int64)
    if self.train:
      iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64)
    else: 
      iscrowd = torch.ones((boxes.shape[0],), dtype=torch.int64)

    target = {}
    target['boxes'] = boxes
    target['labels'] = labels
    target['image_id'] = torch.tensor([idx])
    target['area'] = area
    target['iscrowd'] = iscrowd 
    #d = torch.stack(list((map(torch.tensor, target['boxes'])))).type(torch.float32)
    target['boxes'] = torch.as_tensor(target['boxes'], dtype=torch.float32)

    return image, target, image_to_load

def collate_fn(batch):
    return tuple(zip(*batch))

In [8]:
def get_train_and_test_dataloders(data_dir: str, annotations_loc: str, test_pct: float, batch_size: int):
  # get datasets
  train_dataset = SimpsonsObjectDetectionDataset(data_dir=data_dir, image_size = (300,300), annotations_loc=annotations_loc, train = True, transforms=transforms.ToTensor()
)
  test_dataset = SimpsonsObjectDetectionDataset(data_dir=data_dir, image_size = (300,300), annotations_loc=annotations_loc, train = False, transforms=transforms.ToTensor()
) 
  class_to_idx = train_dataset.class_to_idx
  idx_to_class = train_dataset.idx_to_class

  len_train = len(train_dataset)
  indicies = list(range(len_train))
  split = int(np.floor(test_pct * len_train))
  np.random.shuffle(indicies)
  train_idx, test_idx = indicies[split:], indicies[:split]
  train_sampler = SubsetRandomSampler(train_idx)
  test_sampler = SubsetRandomSampler(test_idx)

  train_dataloader = DataLoader(train_dataset, 
                                batch_size=batch_size, 
                                sampler=train_sampler,
                                collate_fn=collate_fn)
  
  test_dataloader = DataLoader(test_dataset, 
                              batch_size=batch_size, 
                              sampler=test_sampler,
                              collate_fn=collate_fn)
  
  return train_dataloader, test_dataloader, class_to_idx, idx_to_class

In [9]:
train_dataloader, test_dataloader, class_to_idx, idx_to_class = get_train_and_test_dataloders(data_dir=data_dir, annotations_loc=annotation_loc, test_pct=0.2, batch_size=15)

In [10]:
device      = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
num_classes = 20
loss_value  = 0.0
epochs = 4

In [11]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

model.to(device)

lr = 0.0005
params = [p for p in model.parameters() if p.requires_grad]
optimiser = optim.AdamW(params, lr=lr, weight_decay=0.0005)
scheduler = optim.lr_scheduler.OneCycleLR(optimiser, max_lr = lr, epochs = epochs, steps_per_epoch=len(train_dataloader))

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth


HBox(children=(FloatProgress(value=0.0, max=167502836.0), HTML(value='')))




In [12]:
for epoch in range(epochs):
    train_dataloader = tqdm.tqdm(train_dataloader)
    start_time = timer()
    for images, targets, which_im in train_dataloader:
      images = list(image.to(device) for image in images)
      targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

      loss_dict = model(images, targets)
      losses = sum(loss for loss in loss_dict.values())
      loss_value = losses.item()

      optimiser.zero_grad()
      losses.backward()
      optimiser.step() 
      scheduler.step()
    end_time = timer()
    print('\nLoss : {:.5f} | Time: {:.5f}'.format(loss_value, (end_time - start_time)))

100%|██████████| 360/360 [35:23<00:00,  5.90s/it]
  0%|          | 0/360 [00:00<?, ?it/s]


Loss : 0.23859 | Time: 2123.04677


100%|██████████| 360/360 [16:43<00:00,  2.79s/it]
  0%|          | 0/360 [00:00<?, ?it/s]


Loss : 0.17254 | Time: 1003.98684


100%|██████████| 360/360 [16:39<00:00,  2.78s/it]
  0%|          | 0/360 [00:00<?, ?it/s]


Loss : 0.17176 | Time: 999.25591


100%|██████████| 360/360 [16:34<00:00,  2.76s/it]


Loss : 0.11239 | Time: 994.95561





In [13]:
model.eval()
detection_threshold = 0.5
test_dataloader = tqdm.tqdm(test_dataloader)
results = []
with torch.no_grad():
    for images, targets, which_im in test_dataloader:
      images = list(image.to(device) for image in images)
      outputs = model(images)

      for i, image in enumerate(images):
          boxes = outputs[i]['boxes'].data.cpu().numpy()
          scores = outputs[i]['scores'].data.cpu().numpy()
          boxes = boxes[scores >= detection_threshold].astype(np.int32)
          scores = scores[scores >= detection_threshold]

          for box, labels in zip(boxes, outputs[i]['labels']):
              results.append({'file_name' : which_im[i], 
                              'classes'   : idx_to_class[labels.item()], 
                              'xmin'      : box[0],
                              'ymin'      : box[1],
                              'xmax'      : box[2],
                              'ymax'      : box[3]})


100%|██████████| 90/90 [07:47<00:00,  5.20s/it]


In [14]:
model_predictions_path = '/content/gdrive/My Drive/simpsons/' + 'output.csv'
with open(model_predictions_path, 'w') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=['file_name', 'classes', 'xmin', 'ymin', 'xmax', 'ymax'])
    writer.writeheader()
    writer.writerows(results)