In [1]:
import cv2
import os
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import DataLoader
from torchvision import transforms

%matplotlib inline

In [2]:
from dataset import CocoDataset, display_image_annot, display_bbox_annot

S, B, C = 7, 2, 1

In [7]:
root = 'coco'
img_paths = 'images'
train_dataset = CocoDataset(root, data_type=os.path.join(root, 'train2017'), transforms=None, S=S, B=B, C=C, in_memory=False, is_debug=False)
val_dataset = CocoDataset(root, data_type=os.path.join(root, 'val2017'), transforms=None, S=S, B=B, C=C, in_memory=False, is_debug=False)

# Test and visualize if the output is fine
train_dataset[3]

loading annotations into memory...
Done (t=12.52s)
creating index...
index created!
loading annotations into memory...
Done (t=0.25s)
creating index...
index created!
[2, 6]
[4, 4]
[4, 4]
[4, 3]
[4, 3]
[4, 6]
[4, 6]
[4, 6]
[4, 6]
[4, 6]
[4, 6]
[5, 6]
[4, 5]

 Number of bboxes: (13, 4)


 Tensor Of prediction: size= torch.Size([7, 7, 11])
tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000],
      

(tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]],
 
         [[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]],
 
         [[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]]]),
 tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
    

In [8]:
import multiprocessing as mp
import torch.backends.cudnn as cudnn

print(f'Usable threads: { torch.get_num_threads() }')
print(f'cuda version: { torch.version.cuda }\tcudnn version: { cudnn.version() }')

num_workers = (0.4 * mp.cpu_count())
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True, pin_memory=True, num_workers=num_workers)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False, pin_memory=True, num_workers=num_workers)

Usable threads: 12
cuda version: 11.1	cudnn version: 8005


In [9]:
from nets import YoloNet

'''
    Parameters:
    batch = 24
    epochs = 120
    input_multiple = 32

    Data augmentation:
    * normalized and scaled

    * divide data into SxS (object check if object center falls into this grid cell), useful for later
    * (x, y) -> center location
    * (w, h)

    Training:
    * Pr(obj) = confidence score and during training: 
        Pr(conf) = Pr(obj) * IoU(bbox_pred, bbox_true)

    * Pr(class_i | obj) and during training: 
        Pr(obj) = regressed
        Pr(class_i | obj) = Pr(class_i) / Pr(obj)
'''

'\n    Parameters:\n    batch = 24\n    epochs = 120\n    input_multiple = 32\n\n    Data augmentation:\n    * normalized and scaled\n\n    * divide data into SxS (object check if object center falls into this grid cell), useful for later\n    * (x, y) -> center location\n    * (w, h)\n\n    Training:\n    * Pr(obj) = confidence score and during training: \n        Pr(conf) = Pr(obj) * IoU(bbox_pred, bbox_true)\n\n    * Pr(class_i | obj) and during training: \n        Pr(obj) = regressed\n        Pr(class_i | obj) = Pr(class_i) / Pr(obj)\n'