In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import object_detection_utils as utils
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

import numpy as np
import matplotlib.pyplot as plt

In [3]:
#from model_resnet18 import backbone_network
#from model_resnet34 import backbone_network
from model_custom import backbone_network
backbone_model=backbone_network()

In [4]:
backbone_model

base_net(
  (blk_0): Sequential(
    (conv_1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (batch_norm_1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu_1): ReLU()
    (conv_2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (batch_norm_2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu_2): ReLU()
    (max_pool_1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (blk_1): Sequential(
    (conv_1): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (batch_norm_1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu_1): ReLU()
    (conv_2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (batch_norm_2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu_2): ReLU()
    (max_pool_1): MaxPool2d(kernel_size=2, s

## Dataset:
Each dataset folder is expected to have 2 sub-folders named ``train`` and ``val`` and in each  a file named ``labels.txt`` .
Each line in ``labels.txt`` would be in format ``[imagexxx.jpg,y1,x1,y2,x2,class_name]    coordinates in absolute coordinates`` with no header.

In [5]:
class Pikachu_config(utils.Config):
    HEIGHT=256
    WIDTH=256
    NUM_CLASSES=1 #EXCLUDING BACKGROUND
    def __init__(self):
        if self.WIDTH == 512:
            self.SCALES=((32,),(64,),(128,),(256,),(512,))
        
        if self.WIDTH ==256:
            self.SCALES=((16,),(32,),(64,),(128,),(256,))
        if self.WIDTH==128:
            self.SCALES=((8,),(16,),(32,),(64,),(128,))
            
        self.ratio=0.4
        utils.Config.__init__(self,backbone_model)
        
config=Pikachu_config()
del backbone_model

grid_sizes: ((32, 32), (16, 16), (8, 8), (4, 4), (1, 1))
image_size: (256, 256)
grid_heigh: 32   grid_width: 32
stride_height: 8.0    stride_width: 8.0
grid_heigh: 16   grid_width: 16
stride_height: 16.0    stride_width: 16.0
grid_heigh: 8   grid_width: 8
stride_height: 32.0    stride_width: 32.0
grid_heigh: 4   grid_width: 4
stride_height: 64.0    stride_width: 64.0
grid_heigh: 1   grid_width: 1
stride_height: 256.0    stride_width: 256.0


In [6]:
class Pikachu_dataset(utils.custom_dataset):
    def __init__(self):
        utils.custom_dataset.__init__(self,config,"./data/pikachu/",subset="train")
    #override the preprocess function if u want.
    def preprocess(self,x):
        #limiting range [0-1]
        return x/255.

In [7]:
train_ds=Pikachu_dataset()
batch_size=16
train_dl=DataLoader(train_ds,batch_size=batch_size,shuffle=False)

No of classes found: 1  ['pikachu']
Current ratio is 0.4


In [8]:
def flatten_pred(pred):
    #it is like batch_flatten
    return torch.flatten(pred.permute(0, 2, 3, 1),start_dim=1)

def concat_preds(preds):
    return torch.cat([flatten_pred(p) for p in preds], dim=1)

In [9]:
class TinySSD(nn.Module):
    def __init__(self,config,freeze=False): 
        super().__init__()
        self.config=config
        self.backbone_model=backbone_network()
        if freeze:
            for param in list(self.backbone_model.named_parameters()):
                name=param[0]
                if "layer5" in name or "layer6" in name:
                    print(name)
                    continue
                else:
                    param[1].requires_grad=False
        for i in range(len(self.config.FEATURE_MAPS_SHAPES)):
            setattr(self,"cls_%d" %i,utils.cls_predictor(in_channels=self.config.FEATURE_MAPS_SHAPES[i][1],num_anchors_per_pixel=self.config.NUM_ANCHORS_PER_LOCATION[i],num_classes=self.config.NUM_CLASSES+1))
        for i in range(len(self.config.FEATURE_MAPS_SHAPES)):
            setattr(self,"bbox_%d" %i,utils.bbox_predictor(self.config.FEATURE_MAPS_SHAPES[i][1],self.config.NUM_ANCHORS_PER_LOCATION[i]))
            
    def forward(self,x):
        outputs_class=[None]*len(self.config.FEATURE_MAPS_SHAPES)
        outputs_bbox=[None]*len(self.config.FEATURE_MAPS_SHAPES)
        
        feature_maps=self.backbone_model(x)
        for i,feature_map in enumerate(feature_maps):
            
            outputs_class[i]=getattr(self,"cls_%d" %i)(feature_map)
        
        for i,feature_map in enumerate(feature_maps):
            outputs_bbox[i]=getattr(self,"bbox_%d" %i)(feature_map)
        
        cls_preds=concat_preds(outputs_class)
        #cls_preds shape should be [batch_size,-1,total_num_classes]
        cls_preds=cls_preds.view(cls_preds.shape[0],-1,self.config.NUM_CLASSES+1)
        
        #bbox shape [batch_size,-1,4]
        bbox_preds=concat_preds(outputs_bbox)
        bbox_preds=bbox_preds.view(bbox_preds.shape[0],-1,4)
        return [cls_preds,bbox_preds]
        
            
ssd_model=TinySSD(config=config,freeze=False)
optimizer=torch.optim.Adam(params=list(ssd_model.parameters()),lr=1e-4)

In [10]:
classification_loss=utils.classification_loss
regression_loss=utils.regression_loss

In [11]:
def loss_batch(model, loss_functions, sample, opt=None):
    
    offsets=sample["offsets"]
    rpn_match=sample["rpn_match"]
    xb=sample["img"]
    class_ids=sample["class_ids"]
    
    preds=model(xb)
    class_loss=loss_functions[0](y_true=class_ids,y_pred=preds[0],rpn_match=rpn_match)
    bbox_loss=loss_functions[1](y_true=offsets,y_pred=preds[1],rpn_match=rpn_match)
    
    loss=class_loss+bbox_loss
    
    if opt is not None:
        loss.backward()
        opt.step()
        opt.zero_grad()

    return loss.item(), len(xb)

In [12]:
def training_loop(num_epochs):
    
    for epoch in range(1,num_epochs+1):
        training_losses=[]
        ssd_model.train()
        for sample in train_dl:
            loss=loss_batch(ssd_model,[classification_loss,regression_loss],sample,opt=optimizer)
            training_losses.append(loss[0])
            print(loss[0])
            
        """
        model.eval()
        with torch.no_grad():
            #losses, nums = zip(*[loss_batch(model, loss_function, xb, yb) for xb, yb in valid_dl])
            #val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums)
            
        print validation loss at the end of each epoch.    
        print(epoch, val_loss)
        """
        print("epoch Num:  {}     training_loss: {}".format(epoch,sum(training_losses)/len(training_losses)))
        


In [13]:
training_loop(2)

1.4553873538970947
1.202070713043213
1.0347061157226562
0.8625377416610718


KeyboardInterrupt: 