In [None]:
import torch
import torch.nn as nn

In [None]:
class CNNBlock(nn.Module):
  def __init__(self , in_channels , out_channels ,bn_act = True , **kwargs ):
    super().__init__()
    self.conv = nn.Conv2d(in_channels , out_channels ,bias = not bn_act , **kwargs)
    self.bn = nn.BatchNorm2d(out_channels)
    self.leaky = nn.LeakyReLU(0.1)
    self.use_bn_act = bn_act
  def forward(self , x):
    if self.use_bn_act :
      return self.leaky(self.bn(self.conv(x)))
    else :
      return self.leaky(self.conv(x))




In [None]:
import torch.nn.functional as F
class ResidualBlock(nn.Module):
  def __init__(self , channels , dropout : int ,  num_repeats  , use_residual= False):
    super().__init__()
    self.layers = nn.ModuleList()
    for repeat in range(num_repeats):
      self.layers += [ nn.Sequential(
          CNNBlock(channels , channels//2 , kernel_size = 1) ,
          CNNBlock(channels//2 , channels , kernel_size = 3 , padding = 1)
      )
      ]
    self.use_residual = use_residual
    self.num_repeats = num_repeats


  def forward(self , x):
    for layer in self.layers:
      x = layer(x) + x if self.use_residual else layer(x)
    return x



In [None]:
class ScalePrediction(nn.Module):
  def __init__(self , in_channels , num_classes) :
    super().__init__()
    self.pred = nn.Sequential(
        CNNBlock(in_channels , 2* in_channels , kernel_size = 3 , padding = 1 ) ,
        CNNBlock(2*in_channels , (num_classes + 5) * 3  , bn_act = False , kernel_size = 1) ,
    ) #(B , channels , d1  , d2    ,)
    self.num_classes = num_classes
  def forward(self , x):
    return (self.pred(x).reshape(x.shape[0] , 3 , self.num_classes + 5 , x.shape[2] , x.shape[3])).permute(0 , 1 , 3 , 4 ,2 ) #(N x 3 x 13 x 13 x (5 + num_of_classes))

In [None]:
# List : ["B" , 1] #(blocks , num_of_repeats)
config = [ #out_channel , kernel_size , stride
    (32 , 3 , 1) ,
    (64 , 3 , 2) ,
    ["B" , 1] ,
    (128 , 3 , 2) ,
    ["B" , 2 ] ,
    (256 , 3 , 2 ) ,
    ["B" , 8] ,
    (512 , 3 , 2) ,
    ["B" , 8 ] ,
    (1024 , 3 , 2) ,
    ["B" , 4] ,
    (512 , 1 , 1) ,
    (1024 , 3 , 1),
    "S" ,
    (256 , 1 , 1) ,
    "U" ,
    (256 , 1 ,1 ) ,
    (512 , 3 , 1) ,
    "S" ,
    (128 , 1 , 1) ,
    (256 , 3 , 1) ,
    "S" ,
]

In [None]:
class Yolov3(nn.Module):
  def __init__(self , in_channels = 3 , num_classes = 20 ):
    super().__init__()
    self.num_classes= num_classes
    self.in_channels = in_channels
    self.layers = self._create_conv_layers()
  def forward(self , x):
    outputs = []
    route_connections = []
    for layer in self.layers :
      if isinstance(layer , ScalePrediction):
        outputs.append(layer(x))
        continue
      x = layer(x)
      if isinstance(layer , ResidualBlock) and layer.num_repeats == 8 :
        route_connections.append(x)
      elif isinstance(layer , nn.Upsample):
        x = torch.cat([x , route_connections[-1]] , dim = -1)
        route_connections.pop()
    return outputs
  def _create_conv_layers(self):
    layers = nn.ModuleList()
    in_channels = self.in_channels
    for module in config :
      if isinstance(module , tuple):
        out_channels , kernel_size , stride = module
        layers.append(CNNBlock(in_channels , out_channels , kernel_size = kernel_size , stride = stride , padding = 1 if kernel_size == 3 else 0 ,))
        in_channels = out_channels
      elif isinstance(module , list):
        num_repeats = module[1]
        layers.append(ResidualBlock(in_channels , 0.1 , num_repeats = num_repeats))
      elif isinstance(module , str):
        if module == "S" :
          layers += [
              ResidualBlock(in_channels , 0.1 ,use_residual = False , num_repeats = 1 ) ,
              CNNBlock(in_channels , in_channels// 2 , kernel_size = 1   ) ,
              ScalePrediction(in_channels//2  , num_classes = self.num_classes)
          ]
          in_channels = in_channels // 2
      elif module == "U" :
          layers.append(nn.Upsample(scale_factor = 2))
          in_channels = in_channels * 3
    return layers



In [None]:
from torchsummary import summary

In [None]:
num_classes = 20
IMAGE_SIZE = 416
model = Yolov3(num_classes = num_classes )
summary(model , (3 , 416 , 416))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 416, 416]             864
       BatchNorm2d-2         [-1, 32, 416, 416]              64
         LeakyReLU-3         [-1, 32, 416, 416]               0
          CNNBlock-4         [-1, 32, 416, 416]               0
            Conv2d-5         [-1, 64, 208, 208]          18,432
       BatchNorm2d-6         [-1, 64, 208, 208]             128
         LeakyReLU-7         [-1, 64, 208, 208]               0
          CNNBlock-8         [-1, 64, 208, 208]               0
            Conv2d-9         [-1, 32, 208, 208]           2,048
      BatchNorm2d-10         [-1, 32, 208, 208]              64
        LeakyReLU-11         [-1, 32, 208, 208]               0
         CNNBlock-12         [-1, 32, 208, 208]               0
           Conv2d-13         [-1, 64, 208, 208]          18,432
      BatchNorm2d-14         [-1, 64, 2

In [None]:
!kaggle datasets download -d aladdinpersson/pascalvoc-yolo

Dataset URL: https://www.kaggle.com/datasets/aladdinpersson/pascalvoc-yolo
License(s): unknown
pascalvoc-yolo.zip: Skipping, found more recently modified local copy (use --force to force download)


In [None]:
!unzip /content/pascalvoc-yolo.zip

Archive:  /content/pascalvoc-yolo.zip
replace 100examples.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [None]:
import numpy as np
import os
import pandas as pd
import torch
from PIL import Image , ImageFile
from torch.utils.data import Dataset , DataLoader

In [None]:
def iou(box1 , box2):
  x_left  = torch.max(box1[... , 1 ] , box2[... , 1 ])
  y_below = torch.max(box1[... , 2] , box2[... , 2])
  x_right = torch.min(box1[... , 1] + box1[... , 3] * 0.5 , box2[... , 1] + box2[... , 3] * 0.5)
  y_above = torch.min(box1[... , 2] + box1[... , 4] * 0.5 , box2[... , 2] + box2[... , 4] * 0.5)
  area_o_int = (x_right - x_left).clamp(0) * (y_above - y_below).clamp(0)
  area_total = box1[...,3] * box1[... , 4] + box2[... , 3] * box2[... , 4] - area_o_int
  iou = area_o_int / (area_total)
  return iou

In [None]:
ImageFile.LOAD_TRUNCATED_IMAGES = True


In [None]:
!pip install svn

Collecting svn
  Downloading svn-1.0.1.tar.gz (12 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting nose (from svn)
  Downloading nose-1.3.7-py3-none-any.whl.metadata (1.7 kB)
Downloading nose-1.3.7-py3-none-any.whl (154 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.7/154.7 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: svn
  Building wheel for svn (setup.py) ... [?25l[?25hdone
  Created wheel for svn: filename=svn-1.0.1-py2.py3-none-any.whl size=16037 sha256=b030a1d83725a0b01a5dacdef082a94423a653e0b7d842e82d6f647160c478f1
  Stored in directory: /root/.cache/pip/wheels/53/b1/38/b7c80242ad28c7cc6b8f1de515dc16d0cf654b96e4448034d4
Successfully built svn
Installing collected packages: nose, svn
Successfully installed nose-1.3.7 svn-1.0.1


In [None]:
!sudo apt-get install subversion

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  libapr1 libaprutil1 libserf-1-1 libsvn1 libutf8proc2
Suggested packages:
  db5.3-util libapache2-mod-svn subversion-tools
The following NEW packages will be installed:
  libapr1 libaprutil1 libserf-1-1 libsvn1 libutf8proc2 subversion
0 upgraded, 6 newly installed, 0 to remove and 49 not upgraded.
Need to get 2,672 kB of archives.
After this operation, 10.5 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 libapr1 amd64 1.7.0-8ubuntu0.22.04.1 [108 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 libaprutil1 amd64 1.6.1-5ubuntu4.22.04.2 [92.8 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libserf-1-1 amd64 1.3.9-10ubuntu2 [50.0 kB]
Get:4 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libutf8proc2 amd64 2.7.0-3 [73.9 kB]
Get:5 http://archive.

In [None]:
class YOLODataset(Dataset):
  def __init__(self
               , csv_file , img_dir , label_dir , anchors  , image_size = 416 , S = [13 , 26 , 52] , C = 20 , transform  = None):
    self.annotations = pd.read_csv(csv_file)
    self.img_dir = img_dir
    self.label_dir = label_dir
    self.transform = transform
    self.S = S
    self.anchors = torch.tensor(anchors[0] + anchors[1] + anchors[2]) # for all 3 scales
    self.num_anchors = self.anchors.shape[0]
    self.num_anchors_per_scale = self.num_anchors // 3
    self.C = C
    self.ignore_iou_thresh = 0.5
  def __len__(self):
    return len(self.annotations)
  def __getitem__(self , index):
    label_path = os.path.join(self.label_dir , self.annotations.iloc[index , 1]  )
    image_path = os.path.join(self.image_dir , self.annotations.iloc[index , 0])
    bboxes = np.roll(np.loadtxt(fname = label_path , delimiter = " " , ndmin = 2 ).tolist(), 4 , axis = 1 ).tolist() # [class , x , y ,w , h] --> [x , y ,w , h ,class , ]
    img_path = os.path.join(self.img_dir , self.annotations.iloc[index , 0])
    image = np.array(Image.open(img_path).convert("RGB"))
    if self.transform :
      augmentations = self.transform(image = image , bboxes = bboxes)
      image  = augmentations["image"]
      bboxes = augmentations["bboxes"]
    target = [torch.zeros((self.num_anchors // 3 , S , S , 6 )) for S in self.S]  # assume same no. of object at each scale , 6-> [p_o  , x , y , w , h , c]
    for box in bboxes :
      iou_anchors = iou(torch.tensor(box[:4]) , self.anchors)
      anchor_indices = iou_anchors.argsort(descending = True , dim = 0 )
      x , y , width , height , class_label = box
      has_anchor = [False , False , False]

      for anchor_idx in anchor_indices :
        scale_idx = anchor_idx // (self.num_anchors_per_scale)
        anchor_on_scale = anchor_idx %  self.num_anchors_per_scale
        S = self.S[scale_idx]
        i , j = int(S*y) , int(S * x)

        anchor_taken = target[scale_idx][anchor_on_scale , i , j ,0]
        if not anchor_taken and not has_anchor[scale_idx]:
          target[scale_idx][anchor_on_scale , i , j , 0] = 1
          x_cell = S*x - j
          y_cell = S* y - i
          width_cell, height_cell = (width * S , height * S)
          box_coordinates = torch.tensor(
              [x_cell , y_cell , width_cell , height_cell]
          )
          target[scale_idx][anchor_on_scale , i , j , 1:5] = box_coordinates
          target[scale_idx][anchor_on_scale , i , j , 5] = int(class_label)
        elif not anchor_taken and iou_anchors[anchor_idx] > self.ignore_iou_threshold :
          target[scale_idx][anchor_on_scale , i , j, 0] = -1
    return image , target

In [None]:
class YoloLoss(nn.Module):
  def __init__(self ):
    super().__init__()
    self.mse = nn.MSELoss()
    self.bce = nn.BCEWithLogitsLoss()
    self.entropy = nn.CrossEntropy()
    self.sigmoid = nn.Sigmoid()

    #constants
    self.lambda_class = 1
    self.lambda_nooobj = 10
    self.lambda_obj = 1
    self.lambda_box = 1
  def forward(self , predictions , target , anchors):
    obj = target[...  , 0] == 1
    noobj = target[... ,0] == 0

    #No object loss
    no_object_loss = self.bce(
        predictions[... , 0:1][noobj] , target[... , 0:1][noobj]
    )

    # object loss
    anchors = anchors.reshape(1 , 3 , 1 , 1 ,2) # 3 * 2
    box_pred = torch.cat([self.sigmoid(predictions[... , 1:3]) , torch.exp(predictions[... , 3 : 5] * anchors)] , dim = -1)
    ious
