In [None]:
####

In [None]:
#! unzip '/content/drive/MyDrive/archive.zip' -d '/content/drive/MyDrive/Yolo_Dataset/'

In [None]:
import torch
from torch import nn
import torch.nn.functional as F
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
from torchvision.utils import make_grid
from torchvision import transforms
from PIL import Image

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
def show_tensor_images(image_tensor, num_images=2, size=(3 , 448 , 448)):
  image_shifted = image_tensor
  image_unflat = image_shifted.detach().cpu().view(-1, *size)
  image_grid = make_grid(image_unflat[:num_images], nrow=5)
  plt.imshow(image_grid.permute(1, 2, 0).squeeze())
  plt.show()

In [None]:
def iou(boxes_preds , boxes_labels):
    box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2
    box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2
    box1_x2 = boxes_preds[..., 0:1] + boxes_preds[..., 2:3] / 2
    box1_y2 = boxes_preds[..., 1:2] + boxes_preds[..., 3:4] / 2
    box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2
    box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2
    box2_x2 = boxes_labels[..., 0:1] + boxes_labels[..., 2:3] / 2
    box2_y2 = boxes_labels[..., 1:2] + boxes_labels[..., 3:4] / 2

    x1 = torch.max(box1_x1 , box2_x1)
    x2 = torch.max(box1_x2 , box2_x2)
    y1 = torch.max(box1_y1 , box2_y1)
    y2 = torch.max(box1_y2 , box2_y2)

    intersection = (x2 - x2).clamp(0) * (y2 - y1).clamp(0)
    box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1))
    box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1))
    return intersection / (box1_area + box2_area - intersection + 1e-6)

In [None]:
class Conv(nn.Module):
    def __init__(self , 
                 in_channels,  
                 out_channels ,
                 kernel_size = 3 , 
                 stride = 1 , 
                 padding = 1 , 
                 use_norm = False , 
                 use_activation = True ,
                 use_pool = True):
        super(Conv , self).__init__()

        self.use_norm = use_norm
        self.use_activation = use_activation
        self.use_pool = use_pool

        self.conv1 = nn.Conv2d(in_channels , 
                               out_channels , 
                               kernel_size , 
                               stride , 
                               padding)
        
        if self.use_norm:
            self.norm = nn.BatchNorm2d(out_channels)
        if self.use_activation:
            self.activation = nn.LeakyReLU(0.1)
        if self.use_pool:
            self.maxpool = nn.MaxPool2d(kernel_size=2 , stride=2)

    def forward(self , x):
        x = self.conv1(x)
        if self.use_norm:
            x = self.norm(x)
        if self.use_activation:
            x = self.activation(x)
        if self.use_pool:
            x = self.maxpool(x)
        return x

In [None]:
x = torch.randn(2 , 1024 , 7 , 7).to(device)
conv = Conv(1024 , 1024 , 3 ,  stride=2 ,  padding=1  , use_pool=False).to(device)
z = conv(x)
z.shape

In [None]:
class YOLO_Block(nn.Module):
    def __init__(self , 
                 in_channels , 
                 out_channels , 
                 use_pool = False):
        super(YOLO_Block , self).__init__()

        self.conv1 = Conv(in_channels ,
                          out_channels//2 , 
                          kernel_size = 1 , 
                          stride = 1 , 
                          padding = 0 , 
                          use_pool = False)
        self.conv2 = Conv(out_channels//2 , 
                          out_channels , 
                          use_pool = use_pool)
    def forward(self , x):
        x = self.conv1(x)
        x = self.conv2(x)
        return x

In [None]:
x = torch.randn(2 , 3 , 512 , 512).to(device)
yolo_block = YOLO_Block(3 , 32 , use_pool=True).to(device)
z = yolo_block(x)
z.shape

In [None]:
class Repeated_YOLO_Block(nn.Module):
    def __init__(self , 
                 in_channels , 
                 out_channels , 
                 repeats):
        super(Repeated_YOLO_Block , self).__init__()

        layers = []
        in_channels_ = in_channels
        for i , rep in enumerate(range(repeats)):
            if i == len(range(repeats)):
                use_pool = True
            else :
                use_pool = False
            layers.append(YOLO_Block(in_channels_ , out_channels , use_pool=use_pool))
            in_channels_ = out_channels

        self.conv = nn.Sequential(*layers)

    def forward(self , x):
        x = self.conv(x)
        return x

In [None]:
x = torch.randn(2 , 3 , 512 , 512).to(device)
repeated_block = Repeated_YOLO_Block(3 , 32 , 4)
z = repeated_block(x)
z.shape

In [None]:
class YOLO(nn.Module):
    def __init__(self ,
                 in_channels ,
                 out_channels,  
                 hidden_dim_list , 
                 repeat_list):
        super(YOLO , self).__init__()

        self.conv1 = Conv(in_channels , 
                          hidden_dim_list[0] , 
                          kernel_size = 7 , 
                          stride = 2 , 
                          padding = 3)

        self.conv2 = Conv(hidden_dim_list[0] , 
                          hidden_dim_list[1])

        self.conv3 = nn.Sequential(
            YOLO_Block(hidden_dim_list[1] , 
                       hidden_dim_list[2]) , 
            YOLO_Block(hidden_dim_list[2] , 
                       hidden_dim_list[3])
        )        

        self.conv4 = nn.Sequential(
            Repeated_YOLO_Block(hidden_dim_list[3] , hidden_dim_list[4] , repeat_list[0]) , 
            YOLO_Block(hidden_dim_list[4] , hidden_dim_list[5] , use_pool=True)
        )

        self.conv5 = nn.Sequential(
            Repeated_YOLO_Block(hidden_dim_list[5] , hidden_dim_list[6] , repeat_list[1]) , 
            YOLO_Block(hidden_dim_list[6] , hidden_dim_list[7] , use_pool=True)
        )

        self.conv6 = nn.Sequential(
            Conv(hidden_dim_list[7] , hidden_dim_list[7] , use_pool=False) , 
            Conv(hidden_dim_list[7] , hidden_dim_list[7] , use_pool=True)
        )

        self.conv7 = nn.Sequential(
            Conv(hidden_dim_list[7] , hidden_dim_list[7] , kernel_size=3 , stride=2 , padding=1 , use_pool=True)
        )

        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(hidden_dim_list[8] , hidden_dim_list[9])
        self.fc2 = nn.Linear(hidden_dim_list[9] , hidden_dim_list[10])

    def forward(self , x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.conv6(x)
        x = self.conv7(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.fc2(x)
        # (S x S x (B * 5 + C)) => S = grid , B = Bounding Boxes , C = Class Probabilities
        # x -> (7 x 7 x 30)
        x = x.view(x.shape[0] , 7 , 7 , 30)
        return x

In [None]:
x = torch.randn(2 , 3 , 448 , 448).to(device)
hidden_dim_list = [64 , 192 , 128 , 256 , 256 , 512 , 1024 , 1024 , 4096 , 2048 , 1470]
repeat_list = [4 , 2]
yolo = YOLO(3 , 32 , hidden_dim_list , repeat_list)
z = yolo(x)
z.shape

In [None]:
class Loss(nn.Module):
    def __init__(self , 
                 S = 7 , 
                 B = 2 , 
                 C = 20):
        super(Loss , self).__init__()

        self.criterion = nn.MSELoss(reduction='sum')
        self.S = S
        self.B = B
        self.C = C

        self.lambda_noobj = 0.5
        self.lambda_coord = 5

    def forward(self , predictions , target):
        #predictions = predictions.reshape(-1 , self.S , self.S , self.B + 5 * self.C)

        iou_b1 = iou(predictions[... , 21:25] , target[... , 21:25])
        iou_b2 = iou(predictions[... , 26:30] , target[... , 21:25])
        ious = torch.cat((iou_b1.unsqueeze(0) , iou_b2.unsqueeze(0)) , dim=0)
        iout_maxes , bestbox = torch.max(ious , dim=0)
        exists_box = target[...,20].unsqueeze(3)


        box_predictions = exists_box * ((
            bestbox * predictions[... , 26:30]
            + (1 - bestbox) * predictions[... , 21:25]
        ))

        box_target = exists_box * target[... , 21:25]
        
        box_predictions[... , 2:4] = torch.sign(box_predictions[... , 2:4]) * torch.sqrt(
            torch.abs(box_predictions[... , 2:4] + 1e-6)
        )

        box_target[... , 2:4] = torch.sqrt(box_target[... , 2:4])

        box_loss = self.criterion(
            torch.flatten(box_predictions , end_dim=-2) , 
            torch.flatten(box_target , end_dim=-2)
        )

        
        pred_box = bestbox * predictions[... , 25:26] + (1 - bestbox) * predictions[... , 20:21]
        object_loss = self.criterion(
            torch.flatten(exists_box * pred_box) , 
            torch.flatten(exists_box * target[... , 20:21])
        )

        no_obj_loss = self.criterion(
            torch.flatten((1 - exists_box) * predictions[... , 25:26] , start_dim=1) , 
            torch.flatten((1 - exists_box) * target[... , 20:21] , start_dim = 1)
        )

        no_obj_loss += self.criterion(
            torch.flatten((1 - exists_box) * predictions[... , 20:21] , start_dim=1) , 
            torch.flatten((1 - exists_box) * target[... , 20:21] , start_dim = 1)
        )

        class_loss = self.criterion(
            torch.flatten(exists_box * predictions[... , :20] , end_dim=-2) , 
            torch.flatten(exists_box * target[... , :20] , end_dim=-2)
        )

        loss = (
            self.lambda_coord * box_loss 
            + object_loss 
            + self.lambda_noobj * no_obj_loss 
            + class_loss
        )
        return loss

In [None]:
class Dataset_(torch.utils.data.Dataset):
    def __init__(self , 
                 csv_file , 
                 image_dir ,
                 label_dir , 
                 S = 7 , 
                 B = 2 , 
                 C = 20 , 
                 transforms = None):
        super(Dataset_ , self).__init__()

        self.df = pd.read_csv(csv_file)
        self.image_dir = image_dir
        self.label_dir = label_dir

        self.S = S
        self.B = B
        self.C = C
        self.transforms = transforms

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self , idx):
        label_path = os.path.join(self.label_dir , self.df.iloc[idx , 1])
        boxes = []

        with open(label_path) as f:
            for label in f.readlines():
                class_label , x , y , width , height = [
                    float(x) if float(x) != int(float(x)) else int(x)
                    for x in label.replace("\n", "").split()
                ]
                boxes.append([class_label , x , y , width , height])
        image_path = os.path.join(self.image_dir , self.df.iloc[idx , 0])
        image = np.asarray(plt.imread(image_path))
        image = torch.from_numpy(image).permute(2 , 0 , 1)
        boxes = torch.tensor(boxes)

        if self.transforms:
            image = self.transforms(image)
        label_matrix = torch.zeros((self.S , self.S , self.C + 5 * self.B))

        for box in boxes:
            class_label , x , y , width , height = box.tolist()
            class_label = int(class_label)
            i = int(self.S * y)
            j = int(self.S * x)
            x_cell = self.S * x - j
            y_cell = self.S * y - i

            width_cell , height_cell = (
                width * self.S , 
                height * self.S
            )
            
            if label_matrix[i , j , 20] == 0:
                label_matrix[i , j , 20] == 1
                box_coordinates = torch.tensor(
                    [x_cell , y_cell , width_cell , height_cell]
                )
                label_matrix[i , j , 21:25] = box_coordinates
                label_matrix[i , j , class_label] = 1
            return image , label_matrix

In [None]:
transform = transforms.Compose([
                                transforms.ToPILImage(),
                                transforms.Resize((448 , 448)) , 
                                transforms.ToTensor()
])

In [None]:
dataset = Dataset_(
    csv_file = '/content/drive/MyDrive/Yolo_Dataset/train.csv' , 
    image_dir = '/content/drive/MyDrive/Yolo_Dataset/images' , 
    label_dir = '/content/drive/MyDrive/Yolo_Dataset/labels' , 
    transforms = transform
)

In [None]:
dataloader = torch.utils.data.DataLoader(dataset , batch_size = 2 , shuffle = True)

In [None]:
for x , y in dataloader:
    show_tensor_images(x)
    print(y.shape)
    break

In [None]:
hidden_dim_list = [64 , 192 , 128 , 256 , 256 , 512 , 1024 , 1024 , 4096 , 2048 , 1470]
repeat_list = [4 , 2]
yolo = YOLO(3 , 32 , hidden_dim_list , repeat_list).to(device)

In [None]:
epochs = 100
lr = 0.0002
betas = (0.5 , 0.999)
criterion = Loss().to(device)
opt = torch.optim.Adam(yolo.parameters() , lr=lr , betas = betas)
display_step = 500

In [None]:
def train():
    mean_yolo_loss = 0
    cur_step = 0
    for epoch in range(epochs):
        for x , y in dataloader:
            x , y = x.to(device) , y.to(device)
            
            opt.zero_grad()
            y_ = yolo(x)
            loss = criterion(y_ , y)
            loss.backward()
            opt.step()

            mean_yolo_loss += loss.item() / display_step

            if cur_step % display_step == 0:
                print(f'Epoch {epoch} , cur_step {cur_step} , Yolo mean batch loss {mean_yolo_loss}')
                show_tensor_images(x)
            cur_step += 1
        mean_yolo_loss = 0


In [None]:
train()