In [275]:
####

In [318]:
import torch
from torch import nn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from torchvision.utils import make_grid
from torchvision import transforms
from tqdm.notebook import tqdm

In [277]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [278]:
def show_tensor_images(image_tensor, num_images=2, size=(3 , 416 , 416)):
    image_shifted = image_tensor
    image_unflat = image_shifted.detach().cpu().view(-1, *size)
    image_grid = make_grid(image_unflat[:num_images], nrow=5)
    plt.imshow(image_grid.permute(1, 2, 0).squeeze())
    plt.show()

In [279]:
def iou_width_height(boxes1, boxes2):

    intersection = torch.min(boxes1[..., 0], boxes2[..., 0]) * torch.min(
        boxes1[..., 1], boxes2[..., 1]
    )
    union = (
        boxes1[..., 0] * boxes1[..., 1] + boxes2[..., 0] * boxes2[..., 1] - intersection
    )
    return intersection / union

In [280]:
def intersection_over_union(boxes_preds, boxes_labels, box_format="midpoint"):

    if box_format == "midpoint":
        box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2
        box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2
        box1_x2 = boxes_preds[..., 0:1] + boxes_preds[..., 2:3] / 2
        box1_y2 = boxes_preds[..., 1:2] + boxes_preds[..., 3:4] / 2
        box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2
        box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2
        box2_x2 = boxes_labels[..., 0:1] + boxes_labels[..., 2:3] / 2
        box2_y2 = boxes_labels[..., 1:2] + boxes_labels[..., 3:4] / 2

    if box_format == "corners":
        box1_x1 = boxes_preds[..., 0:1]
        box1_y1 = boxes_preds[..., 1:2]
        box1_x2 = boxes_preds[..., 2:3]
        box1_y2 = boxes_preds[..., 3:4]
        box2_x1 = boxes_labels[..., 0:1]
        box2_y1 = boxes_labels[..., 1:2]
        box2_x2 = boxes_labels[..., 2:3]
        box2_y2 = boxes_labels[..., 3:4]

    x1 = torch.max(box1_x1, box2_x1)
    y1 = torch.max(box1_y1, box2_y1)
    x2 = torch.min(box1_x2, box2_x2)
    y2 = torch.min(box1_y2, box2_y2)

    intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
    box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1))
    box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1))

    return intersection / (box1_area + box2_area - intersection + 1e-6)

In [281]:
class Conv(nn.Module):
    def __init__(self , 
                 in_channels , 
                 out_channels , 
                 kernel_size = (3 , 3) , 
                 stride = (1 , 1) , 
                 padding = 1 , 
                 use_norm = True , 
                 use_activation = True , 
                 use_pool = True):
        super(Conv , self).__init__()

        self.use_norm = use_norm
        self.use_activation = use_activation
        self.use_pool = use_pool

        self.conv1 = nn.Conv2d(in_channels , 
                               out_channels , 
                               kernel_size , 
                               stride , 
                               padding)
        if self.use_norm:
            self.norm = nn.BatchNorm2d(out_channels)
        if self.use_activation:
            self.activation = nn.LeakyReLU(0.2)
        if self.use_pool:
            self.maxpool = nn.MaxPool2d(kernel_size=2 , stride=2)
    def forward(self , x):
        x = self.conv1(x)
        if self.use_norm:
            x = self.norm(x)
        if self.use_activation:
            x = self.activation(x)
        if self.use_pool:
            x = self.maxpool(x)
        return x

In [None]:
x = torch.randn(2 , 3 , 512 , 512).to(device)
conv = Conv(3 , 32).to(device)
z = conv(x)
z.shape

In [283]:
class ReOrgLayer(nn.Module):
    def __init__(self, stride = 2):
        super(ReOrgLayer, self).__init__()
        self.stride= stride
        
    def forward(self,x):
        assert(x.data.dim() == 4)
        B,C,H,W = x.data.shape
        hs = self.stride
        ws = self.stride
        assert(H % hs == 0),  "The stride " + str(self.stride) + " is not a proper divisor of height " + str(H)
        assert(W % ws == 0),  "The stride " + str(self.stride) + " is not a proper divisor of height " + str(W)
        x = x.view(B,C, H // hs, hs, W // ws, ws).transpose(-2,-3).contiguous()
        x = x.view(B,C, H // hs * W // ws, hs, ws)
        x = x.view(B,C, H // hs * W // ws, hs*ws).transpose(-1,-2).contiguous()
        x = x.view(B, C, ws*hs, H // ws, W // ws).transpose(1,2).contiguous()
        x = x.view(B, C*ws*hs, H // ws, W // ws)
        return x

In [284]:
config = [
          #[out_channels , kernel_size , stride , padding , maxpool]
          # List for Conv and maxpool
          # Tuple for Conv3x3 , Conv1x1 , Conv3x3
            [32 , 3 , 1 , 1 , 1] , 
            [64 , 3 , 1 , 1 , 1] , 
            128 , # (out_channels = 128 , kernel_size = 3 , stride = 1 , padding = 1)
                    # (out_channels = 64 , kernel_size = 1 , stride = 1 , padding = 0)
                    # (out_channels = 128 , kernel_size = 3 , stride = 1 , padding = 0)
            'M' , 
            256 , 
            'M' , 
            512 , 
            [256 , 1 , 1 , 0 , 0] ,
            ('S' , [512 , 3 , 1 , 1 , 0]) , ## Tuple and S => to save results 
                                            ## and to use it for skip connections and use reorganize
            'M' , # Maxpool
            1024 , 
            [512 , 1 , 1 , 0 , 0] , 
            [1024 , 3 , 1 , 1 , 0] ,
            [1024 , 3 , 1 , 1 , 0] , 
            [1024 , 3 , 1 , 1 , 0] ,  
            'C' , # Concat with skip connections dim = 1
            ('add' , [1024 , 3 , 1 , 1 , 0]) , 
            [125 , 1 , 1 , 0 , 0] 

]

In [285]:
class Support(nn.Module):
    def __init__(self):
        super(Support , self).__init__()

        self.in_channels = 3
    def forward(self , x):
        return x

In [286]:
class Support_(nn.Module):
    def __init__(self):
        super(Support_ , self).__init__()

        self.in_channels = 3
    def forward(self , x):
        return x

In [287]:
B = 5
S = 13
C = 20

In [288]:
class YOLO(nn.Module):
    def __init__(self , 
                 in_channels = 3 ,
                 config = config , 
                 B = B , 
                 C = C , 
                 S = S):
        super(YOLO , self).__init__()

        self.layers = nn.ModuleList()
        self.config = config
        self.B = B
        self.C = C 
        self.S = S
        for module in self.config:
            if isinstance(module , list):
                out_channels , kernel_size , stride , padding , use_pool = module
                use_pool = True if use_pool == 1 else False
                self.layers.append(Conv(
                    in_channels , out_channels , kernel_size , stride , padding , use_pool=use_pool
                ))
                in_channels = out_channels
            
            elif isinstance(module , int):
                out_channels = module
                self.layers.append(
                    Conv(in_channels, out_channels , use_pool=False))
                self.layers.append(
                    Conv(out_channels , out_channels //2 , kernel_size=(1 , 1) , stride=(1 , 1) , padding=0 , use_pool=False)
                )
                self.layers.append(
                    Conv(out_channels // 2 , in_channels * 2 , use_pool=False)
                )
                in_channels = out_channels
            
            elif isinstance(module , tuple) and module[0] == 'S':
                out_channels , kernel_size , stride , padding , use_pool = module[1]
                use_pool = True if use_pool == 1 else False
                #self.layers.append(nn.Identity())
                self.layers.append(Conv(
                    in_channels , out_channels , kernel_size , stride , padding , use_pool = use_pool
                ))
                self.layers.append(Support())
                in_channels = out_channels
            
            elif isinstance(module , str) and module == 'M':
                self.layers.append(nn.MaxPool2d(kernel_size=2 , stride=2))
        
            elif isinstance(module , tuple) and module[0] == 'add':
                out_channels , kernel_size , stride , padding , use_pool = module[1]
                use_pool = True if use_pool == 1 else False
                self.layers.append(Conv(
                    3072 , out_channels , kernel_size , stride , padding , use_pool = use_pool
                ))
                in_channels = out_channels

            elif isinstance(module , str) and module == 'C':
                self.layers.append(Support_())
        self.reorg_layer = ReOrgLayer()

    
    def forward(self , x):
        for_save = []
        for layer in self.layers:
            if isinstance(layer , Support):
                x = layer(x)
                for_save.append(x)
            elif isinstance(layer , Support_):
                x_ = self.reorg_layer(for_save[-1])
                x = torch.cat([x , x_] , dim=1)
            else:
                x = layer(x)
        return x.view(x.shape[0] , self.B , self.S , self.S , self.C + 5)

In [None]:
yolo = YOLO().to(device)
x = torch.randn(2,  3 , 416 , 416)
z = yolo(x)
z.shape

In [331]:
class Dataset_(torch.utils.data.Dataset):
    def __init__(self ,
                 img_dir , 
                 label_dir , 
                 csv_file , 
                 anchors , 
                 transforms = None , 
                 S = 13 , 
                 B = 5 , 
                 C = 20 ):
        super(Dataset_ , self).__init__()

        self.img_dir = img_dir
        self.label_dir = label_dir
        self.df = pd.read_csv(csv_file)
        self.anchors = torch.from_numpy(np.array(anchors))
        #print(self.anchors)
        self.transforms = transforms
        self.number_of_anchors_per_cell = 5
        self.ignore_iou_thresh = 0.5
        self.C = C
        self.S = S
        self.B = B

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self , idx):
        label_path = os.path.join(self.label_dir , self.df.iloc[idx , 1])
        boxes = []

        with open(label_path) as f:
            for label in f.readlines():
                class_label , x , y , width , height = [
                    float(x) if float(x) != int(float(x)) else int(x)
                    for x in label.replace("\n", "").split()
                ]
                boxes.append([ x , y , width , height , class_label])

        boxes = torch.tensor(boxes) 

        img_path = os.path.join(self.img_dir , self.df.iloc[idx , 0])
        image = np.asarray(plt.imread(img_path))
        image = torch.from_numpy(image).permute(2 , 0 , 1)
        if self.transforms:
            image = self.transforms(image)

        targets = torch.zeros((self.B , self.S , self.S , 6))
        for box in boxes:
            iou_anchors = iou_width_height(box[2:4] , self.anchors)
            anchors_indices = iou_anchors.argsort(descending=True, dim=0)        
            x , y , width , height , class_label = box
            has_anchor = [False for _ in range(self.B)]
            for anchor_idx in anchors_indices:
                anchor_on_scale = anchor_idx % self.B
                S = self.S
                i , j = int(S * y) , int(S * x)
                anchor_taken = targets[anchor_on_scale , i , j , 0]
                if not anchor_taken and not has_anchor[anchor_on_scale]:
                    targets[anchor_on_scale , i , j , 0] = 1
                    x_cell , y_cell = S * x - y , S * y - i
                    width_cell , height_cell = (
                        width * S , 
                        height * S
                    )
                    box_coordinate = torch.tensor([x_cell , y_cell , width_cell , height_cell])
                    targets[anchor_on_scale , i , j , 1:5] = box_coordinate
                    targets[anchor_on_scale , i , j , 5] = int(class_label)
                    has_anchor[anchor_on_scale] = True

                elif not anchor_taken and iou_anchors[anchor_idx] > self.ignore_iou_thresh:
                    targets[anchor_on_scale , i , j , 0] = -1
        return image , targets

In [332]:
anchors = [[ 0.28, 0.22], [  0.38, 0.48], [ 0.9, 0.78], [ 0.07, 0.15], [ 0.15, 0.11]]
transform = transforms.Compose([
                                transforms.ToPILImage() , 
                                transforms.Resize((416 , 416)) , 
                                transforms.ToTensor()
])
dataset = Dataset_(
    img_dir = '/content/drive/MyDrive/Yolo_Dataset/images/' , 
    label_dir = '/content/drive/MyDrive/Yolo_Dataset/labels' , 
    csv_file = '/content/drive/MyDrive/Yolo_Dataset/train.csv' , 
    anchors = anchors , 
    transforms = transform
)
dataloader = torch.utils.data.DataLoader(dataset , batch_size = 1 , shuffle=True)

In [None]:
for x , y in dataloader:
    show_tensor_images(x)
    #for i in y:
    #    for j in i:
    #        for k in j:
    #            print(k)
    print(y.shape)
    break

In [334]:
class Loss(nn.Module):
    def __init__(self):
        super(Loss , self).__init__()

        self.mse = nn.MSELoss()
        self.en = nn.CrossEntropyLoss()
        self.bce = nn.BCEWithLogitsLoss()
        self.sigmoid = nn.Sigmoid()


        self.lambda_class = 1
        self.lambda_noobj = 10
        self.lambda_obj = 1
        self.lambda_box = 10

    def forward(self , predictions , targets , anchors):
        anchors = torch.tensor(anchors)
        obj = targets[... , 0] == 1
        noobj = targets[... , 0] == 0

        no_obj_loss = self.mse(
            (predictions[... , 0:1][noobj]) , (targets[... , 0:1][noobj])
        )

        anchors = anchors.reshape(1 , 5 , 1 , 1 , 2)
        box_preds = torch.cat([self.sigmoid(predictions[... , 1:3]) , torch.exp(predictions[... , 3:5]) * anchors] , dim = -1)
        ious = intersection_over_union(box_preds[obj] , targets[... , 1:5][obj]).detach()
        object_loss = self.mse(self.sigmoid(predictions[... , 0:1][obj]) , ious * targets[... , 0:1][obj])

        predictions[... , 1:3] = self.sigmoid(predictions[... , 1:3])
        targets[..., 3:5] = torch.log(
            (1e-16 + targets[..., 3:5] / anchors)
        )  
        box_loss = self.mse(predictions[... , 1:5][obj] , targets[... , 1:5][obj])

        class_loss = self.en(
            (predictions[... , 5:][obj]) , (targets[... , 5][obj].long())
        )

        return (
            self.lambda_box * box_loss
            + self.lambda_obj * object_loss
            + self.lambda_noobj * no_obj_loss
            + self.lambda_class * class_loss
        )

In [335]:
loss_ = Loss().to(device)
for x , y in dataloader:
    predictions = yolo(x)
    break
z = loss_(predictions , y , anchors)

In [336]:
yolo = YOLO().to(device)
lr = 0.002
betas = (0.5 , 0.999)
opt = torch.optim.Adam(yolo.parameters() , lr=lr , betas = betas)
epochs = 200
display_steps = 100

In [337]:
def train():
    mean_yolo_loss = 0
    cur_step = 0

    for epoch in range(epochs):
        for x , y in tqdm(dataloader):
            x , y = x.to(device) , y.to(device)

            opt.zero_grad()
            y_ = yolo(x)
            yolo_loss = loss_(y_ , y , anchors)
            yolo_loss.backward()
            opt.step()
            
            mean_yolo_loss += yolo_loss.item() / display_steps
            if cur_step % display_steps == 0:
                print(f'Epoch {epoch} , Step {cur_step} , Mean YOLO Loss {mean_yolo_loss}')
            cur_step +=1
        mean_yolo_loss = 0


In [None]:
train()