In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, models, transforms

## Arhictecture of YOLO-v1

![Alt text](images/Yolov1_arch.png)

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [None]:
class YOLOv1(nn.module):
    
    def __init__(self, image_channels, num_classes):
        super(YOLOv1, self).__init__()
        
        
        self.conv1 = nn.conv2d(image_channels, 64, kernel_size=7, stride=2, padding=3) 
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        
        self.conv2 = nn.conv2d(64, 192, kernel_size=3, stride=1, padding=1)
        
        self.conv3 = nn.conv2d(192, 128, kernel_size=1, stride=1, padding=0)
        self.conv4 = nn.conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.conv5 = nn.conv2d(256, 256, kernel_size=1, stride=1, paading=0)
        self.conv6 = nn.conv2d(256, 512, kernel_size=3, stride=1, padding=1)
        
        self.conv7 = nn.conv2d(512, 256, kernel_size=1, stride=1, padding=0)
        self.conv8 = nn.conv2d(256, 512, kernel_size=3, stride=1, padding=1)
        self.conv9 = nn.conv2d(512, 256, kernel_size=1, stride=1, padding=0)
        self.conv10 = nn.conv2d(256, 512, kernel_size=3, stride=1, padding=1)
        self.conv11 = nn.conv2d(512, 256, kernel_size=1, stride=1, padding=0)
        self.conv12 = nn.conv2d(256, 512, kernel_size=3, stride=1, padding=1)
        self.conv13 = nn.conv2d(512, 256, kernel_size=1, stride=1, padding=0)
        self.conv14 = nn.conv2d(256, 512, kernel_size=3, stride=1, padding=1)
        self.conv15 = nn.conv2d(512, 512, kernel_size=1, stride=1, padding=0)
        self.conv16 = nn.conv2d(512, 1024, kernel_size=3, stride=1, padding=1)
        
        self.conv17 = nn.conv2d(1024, 512, kernel_size=1, stride=1, padding=0)
        self.conv18 = nn.conv2d(512, 1024, kernel_size=3, stride=1, padding=1)
        self.conv19 = nn.conv2d(1024, 512, kernel_size=1, stride=1, padding=0)
        self.conv20 = nn.conv2d(512, 1024, kernel_size=3, stride=1, padding=1)
        self.conv21 = nn.conv2d(1024, 1024, kernel_size=3, stride=1, padding=1)
        self.conv22 = nn.conv2d(1024, 1024, kernel_size=3, stride=2, padding=1)
        
        self.conv23 = nn.conv2d(1024, 1024, kernel_size=3, stride=1, padding=1)
        self.conv24 = nn.conv2d(1024, 1024, kernel_size=3, stride=1, padding=1)
        
        self.fc1 = nn.Linear(7*7*1024, 4096)
        self.fc2 = nn.Linear(4096, num_classes)
            
    def forward(self, x):
        
        x = self.conv1(x)       # input: (448, 448, 3)    output: (224,224,64)
        x = self.maxpool(x)     # input: (224, 224, 64)   output: (112, 112, 64)
        
        x = self.conv2(x)       # input: (112. 112, 64)   output: (112, 112, 192)
        x = self.maxpool(x)     # input: (112, 112, 192)  output: (56, 56, 192)
        
        x = self.conv3(x)       # input: (56, 56, 192)    output: (56, 56, 128)
        x = self.conv4(x)       # input: (56, 56, 128)    output: (56, 56, 256)
        x = self.conv5(x)       # input: (56, 56, 256)    output: (56, 56, 256)     
        x = self.conv6(x)       # input: (56, 56, 256)    output: (56, 56, 512)
        x = self.maxxpool(x)    # input: (56, 56, 512)    output: (28, 28, 512)
        
        x = self.conv7(x)       # input: (28, 28, 512)    output: (28, 28, 256)
        x = self.conv8(x)       # input: (28, 28, 256)    output: (28, 28, 512)
        x = self.conv9(x)       # input: (28, 28, 512)    output: (28, 28, 256)
        x = self.conv10(x)      # input: (28, 28, 256)    output: (28, 28, 512)
        x = self.conv11(x)      # input: (28, 28, 512)    output: (28, 28, 256)
        x = self.conv12(x)      # input: (28, 28, 256)    output: (28, 28, 512)
        x = self.conv13(x)      # input: (28, 28, 512)    output: (28, 28, 256)
        x = self.conv14(x)      # input: (28, 28, 256)    output: (28, 28, 512)
        x = self.conv15(x)      # input: (28, 28, 512)    output: (28, 28, 512)    
        x = self.conv16(x)      # input: (28, 28, 512)    output: (28, 28, 1024)
        x = self.maxpool(x)     # input: (28, 28, 1024)   output: (14, 14, 1024)
        
        x = self.conv17(x)      # input: (14, 14, 1024)   output: (14, 14, 512)
        x = self.conv18(x)      # input: (14, 14, 512)    output: (14, 14, 1024)
        x = self.conv19(x)      # input: (14, 14, 1024)   output: (14, 14, 512)
        x = self.conv20(x)      # input: (14, 14, 512)    output: (14, 14, 1024)
        x = self.conv21(x)      # input: (14, 14, 1024)   output: (14, 14, 1024)
        x = self.conv22(x)      # input: (14, 14, 1024)   output: (7, 7, 1024)
        
        x = self.conv23(x)      # input: (7, 7, 1024)     output: (7, 7, 1024)
        x = self.conv24(x)      # input: (7, 7, 1024)     output: (7, 7, 1024)
        
        x = self.fc1(x)
        x = self.fc2(x)
        