In [2]:
# Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision as transforms

from os import listdir
import random

In [5]:
# Create model
class YOLOv1(nn.Module):
    def __init__(self, split_size, num_boxes, num_classes):
        super(YOLOv1, self).__init__()
        self.darkNet = nn.Sequential(
            nn.Conv2d(3, 64, 7, padding=3, stride=2),
                nn.BatchNorm2d(64),
                nn.LeakyReLU(0.1),
            nn.MaxPool2d(2, 2),
            
            nn.Conv2d(64, 192, 3, padding=1),
                nn.BatchNorm2d(192),
                nn.LeakyReLU(0.1),
            nn.MaxPool2d(2, 2),
            
            nn.Conv2d(192, 128, 1),
                nn.BatchNorm2d(128),
                nn.LeakyReLU(0.1),
            nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.LeakyReLU(0.1),
            nn.Conv2d(256, 256, 1),
                nn.BatchNorm2d(256),
                nn.LeakyReLU(0.1),
            nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.LeakyReLU(0.1),
            nn.MaxPool2d(2, 2),
            
            nn.Conv2d(512, 256, 1),
                nn.BatchNorm2d(256),
                nn.LeakyReLU(0.1),
            nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.LeakyReLU(0.1),
            nn.Conv2d(512, 256, 1),
                nn.BatchNorm2d(256),
                nn.LeakyReLU(0.1),
            nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.LeakyReLU(0.1),
            nn.Conv2d(512, 256, 1),
                nn.BatchNorm2d(256),
                nn.LeakyReLU(0.1),
            nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.LeakyReLU(0.1),
            nn.Conv2d(512, 256, 1),
                nn.BatchNorm2d(256),
                nn.LeakyReLU(0.1),
            nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.LeakyReLU(0.1),
            nn.Conv2d(512, 512, 1),
                nn.BatchNorm2d(512),
                nn.LeakyReLU(0.1),
            nn.Conv2d(512, 1024, 3, padding=1),
                nn.BatchNorm2d(1024),
                nn.LeakyReLU(0.1),
            nn.MaxPool2d(2, 2),
            
            nn.Conv2d(1024, 512, 1),
                nn.BatchNorm2d(512),
                nn.LeakyReLU(0.1),
            nn.Conv2d(512, 1024, 3, padding=1),
                nn.BatchNorm2d(1024),
                nn.LeakyReLU(0.1),
            nn.Conv2d(1024, 512, 1),
                nn.BatchNorm2d(512),
                nn.LeakyReLU(0.1),
            nn.Conv2d(512, 1024, 3, padding=1),
                nn.BatchNorm2d(1024),
                nn.LeakyReLU(0.1),
            nn.Conv2d(1024, 1024, 3, padding=1),
                nn.BatchNorm2d(1024),
                nn.LeakyReLU(0.1),
            nn.Conv2d(1024, 1024, 3, padding=1, stride=2),
                nn.BatchNorm2d(1024),
                nn.LeakyReLU(0.1),
            
            nn.Conv2d(1024, 1024, 3, padding=1),
                nn.BatchNorm2d(1024),
                nn.LeakyReLU(0.1),
            nn.Conv2d(1024, 1024, 3, padding=1),
                nn.BatchNorm2d(1024),
                nn.LeakyReLU(0.1),
            )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(1024 * split_size * split_size, 4096),
            nn.Dropout(0.5),
            nn.LeakyReLU(0.1),
            nn.Linear(4096, split_size * split_size * (num_classes + num_boxes*5))
            )
        
    def forward(self, x):
        x = self.darkNet(x)
        x = self.fc(x)
        return x
    
model = YOLOv1(7,2,20)
x = torch.randn(2,3,448,448)
print(model(x).shape)

torch.Size([2, 1470])


In [22]:
# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [23]:
# Hyperparameters
batch_size = 64
epochs = 5
learning_rate = 0.01

In [5]:
# Load Data
transform = transforms.Compose([
    transforms.Resize(448),
    transforms.CenterCrop(448),
    transforms.ToTensor()
])

train_data_path = "/bdd100k/images/100k/train/"
test_data_path = "/bdd100k/images/100k/test/"

train_files = listdir(train_data_path)
test_files = listdir(test_data_path)

for i in range(len(train_files)):
    f = random.choice(files)

In [6]:
# Initialize network

In [7]:
# Loss and optimizer

In [8]:
# Train network

In [9]:
# Check accuracy on training & test to see how good our model is