### Importing functions

In [12]:
import os
import glob
import torch
import random
import numpy as np
import cv2
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from torch import nn

In [26]:

class YOLODataset(Dataset):
    def __init__(self, img_dir, label_dir, img_size=416):
        self.img_dir = img_dir
        self.label_dir = label_dir
        self.img_size = img_size
        self.img_paths = glob.glob(os.path.join(img_dir, "*.jpg"))  # Assuming images are in .jpg format

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, index):
        img_path = self.img_paths[index]
        label_path = os.path.join(self.label_dir, os.path.basename(img_path).replace(".jpg", ".txt"))

        # Load image
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (self.img_size, self.img_size))
        img = np.transpose(img, (2, 0, 1))  # HWC to CHW
        img = torch.tensor(img, dtype=torch.float32) / 255.0  # Normalize to [0, 1]

        # Load labels
        labels = []
        if os.path.isfile(label_path):
            with open(label_path) as f:
                for line in f:
                    labels.append([float(x) for x in line.strip().split()])

        # Convert labels to tensor
        labels = torch.tensor(labels, dtype=torch.float32)
        
        return img, labels

# Create dataset and dataloader
def create_dataloader(img_dir, label_dir, batch_size, img_size=416, shuffle=True, num_workers=0, max_labels=50):
    dataset = YOLODataset(img_dir, label_dir, img_size, max_labels)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, pin_memory=True)
    return dataloader


In [27]:
def collate_fn(batch):
    imgs, labels = zip(*batch)
    imgs = torch.stack(imgs, 0)
    
    max_num_labels = max(label.shape[0] for label in labels)
    padded_labels = []
    for label in labels:
        num_labels = label.shape[0]
        if num_labels < max_num_labels:
            padding = torch.zeros((max_num_labels - num_labels, 5))
            padded_label = torch.cat((label, padding), dim=0)
        else:
            padded_label = label[:max_num_labels]
        padded_labels.append(padded_label)
        
    padded_labels = torch.stack(padded_labels, 0)
    
    return imgs, padded_labels


In [28]:
def create_dataloader(img_dir, label_dir, batch_size, img_size=416, shuffle=True, num_workers=0):
    dataset = YOLODataset(img_dir, label_dir, img_size)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, pin_memory=True, collate_fn=collate_fn)
    return dataloader


## Model

### Defining Model

In [29]:
class YOLOLayer(nn.Module):
    def __init__(self, anchors, num_classes, img_dim):
        super(YOLOLayer, self).__init__()
        self.anchors = anchors
        self.num_classes = num_classes
        self.img_dim = img_dim

    def forward(self, x, targets=None, img_dim=None):
        # Implement forward pass to calculate bounding boxes, objectness scores, and class probabilities
        pass

class YOLO(nn.Module):
    def __init__(self, num_classes):
        super(YOLO, self).__init__()
        self.num_classes = num_classes

        # Define convolutional layers
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1)
        self.conv5 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)

        # Define YOLO layers
        self.yolo1 = YOLOLayer(anchors=[(10,13), (16,30), (33,23)], num_classes=self.num_classes, img_dim=416)
        self.yolo2 = YOLOLayer(anchors=[(30,61), (62,45), (59,119)], num_classes=self.num_classes, img_dim=416)
        self.yolo3 = YOLOLayer(anchors=[(116,90), (156,198), (373,326)], num_classes=self.num_classes, img_dim=416)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = torch.relu(self.conv3(x))
        x = torch.relu(self.conv4(x))
        x = torch.relu(self.conv5(x))

        # Pass through YOLO layers
        yolo1_output = self.yolo1(x)
        yolo2_output = self.yolo2(x)
        yolo3_output = self.yolo3(x)

        return [yolo1_output, yolo2_output, yolo3_output]


In [30]:
class YOLOLoss(nn.Module):
    def __init__(self, num_classes):
        super(YOLOLoss, self).__init__()
        self.num_classes = num_classes

    def forward(self, predictions, targets):
        # Calculate localization loss, confidence loss, and class probability loss
        loc_loss = 0
        conf_loss = 0
        class_loss = 0

        for pred in predictions:
            # Implement the loss calculation
            pass

        total_loss = loc_loss + conf_loss + class_loss
        return total_loss


In [31]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(device)

# Instantiate the model
model = YOLO(num_classes=1).to(device)

# Instantiate the loss function
criterion = YOLOLoss(num_classes=1).to(device)

# Optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

mps


In [32]:
# Define paths
train_img_dir = "./data/train/images"
train_label_dir = "./data/train/labels"
valid_img_dir = "./data/valid/images"
valid_label_dir = "./data/valid/labels"
test_img_dir = "./data/test/images"
test_label_dir = "./data/test/labels"

# Create dataloaders
train_loader = create_dataloader(train_img_dir, train_label_dir, batch_size=16, img_size=416, shuffle=True)
valid_loader = create_dataloader(valid_img_dir, valid_label_dir, batch_size=16, img_size=416, shuffle=False)
test_loader = create_dataloader(test_img_dir, test_label_dir, batch_size=16, img_size=416, shuffle=False)


### Training Loop

In [34]:
# Hyperparameters
num_epochs = 100
batch_size = 16

# Prepare Data Loaders
train_loader = create_dataloader(train_img_dir, train_label_dir, batch_size=batch_size, img_size=416, shuffle=True)
valid_loader = create_dataloader(valid_img_dir, valid_label_dir, batch_size=batch_size, img_size=416, shuffle=False)
test_loader = create_dataloader(test_img_dir, test_label_dir, batch_size=batch_size, img_size=416, shuffle=False)

# Training Loop
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for imgs, targets in train_loader:
        imgs = imgs.to(device)
        targets = targets.to(device)

        # Forward pass
        outputs = model(imgs)

        # Compute loss
        loss = criterion(outputs, targets)

        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * imgs.size(0)

    epoch_loss = running_loss / len(train_loader.dataset)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')

    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for imgs, targets in valid_loader:
            imgs = imgs.to(device)
            targets = targets.to(device)

            # Forward pass
            outputs = model(imgs)

            # Compute loss
            loss = criterion(outputs, targets)

            val_loss += loss.item() * imgs.size(0)

    val_loss /= len(valid_loader.dataset)
    print(f'Validation Loss: {val_loss:.4f}')

# Save the model
torch.save(model.state_dict(), "yolo_model.pth")


ValueError: expected sequence of length 183 at dim 1 (got 111)