In [None]:
# Section 1.2: Object Detection with Pre-trained Feature Extractor

import torchvision.models as models
import torch.nn as nn
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
from datasets import FacesDataset


import pandas as pd
import numpy as np
import os
import random
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches

import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

import albumentations as A
from albumentations.pytorch import ToTensorV2

from torchvision.models import ResNet50_Weights

# Load the CSV file
bbox_data = pd.read_csv('data/faces.csv')

# Get the unique image names - since each image may have multiple bounding boxes, we need to get the unique image names
image_names = bbox_data['image_name'].unique()

# Split the data into training and testing sets
train_images, test_images = train_test_split(image_names, test_size=0.2, random_state=42)

# Create DataFrames for training and testing
train_df = bbox_data[bbox_data['image_name'].isin(train_images)]
test_df = bbox_data[bbox_data['image_name'].isin(test_images)]

print("Training samples:", len(train_df), "Testing samples:", len(test_df))

# Load pre-trained ResNet-50 model using the updated 'weights' parameter
backbone = models.resnet50(weights=ResNet50_Weights.DEFAULT)

# Freeze backbone weights
for param in backbone.parameters():
    param.requires_grad = False

class ObjectDetectionModel(nn.Module):
    def __init__(self, backbone):
        super(ObjectDetectionModel, self).__init__()
        self.backbone = backbone
        self.backbone.fc = nn.Identity()  # Remove the original classification head
        self.regressor = nn.Linear(2048, 4)  # New regression head for bounding boxes

    def forward(self, x):
        features = self.backbone(x)
        bbox_preds = self.regressor(features)
        return bbox_preds

# Instantiate the model
model = ObjectDetectionModel(backbone)

transform = A.Compose([
    A.RandomSizedBBoxSafeCrop(width=512, height=512, erosion_rate=0.2, p=1.0),  # Ensure all images are cropped and resized to 512x512
    A.HorizontalFlip(p=0.4),
    A.ColorJitter(p=0.42),
    A.RandomBrightnessContrast(p=0.4),
    A.Normalize(mean=(0.485, 0.456, 0.406), 
                std=(0.229, 0.224, 0.225)),
    ToTensorV2()
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels'])) # to take care of bounding boxes



train_dataset = FacesDataset(train_df, image_dir='data/images', transform=transform)
test_dataset = FacesDataset(test_df, image_dir='data/images', transform=transform)

# Prepare DataLoader
import os
from torch.utils.data import DataLoader

num_workers = 5  # Set to 0 to avoid the FacesDataset error in Jupyter - could be parallelized in a script

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4, pin_memory=True)

# Define loss function and optimizer
criterion = nn.SmoothL1Loss()  # Also known as Huber loss
optimizer = torch.optim.Adam(model.regressor.parameters(), lr=1e-3)


# Determine the device - because I am thinking of either running on my mac or the cloud nodes

if torch.cuda.is_available():
    device = torch.device('cuda')
elif torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')

model.to(device)

# Integrate mixed precision training if using CUDA
use_amp = torch.cuda.is_available()

if use_amp:
    scaler = torch.cuda.amp.GradScaler()

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch")
    for images, targets in progress_bar:
        images = images.to(device)
        targets = targets.to(device)  # Shape: [batch_size, 4]

        optimizer.zero_grad()
        if use_amp:
            with torch.cuda.amp.autocast():
                outputs = model(images)  # Shape: [batch_size, 4]
                loss = criterion(outputs, targets)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            outputs = model(images)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

        epoch_loss += loss.item()
        progress_bar.set_postfix(loss=loss.item())

    epoch_loss /= len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Average Loss: {epoch_loss:.4f}")


Training samples: 2686 Testing samples: 664


  check_for_updates()
  check_for_updates()
  check_for_updates()
  check_for_updates()
Epoch 1/10:  90%|█████████ | 100/111 [00:49<00:04,  2.54batch/s, loss=194]