In [1]:
import os
import torch
import numpy as np
from PIL import Image
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import ToTensor

class CustomDataset(Dataset):
    def __init__(self, root, transforms):
        self.root = root
        self.transforms = transforms
        self.imgs = list(sorted(os.listdir(os.path.join(root, "images"))))
        self.labels = list(sorted(os.listdir(os.path.join(root, "labels"))))

    def __getitem__(self, idx):
        img_path = os.path.join(self.root, "images", self.imgs[idx])
        label_path = os.path.join(self.root, "labels", self.labels[idx])
        img = Image.open(img_path).convert("RGB")

        with open(label_path) as f:
            lines = f.readlines()

        boxes = []
        labels = []
        for line in lines:
            class_label, x_center, y_center, width, height = map(float, line.strip().split())
            x_min = (x_center - width / 2) * img.width
            y_min = (y_center - height / 2) * img.height
            x_max = (x_center + width / 2) * img.width
            y_max = (y_center + height / 2) * img.height

            boxes.append([x_min, y_min, x_max, y_max])
            labels.append(int(class_label))

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        iscrowd = torch.zeros((len(labels),), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img = ToTensor()(img)
            img = self.transforms(img)

        return img, target


    def __len__(self):
        return len(self.imgs)

transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((800, 800)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.1, contrast=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load the dataset
dataset_train = CustomDataset('Emergency Dataset/train', transforms=transform)
dataset_val = CustomDataset('Emergency Dataset/valid', transforms=transform)
dataset_test = CustomDataset('Emergency Dataset/test', transforms=transform)

def my_collate(batch):
    data = [item[0] for item in batch]
    target = [item[1] for item in batch]
    return [data, target]

# Define the dataloader
train_dataloader = DataLoader(dataset_train, batch_size=2, shuffle=True, num_workers=0, collate_fn=my_collate)
val_dataloader = DataLoader(dataset_val, batch_size=2, shuffle=False, num_workers=0)
test_dataloader = DataLoader(dataset_test, batch_size=2, shuffle=False, num_workers=0)


In [2]:

from torchvision.models.detection import fasterrcnn_resnet50_fpn
import torch.optim as optim
import math

# Load a pre-trained Faster R-CNN model
model = fasterrcnn_resnet50_fpn(pretrained=True)

# Number of classes is 3 (background, non-emergency vehicle, emergency vehicle)
num_classes = 3  

# Get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# Replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Move model to the right device
model.to(device)

# Construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

# Learning rate scheduler which decreases the learning rate by 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

# Training
num_epochs = 10
min_val_loss = math.inf

for epoch in range(num_epochs):
    train_loss = 0
    model.train()
    for images, targets in train_dataloader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        train_loss += losses.item()

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
    print(f"Epoch #{epoch+1} Train Loss: {train_loss / len(train_dataloader)}")

    # validation
    model.eval()
    with torch.no_grad():
        val_loss = 0
        for images, targets in val_dataloader:
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            val_loss += losses.item()
        print(f"Epoch #{epoch+1} Validation Loss: {val_loss / len(val_dataloader)}")

    # save model if validation loss has decreased
    if val_loss < min_val_loss:
        print(f"Validation Loss Decreased({min_val_loss:.6f}--->{val_loss:.6f}) \t Saving The Model")
        torch.save(model.state_dict(), 'saved_model.pth')
        min_val_loss = val_loss

    # Update the learning rate
    lr_scheduler.step()

# Load the best model
model.load_state_dict(torch.load('saved_model.pth'))

# Test
model.eval()
with torch.no_grad():
    test_loss = 0
    for images, targets in test_dataloader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        test_loss += losses.item()
    print(f"Test Loss: {test_loss / len(test_dataloader)}")




KeyboardInterrupt: 