In [None]:
# Install necessary packages
!pip install -U git+https://github.com/albu/albumentations --no-cache-dir
!pip install pycocotools

# Import Libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
from torchvision.models import resnet50
import os
import cv2
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from PIL import Image
from typing import Any, Callable, Optional, Tuple, List

device = 'cuda' if torch.cuda.is_available() else 'cpu'



# Define Custom Dataset Class

In [None]:
# Download and unzip custom dataset
!curl -L "https://public.roboflow.com/ds/6F1fnWqEs1?key=ixsFJhWXgt" > roboflow.zip; unzip roboflow.zip; rm roboflow.zip

class CarDetection(torch.utils.data.Dataset):
    def __init__(self, root: str, annFile: str, transform: Optional[Callable] = None):
        from pycocotools.coco import COCO
        self.root = root
        self.coco = COCO(annFile)
        self.ids = list(sorted(self.coco.imgs.keys()))
        self.transforms = transform

    def _load_image(self, id: int) -> Image.Image:
        path = self.coco.loadImgs(id)[0]["file_name"]
        return Image.open(os.path.join(self.root, path)).convert("RGB")

    def _load_target(self, id) -> List[Any]:
        return self.coco.loadAnns(self.coco.getAnnIds(id))

    def __getitem__(self, index: int) -> Tuple[Any, Any]:
        id = self.ids[index]
        image = self._load_image(id)
        target = self._load_target(id)
        targets = {'boxes': torch.tensor([item['bbox'] for item in target], dtype=torch.float32),
                   'labels': torch.tensor([item['category_id'] for item in target], dtype=torch.int64)}

        if self.transforms:
            image, targets = self.transforms(image, targets)

        image = torchvision.transforms.functional.to_tensor(image)
        return image, targets

    def __len__(self) -> int:
        return len(self.ids)

# DataLoader Setup
def get_loader(root, json, batch_size=1, shuffle=False, num_workers=4):
    dataset = CarDetection(root=root, annFile=json)
    data_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)
    return data_loader

train_loader = get_loader('train', '/content/train/_annotations.coco.json', batch_size=2, shuffle=True)
val_loader = get_loader('valid', '/content/valid/_annotations.coco.json', batch_size=1, shuffle=False)

# Define Custom RetinaNet with FPN

In [None]:
import torchvision
from torchvision.models.detection.retinanet import RetinaNet
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
import torch.nn as nn
import torch

# Define Custom RetinaNet with FPN
class CustomRetinaNet(nn.Module):
    def __init__(self, num_classes):
        super(CustomRetinaNet, self).__init__()
        # Define a ResNet-50 backbone with FPN
        self.backbone = resnet_fpn_backbone('resnet50', pretrained=True)
        # Create RetinaNet with the FPN backbone
        self.retina_net = RetinaNet(backbone=self.backbone, num_classes=num_classes)

    def forward(self, images, targets=None):
        return self.retina_net(images, targets)

# Initialize Model
num_classes = len(train_loader.dataset.coco.getCatIds()) + 1  # Including background class
model = CustomRetinaNet(num_classes).to(device)



# Training Setup

In [None]:


# Training Setup
optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
num_epochs = 5
model.train()

# Training Loop
for epoch in range(num_epochs):
    for images, targets in train_loader:
        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        optimizer.zero_grad()
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        losses.backward()
        optimizer.step()

    scheduler.step()
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {losses.item():.4f}")



# Prediction and Visualization

In [None]:

# Define Visualization Function
def visualize_bbox(img, bbox, class_name, score, color=(255, 0, 0), thickness=2):
    x_min, y_min, w, h = bbox
    x_min, x_max, y_min, y_max = int(x_min), int(x_min + w), int(y_min), int(y_min + h)
    cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color=color, thickness=thickness)
    ((text_width, text_height), _) = cv2.getTextSize(class_name, cv2.FONT_HERSHEY_SIMPLEX, 0.35, 1)
    cv2.rectangle(img, (x_min, y_min - int(1.3 * text_height)), (x_min + text_width, y_min), color, -1)
    cv2.putText(img, f"{class_name}: {score:.2f}", (x_min, y_min - int(0.3 * text_height)), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (255, 255, 255), 1)
    return img

# Prediction and Visualization
def predict_and_visualize(image_path):
    model.eval()
    img = Image.open(image_path).convert("RGB")
    img_tensor = torchvision.transforms.functional.to_tensor(img).unsqueeze(0).to(device)

    with torch.no_grad():
        predictions = model(img_tensor)

    img_np = img_tensor.squeeze(0).permute(1, 2, 0).cpu().numpy()
    plt.imshow(img_np)

    ax = plt.gca()
    for box, label, score in zip(predictions[0]['boxes'], predictions[0]['labels'], predictions[0]['scores']):
        if score > 0.6:
            x_min, y_min, x_max, y_max = box.cpu().numpy()
            ax.add_patch(Rectangle((x_min, y_min), x_max - x_min, y_max - y_min, fill=False, edgecolor=(1, 0, 0), linewidth=2))
            ax.text(x_min, y_min, f"{category_id_to_name[label.item()]}: {score:.2f}", bbox=dict(facecolor='yellow', alpha=0.5), fontsize=8, color='black')

    plt.axis('off')
    plt.show()

# Test the prediction visualization with a sample image
predict_and_visualize('/content/test/sample_image.jpg')  # Replace with an actual path from your test dataset
