In [None]:
!pip install torch torchvision matplotlib Pillow

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install tensorboard



In [None]:
import torch
import json
from torch.utils.data import DataLoader
from torchvision.datasets import VOCDetection
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import torchvision.transforms.functional as F
import torchvision.transforms as T
from torch.utils.data import Dataset
import os
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter

In [None]:
class VOCCarDataset(Dataset):
    def __init__(self, root, year='2007', image_set=None, download=True, transforms=None):
        if image_set=='train' :
            image_set = 'train'
        elif image_set=='val' :
            image_set = 'val'
        else :
            image_set = 'test'
        self.voc = VOCDetection(root=root, year=year, image_set=image_set, download=download)
        self.transforms = transforms
        self.class_to_idx = {'car': 1}

    def __len__(self):
        return len(self.voc)

    def __getitem__(self, idx):
        img, annotation = self.voc[idx]
        annotation = annotation['annotation']

        boxes = []
        labels = []

        # Handle cases where there are no objects at all
        if 'object' not in annotation or annotation['object'] is None:
             # Ensure empty boxes tensor has correct shape [0, 4]
            boxes_tensor = torch.zeros((0, 4), dtype=torch.float32)
            labels_tensor = torch.zeros((0,), dtype=torch.int64)
            return img, {
                'boxes': boxes_tensor,
                'labels': labels_tensor,
                'image_id': torch.tensor([idx])
            }


        objects = annotation['object']
        if not isinstance(objects, list):
            objects = [objects]

        for obj in objects:
            name = obj['name']
            if name != 'car':
                continue

            bndbox = obj['bndbox']
            xmin = float(bndbox['xmin'])
            ymin = float(bndbox['ymin'])
            xmax = float(bndbox['xmax'])
            ymax = float(bndbox['ymax'])

            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(self.class_to_idx['car'])

        # After processing all objects, if no 'car' objects were found, boxes will be empty.
        # Convert the list to a tensor and ensure correct shape.
        if not boxes:
             # If no car objects were found, create an empty tensor with shape [0, 4]
            boxes_tensor = torch.zeros((0, 4), dtype=torch.float32)
        else:
            boxes_tensor = torch.tensor(boxes, dtype=torch.float32)

        labels_tensor = torch.tensor(labels, dtype=torch.int64)


        target = {


            'boxes': boxes_tensor,
            'labels': labels_tensor,
            'image_id': torch.tensor([idx])
        }

        if self.transforms:
            img = self.transforms(img)

        return img, target

In [None]:
from torchvision.transforms import functional as F
import random

def get_transform(train):
    def apply_transforms(image):
        image = F.to_tensor(image)
        if train and random.random() > 0.5:
            image = F.hflip(image)
        return image
    return apply_transforms


def collate_fn(batch):
    return tuple(zip(*batch))

In [None]:
def get_model(num_classes):
    model = fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

In [None]:
def train_model(model, device, train_loader, num_epochs=None, num_classes=2):
    model.to(device)
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
    model.train()
    writer = SummaryWriter(log_dir="runs/fasterrcnn_car_detector")

    # Dictionary to store loss logs
    loss_history = {
        'epoch': [],
        'total_loss': [],
        'loss_classifier': [],
        'loss_box_reg': [],
        'loss_objectness': [],
        'loss_rpn_box_reg': [],
    }

    for epoch in range(num_epochs):
        print(f"Epoch [{epoch+1}/{num_epochs}]")
        epoch_loss = 0.0
        epoch_cls_loss = 0.0
        epoch_box_loss = 0.0
        epoch_obj_loss = 0.0
        epoch_rpn_loss = 0.0

        for images, targets in tqdm(train_loader):
            images = [img.to(device) for img in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)
            total_loss = sum(loss for loss in loss_dict.values())

            optimizer.zero_grad()
            total_loss.backward()
            optimizer.step()

            epoch_loss += total_loss.item()
            epoch_cls_loss += loss_dict['loss_classifier'].item()
            epoch_box_loss += loss_dict['loss_box_reg'].item()
            epoch_obj_loss += loss_dict['loss_objectness'].item()
            epoch_rpn_loss += loss_dict['loss_rpn_box_reg'].item()

        # Log to TensorBoard
        writer.add_scalar("Loss/Total", epoch_loss, epoch)
        writer.add_scalars("Loss/Components", {
            'classifier': epoch_cls_loss,
            'box_reg': epoch_box_loss,
            'objectness': epoch_obj_loss,
            'rpn_box_reg': epoch_rpn_loss,
        }, epoch)

        # Save in dictionary
        loss_history['epoch'].append(epoch + 1)
        loss_history['total_loss'].append(epoch_loss)
        loss_history['loss_classifier'].append(epoch_cls_loss)
        loss_history['loss_box_reg'].append(epoch_box_loss)
        loss_history['loss_objectness'].append(epoch_obj_loss)
        loss_history['loss_rpn_box_reg'].append(epoch_rpn_loss)

        print(f"Epoch Loss: {epoch_loss:.4f}")

    writer.close()
    torch.save(model.state_dict(), "car_detector.pth")
    print("Model saved as car_detector.pth")
    return loss_history

In [None]:
train_dataset = VOCCarDataset(root='data', image_set='train', download=True, transforms=get_transform(train=True))
val_dataset = VOCCarDataset(root='data', image_set='val', download=True, transforms=get_transform(train=False))
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn)
test_dataset = VOCCarDataset(root='data', image_set='test', download=True, transforms=get_transform(train=False))

test_loader = DataLoader(train_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn)
model = get_model(num_classes= 2)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#loss_history = train_model(model, device, train_loader, num_epochs=5)
#with open("loss_log.json", "w") as f:
    #json.dump(loss_history, f)

100%|██████████| 460M/460M [00:22<00:00, 20.4MB/s]
100%|██████████| 451M/451M [00:21<00:00, 20.8MB/s]
Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:01<00:00, 136MB/s]


In [None]:
def predict(image_path, model_path="car_detector.pth", threshold=0.5):
    image = Image.open(image_path).convert("RGB")
    image_tensor = F.to_tensor(image).unsqueeze(0).to(device)

    model = get_model(num_classes=2)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    model.eval()

    with torch.no_grad():
        prediction = model(image_tensor)[0]

    # Move boxes and scores to CPU before plotting
    boxes = prediction['boxes'].cpu().numpy()  # Move to CPU and convert to NumPy
    scores = prediction['scores'].cpu().numpy() # Move to CPU and convert to NumPy

    # Show results
    plt.figure(figsize=(10, 8))
    plt.imshow(image)
    ax = plt.gca()

    for box, score in zip(boxes, scores): # Iterate over the NumPy arrays
        if score >= threshold:
            xmin, ymin, xmax, ymax = box
            rect = plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
                                 edgecolor='lime', facecolor='none', linewidth=2)
            ax.add_patch(rect)
            ax.text(xmin, ymin, f"{score:.2f}", color='white',
                    bbox=dict(facecolor='green', alpha=0.5))

    plt.axis('off')
    plt.show()

In [None]:
predict("image3.jpeg")

FileNotFoundError: [Errno 2] No such file or directory: 'image3.jpeg'

In [None]:
!pip install torchmetrics pycocotools

Collecting torchmetrics
  Downloading torchmetrics-1.7.1-py3-none-any.whl.metadata (21 kB)
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.14.3-py3-none-any.whl.metadata (5.6 kB)
Downloading torchmetrics-1.7.1-py3-none-any.whl (961 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m961.5/961.5 kB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lightning_utilities-0.14.3-py3-none-any.whl (28 kB)
Installing collected packages: lightning-utilities, torchmetrics
Successfully installed lightning-utilities-0.14.3 torchmetrics-1.7.1


In [None]:
from torchmetrics.detection.mean_ap import MeanAveragePrecision
import torch
from tqdm import tqdm
import pandas as pd

In [None]:
def evaluate(model_path="car_detector.pth", test_loader=None):
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    model.eval()

    metric = MeanAveragePrecision()

    for images, targets in tqdm(test_loader):
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        with torch.no_grad():
            preds = model(images)

        preds_cpu = [{k: v.cpu() for k, v in p.items()} for p in preds]
        targets_cpu = [{k: v.cpu() for k, v in t.items()} for t in targets]

        metric.update(preds_cpu, targets_cpu)

    map_results = metric.compute()

    # Convert results to DataFrame
    output_csv_path="/content/map_results.csv"
    df = pd.DataFrame(map_results.items(), columns=["Metric", "Value"])
    df.to_csv(output_csv_path, index=False)

    print(f"📁 mAP results saved to: {output_csv_path}")
    return map_results



In [None]:
# Call evaluate with the correct arguments: model_path and test_loader
evaluate(model_path="car_detector.pth", test_loader=test_loader) # Pass test_loader as a keyword argument

100%|██████████| 626/626 [06:09<00:00,  1.69it/s]


📁 mAP results saved to: /content/map_results.csv


{'map': tensor(0.3826),
 'map_50': tensor(0.5899),
 'map_75': tensor(0.4153),
 'map_small': tensor(0.1886),
 'map_medium': tensor(0.2762),
 'map_large': tensor(0.5261),
 'mar_1': tensor(0.2792),
 'mar_10': tensor(0.5245),
 'mar_100': tensor(0.5488),
 'mar_small': tensor(0.3730),
 'mar_medium': tensor(0.4958),
 'mar_large': tensor(0.6775),
 'map_per_class': tensor(-1.),
 'mar_100_per_class': tensor(-1.),
 'classes': tensor(1, dtype=torch.int32)}

In [None]:
import h5py

def save_weights_to_h5(model, file_path="/content/model_weights.h5"):
    #os.makedirs(os.path.dirname(file_path), exist_ok=True)
    with h5py.File(file_path, 'w') as f:
        for name, param in model.named_parameters():
            f.create_dataset(name, data=param.detach().cpu().numpy())


model = get_model(num_classes=2)
model.load_state_dict(torch.load("car_detector.pth", map_location="cpu"))
save_weights_to_h5(model)



