# **Access data on Google Drive**

In [2]:
# Access the files on my Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [3]:
!pip install torch torchvision
!pip install pycocotools

Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)
Collecting nvidia-curand-cu12==10.3.2.106 (from torch)
  Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)
Collectin

In [4]:
import torch
import torch.optim as optim
import torchvision
from torchvision.datasets import CocoDetection
from torch.utils.data import DataLoader
import time
import matplotlib.pyplot as plt

# Define transformations
transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    # Add other transformations here if needed
])

# Define the calculate_metrics function
def calculate_metrics(model, data_loader, device):
    model.eval()
    total_tp, total_fp, total_fn = 0, 0, 0
    total_iou = 0
    inference_times = []

    with torch.no_grad():
        for images, targets in data_loader:
            images = list(image.to(device) for image in images)

            new_targets = []
            for t in targets:
                if isinstance(t, dict):
                    if 'boxes' in t:  # Check if 'boxes' key exists
                        boxes = torch.tensor(t['boxes']).float().to(device)
                        labels = torch.tensor(t['labels']).long().to(device)
                        new_targets.append({'boxes': boxes, 'labels': labels})
                else:
                    for t_dict in t:
                        if 'boxes' in t_dict:  # Check if 'boxes' key exists
                            boxes = torch.tensor(t_dict['boxes']).float().to(device)
                            labels = torch.tensor(t_dict['labels']).long().to(device)
                            new_targets.append({'boxes': boxes, 'labels': labels})

            targets = new_targets

            # Measure inference time
            start_time = time.time()
            outputs = model(images)
            inference_times.append(time.time() - start_time)

            # Calculate metrics
            for output, target in zip(outputs, targets):
                tp, fp, fn, iou = calculate_detection_metrics(output, target)
                total_tp += tp
                total_fp += fp
                total_fn += fn
                total_iou += iou

    precision = total_tp / (total_tp + total_fp) if total_tp + total_fp > 0 else 0
    recall = total_tp / (total_tp + total_fn) if total_tp + total_fn > 0 else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0
    avg_iou = total_iou / (total_tp + total_fn) if total_tp + total_fn > 0 else 0
    fps = len(data_loader.dataset) / sum(inference_times)

    # Save metrics as images
    save_metric_as_image('precision.png', precision)
    save_metric_as_image('recall.png', recall)
    save_metric_as_image('f1_score.png', f1_score)
    save_metric_as_image('avg_iou.png', avg_iou)
    save_metric_as_image('fps.png', fps)

    return precision, recall, f1_score, avg_iou, fps

def save_metric_as_image(filename, metric):
    fig, ax = plt.subplots(figsize=(5, 3))
    ax.text(0.5, 0.5, f"{metric:.4f}", fontsize=24, ha='center', va='center')
    ax.set_axis_off()
    plt.savefig(filename, bbox_inches='tight', pad_inches=0.1)
    plt.close(fig)

def calculate_detection_metrics(output, target):
    iou_threshold = 0.5

    tp, fp, fn = 0, 0, 0
    total_iou = 0

    # Iterate over predicted and ground truth bounding boxes
    for pred_box, pred_label in zip(output['boxes'], output['labels']):
        matched = False
        for gt_box, gt_label in zip(target['boxes'], target['labels']):
            if pred_label == gt_label:
                iou = calculate_iou(pred_box, gt_box)
                if iou >= iou_threshold:
                    matched = True
                    total_iou += iou
                    break

        if matched:
            tp += 1
        else:
            fp += 1

    fn = len(target['boxes']) - tp

    return tp, fp, fn, total_iou

def calculate_iou(box1, box2):
    # Implement IoU calculation logic
    x1, y1, x2, y2 = box1
    x3, y3, x4, y4 = box2

    # Calculate the coordinates of the intersection rectangle
    x_left = max(x1, x3)
    y_top = max(y1, y3)
    x_right = min(x2, x4)
    y_bottom = min(y2, y4)

    # Calculate the area of intersection rectangle
    intersection_area = max(0, x_right - x_left + 1) * max(0, y_bottom - y_top + 1)

    # Calculate the area of both bounding boxes
    box1_area = (x2 - x1 + 1) * (y2 - y1 + 1)
    box2_area = (x4 - x3 + 1) * (y4 - y3 + 1)

    # Calculate the IoU
    iou = intersection_area / float(box1_area + box2_area - intersection_area)

    return iou

# Load the datasets
train_dataset = CocoDetection(root='/content/drive/MyDrive/my_cocojson_dataset/unzipped/Vehicle Registration Plates.v2-licenseplatedatasetv1.coco/train', annFile='/content/drive/MyDrive/my_cocojson_dataset/unzipped/License Plate Recognition.v4-resized640_aug3x-accurate.coco/train/annotations.json', transform=transform)
val_dataset = CocoDetection(root='/content/drive/MyDrive/my_cocojson_dataset/unzipped/Vehicle Registration Plates.v2-licenseplatedatasetv1.coco/valid', annFile='/content/drive/MyDrive/my_cocojson_dataset/unzipped/License Plate Recognition.v4-resized640_aug3x-accurate.coco/valid/annotations.json', transform=transform)
test_dataset = CocoDetection(root='/content/drive/MyDrive/my_cocojson_dataset/unzipped/Vehicle Registration Plates.v2-licenseplatedatasetv1.coco/test', annFile='/content/drive/MyDrive/my_cocojson_dataset/unzipped/License Plate Recognition.v4-resized640_aug3x-accurate.coco/test/annotations.json', transform=transform)

# DataLoader
def collate_fn(batch):
    return tuple(zip(*batch))

train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, num_workers=4, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=2, shuffle=False, num_workers=4, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=2, shuffle=False, num_workers=4, collate_fn=collate_fn)

# Initialize the SSD model
model = torchvision.models.detection.ssd300_vgg16(weights='SSD300_VGG16_Weights.DEFAULT')
model = model.to('cuda')  # Move the model to GPU

# Define the optimizer
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    for batch_idx, (images, targets) in enumerate(train_loader):
        images = list(image.to('cuda') for image in images)

        # Debug print to check targets structure
        print(f"Batch {batch_idx + 1}")
        print(f"Targets: {targets}")

        # Ensure the target is in the correct format
        new_targets = []
        for t in targets:
            if isinstance(t, dict):
                if 'boxes' in t:  # Check if 'boxes' key exists
                    boxes = torch.tensor(t['boxes']).float().to('cuda')
                    labels = torch.tensor(t['labels']).long().to('cuda')
                    new_targets.append({'boxes': boxes, 'labels': labels})
            else:
                for t_dict in t:
                    if 'boxes' in t_dict:  # Check if 'boxes' key exists
                        boxes = torch.tensor(t_dict['boxes']).float().to('cuda')
                        labels = torch.tensor(t_dict['labels']).long().to('cuda')
                        new_targets.append({'boxes': boxes, 'labels': labels})

        if not new_targets:
            continue  # Skip processing if targets list is empty

        targets = new_targets

        # Forward pass
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        # Backward pass and optimization
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        # Print loss periodically
        if (batch_idx + 1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Batch [{batch_idx+1}/{len(train_loader)}], Loss: {losses.item():.4f}')

# Evaluate the model
model.eval()
with torch.no_grad():
    precision, recall, f1_score, avg_iou, fps = calculate_metrics(model, val_loader, 'cuda')
    print(f'Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1_score:.4f}, Average IoU: {avg_iou:.4f}, FPS: {fps:.2f}')

# Save the trained model
torch.save(model.state_dict(), 'ssd_model.pth')

loading annotations into memory...
Done (t=122.35s)
creating index...
index created!
loading annotations into memory...
Done (t=12.16s)
creating index...
index created!
loading annotations into memory...
Done (t=5.85s)
creating index...
index created!


Downloading: "https://download.pytorch.org/models/ssd300_vgg16_coco-b556d3b4.pth" to /root/.cache/torch/hub/checkpoints/ssd300_vgg16_coco-b556d3b4.pth
100%|██████████| 136M/136M [00:02<00:00, 54.5MB/s]


[1;30;43mStrømmer utdata som er avkortet til de siste 5000 linjene.[0m
Batch 8088
Targets: ([{'id': 17692, 'image_id': 17000, 'category_id': 1, 'bbox': [199, 283, 229, 103.332], 'area': 23663.093, 'segmentation': [], 'iscrowd': 0}], [{'id': 7949, 'image_id': 7621, 'category_id': 1, 'bbox': [235, 179, 119.792, 154.687], 'area': 18530.198, 'segmentation': [], 'iscrowd': 0}])
Batch 8089
Targets: ([{'id': 20314, 'image_id': 19521, 'category_id': 1, 'bbox': [577, 517, 25.145, 25.145], 'area': 632.293, 'segmentation': [], 'iscrowd': 0}], [{'id': 21765, 'image_id': 20920, 'category_id': 1, 'bbox': [211, 276, 214.088, 188.239], 'area': 40299.741, 'segmentation': [], 'iscrowd': 0}])
Batch 8090
Targets: ([{'id': 7838, 'image_id': 7517, 'category_id': 1, 'bbox': [227, 87, 163.088, 188.885], 'area': 30804.871, 'segmentation': [], 'iscrowd': 0}], [{'id': 18632, 'image_id': 17911, 'category_id': 1, 'bbox': [394, 461, 112.39, 67.652], 'area': 7603.349, 'segmentation': [], 'iscrowd': 0}])
Batch 8091

  return F.conv2d(input, weight, bias, self.stride,


Precision: 0.0000, Recall: 0.0000, F1-score: 0.0000, Average IoU: 0.0000, FPS: 29.65
