In [None]:
import torch
import torch.nn as nn
import argparse
import os
import numpy as np
import yaml
import random
import cv2
import csv
from PIL import Image
from tqdm import tqdm
import torchvision
from torch.utils.data import DataLoader, Dataset
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
from torchvision.models.detection.rpn import AnchorGenerator

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# **ARCHITECTURE + METHODS**

# **DATASET DEFINITION**

In [None]:

import albumentations as A
from albumentations.pytorch import ToTensorV2

# Define Albumentations augmentation pipeline
albumentations_transform = A.Compose([
    A.RandomRotate90(p=0.5),                   # Random 90-degree rotations
    A.HorizontalFlip(p=0.5),                  # Horizontal flipping
    A.VerticalFlip(p=0.5),                    # Vertical flipping
    A.GaussNoise(var_limit=(10, 50), p=0.5),  # Add random Gaussian noise
    A.Normalize(mean=(0.5,), std=(0.5,)),     # Normalize grayscale values
    ToTensorV2()                              # Convert to PyTorch tensor
])

  check_for_updates()


In [None]:

import albumentations as A
from albumentations.pytorch import ToTensorV2

class IRDataset(Dataset):
    def __init__(self, im_dir, ann_dir, transform=None):
        self.im_dir = im_dir
        self.ann_dir = ann_dir
        self.transform = transform

        # Define the classes
        classes = ['person', 'vehicle']
        classes = sorted(classes)
        classes = ['background'] + classes  # Background is index 0

        self.label2idx = {classes[idx]: idx for idx in range(len(classes))}
        self.idx2label = {idx: classes[idx] for idx in range(len(classes))}

        # Load image information
        self.images_info = get_image_info(im_dir, ann_dir, self.label2idx, classes)

    def __len__(self):
        return len(self.images_info)

    def __getitem__(self, index):
        im_info = self.images_info[index]
        im = Image.open(im_info['filename']).convert('RGB')
        im_np = np.array(im)

        im_tensor = torchvision.transforms.ToTensor()(im)

        if self.transform:
           augmented = self.transform(image=im_np)
           im_tensor = augmented['image']
        else:
           im_tensor = torchvision.transforms.ToTensor()(im_np)


        targets = {}
        targets['bboxes'] = torch.as_tensor([detection['bbox'] for detection in im_info['detections']])
        targets['labels'] = torch.as_tensor([detection['label'] for detection in im_info['detections']])

        return im_tensor, targets, im_info['filename']


In [None]:
def get_image_info(image_directory, annotation_directory, label2idx, classes):

    im_infos = []

    # Iterate over all files in the image directory
    for filename in os.listdir(image_directory):
        if filename.endswith('.jpg') or filename.endswith('.png'):
            img_id = filename.split('.')[0]
            img_path = os.path.join(image_directory, filename)

            # Read image to get dimensions
            image = cv2.imread(img_path)
            if image is None:  # Skip if the image cannot be read
                continue
            height, width, _ = image.shape

            # Initialize detections list
            detections = []

            # Read corresponding annotation file
            annotation_file = os.path.join(annotation_directory, f"{img_id}.txt")
            if os.path.exists(annotation_file):
                with open(annotation_file, 'r') as file:
                    for line in file:
                        parts = line.strip().split()
                        if len(parts) == 5:
                            class_id = int(parts[0])  # Original class ID from annotation

                            # Map class ID to label index
                            if class_id in range(len(label2idx) - 1):  # Check if class_id is valid
                                mapped_label = label2idx[classes[class_id + 1]]  # Adjust index for 'background'

                                x_center = float(parts[1]) * width
                                y_center = float(parts[2]) * height
                                box_width = float(parts[3]) * width
                                box_height = float(parts[4]) * height

                                # Calculate bounding box coordinates
                                x_min = int(x_center - box_width / 2)
                                y_min = int(y_center - box_height / 2)
                                x_max = int(x_center + box_width / 2)
                                y_max = int(y_center + box_height / 2)

                                # Add detection with mapped label
                                detections.append({
                                    'label': mapped_label,
                                    'bbox': [x_min, y_min, x_max, y_max]
                                })

            # Skip if no detections are found
            if not detections:
                continue

            # Append image info to the list
            im_info = {
                'img_id': img_id,
                'filename': img_path,
                'width': width,
                'height': height,
                'detections': detections
            }
            im_infos.append(im_info)

    return im_infos

def collate_function(data):
    return tuple(zip(*data))

In [None]:
from torch.utils.data import random_split

def split_dataset(dataset, split_ratios=[0.7, 0.15, 0.15], seed=42):
    """
    Splits a dataset into training, validation, and test subsets.

    Args:
        dataset (Dataset): The dataset to split.
        split_ratios (list): A list of three floats for train, val, and test splits. Must sum to 1.
        seed (int): Random seed for reproducibility.

    Returns:
        tuple: Datasets for training, validation, and test.
    """
    assert sum(split_ratios) == 1.0, "Split ratios must sum to 1."

    # Calculate lengths for each split
    dataset_size = len(dataset)
    train_size = int(split_ratios[0] * dataset_size)
    val_size = int(split_ratios[1] * dataset_size)
    test_size = dataset_size - train_size - val_size  # Ensure no rounding errors

    # Fix the random seed for reproducibility
    generator = torch.Generator().manual_seed(seed)

    # Perform the split
    train_set, val_set, test_set = random_split(dataset, [train_size, val_size, test_size], generator=generator)

    return train_set, val_set, test_set

In [None]:
# Directories for images and annotations
im_dir = '/content/drive/MyDrive/Colab Notebooks/ir_images/ir_images'
label_dir = '/content/drive/MyDrive/Colab Notebooks/ir_labels/ir_labels'

In [None]:
image_info = get_image_info(im_dir, label_dir, {'background': 0, 'person': 1, 'vehicle': 2}, ['background', 'person', 'vehicle'])
for info in image_info[:2]:  # Print the first 2 entries
    print(info)

{'img_id': '20240821_122016_IR_H264_frame_004721', 'filename': '/content/drive/MyDrive/Colab Notebooks/ir_images/20240821_122016_IR_H264_frame_004721.jpg', 'width': 640, 'height': 512, 'detections': [{'label': 1, 'bbox': [233, 424, 260, 491]}, {'label': 1, 'bbox': [410, 417, 443, 493]}, {'label': 1, 'bbox': [486, 415, 514, 492]}, {'label': 1, 'bbox': [604, 412, 626, 489]}, {'label': 1, 'bbox': [625, 416, 640, 489]}, {'label': 1, 'bbox': [616, 414, 632, 489]}]}
{'img_id': '20240821_122016_IR_H264_frame_005250', 'filename': '/content/drive/MyDrive/Colab Notebooks/ir_images/20240821_122016_IR_H264_frame_005250.jpg', 'width': 640, 'height': 512, 'detections': [{'label': 1, 'bbox': [362, 1, 528, 512]}, {'label': 1, 'bbox': [153, 417, 175, 481]}, {'label': 1, 'bbox': [51, 421, 70, 473]}, {'label': 1, 'bbox': [8, 417, 24, 468]}, {'label': 1, 'bbox': [28, 419, 42, 474]}, {'label': 1, 'bbox': [36, 426, 48, 475]}, {'label': 1, 'bbox': [0, 421, 14, 474]}]}


In [None]:
# Create datasets for training, validation, and testing
dataset = IRDataset(im_dir=im_dir, ann_dir=label_dir)
len(dataset)

# **MODEL INITIALIZATION**

In [None]:
''' Hyperparameters
    Edit the values as necessary here '''

rpn_pre_nms_top_n = 2000            # Number of proposals before NMS (filtering proposals)
rpn_post_nms_top_n = 1000           # Number of proposals after NMS (filtering proposals)
rpn_nms_thresh = 0.7                # IoU threshold for NMS
rpn_fg_iou_thresh = 0.7             # Minimum IoU for a positive anchor
rpn_bg_iou_thresh = 0.3             # Maximum IoU for a negative anchor
rpn_batch_size_per_image = 256      # Number of anchors sampled per image during training

box_detections_per_img = 9          # Maximum number of detections per image
box_score_thresh = 0.05             # Min. score for a detection to be considered valid
box_nms_thresh = 0.5                # IoU threshold for NMS on detection results
box_fg_iou_thresh = 0.5             # Minimum IoU for a positive RoI during training
box_bg_iou_thresh = 0.5             # Maximum IoU for a negative RoI during training

batch_size = 4
lr = 0.003
num_epochs = 100
momentum = 0.9
weight_decay = 0
acc_steps = 4                       # Can leave as 1

loss_weights = [1, 1, 1, 1]

split_ratios = [0.7, 0.15, 0.15]
train_dataset, val_dataset, test_dataset = split_dataset(dataset, split_ratios)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, collate_fn=collate_function)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2, collate_fn=collate_function)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2, collate_fn=collate_function)

In [None]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True,
                                                             min_size=512,
                                                             max_size=640,
                                                             rpn_pre_nms_top_n=rpn_pre_nms_top_n,
                                                             rpn_post_nms_top_n=rpn_post_nms_top_n,
                                                             rpn_nms_thresh=rpn_nms_thresh,
                                                             rpn_fg_iou_thresh=rpn_fg_iou_thresh,
                                                             rpn_bg_iou_thresh=rpn_bg_iou_thresh,
                                                             rpn_batch_size_per_image=rpn_batch_size_per_image,
                                                             box_detections_per_img=box_detections_per_img,
                                                             box_nms_thresh=box_nms_thresh,
                                                             box_score_thresh=box_score_thresh,
                                                             box_fg_iou_thresh=box_fg_iou_thresh,
                                                             box_bg_iou_thresh=box_bg_iou_thresh,
)

model.roi_heads.box_predictor = FastRCNNPredictor(
            model.roi_heads.box_predictor.cls_score.in_features,
            num_classes=3)

optimizer = torch.optim.SGD(lr=lr, momentum=momentum, weight_decay=weight_decay,
                            params=filter(lambda p: p.requires_grad, model.parameters()))

# **TRAINING ROUTINE**

In [None]:
train(model, dataset, train_loader, val_loader, optimizer, loss_weights, num_epochs, acc_steps, device)

In [None]:
print(len(dataset))
print(len(train_dataset))
print(len(train_loader))
print(len(val_dataset))
print(len(val_loader))
print(len(test_dataset))
print(len(test_loader))

1108
775
194
166
42
167
42


In [None]:
def get_model_name(name, batch_size, learning_rate, epoch):
    """ Generate a name for the model consisting of all the hyperparameter values

    Args:
        config: Configuration object containing the hyperparameters
    Returns:
        path: A string with the hyperparameter name and value concatenated
    """
    path = "model_{0}_lr{1}_epoch{2}".format(name,
                                                   learning_rate,
                                                   epoch)
    return path

In [None]:
def train(model, dataset, train_loader, val_loader, optimizer, loss_weights, num_epochs, acc_steps=1, device='cpu'):
    model.train()
    model.to(device)

    # Lists to store losses
    train_rpn_class_loss_history = []
    train_rpn_reg_loss_history = []
    train_detector_class_loss_history = []
    train_detector_reg_loss_history = []
    train_loss_history = []

    val_rpn_class_loss_history = []
    val_rpn_reg_loss_history = []
    val_detector_class_loss_history = []
    val_detector_reg_loss_history = []
    val_loss_history = []

    # Lists for mAPs
    mean_ap_history = []
    all_aps_history = []

    for epoch in range(num_epochs):
        train_rpn_class_losses = []
        train_rpn_reg_losses = []
        train_detector_class_losses = []
        train_detector_reg_losses = []

        val_rpn_class_losses = []
        val_rpn_reg_losses = []
        val_detector_class_losses = []
        val_detector_reg_losses = []

        gts = []
        preds = []
        optimizer.zero_grad()

        # Training Loop
        for ims, targets, _ in tqdm(train_loader, desc='Training'):
            optimizer.zero_grad()
            for target in targets:
              #raise ValueError(targets)
              target['boxes'] = target['bboxes'].float().to(device)
              del target['bboxes']
              target['labels'] = target['labels'].long().to(device)
            images = [im.float().to(device) for im in ims]
            batch_losses = model(images, targets)

            loss = loss_weights[0]*batch_losses['loss_classifier']
            loss += loss_weights[1]*batch_losses['loss_box_reg']
            loss += loss_weights[2]*batch_losses['loss_objectness']
            loss += loss_weights[3]*batch_losses['loss_rpn_box_reg']

            train_rpn_class_losses.append(batch_losses['loss_objectness'].item())
            train_rpn_reg_losses.append(batch_losses['loss_rpn_box_reg'].item())
            train_detector_class_losses.append(batch_losses['loss_classifier'].item())
            train_detector_reg_losses.append(batch_losses['loss_box_reg'].item())

            loss.backward()
            optimizer.step()
            break

        # Evaluate mAP
        mean_ap, all_aps, _, _, _, _, _, _ = evaluate_map(model, dataset, val_loader, device)
        model.train()

        print('Mean Average Precision : {:.4f}'.format(mean_ap))

        for param in model.parameters():
          param.requires_grad = False

        # Validation Loop
        with torch.no_grad():
            for ims, targets, _ in tqdm(val_loader):
                for target in targets:
                  #raise ValueError(targets)
                  target['boxes'] = target['bboxes'].float().to(device)
                  del target['bboxes']
                  target['labels'] = target['labels'].long().to(device)
                images = [im.float().to(device) for im in ims]
                batch_losses = model(images, targets)

                val_rpn_class_losses.append(batch_losses['loss_objectness'].item())
                val_rpn_reg_losses.append(batch_losses['loss_rpn_box_reg'].item())
                val_detector_class_losses.append(batch_losses['loss_classifier'].item())
                val_detector_reg_losses.append(batch_losses['loss_box_reg'].item())

        for param in model.parameters():
          param.requires_grad = True

        print('Finished epoch {}'.format(epoch))

        # Calculate average losses for the epoch
        avg_train_rpn_classification_loss = np.mean(train_rpn_class_losses)
        avg_train_rpn_regression_loss = np.mean(train_rpn_reg_losses)
        avg_train_detector_classification_loss = np.mean(train_detector_class_losses)
        avg_train_detector_regression_loss = np.mean(train_detector_reg_losses)

        avg_val_rpn_classification_loss = np.mean(val_rpn_class_losses)
        avg_val_rpn_regression_loss = np.mean(val_rpn_reg_losses)
        avg_val_detector_classification_loss = np.mean(val_detector_class_losses)
        avg_val_detector_regression_loss = np.mean(val_detector_reg_losses)

        # Store the average losses
        train_rpn_class_loss_history.append(avg_train_rpn_classification_loss)
        train_rpn_reg_loss_history.append(avg_train_rpn_regression_loss)
        train_detector_class_loss_history.append(avg_train_detector_classification_loss)
        train_detector_reg_loss_history.append(avg_train_detector_regression_loss)

        val_rpn_class_loss_history.append(avg_val_rpn_classification_loss)
        val_rpn_reg_loss_history.append(avg_val_rpn_regression_loss)
        val_detector_class_loss_history.append(avg_val_detector_classification_loss)
        val_detector_reg_loss_history.append(avg_val_detector_regression_loss)

        # Store mAP
        mean_ap_history.append(mean_ap)
        all_aps_history.append(all_aps)

        # Save
        model_path = get_model_name('frcnn', batch_size, lr, epoch)
        torch.save(model.state_dict(), model_path)

        # Prepare the loss output string
        loss_output = ''
        loss_output += 'RPN Classification Loss (Train): {:.4f}'.format(avg_train_rpn_classification_loss)
        loss_output += ' | RPN Localization Loss (Train): {:.4f}'.format(avg_train_rpn_regression_loss)
        loss_output += ' | FRCNN Classification Loss (Train): {:.4f}'.format(avg_train_detector_classification_loss)
        loss_output += ' | FRCNN Localization Loss (Train): {:.4f}'.format(avg_train_detector_regression_loss)

        # Add validation losses to the printout
        loss_output += ' | RPN Classification Loss (Val): {:.4f}'.format(avg_val_rpn_classification_loss)
        loss_output += ' | RPN Localization Loss (Val): {:.4f}'.format(avg_val_rpn_regression_loss)
        loss_output += ' | FRCNN Classification Loss (Val): {:.4f}'.format(avg_val_detector_classification_loss)
        loss_output += ' | FRCNN Localization Loss (Val): {:.4f}'.format(avg_val_detector_regression_loss)

        loss_output += ' | Mean Average Precision: {:.4f}'.format(mean_ap)

        # Print out the losses and mAP
        print(loss_output)


    save_results(model_path, train_rpn_class_loss_history, train_rpn_reg_loss_history, train_detector_class_loss_history,
             train_detector_reg_loss_history, val_rpn_class_loss_history, val_rpn_reg_loss_history, val_detector_class_loss_history,
             val_detector_reg_loss_history, mean_ap_history, all_aps_history, num_epochs)
    print('Done Training...')

In [None]:
def save_results(model_path, train_rpn_class_loss_history, train_rpn_reg_loss_history, train_detector_class_loss_history,
                 train_detector_reg_loss_history, val_rpn_class_loss_history, val_rpn_reg_loss_history, val_detector_class_loss_history,
                 val_detector_reg_loss_history, mean_ap_history, all_aps_history, num_epochs):

    # Extract the base directory and filename from model_path
    base_dir, model_name = os.path.split(model_path)
    results_dir = os.path.join(base_dir, f"{model_name}_results")

    # Create the directory if it doesn't exist
    os.makedirs(results_dir, exist_ok=True)

    # Define the file paths for each CSV
    train_rpn_class_loss_path = os.path.join(results_dir, "train_rpn_class_loss.csv")
    train_rpn_reg_loss_path = os.path.join(results_dir, "train_rpn_reg_loss.csv")
    train_detector_class_loss_path = os.path.join(results_dir, "train_detector_class_loss.csv")
    train_detector_reg_loss_path = os.path.join(results_dir, "train_detector_reg_loss.csv")

    val_rpn_class_loss_path = os.path.join(results_dir, "val_rpn_class_loss.csv")
    val_rpn_reg_loss_path = os.path.join(results_dir, "val_rpn_reg_loss.csv")
    val_detector_class_loss_path = os.path.join(results_dir, "val_detector_class_loss.csv")
    val_detector_reg_loss_path = os.path.join(results_dir, "val_detector_reg_loss.csv")

    mean_ap_path = os.path.join(results_dir, "mean_ap.csv")
    all_aps_path = os.path.join(results_dir, "all_aps.csv")  # Update for CSV

    # Save the results into CSV files
    np.savetxt(train_rpn_class_loss_path, train_rpn_class_loss_history)
    np.savetxt(train_rpn_reg_loss_path, train_rpn_reg_loss_history)
    np.savetxt(train_detector_class_loss_path, train_detector_class_loss_history)
    np.savetxt(train_detector_reg_loss_path, train_detector_reg_loss_history)

    np.savetxt(val_rpn_class_loss_path, val_rpn_class_loss_history)
    np.savetxt(val_rpn_reg_loss_path, val_rpn_reg_loss_history)
    np.savetxt(val_detector_class_loss_path, val_detector_class_loss_history)
    np.savetxt(val_detector_reg_loss_path, val_detector_reg_loss_history)

    np.savetxt(mean_ap_path, mean_ap_history)

    # Save all_aps_history as CSV
    with open(all_aps_path, mode='w', newline='') as csvfile:
        # Extract unique keys from all_aps_history (excluding 'background')
        fieldnames = {key for aps in all_aps_history for key in aps.keys() if key != 'background'}
        fieldnames = sorted(fieldnames)  # Sort to maintain consistent column order

        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()  # Write the header row

        for aps in all_aps_history:
            # Exclude 'background' and write the filtered dictionary
            filtered_aps = {k: aps[k] for k in aps if k != 'background'}
            writer.writerow(filtered_aps)

    print(f'Results saved in folder: {results_dir}')

In [None]:
import matplotlib.pyplot as plt

def plot_curves(path):
      """ Plots the training curve for a model run, given the csv files
    containing the train/validation error/loss.

    Args:
        path: The base path of the csv files produced during training
    """

      train_rpn_class_loss = np.loadtxt("{}/train_rpn_class_loss.csv".format(path))
      train_rpn_reg_loss = np.loadtxt("{}/train_rpn_reg_loss.csv".format(path))
      train_detector_class_loss = np.loadtxt("{}/train_detector_class_loss.csv".format(path))
      train_detector_reg_loss = np.loadtxt("{}/train_detector_reg_loss.csv".format(path))

      val_rpn_class_loss = np.loadtxt("{}/val_rpn_class_loss.csv".format(path))
      val_rpn_reg_loss = np.loadtxt("{}/val_rpn_reg_loss.csv".format(path))
      val_detector_class_loss = np.loadtxt("{}/val_detector_class_loss.csv".format(path))
      val_detector_reg_loss = np.loadtxt("{}/val_detector_reg_loss.csv".format(path))

      mean_ap = np.loadtxt("{}/mean_ap.csv".format(path))

      train_loss = train_rpn_class_loss + train_rpn_reg_loss + train_detector_class_loss + train_detector_reg_loss
      val_loss = val_rpn_class_loss + val_rpn_reg_loss + val_detector_class_loss + val_detector_reg_loss

      n = len(train_loss)

      plt.title("RPN Class Loss")
      plt.plot(range(1,n+1), train_rpn_class_loss, label="Train")
      plt.plot(range(1,n+1), val_rpn_class_loss, label="Val")
      plt.xlabel("Epoch")
      plt.ylabel("Loss")
      plt.legend(loc='best')
      plot_title = os.path.join(path, "rpn_class")
      plt.savefig(plot_title)
      plt.show()

      plt.title("RPN Regression Loss")
      plt.plot(range(1,n+1), train_rpn_reg_loss, label="Train")
      plt.plot(range(1,n+1), val_rpn_reg_loss, label="Val")
      plt.xlabel("Epoch")
      plt.ylabel("Loss")
      plt.legend(loc='best')
      plot_title = os.path.join(path, "rpn_reg")
      plt.savefig(plot_title)
      plt.show()

      plt.title("Detector Class Loss")
      plt.plot(range(1,n+1), train_detector_class_loss, label="Train")
      plt.plot(range(1,n+1), val_detector_class_loss, label="Val")
      plt.xlabel("Epoch")
      plt.ylabel("Loss")
      plt.legend(loc='best')
      plot_title = os.path.join(path, "detector_class")
      plt.savefig(plot_title)
      plt.show()

      plt.title("Detector Regression Loss")
      plt.plot(range(1,n+1), train_detector_reg_loss, label="Train")
      plt.plot(range(1,n+1), val_detector_reg_loss, label="Val")
      plt.xlabel("Epoch")
      plt.ylabel("Loss")
      plt.legend(loc='best')
      plot_title = os.path.join(path, "detector_reg")
      plt.savefig(plot_title)
      plt.show()

      plt.title("Total Loss")
      plt.plot(range(1,n+1), train_loss, label="Train")
      plt.plot(range(1,n+1), val_loss, label="Val")
      plt.xlabel("Epoch")
      plt.ylabel("Loss")
      plt.legend(loc='best')
      plot_title = os.path.join(path, "total_loss")
      plt.savefig(plot_title)
      plt.show()

      plt.title("Mean Average Precision")
      plt.plot(range(1,n+1), mean_ap)
      plt.xlabel("Epoch")
      plt.ylabel("mAP")
      plot_title = os.path.join(path, "mAP")
      plt.savefig(plot_title)
      plt.show()


In [None]:
def evaluate_map(model, dataset, val_loader, device, iou_threshold=0.5):
    model.eval()
    model.to(device)
    gts = []
    preds = []
    for im, target, _ in tqdm(val_loader, desc='Evaluating mAP'):
        #im_name = fname
        #raise ValueError(im[0])
        im = im[0]
        im = im.float().to(device)
        im = im.unsqueeze(0)

        target = target[0]
        #raise ValueError(target)
        target_boxes = target['bboxes'].float().to(device)
        target_labels = target['labels'].long().to(device)
        output = model(im, None)[0]

        boxes = output['boxes']
        labels = output['labels']
        scores = output['scores']

        pred_boxes = {}
        gt_boxes = {}
        for label_name in dataset.label2idx:
            pred_boxes[label_name] = []
            gt_boxes[label_name] = []

        for idx, box in enumerate(boxes):
            x1, y1, x2, y2 = box.detach().cpu().numpy()
            label = labels[idx].detach().cpu().item()
            score = scores[idx].detach().cpu().item()
            label_name = dataset.idx2label[label]
            pred_boxes[label_name].append([x1, y1, x2, y2, score])

        for idx, box in enumerate(target_boxes):
            x1, y1, x2, y2 = box.detach().cpu().numpy()
            label = target_labels[idx].detach().cpu().item()
            label_name = dataset.idx2label[label]
            gt_boxes[label_name].append([x1, y1, x2, y2])

        gts.append(gt_boxes)
        preds.append(pred_boxes)

    mean_ap, all_aps, mean_recall, mean_precision, mean_f1, all_recalls, all_precisions, all_f1_scores = compute_map(preds, gts, iou_threshold)
    return mean_ap, all_aps, mean_recall, mean_precision, mean_f1, all_recalls, all_precisions, all_f1_scores

In [None]:
def get_iou(det, gt):
    det_x1, det_y1, det_x2, det_y2 = det
    gt_x1, gt_y1, gt_x2, gt_y2 = gt

    x_left = max(det_x1, gt_x1)
    y_top = max(det_y1, gt_y1)
    x_right = min(det_x2, gt_x2)
    y_bottom = min(det_y2, gt_y2)

    if x_right < x_left or y_bottom < y_top:
        return 0.0

    area_intersection = (x_right - x_left) * (y_bottom - y_top)
    det_area = (det_x2 - det_x1) * (det_y2 - det_y1)
    gt_area = (gt_x2 - gt_x1) * (gt_y2 - gt_y1)
    area_union = float(det_area + gt_area - area_intersection + 1E-6)
    iou = area_intersection / area_union
    return iou

In [None]:
def compute_map(det_boxes, gt_boxes, iou_threshold=0.5):
    gt_labels = {cls_key for im_gt in gt_boxes for cls_key in im_gt.keys()}
    gt_labels = sorted(gt_labels)
    all_aps = {}
    all_recalls = {}
    all_precisions = {}
    all_f1_scores = {}

    aps = []  # Average Precisions for all classes
    recalls_per_class = []  # Average Recalls per class
    precisions_per_class = []  # Average Precisions per class
    f1_scores_per_class = []  # F1-scores per class

    for idx, label in enumerate(gt_labels):
        cls_dets = [
            [im_idx, im_dets_label] for im_idx, im_dets in enumerate(det_boxes)
            if label in im_dets for im_dets_label in im_dets[label]
        ]

        cls_dets = sorted(cls_dets, key=lambda k: -k[1][-1])
        gt_matched = [[False for _ in im_gts[label]] for im_gts in gt_boxes]
        num_gts = sum([len(im_gts[label]) for im_gts in gt_boxes])
        tp = [0] * len(cls_dets)
        fp = [0] * len(cls_dets)

        for det_idx, (im_idx, det_pred) in enumerate(cls_dets):
            im_gts = gt_boxes[im_idx][label]
            max_iou_found = -1
            max_iou_gt_idx = -1

            for gt_box_idx, gt_box in enumerate(im_gts):
                gt_box_iou = get_iou(det_pred[:-1], gt_box)
                if gt_box_iou > max_iou_found:
                    max_iou_found = gt_box_iou
                    max_iou_gt_idx = gt_box_idx

            if max_iou_found < iou_threshold or gt_matched[im_idx][max_iou_gt_idx]:
                fp[det_idx] = 1
            else:
                tp[det_idx] = 1
                gt_matched[im_idx][max_iou_gt_idx] = True

        tp = np.cumsum(tp)
        fp = np.cumsum(fp)

        eps = np.finfo(np.float32).eps
        recalls = tp / np.maximum(num_gts, eps)
        precisions = tp / np.maximum((tp + fp), eps)

        recalls = np.concatenate(([0.0], recalls, [1.0]))
        precisions = np.concatenate(([0.0], precisions, [0.0]))

        for i in range(precisions.size - 1, 0, -1):
            precisions[i - 1] = np.maximum(precisions[i - 1], precisions[i])
        i = np.where(recalls[1:] != recalls[:-1])[0]
        ap = np.sum((recalls[i + 1] - recalls[i]) * precisions[i + 1])

        if num_gts > 0:
            aps.append(ap)
            all_aps[label] = ap
            avg_recall = recalls[-2]  # Recall at the last predicted TP
            avg_precision = precisions[np.argmax(recalls > 0)]  # Precision at max recall > 0

            # F1-Score Calculation
            if avg_precision + avg_recall > 0:
                f1_score = 2 * (avg_precision * avg_recall) / (avg_precision + avg_recall)
            else:
                f1_score = 0

            # Store metrics
            all_recalls[label] = avg_recall
            all_precisions[label] = avg_precision
            all_f1_scores[label] = f1_score

            recalls_per_class.append(avg_recall)
            precisions_per_class.append(avg_precision)
            f1_scores_per_class.append(f1_score)
        else:
            all_aps[label] = np.nan
            all_recalls[label] = np.nan
            all_precisions[label] = np.nan
            all_f1_scores[label] = np.nan

    mean_ap = sum(aps) / len(aps) if aps else 0
    mean_recall = sum(recalls_per_class) / len(recalls_per_class) if recalls_per_class else 0
    mean_precision = sum(precisions_per_class) / len(precisions_per_class) if precisions_per_class else 0
    mean_f1 = sum(f1_scores_per_class) / len(f1_scores_per_class) if f1_scores_per_class else 0

    return mean_ap, all_aps, mean_recall, mean_precision, mean_f1, all_recalls, all_precisions, all_f1_scores

In [None]:
def pascal_map(model, dataset, loader, device):
    model.eval()
    model.to(device)

    maps = []

    for i in np.arange(0.5, 0.95, 0.05):
        mean_ap, all_aps, _, _, _, _, _, _ = evaluate_map(model, dataset, loader, device, iou_threshold=i)
        maps.append(mean_ap)

    return np.mean(maps)

# **INFERENCE**

In [None]:
def infer(model, dataset, test_loader, output_dir, device, score_threshold):
    model.eval()
    model.to(device)

    if not os.path.exists(output_dir):
        os.mkdir(output_dir)

    num_preds = []

    for sample_count in tqdm(range(9)):
        random_idx = random.randint(0, len(dataset)-1)
        im, target, fname = dataset[random_idx]
        im = im.unsqueeze(0).float().to(device)

        gt_im = cv2.imread(fname)
        gt_im_copy = gt_im.copy()

        num_preds = target['bboxes'].size(0)
        # Saving images with ground truth boxes
        for idx, box in enumerate(target['bboxes']):
            x1, y1, x2, y2 = box.detach().cpu().numpy()
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

            cv2.rectangle(gt_im, (x1, y1), (x2, y2), thickness=2, color=[0, 255, 0])
            cv2.rectangle(gt_im_copy, (x1, y1), (x2, y2), thickness=2, color=[0, 255, 0])
            text = dataset.idx2label[target['labels'][idx].detach().cpu().item()]
            text_size, _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_PLAIN, 1, 1)
            text_w, text_h = text_size
            cv2.rectangle(gt_im_copy, (x1, y1), (x1 + 10 + text_w, y1 + 10 + text_h), [255, 255, 255], -1)
            cv2.putText(gt_im, text=dataset.idx2label[target['labels'][idx].detach().cpu().item()],
                        org=(x1 + 5, y1 + 15),
                        thickness=1,
                        fontScale=1,
                        color=[0, 0, 0],
                        fontFace=cv2.FONT_HERSHEY_PLAIN)
            cv2.putText(gt_im_copy, text=text,
                        org=(x1 + 5, y1 + 15),
                        thickness=1,
                        fontScale=1,
                        color=[0, 0, 0],
                        fontFace=cv2.FONT_HERSHEY_PLAIN)
        cv2.addWeighted(gt_im_copy, 0.7, gt_im, 0.3, 0, gt_im)
        cv2.imwrite('{}/output_gt_{}.png'.format(output_dir, sample_count), gt_im)

        # Getting predictions from trained model
        output = model(im, None)[0]
        boxes = output['boxes']
        labels = output['labels']
        scores = output['scores']

        # Filter predictions based on score_threshold
        valid_indices = scores >= score_threshold
        boxes = boxes[valid_indices]
        labels = labels[valid_indices]
        scores = scores[valid_indices]

        im = cv2.imread(fname)
        im_copy = im.copy()

        # Saving images with predicted boxes
        for idx, box in enumerate(boxes):
            x1, y1, x2, y2 = box.detach().cpu().numpy()
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
            cv2.rectangle(im, (x1, y1), (x2, y2), thickness=2, color=[0, 0, 255])
            cv2.rectangle(im_copy, (x1, y1), (x2, y2), thickness=2, color=[0, 0, 255])
            text = '{} : {:.2f}'.format(dataset.idx2label[labels[idx].detach().cpu().item()],
                                        scores[idx].detach().cpu().item())
            text_size, _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_PLAIN, 1, 1)
            text_w, text_h = text_size
            cv2.rectangle(im_copy, (x1, y1), (x1 + 10 + text_w, y1 + 10 + text_h), [255, 255, 255], -1)
            cv2.putText(im, text=text,
                        org=(x1 + 5, y1 + 15),
                        thickness=1,
                        fontScale=1,
                        color=[0, 0, 0],
                        fontFace=cv2.FONT_HERSHEY_PLAIN)
            cv2.putText(im_copy, text=text,
                        org=(x1 + 5, y1 + 15),
                        thickness=1,
                        fontScale=1,
                        color=[0, 0, 0],
                        fontFace=cv2.FONT_HERSHEY_PLAIN)
        cv2.addWeighted(im_copy, 0.7, im, 0.3, 0, im)
        cv2.imwrite('{}/output_{}.jpg'.format(output_dir, sample_count), im)

# **EVALUATION**

In [None]:
plot_curves('/content/model_frcnn_lr0.003_epoch99_results')

In [None]:
infer(model, dataset, val_loader, 'yes', device, 0.5)

100%|██████████| 9/9 [00:28<00:00,  3.15s/it]


## **TESTING**

In [None]:
evaluate_map(model, dataset, test_loader, device, iou_threshold=0.9)

# **SAVE DATA**

In [None]:
!zip -r /content/model_frcnn_lr0.003_epoch99_results.zip /content/model_frcnn_lr0.003_epoch99_results

  adding: content/model_frcnn_lr0.003_epoch99_results/ (stored 0%)
  adding: content/model_frcnn_lr0.003_epoch99_results/train_detector_reg_loss.csv (deflated 56%)
  adding: content/model_frcnn_lr0.003_epoch99_results/detector_class.png (deflated 7%)
  adding: content/model_frcnn_lr0.003_epoch99_results/mAP.png (deflated 8%)
  adding: content/model_frcnn_lr0.003_epoch99_results/total_loss.png (deflated 4%)
  adding: content/model_frcnn_lr0.003_epoch99_results/train_rpn_class_loss.csv (deflated 54%)
  adding: content/model_frcnn_lr0.003_epoch99_results/rpn_class.png (deflated 6%)
  adding: content/model_frcnn_lr0.003_epoch99_results/train_detector_class_loss.csv (deflated 54%)
  adding: content/model_frcnn_lr0.003_epoch99_results/train_rpn_reg_loss.csv (deflated 54%)
  adding: content/model_frcnn_lr0.003_epoch99_results/val_detector_reg_loss.csv (deflated 57%)
  adding: content/model_frcnn_lr0.003_epoch99_results/rpn_reg.png (deflated 6%)
  adding: content/model_frcnn_lr0.003_epoch99_re

In [None]:
from google.colab import files
files.download("/content/model_frcnn_lr0.003_epoch99_results.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# **LLVIP DATASET**


In [None]:
import xml.etree.ElementTree as ET

In [None]:
class LLVIPDataset(Dataset):
    def __init__(self, im_dir, ann_dir):
        self.im_dir = im_dir
        self.ann_dir = ann_dir

        # Define the classes
        classes = ['person', 'vehicle']
        classes = sorted(classes)
        classes = ['background'] + classes  # Background is index 0

        self.label2idx = {classes[idx]: idx for idx in range(len(classes))}
        self.idx2label = {idx: classes[idx] for idx in range(len(classes))}

        # Load image information
        self.images_info = LLVIP_get_image_info(im_dir, ann_dir, self.label2idx, classes)

        print(f"Loaded {len(self.images_info)} images.")
        if self.images_info:
            print(f"First entry: {self.images_info[0]}")

    def __len__(self):
        return len(self.images_info)

    def __getitem__(self, index):
        im_info = self.images_info[index]
        im = Image.open(im_info['filename']).convert('RGB')

        im_tensor = torchvision.transforms.ToTensor()(im)

        targets = {}
        targets['bboxes'] = torch.as_tensor([detection['bbox'] for detection in im_info['detections']])
        targets['labels'] = torch.as_tensor([detection['label'] for detection in im_info['detections']])

        return im_tensor, targets, im_info['filename']

In [None]:
def LLVIP_get_image_info(image_directory, annotation_directory, label2idx, classes):
    im_infos = []

    # Iterate over all files in the annotation directory
    for annotation_filename in os.listdir(annotation_directory):
        if annotation_filename.endswith('.xml'):
            # Parse the XML file
            annotation_path = os.path.join(annotation_directory, annotation_filename)
            tree = ET.parse(annotation_path)
            root = tree.getroot()

            # Get the image filename
            filename = root.find('filename').text
            img_id = filename.split('.')[0]
            img_path = os.path.join(image_directory, filename)

            # Read the image to get dimensions
            image = cv2.imread(img_path)
            if image is None:  # Skip if the image cannot be read
                continue
            height, width, _ = image.shape

            # Initialize detections list
            detections = []

            # Parse each object in the XML file
            for obj in root.findall('object'):
                class_name = obj.find('name').text

                # Map class name to label index
                if class_name in label2idx:
                    mapped_label = label2idx[class_name]

                    # Get bounding box coordinates
                    bndbox = obj.find('bndbox')
                    x_min = int(bndbox.find('xmin').text)
                    y_min = int(bndbox.find('ymin').text)
                    x_max = int(bndbox.find('xmax').text)
                    y_max = int(bndbox.find('ymax').text)

                    # Add detection
                    detections.append({
                        'label': mapped_label,
                        'bbox': [x_min, y_min, x_max, y_max]
                    })

            # Skip if no detections are found
            if not detections:
                continue

            # Append image info to the list
            im_info = {
                'img_id': img_id,
                'filename': img_path,
                'width': width,
                'height': height,
                'detections': detections
            }
            im_infos.append(im_info)

    return im_infos

def collate_function(data):
    return tuple(zip(*data))

In [None]:
image_dir = '/content/drive/MyDrive/Colab Notebooks/LLVIP DATASET/LLVIP_IMG/test'

annotation_dir = '/content/drive/MyDrive/Colab Notebooks/LLVIP DATASET/LLVIP_LABEL/Annotations'

In [None]:
label2idx = {'person': 1, 'car': 2}  # Adjust based on your model's labels
classes = ['background', 'person', 'car']  # Add all classes, including background

image_info = LLVIP_get_image_info(image_dir, annotation_dir, label2idx, classes)

print(len(image_info))
print(image_info[0] if image_info else "No valid images found")

# Example output
for info in image_info[:2]:  # Print the first 2 entries
    print(info)

In [None]:
dataset = LLVIPDataset(im_dir=image_dir, ann_dir=annotation_dir)

Loaded 3463 images.
First entry: {'img_id': '190006', 'filename': '/content/drive/MyDrive/Colab Notebooks/LLVIP DATASET/LLVIP_IMG/test/190006.jpg', 'width': 1280, 'height': 1024, 'detections': [{'label': 1, 'bbox': [114, 382, 212, 679]}, {'label': 1, 'bbox': [31, 407, 122, 665]}, {'label': 1, 'bbox': [20, 237, 87, 459]}, {'label': 1, 'bbox': [75, 212, 174, 459]}, {'label': 1, 'bbox': [1219, 332, 1279, 559]}, {'label': 1, 'bbox': [1, 284, 26, 528]}]}


In [None]:
split_ratios = [0.7, 0.15, 0.15]
train_dataset, val_dataset, test_dataset = split_dataset(dataset, split_ratios)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, collate_fn=collate_function)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4, collate_fn=collate_function)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4, collate_fn=collate_function)



In [None]:
''' Hyperparameters
    Edit the values as necessary here '''

rpn_pre_nms_top_n = 2000            # Number of proposals before NMS (filtering proposals)
rpn_post_nms_top_n = 1000           # Number of proposals before NMS (filtering proposals)
rpn_nms_thresh = 0.7                # IoU threshold for NMS
rpn_fg_iou_thresh = 0.7             # Minimum IoU for a positive anchor
rpn_bg_iou_thresh = 0.3             # Maximum IoU for a negative anchor
rpn_batch_size_per_image = 256      # Number of anchors sampled per image during training

box_detections_per_img = 9          # Maximum number of detections per image
box_score_thresh = 0.05             # Min. score for a detection to be considered valid
box_nms_thresh = 0.5                # IoU threshold for NMS on detection results
box_fg_iou_thresh = 0.5             # Minimum IoU for a positive RoI during training
box_bg_iou_thresh = 0.5             # Maximum IoU for a negative RoI during training

batch_size = 4
lr = 0.003
num_epochs = 100
momentum = 0.9
weight_decay = 0.005
acc_steps = 1                       # Can leave as 1

loss_weights = [1, 1, 1, 1]

In [None]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True,
                                                             min_size=512,
                                                             max_size=640,
                                                             rpn_pre_nms_top_n=rpn_pre_nms_top_n,
                                                             rpn_post_nms_top_n=rpn_post_nms_top_n,
                                                             rpn_nms_thresh=rpn_nms_thresh,
                                                             rpn_fg_iou_thresh=rpn_fg_iou_thresh,
                                                             rpn_bg_iou_thresh=rpn_bg_iou_thresh,
                                                             rpn_batch_size_per_image=rpn_batch_size_per_image,
                                                             box_detections_per_img=box_detections_per_img,
                                                             box_nms_thresh=box_nms_thresh,
                                                             box_score_thresh=box_score_thresh,
                                                             box_fg_iou_thresh=box_fg_iou_thresh,
                                                             box_bg_iou_thresh=box_bg_iou_thresh,
)

model.roi_heads.box_predictor = FastRCNNPredictor(
            model.roi_heads.box_predictor.cls_score.in_features,
            num_classes=3)

optimizer = torch.optim.SGD(lr=lr, momentum=momentum, weight_decay=weight_decay,
                            params=filter(lambda p: p.requires_grad, model.parameters()))

In [None]:
train(model, dataset, train_loader, val_loader, optimizer, loss_weights, num_epochs, acc_steps, device)

In [None]:
model.load_state_dict(torch.load('/content/model_frcnn_lr0.003_epoch99', weights_only=True))

<All keys matched successfully>

In [None]:
evaluate_map(model, dataset, test_loader, device)

In [None]:
infer(model, dataset, test_loader, 'yes', device, 0.9)

100%|██████████| 9/9 [00:01<00:00,  5.20it/s]


In [None]:
# Hyperparameters to improve recall
rpn_pre_nms_top_n = 3000            # Number of proposals before NMS (filtering proposals)
rpn_post_nms_top_n = 1500           # Number of proposals before NMS (filtering proposals)
rpn_nms_thresh = 0.6               # IoU threshold for NMS
rpn_fg_iou_thresh = 0.6             # Minimum IoU for a positive anchor
rpn_bg_iou_thresh = 0.4             # Maximum IoU for a negative anchor
rpn_batch_size_per_image = 512      # Number of anchors sampled per image during training

box_detections_per_img = 9          # Maximum number of detections per image
box_score_thresh = 0.01             # Min. score for a detection to be considered valid
box_nms_thresh = 0.4                # IoU threshold for NMS on detection results
box_fg_iou_thresh = 0.4             # Minimum IoU for a positive RoI during training
box_bg_iou_thresh = 0.5             # Maximum IoU for a negative RoI during training

# **ADA FLIR DATASET**

In [None]:
import torch
from torch.utils.data import Dataset
from PIL import Image
import torchvision

class FLIRDataset(Dataset):
    def __init__(self, im_dir, ann_dir):
        self.im_dir = im_dir
        self.ann_dir = ann_dir

        # Define the classes
        self.classes = ['person', 'vehicle']
        self.classes = sorted(self.classes)
        self.classes = ['background'] + self.classes  # Background is index 0

        self.label2idx = {self.classes[idx]: idx for idx in range(len(self.classes))}
        self.idx2label = {idx: self.classes[idx] for idx in range(len(self.classes))}

        # Load image information
        #self.label2idx = {1:1, 3:3}
        #self.idx2label = {1:1, 3:3}
        #self.classes = [0, 1, 3]

        # Keep the label2idx and classes as is, create new variables for the image information
        self.images_info = FLIR_get_image_info(im_dir, ann_dir, {1:1, 3:2}, [0, 1, 2])

        print(f"Loaded {len(self.images_info)} images.")
        if self.images_info:
            print(f"First entry: {self.images_info[0]}")

    def __len__(self):
        return len(self.images_info)

    def __getitem__(self, index):
        im_info = self.images_info[index]
        im = Image.open(im_info['filename']).convert('RGB')

        # Convert image to tensor
        im_tensor = torchvision.transforms.ToTensor()(im)

        # Prepare target (bounding boxes and labels)
        targets = {}
        targets['bboxes'] = torch.as_tensor([detection['bbox'] for detection in im_info['detections']])
        targets['labels'] = torch.as_tensor([detection['label'] for detection in im_info['detections']])

        return im_tensor, targets, im_info['filename']



def collate_function(data):
    return tuple(zip(*data))

In [None]:
def FLIR_get_image_info(image_dir, annotation_file, label2idx, classes):
    import os
    import cv2
    import json

    # Read the JSON annotation file
    with open(annotation_file, 'r') as f:
        data = json.load(f)

    im_infos = []

    # Create a mapping of image IDs to file paths
    image_id_to_filename = {
        img['id']: os.path.join(image_dir, os.path.basename(img['file_name']))
        for img in data['images']
    }

    # Parse annotations and group them by image ID
    annotations_by_image = {}
    for ann in data['annotations']:
        img_id = ann['image_id']
        if img_id not in annotations_by_image:
            annotations_by_image[img_id] = []
        annotations_by_image[img_id].append(ann)

    # Process each image
    for img_id, detections in annotations_by_image.items():
        if img_id not in image_id_to_filename:
            continue

        filename = image_id_to_filename[img_id]
        if not os.path.exists(filename):
            print(f"Image file {filename} does not exist.")
            continue

        # Read the image dimensions
        image = cv2.imread(filename)
        if image is None:
            print(f"Failed to read image {filename}.")
            continue
        height, width, _ = image.shape

        valid_detections = []
        for det in detections:
            category_id = det['category_id']

            # Handle only known categories: {1: person, 3: car}
            if category_id == 1:
                label = 1  # Person
            elif category_id == 3:
                label = 2  # Remap Car (category_id=3) to label=2
            else:
                #print(f"Skipping unknown category_id={category_id}")
                continue

            if label not in classes:
                print(f"Skipping detection with category_id={category_id} because label {label} is not in classes.")
                continue

            # Bounding box is [x_min, y_min, width, height]
            bbox = det['bbox']
            x_min, y_min, w, h = bbox
            x_max, y_max = x_min + w, y_min + h
            valid_detections.append({
                'label': label,
                'bbox': [x_min, y_min, x_max, y_max]
            })

        if not valid_detections:
            print(f"No valid detections for image ID {img_id}.")
            continue

        im_infos.append({
            'img_id': img_id,
            'filename': filename,
            'width': width,
            'height': height,
            'detections': valid_detections
        })

    print(f"Total processed images: {len(im_infos)}")
    return im_infos

In [None]:
image_dir = '/content/drive/MyDrive/Colab Notebooks/ADA FLIR DATASET/thermal_8_bit'

annotation_dir = '/content/drive/MyDrive/Colab Notebooks/ADA FLIR DATASET/thermal_annotations.json'

In [None]:
dataset = FLIRDataset(im_dir=image_dir, ann_dir=annotation_dir)

Total processed images: 1360
Loaded 1360 images.
First entry: {'img_id': 0, 'filename': '/content/drive/MyDrive/Colab Notebooks/ADA FLIR DATASET/thermal_8_bit/FLIR_08863.jpeg', 'width': 640, 'height': 512, 'detections': [{'label': 1, 'bbox': [580, 206, 614, 281]}, {'label': 1, 'bbox': [609, 207, 639, 323]}, {'label': 2, 'bbox': [469, 226, 499, 252]}, {'label': 2, 'bbox': [462, 230, 477, 244]}, {'label': 2, 'bbox': [293, 227, 332, 253]}]}


In [None]:
rpn_pre_nms_top_n = 2000            # Number of proposals before NMS (filtering proposals)
rpn_post_nms_top_n = 1000           # Number of proposals after NMS (filtering proposals)
rpn_nms_thresh = 0.7                # IoU threshold for NMS
rpn_fg_iou_thresh = 0.7             # Minimum IoU for a positive anchor
rpn_bg_iou_thresh = 0.3             # Maximum IoU for a negative anchor
rpn_batch_size_per_image = 256      # Number of anchors sampled per image during training

box_detections_per_img = 9          # Maximum number of detections per image
box_score_thresh = 0.05             # Min. score for a detection to be considered valid
box_nms_thresh = 0.5                # IoU threshold for NMS on detection results
box_fg_iou_thresh = 0.5             # Minimum IoU for a positive RoI during training
box_bg_iou_thresh = 0.5             # Maximum IoU for a negative RoI during training

batch_size = 4
lr = 0.003
num_epochs = 100
momentum = 0.9
weight_decay = 0.005
acc_steps = 1                       # Can leave as 1

loss_weights = [1, 1, 1, 1]

In [None]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True,
                                                             min_size=512,
                                                             max_size=640,
                                                             rpn_pre_nms_top_n=rpn_pre_nms_top_n,
                                                             rpn_post_nms_top_n=rpn_post_nms_top_n,
                                                             rpn_nms_thresh=rpn_nms_thresh,
                                                             rpn_fg_iou_thresh=rpn_fg_iou_thresh,
                                                             rpn_bg_iou_thresh=rpn_bg_iou_thresh,
                                                             rpn_batch_size_per_image=rpn_batch_size_per_image,
                                                             box_detections_per_img=box_detections_per_img,
                                                             box_nms_thresh=box_nms_thresh,
                                                             box_score_thresh=box_score_thresh,
                                                             box_fg_iou_thresh=box_fg_iou_thresh,
                                                             box_bg_iou_thresh=box_bg_iou_thresh,
)

model.roi_heads.box_predictor = FastRCNNPredictor(
            model.roi_heads.box_predictor.cls_score.in_features,
            num_classes=3)

optimizer = torch.optim.SGD(lr=lr, momentum=momentum, weight_decay=weight_decay,
                            params=filter(lambda p: p.requires_grad, model.parameters()))



In [None]:
split_ratios = [0.7, 0.15, 0.15]
train_dataset, val_dataset, test_dataset = split_dataset(dataset, split_ratios)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, collate_fn=collate_function)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2, collate_fn=collate_function)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2, collate_fn=collate_function)

In [None]:
model.load_state_dict(torch.load('/content/model_frcnn_lr0.003_epoch149', map_location='cpu'))

  model.load_state_dict(torch.load('/content/model_frcnn_lr0.003_epoch149', map_location='cpu'))


<All keys matched successfully>

In [None]:
evaluate_map(model, dataset, test_loader, device)

In [None]:
pascal_map(model, dataset, test_loader, device)

In [None]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="COCO_V1")

In [None]:
infer(model, dataset, test_loader, 'sample', device, 0.5)