### <center>**Reading and Cleaning Annotation Data for Custom PyTorch Object Detection**</center> 

In [None]:
# Import necessary packages
%matplotlib inline
import json
import os
import shutil
import pandas as pd
import matplotlib.pyplot as plt

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

plt.ion(); # interactive mode

##### Helper functions for processing JSONs

In [None]:
# Function for reading JSON as dictionary
def read_json(filename: str) -> dict:
  
    try:
        with open(filename, "r") as f:
            data = json.loads(f.read())
    except:
        raise Exception(f"Reading {filename} file encountered an error")
  
    return data

# Function to append records to df
def create_dataframe(data: list) -> pd.DataFrame:

    # Create an empty dataframe to append records
    df = pd.DataFrame()
  
    # Looping through each record
    for d in data:
          
        # Normalize the column levels
        record = pd.json_normalize(d)

        df = pd.concat([df, record], axis=0)
          
    return df

# Main function to iterate over files in directory and add to df
def main():
    # Assign directory and empty df for appending annotations
    directory = "C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/Annotations/" # annotation directory
    annos_df = pd.DataFrame()
    
    # Iterate over files in directory
    for filename in os.listdir(directory):
        f = os.path.join(directory, filename)
        # checking if it is a file
        if os.path.isfile(f):
            print(f)
            
        # Read the JSON file as python dictionary 
        data = read_json(filename = f)
    
        # Create the dataframe for the array items in annotations key 
        df = create_dataframe(data = data['annotations'])
        df.insert(loc = 0, column = 'img_name', value = f'{f[-30:-5]}.JPG')
    
        df.rename(columns = {
            "img_name": "img_name",
            "name": "label",
            "bounding_box.h": "bbox_height",
            "bounding_box.w": "bbox_width",
            "bounding_box.x": "bbox_x_topLeft",
            "bounding_box.y": "bbox_y_topLeft",
            "polygon.paths": "polygon_path"
        }, inplace = True)
        
        # Append the df dataframe to the annos_df dataframe
        annos_df = pd.concat([annos_df, df], ignore_index=True)

    # Convert x, y, h, w to xmin, ymin, xmax, ymax
    annos_df.insert(loc = 2, column = 'xmin', 
                    value = annos_df['bbox_x_topLeft'])
    annos_df.insert(loc = 3, column = 'ymin', 
                    value = annos_df['bbox_y_topLeft'])
    annos_df.insert(loc = 4, column = 'xmax', 
                    value = annos_df['bbox_x_topLeft'] + annos_df['bbox_width'])
    annos_df.insert(loc = 5, column = 'ymax', 
                    value = annos_df['bbox_y_topLeft'] + annos_df['bbox_height']) 
  
    # Drop unneccessary columns 
    annos_df = annos_df.drop(columns = ['bbox_height', 'bbox_width', 'bbox_x_topLeft', 
                                        'bbox_y_topLeft', 'id', 'slot_names', 'polygon_path'])
        
    return annos_df

##### Load annotation data into dataframe

In [None]:
# Execute main loading function
if __name__ == '__main__':
    df = main()

##### Filter annotation dataframe

In [None]:
# If label value count is less than 200, drop the row
df = df.groupby('label').filter(lambda x : len(x) > 200)

# If label value is 'Hen', drop the row
df = df[df['label'] != 'Hen']

##### Filter images since most annotation class were filtered out

In [None]:
# Store unique img_names in filtered df as array
img_names = df['img_name'].unique().tolist()

# Create a new directory called 'filtered_images'
new_dir = 'C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_images'
if not os.path.exists(new_dir):
    os.makedirs(new_dir)

# Copy images in img_names to new directory
for img in img_names:
    shutil.copy2(f'C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/Images/{img}', new_dir)

### <center>**Transform and Augment Image and Annotation Data for Custom PyTorch Object Detection**</center> 

In [None]:
# import necessary packages
import numpy as np
from PIL import Image
from io import BytesIO
from collections import defaultdict
import torchvision
torchvision.disable_beta_transforms_warning()
import torch
from torch.utils.data import Dataset
import torch.backends.cudnn as cudnn
import torch.nn as nn
cudnn.benchmark = True
from torchvision import transforms as _transforms, tv_tensors
import torchvision.transforms.v2 as T
from torchvision.models.detection import retinanet_resnet50_fpn_v2, RetinaNet_ResNet50_FPN_V2_Weights
import utils

##### Pre-process annotation data

In [None]:
# Convert labels to categorical data and get the numeric codes
df['target'] = pd.Categorical(df['label']).codes + 1

# Create a dictionary using df['label'] as the keys and df['target'] as the values
label_dict = dict(zip(df['target'], df['label']))

# Drop the original 'label' column from df
df = df.drop(['label'], axis=1)

# Rename 'target' column to 'label'
df.rename(columns={'target': 'label'}, inplace=True)

# Save df as csv in directory
df.to_csv('C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_annotations.csv', index=False)

##### PyTorch dataset for custom image and annotation data

In [None]:
# Custom dataset loader (PyTorch) for loading images and annotation data
class MAVdroneDataset(Dataset):
    """Dataset Loader for Waterfowl Drone Imagery"""

    def __init__(self, csv_file, root_dir, transforms):
        """
        Arguments:
            csv_file (string): Path to the CSV file with annotations.
            root_dir (string): Directory containing all images.
            transforms (string): train = True for training transforms
        """
        self.df = pd.read_csv(csv_file)
        self.root_dir = root_dir

        self.transforms = transforms
        self.unique_image_names = self.df['img_name'].unique()


    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        image_name = self.unique_image_names[idx]

        # isolate first row prevents multiple instances of the same image
        row = self.df[self.df['img_name'] == image_name].iloc[0]

        image_path = os.path.join(self.root_dir, row['img_name'])
        image = None

        # ignore corrupted image data during loading else error
        while True:
            with open(image_path, 'rb') as f:
                buff = BytesIO()
                buff.write(f.read())
                buff.seek(0)
                temp_image = np.array(Image.open(buff), dtype = np.uint8)
                # convert np.array to Tensor[image_channels, image_height, image_width]
                image = torch.from_numpy(temp_image).permute(2, 0, 1)

            if image is not None:
                break

        boxes = self.df[self.df['img_name'] == image_name][['xmin', 'ymin', 'xmax', 'ymax']].values 
        labels = self.df[self.df['img_name'] == image_name]['label'].values

        labels = torch.as_tensor(labels, dtype = torch.int64) # (n_objects)

        boxes = torch.as_tensor(boxes, dtype = torch.float32)

        # if xmin > xmax, flip them so width is always positive
        if torch.any(boxes[:, 0] > boxes[:, 2]):
            boxes[:, [0, 2]] = boxes[:, [2, 0]]
        
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        # suppose all instances are not crowd
        iscrowd = torch.zeros((len(labels),), dtype=torch.int64)
            
        target = {}
        target['boxes'] = tv_tensors.BoundingBoxes(boxes, format = tv_tensors.BoundingBoxFormat.XYXY, canvas_size = (image.shape[1], image.shape[2]))
        target['labels'] = labels
        target['image_id'] = image_id
        target['area'] = area
        target['iscrowd'] = iscrowd

        image = tv_tensors.Image(image)

        if self.transforms is not None:
            image, target = self.transforms(image, target)

        return image, target


    def __len__(self):
        return len(self.unique_image_names)

##### Data transformation function

In [None]:
# ImageNet mean and std since using pretrained ResNet backbone
mean = [0.485, 0.456, 0.406] # 3 bands
std = [0.229, 0.224, 0.225]

# Same transforms as original SSD paper
def get_transform(train):
    transforms = []
    if train:
        transforms.append(T.RandomZoomOut(fill = defaultdict(lambda: 0, {tv_tensors.Image: (255, 20, 147)}),
                                          p = 0.3,
                                          side_range = (1.0, 2.0)))
        transforms.append(T.RandomIoUCrop())
        transforms.append(T.Resize((512, 512), antialias = True)) # no maintain aspect ratio
        transforms.append(T.RandomHorizontalFlip(0.5))
    else:
        transforms.append(T.Resize((512, 512), antialias = True)) # no maintain aspect ratio
    transforms.append(T.ToImage())
    transforms.append(T.ToDtype(torch.float32, scale=True))
    transforms.append(T.SanitizeBoundingBoxes())
    transforms.append(T.Normalize(mean, std)) # ImageNet mean and std values for normalization
    return T.Compose(transforms)

##### Helper functions for plotting image and annotations

In [None]:
# classes are values in label_dict
classes = list(label_dict.values())

# reverse label dictionary for mapping predictions to classes
rev_label_dict = {v: k for k, v in label_dict.items()}

# distinct colors 
bbox_colors = ['#f032e6', '#ffffff', '#ffe119', '#3cb44b', '#42d4f4',
                    '#f58231', '#e6194B', '#dcbeff', '#469990', '#4363d8']

# label color map for plotting color-coded boxes by class
label_color_map = {k: bbox_colors[i] for i, k in enumerate(label_dict.keys())}

# function for reshaping boxes 
def get_box(boxes):
    boxes = np.array(boxes)
    boxes = boxes.astype('float').reshape(-1, 4)
    if boxes.shape[0] == 1 : return boxes
    return np.squeeze(boxes)


# function for plotting image
def img_show(image, ax = None, figsize = (6, 9)):
    if ax is None:
        fig, ax = plt.subplots(figsize = figsize)
    ax.xaxis.tick_top()
    ax.imshow(image)
    return ax
 

def plot_bbox(ax, boxes, labels):
    # add box to the image and use label_color_map to color-code by bounding box class if exists else 'black'
    ax.add_patch(plt.Rectangle((boxes[:, 0], boxes[:, 1]), boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1],
                    fill = False,
                    color = label_color_map[labels.item()] if labels.item() in label_color_map else 'black', 
                    linewidth = 1.5))
    # add label text to bounding box using label_dict if label exists else labels
    ax.text(boxes[:, 2], boxes[:, 3], 
            (label_dict[labels.item()] if labels.item() in label_dict else labels.item()),
            fontsize = 8,
            bbox = dict(facecolor = 'white', alpha = 0.8, pad = 0, edgecolor = 'none'),
            color = 'black')


# function for plotting all boxes and labels on the image using get_polygon, img_show, and plot_mask functions
def plot_detections(image, boxes, labels, ax = None):
    ax = img_show(image.permute(1, 2, 0), ax = ax)
    for i in range(len(boxes)):
        box = get_box(boxes[i])
        plot_bbox(ax, box, labels[i])

##### Plot sample batch to confirm data loads and transforms correctly

In [None]:
# Load sample batch of data to custom PyTorch Dataset and Transform
sample_dataset = MAVdroneDataset(csv_file = 'C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_annotations.csv', 
                                root_dir = 'C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_images', 
                                transforms = get_transform(train = True))

# store image indices in random order list
indices = torch.randperm(len(sample_dataset)).tolist()

sample_data_loader = torch.utils.data.DataLoader(sample_dataset, batch_size = 8, shuffle = True, 
                                             collate_fn = utils.collate_fn, num_workers = 0)

# store images and annotation targets from sample batch
images, targets = next(iter(sample_data_loader))
images = list(image for image in images)
targets = [{k: v for k, v in t.items()} for t in targets]

# Plot the all samples from batch in a grid of subplots. 
plt.figure(figsize = (8, 32))
for i in range(8):
    ax = plt.subplot(8, 2, 1 + i)
    plot_detections(images[i], targets[i]['boxes'], targets[i]['labels'], ax = ax)
    plt.axis('off')
    plt.title(f"Sample {i + 1}")

plt.show()

##### Function for loading RetinaNet with custom num_classes

In [None]:
def get_retinanet_model(num_classes):
    # Load the pre-trained model
    model = retinanet_resnet50_fpn_v2(weights=RetinaNet_ResNet50_FPN_V2_Weights.DEFAULT,
                                    weights_backbone=torchvision.models.ResNet50_Weights.DEFAULT)

    # Replace the classification head's cls_logits layer with a new one
    in_channels = model.head.classification_head.cls_logits.in_channels
    model.head.classification_head.cls_logits = nn.Conv2d(in_channels, num_classes * model.head.classification_head.num_anchors, kernel_size=3, stride=1, padding=1)

    # Update the number of classes in the model
    model.head.classification_head.num_classes = num_classes
    return model

### <center>**Tune Model Hyperparameters using Ray Tune**</center> 

In [None]:
import tempfile
from datetime import datetime
import gc
from engine import train_one_epoch, evaluate 
import ray
from ray import train, tune
from ray.tune.schedulers.hb_bohb import HyperBandForBOHB
from ray.tune.search.bohb import TuneBOHB

##### Helper functions for hyperparameter tuning

In [None]:
def train_MAVdroneDataset(config, indices):
    # get dataset train and dataloader val from ray object store
    dataset_train = ray.get(config["dataset_train_ref"])
    data_loader_val = ray.get(config["data_loader_val_ref"])
    
    # construct custom retinanet model
    model = get_retinanet_model(num_classes=len(classes) + 1) # add 1 for background class
    device = "cpu" 
    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model) # train on multiple GPUs if available
    model.to(device)

    # construct an optimizer 
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr = config["lr"],
                                momentum = config["momentum"], 
                                weight_decay = config["weight_decay"])
    
    # and a learning rate scheduler
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size = config["step_size"], # period of lr decay
                                                   gamma = config["gamma"]) # multiplicative factor of lr decay

    # Load existing checkpoint if exist.
    if train.get_checkpoint():
        loaded_checkpoint = train.get_checkpoint()
        with loaded_checkpoint.as_directory() as loaded_checkpoint_dir:
            model_state, optimizer_state = torch.load(
                os.path.join(loaded_checkpoint_dir, "checkpoint.pt")
            )
            model.load_state_dict(model_state)
            optimizer.load_state_dict(optimizer_state)
    else:
        start_epoch = 0

    training_steps = [
            {"step": 0, "batch_size": config["batch_size"], "epochs": 10, "print_freq": 10},
            {"step": 1, "batch_size": config["batch_size"]*4, "epochs": 10, "print_freq": 5}, 
            {"step": 2, "batch_size": config["batch_size"]*4**2, "epochs": 10, "print_freq": 2},  
            {"step": 3, "batch_size": config["batch_size"]*4**3, "epochs": 10, "print_freq": 1}
        ]

    # loop through training_steps during training to increase batch size and decrease learning rate
    for step in training_steps:
        batch_size = int(step['batch_size'])
        num_epochs = int(step['epochs'])
        print_freq = int(step['print_freq'])

        # define training and validation data loaders
        data_loader = torch.utils.data.DataLoader(dataset_train, batch_size = batch_size, shuffle = True, 
                                                collate_fn = utils.collate_fn, num_workers = 0,
                                                pin_memory = True)
    
        print(f'Beginning training step {step["step"]}... batch size: {batch_size}')

        for epoch in range(start_epoch, num_epochs + start_epoch):
            train_metric_logger, val_metric_logger = train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq, data_loader_val)

            # update the learning rate
            lr_scheduler.step()
            
            # evaluate on the val dataset
            train_coco_evaluator, val_coco_evaluator = evaluate(model, data_loader_val, device, data_loader)

            # Here we save a checkpoint. It is automatically registered with Ray Tune
            with tempfile.TemporaryDirectory() as temp_checkpoint_dir:
                path = os.path.join(temp_checkpoint_dir, "checkpoint.pt")
                torch.save(
                    (model.state_dict(), optimizer.state_dict()), path
                )
                checkpoint = train.Checkpoint.from_directory(temp_checkpoint_dir)
                train.report(
                    {"train_loss": train_metric_logger.loss.avg, # metric_logger object
                    "val_loss": val_metric_logger.loss.avg,
                    "train_mAP_50": train_coco_evaluator.coco_eval['bbox'].stats[1],
                    "val_mAP_50": val_coco_evaluator.coco_eval['bbox'].stats[1],
                    "train_mAR_100": train_coco_evaluator.coco_eval['bbox'].stats[8],
                    "val_mAR_100": val_coco_evaluator.coco_eval['bbox'].stats[8],
                    "training_step": step["step"],
                    "epoch": epoch}, 
                    checkpoint = checkpoint
                )
        
        # set start_epoch to current epoch for next training step
        start_epoch = num_epochs if step['step'] == 0 else num_epochs + start_epoch       
    
    print('Tuning Trial Complete!')

# test set accuracy of best model
def test_best_model(best_result, indices):
    best_model =  get_retinanet_model(num_classes=len(classes) + 1) # add 1 for background class
                                      
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    best_model.to(device)

    checkpoint_path = os.path.join(best_result.checkpoint.to_directory(), "checkpoint.pt")

    model_state, _ = torch.load(checkpoint_path)
    best_model.load_state_dict(model_state)

    dataset_test = ray.get(best_result.config["dataset_test_ref"]) # loads dataset without augmentations from ray object store

    dataset_test = torch.utils.data.Subset(dataset_test, indices[-int(len(dataset_test)*0.05):]) # last 5% of dataset

    data_loader_test = torch.utils.data.DataLoader(dataset_test, batch_size = 1, shuffle = False,
                                                collate_fn = utils.collate_fn, num_workers = 0,
                                                pin_memory = True)
    
    test_results = evaluate(best_model, data_loader_test, device, train_data_loader=None)

    print(f'Best trial test set mAP_50: {test_results.coco_eval["bbox"].stats[1]} and mAR_100: {test_results.coco_eval["bbox"].stats[8]}')

def trial_dirname_creator(trial):
    return f"train_MAVdroneDataset_{trial.trial_id}"

##### Main Tuning Program

In [18]:
gc.collect()
torch.cuda.empty_cache()

def main(num_samples, max_num_epochs, indices):

    dataset = MAVdroneDataset(csv_file = 'C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_annotations.csv',
                            root_dir = 'C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_images/', 
                            transforms = get_transform(train = True))

    dataset_val = MAVdroneDataset(csv_file = 'C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_annotations.csv',
                            root_dir = 'C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_images/', 
                            transforms = get_transform(train = False))
    
    dataset_test_ref = ray.put(dataset_val) # same (no augmentations) as val ds before subsetting
    
    # subset using a 80/15/5 split for train, validation, and test datasets
    dataset_train = torch.utils.data.Subset(dataset, indices[:-int(len(indices)*0.2)]) # first 80% of dataset

    dataset_val = torch.utils.data.Subset(dataset_val, indices[-int(len(indices)*0.2):-int(len(indices)*0.05)]) # next 15% of dataset

    data_loader_val = torch.utils.data.DataLoader(dataset_val, batch_size = 1, shuffle = False,
                                                collate_fn = utils.collate_fn, num_workers = 0,
                                                pin_memory = True)
    
    dataset_train_ref = ray.put(dataset_train)
    data_loader_val_ref = ray.put(data_loader_val)
    
    config = {
        "lr": tune.uniform(0.0001, 0.09),
        "momentum": tune.uniform(0.3, 0.95),
        "weight_decay": tune.uniform(0.00005, 0.05),
        "step_size": tune.uniform(1, 10),
        "gamma": tune.uniform(0.05, 0.9),
        "batch_size": tune.choice([2, 4, 8, 16]),
        "dataset_train_ref": dataset_train_ref,
        "data_loader_val_ref": data_loader_val_ref,
        "dataset_test_ref": dataset_test_ref
    }

    algo = TuneBOHB(
        metric="val_mAP_50",
        mode="max",
        points_to_evaluate = [
            {"lr": 0.005, 
             "momentum": 0.9, 
             "weight_decay": 0.0005, 
             "step_size": 3, 
             "gamma": 0.1,
             "batch_size": 4} 
        ] # PyTorch default values as starting point for search
    )
    algo = tune.search.ConcurrencyLimiter(algo, max_concurrent=2)
    scheduler = HyperBandForBOHB(
        time_attr="training_iteration",
        max_t=int(max_num_epochs),
        reduction_factor=4,
        stop_last_trials=False,
    )

    tuner = tune.Tuner(
        tune.with_resources(
            tune.with_parameters(train_MAVdroneDataset, indices = indices),
            resources={"cpu": 24.0, "gpu": 0.5}
        ),
        run_config=train.RunConfig(
            name="train_MAVdroneDataset_RetinaNet",
            storage_path='C:/Users/exx/Documents/GitHub/SSD_VGG_PyTorch/ray_results',
            stop={"training_iteration": max_num_epochs},
        ),
        tune_config=tune.TuneConfig(
            metric="val_mAP_50",
            mode="max",
            search_alg = algo,
            scheduler=scheduler,
            num_samples=int(num_samples),
            time_budget_s=600000,
            trial_dirname_creator=trial_dirname_creator
        ),
        param_space=config
    )

    results = tuner.fit()

    best_trial = results.get_best_result("val_mAP_50", "max")

    print("Best trial config: {}".format(best_trial.config))
    print("Best trial final training loss: {}".format(best_trial.metrics["train_loss"]))
    print("Best trial final validation loss: {}".format(best_trial.metrics["val_loss"]))
    print("Best trial final training mAP_50: {}".format(best_trial.metrics["train_mAP_50"]))
    print("Best trial final validation mAP_50: {}".format(best_trial.metrics["val_mAP_50"]))
    print("Best trial final training mAR_100: {}".format(best_trial.metrics["train_mAR_100"]))
    print("Best trial final validation mAR_100: {}".format(best_trial.metrics["val_mAR_100"]))

    test_performance = test_best_model(best_trial, indices)

    return best_trial, test_performance

if __name__ == "__main__":
    best_trial = main(num_samples = 30, max_num_epochs = 45, indices = indices)

[36m(train_MAVdroneDataset pid=60080)[0m Epoch: [0] Training  [ 70/375]  eta: 0:06:16  lr: 0.000953  loss: 6.1857 (128.9551)  classification: 5.9321 (128.6743)  bbox_regression: 0.2537 (0.2808)  time: 1.2529  data: 1.0898  max mem: 3949


2024-11-04 20:24:05,690	INFO tune.py:1041 -- Total run time: 101.50 seconds (91.40 seconds for the tuning loop).
Resume experiment with: Tuner.restore(path="C:/Users/exx/Documents/GitHub/SSD_VGG_PyTorch/ray_results/train_MAVdroneDataset_RetinaNet", trainable=...)


RuntimeError: No best trial found for the given metric: val_mAP_50. This means that no trial has reported this metric, or all values reported for this metric are NaN. To not ignore NaN values, you can set the `filter_nan_and_inf` arg to False.

### <center>**Train Model Using Tuned Hyperparameters**</center> 

In [None]:
from torch.utils.tensorboard import SummaryWriter
torch.cuda.empty_cache()

# Hyperparameters are best trial results from Bayesian Optimization using Ray Tune
learning_rate = best_trial.config["lr"]
momentum = best_trial.config["momentum"]
weight_decay = best_trial.config["weight_decay"]
step_size = best_trial.config["step_size"]
gamma = best_trial.config["gamma"]
batch_size = best_trial.config["batch_size"]

training_steps = [
            {"step": 0, "batch_size": batch_size, "epochs": 10, "print_freq": 10},
            {"step": 1, "batch_size": batch_size*4, "epochs": 10, "print_freq": 5}, 
            {"step": 2, "batch_size": batch_size*4**2, "epochs": 10, "print_freq": 2},  
            {"step": 3, "batch_size": batch_size*4**3, "epochs": 10, "print_freq": 1}
        ]

# Main function that performs training and validation.
def main():
    # Initialize model--SSD300 w/ VGG16 backbone pre-trained
    model = get_retinanet_model(num_classes=len(classes) + 1) # add 1 for background class

    device = "cpu" 
    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
    model.to(device)

    start_epoch = 0

    # initialize tensorboard writer
    writer = SummaryWriter()

    # Store one checkpoint dictionary for each epoch in a list of dictionaries. 
    checkpoints = []

    # loop through training_steps during training to increase batch size and decrease learning rate
    for step in training_steps:
        batch_size = step['batch_size']
        num_epochs = step['epochs']
        print_freq = step['print_freq']
        batch_size = step['batch_size']
    
        # use MAVdroneDataset and defined transformations
        dataset = MAVdroneDataset(csv_file = 'C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_annotations.csv',
                                root_dir = 'C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_images/', 
                                transforms = get_transform(train = True))
        
        dataset_val = MAVdroneDataset(csv_file = 'C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_annotations.csv',
                                root_dir = 'C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_images/',
                                transforms = get_transform(train = False))

        # subset using a 80/15/5 split for train, validation, and test datasets
        dataset = torch.utils.data.Subset(dataset, indices[:-int(len(dataset)*0.2)])

        dataset_val = torch.utils.data.Subset(dataset_val, indices[-int(len(dataset_val)*0.2):-int(len(dataset_val)*0.05)])

        # define training and validation data loaders
        data_loader = torch.utils.data.DataLoader(dataset, batch_size = batch_size, shuffle = True, 
                                                collate_fn = utils.collate_fn, num_workers = 0,
                                                pin_memory = True)
        
        data_loader_val = torch.utils.data.DataLoader(dataset_val, batch_size = 1, shuffle = False,
                                                    collate_fn = utils.collate_fn, num_workers = 0,
                                                    pin_memory = True)
        
        # construct an optimizer - SGD w/ momentum and weight decay
        params = [p for p in model.parameters() if p.requires_grad]
        optimizer = torch.optim.SGD(params, lr = learning_rate,
                                    momentum = momentum, weight_decay = weight_decay)
        
        # and a learning rate scheduler
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                    step_size = step_size,
                                                    gamma = gamma)
        
        # Log the hyperparameters to tensorboard
        writer.add_hparams(
            {"lr": learning_rate, "momentum": momentum, "weight_decay": weight_decay, "step_size": step_size, "gamma": gamma, "batch_size": (batch_size)},
            {}
        )
        
        print(f'Beginning training step {step["step"]}... batch size: {batch_size}')

        #########################################################
        ##               The main training loop                ##
        #########################################################
        for epoch in range(start_epoch, num_epochs + start_epoch):
            print()
            print(f'Epoch {epoch} beginning training...')
            print()

            train_metric_logger, val_metric_logger = train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq, data_loader_val)
        
            print()
            print(f'Epoch {epoch} finished training!')
            print()

            # update the learning rate
            lr_scheduler.step()

            print()
            print(f'Epoch {epoch} preparing to calculate train and val set accuracy...')
            print()
            
            # evaluate on the validation dataset
            train_coco_evaluator, val_coco_evaluator = evaluate(model, data_loader_val, device, data_loader)

            # store training and validation metrics in checkpoint dictionary. 
            checkpoint = {
                "epoch": epoch,
                "train_loss": train_metric_logger.loss.avg, # average across entire trianing epoch
                "train_bbox_loss": train_metric_logger.bbox_regression.avg,
                "train_class_loss": train_metric_logger.classification.avg,
                "val_loss": val_metric_logger.loss.avg,
                "val_bbox_loss": val_metric_logger.bbox_regression.avg,
                "val_class_loss": val_metric_logger.classification.avg,
                "train_mAP_50": train_coco_evaluator.coco_eval['bbox'].stats[1],
                "train_mAR_100": train_coco_evaluator.coco_eval['bbox'].stats[8],
                "val_mAP_50": val_coco_evaluator.coco_eval['bbox'].stats[1],
                "val_mAR_100": val_coco_evaluator.coco_eval['bbox'].stats[8],
                "model_state_dict": model.state_dict(),
                "optimizer_state_dict": optimizer.state_dict()
            }

            # append checkpoint to checkpoints list
            checkpoints.append(checkpoint)

            # report training and validation scalars to tensorboard
            writer.add_scalar('Train Loss', np.array(float(checkpoint["train_loss"])), epoch)
            writer.add_scalar('Val Loss', np.array(float(checkpoint["val_loss"])), epoch)
            writer.add_scalar('Train mAP@50', np.array(float(checkpoint["train_mAP_50"])), epoch)
            writer.add_scalar('Train mAR@100', np.array(float(checkpoint["train_mAR_100"])), epoch)
            writer.add_scalar('Val mAP@50', np.array(float(checkpoint["val_mAP_50"])), epoch)
            writer.add_scalar('Val mAR@100', np.array(float(checkpoint["val_mAR_100"])), epoch)

            print()
            print(f'Epoch {epoch} complete! Moving onto epoch {epoch + 1}...')
            print()
        
        print()
        print(f'Training step {step["step"]} complete! Moving onto training step {step["step"] + 1}...')
        print()

        # set start_epoch to current epoch for next training step
        start_epoch = num_epochs if step['step'] == 0 else num_epochs + start_epoch

    print('All Training Steps Complete!')

    # close tensorboard writer
    writer.close()

    return checkpoints

if __name__ == "__main__":
    checkpoints = main()

In [None]:
# Best train epoch is dictionary in checkpoints with highest val_mAP_50 value
best_train_epoch = max(checkpoints, key = lambda x: x['val_mAP_50'])

model = get_retinanet_model(num_classes=len(classes) + 1) # add 1 for background class

# load model weights from best_train_epoch
model.load_state_dict(best_train_epoch["model_state_dict"])

# save model weights to .pth file
torch.save(model.state_dict(), 'RetinaNet_ResNet50_FPN_DuckNet_' + str(datetime.now().strftime("%m%d%Y")) + '.pth')

In [None]:
# copy checkpoints and remove model and optimizer state dicts
checkpoints_copy = checkpoints.copy()
for c in checkpoints_copy:
    del c["model_state_dict"]
    del c["optimizer_state_dict"]

# save checkpoints list to text file
with open('C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/RetinaNet/checkpoints.txt', 'w') as f:
    for item in checkpoints_copy:
        f.write("%s\n" % item)

### <center>**Model Inference on Test Dataset**</center> 

##### Load the test dataset

In [None]:
# test_indices is last 5% of indices list--not seen by model during training/validation
test_indices = indices[-int(len(indices)*0.05):]

dataset_test = MAVdroneDataset(csv_file = 'C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_annotations.csv',
                                root_dir = 'C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_images/', 
                                transforms = get_transform(train = False))

test_image_names = [dataset_test.unique_image_names[i] for i in test_indices]

# create dictionary of test indices and image names
test_dict = dict(zip(test_indices, test_image_names))

# subset test dataset using test_indices
dataset_test = torch.utils.data.Subset(dataset_test, test_indices)

data_loader_test = torch.utils.data.DataLoader(dataset_test, batch_size = 1, shuffle = False,
                                            collate_fn = utils.collate_fn, num_workers = 0,
                                            pin_memory = True)

In [None]:
test_performance = evaluate(model, data_loader_test, device=torch.device('cpu'))

##### Calculate performance metrics on every image in test dataset

In [None]:
from torchmetrics.detection.mean_ap import MeanAveragePrecision

In [None]:
results = []

metric = MeanAveragePrecision(iou_type="bbox",
                              class_metrics=True,
                              max_detection_thresholds=[1, 10, 100]
                              )

model.to('cpu')
model.eval()

for images, targets in data_loader_test:
    # use image_id to get image_name from image_names list
    image_id = [target['image_id'].item() for target in targets]

    # convert boxes in targets to tensors
    targets = [{k: torch.tensor(v) if k == 'boxes' else v for k, v in t.items()} for t in targets]

    # filter targets to only include boxes and labels keys
    ground_truth = [{k: v for k, v in t.items() if k in ('boxes', 'labels')} for t in targets]

    with torch.no_grad():
        prediction = model(images, targets)

    # calculate mAP and mAR from test dataset
    metric.update(prediction, ground_truth)
    mean_AP = metric.compute()

    # append image name to mean_AP
    mean_AP['image_name'] = test_dict[image_id[0]]

    # Append mean_AP and predictions to results list. 
    results.append(mean_AP)

##### Store per-image test dataset metrics as dataframe

In [None]:
# use pandas to create a dataframe of image names and mAP values
img_results_df = pd.DataFrame()
img_results_df['image_name'] = [result['image_name'] for result in results]
img_results_df['mAP'] = [result['map'].item() for result in results]
img_results_df['mAP_50'] = [result['map_50'].item() for result in results]
img_results_df['mAP_75'] = [result['map_75'].item() for result in results]
img_results_df['mAP_small'] = [result['map_small'].item() for result in results]
img_results_df['mAP_medium'] = [result['map_medium'].item() for result in results]
img_results_df['mAP_large'] = [result['map_large'].item() for result in results]
img_results_df['mAR_1'] = [result['mar_1'].item() for result in results]
img_results_df['mAR_10'] = [result['mar_10'].item() for result in results]
img_results_df['mAR_100'] = [result['mar_100'].item() for result in results]
img_results_df['mAR_small'] = [result['mar_small'].item() for result in results]
img_results_df['mAR_medium'] = [result['mar_medium'].item() for result in results]
img_results_df['mAR_large'] = [result['mar_large'].item() for result in results]

# # if value is == -1.0, replace with NaN
img_results_df = img_results_df.replace(-1.0, np.nan)

In [None]:
# Metric values are running averages in torch metrics, so the last value is the final value.
final_metrics = img_results_df.iloc[-1]
final_metrics = final_metrics.drop('image_name')

##### Print per-image metrics for test dataset as table

In [None]:
from prettytable import PrettyTable
# create a pretty table object
x = PrettyTable()

cols = ['Metric', 'Value']  

# add column headers
x.field_names = cols

# values for column one in table are column names from final_metrics, column two are the column values. 
for i in range(len(final_metrics)):
    x.add_row([final_metrics.index[i], f'{final_metrics[i]*100:.2f}%'])

# print table
print(x)

# save table as txt file
with open('C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/testDataset_image_summary_results.txt', 'w') as f:
    print(x, file = f)

# save results_df to csv
img_results_df.to_csv('C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/per_image_results_test_dataset.csv', index = False)

##### Store per-class test dataset metrics as dataframe

In [None]:
class_res_df = pd.DataFrame()

# store 'map_per_class' and 'mar_100_per_class' from results in df
class_res_df['image_name'] = [result['image_name'] for result in results]
class_res_df['classes'] = [result['classes'] for result in results]
class_res_df['map_per_class'] = [result['map_per_class'] for result in results]
class_res_df['mar_100_per_class'] = [result['mar_100_per_class'] for result in results]

# convert tensors to numpy arrays
class_res_df['classes'] = class_res_df['classes'].apply(lambda x: x.numpy())
class_res_df['map_per_class'] = class_res_df['map_per_class'].apply(lambda x: x.numpy())
class_res_df['mar_100_per_class'] = class_res_df['mar_100_per_class'].apply(lambda x: x.numpy())

# replace integer labels in classes column with labels using label_dict
class_res_df['classes'] = class_res_df['classes'].apply(lambda x: [label_dict.get(i) for i in x])

# replace -1.0 values in map_per_class and mar_100_per_class with NaN
class_res_df['map_per_class'] = class_res_df['map_per_class'].apply(lambda x: np.where(x == -1.0, np.nan, x))
class_res_df['mar_100_per_class'] = class_res_df['mar_100_per_class'].apply(lambda x: np.where(x == -1.0, np.nan, x))

# if map_per_class or mar_100_per_class is NaN, delete value from list. Also delete corresponding class label.
class_res_df['classes'] = class_res_df.apply(lambda x: [i for i, j in zip(x['classes'], x['map_per_class']) if not np.isnan(j)], axis = 1)
class_res_df['map_per_class'] = class_res_df['map_per_class'].apply(lambda x: [i for i in x if not np.isnan(i)])
class_res_df['mar_100_per_class'] = class_res_df['mar_100_per_class'].apply(lambda x: [i for i in x if not np.isnan(i)])

In [None]:
# metric values are running averages in TorchMetrics. Store map and mar from last image in dataset
classes = class_res_df['classes'].iloc[-1]
class_map = class_res_df['map_per_class'].iloc[-1]
class_mar_100 = class_res_df['mar_100_per_class'].iloc[-1]

##### Print per-class metrics for every image in test dataset as table

In [None]:
# cols = 'value' and all unique classes
cols = ['Class', 'mAP', 'mAR_100']

# create a pretty table object
x = PrettyTable()

# add column headers
x.field_names = cols

# classes go in first column, class_map in second column, and class_mar_100 in third column
for i in range(len(classes)):
    x.add_row([classes[i], f'{class_map[i]*100:.2f}%', f'{class_mar_100[i]*100:.2f}%'])

# print table
print(x)

# save table as txt file
with open('C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/testDataset_class_summary_results.txt', 'w') as f:
    print(x, file = f)

# save results_df to csv
class_res_df.to_csv('C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/per_class_results_test_dataset.csv', index = False)

##### Load test data into sample batch containing some test images

In [None]:
# load entire test dataset into one batch
data_loader_test_all = torch.utils.data.DataLoader(dataset_test, batch_size = len(dataset_test), shuffle = False,
                                                collate_fn = utils.collate_fn, num_workers = 0)

# run predictions on all images in the test dataset
images, targets = next(iter(data_loader_test_all))

images = list(image for image in images)
targets = [{k: v for k, v in t.items()} for t in targets]

# convert boxes in targets to tensors
targets = [{k: torch.tensor(v) if k == 'boxes' else v for k, v in t.items()} for t in targets]

model.to('cpu')

model.eval()

with torch.no_grad():
    predictions = model(images, targets) 

##### Post-process model predictions for plotting on original images

In [None]:
# for each image in the batch, remove all predicted boxes with scores below 0.5
for i in range(len(predictions)):
    predictions[i]['boxes'] = predictions[i]['boxes'][predictions[i]['scores'] > 0.5]
    predictions[i]['labels'] = predictions[i]['labels'][predictions[i]['scores'] > 0.5]
    predictions[i]['scores'] = predictions[i]['scores'][predictions[i]['scores'] > 0.5]

# resize boxes to original image shape
for i in range(len(images)):
    tran_w, tran_h = images[i].shape[1], images[i].shape[2]
    
    images[i] = Image.open('C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_images/' + test_image_names[i])

    orig_w, orig_h = images[i].size


    predictions[i]['boxes'] = predictions[i]['boxes'] * torch.tensor([orig_w/tran_w, 
                                                                      orig_h/tran_h, 
                                                                      orig_w/tran_w,
                                                                      orig_h/tran_h]).view(1, 4)

### <center>**Plot Model Predictions for Images in Test Dataset**</center> 

In [None]:
def plot_bbox_predicted(ax, boxes, labels, scores): # modify plot_bbox to add confidence scores
    # add box to the image and use label_color_map to color-code by bounding box class if exists else 'black'
    ax.add_patch(plt.Rectangle((boxes[:, 0], boxes[:, 1]), boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1],
                    fill = False,
                    color = label_color_map[labels.item()] if labels.item() in label_color_map else 'black', 
                    linewidth = 1.5))
    
    # add label and score to the bounding box. concatenate label and score to one string. 
    # use label_dict to replace class numbers with class names
    ax.text(boxes[:, 0], boxes[:, 1] - 100,
        s = f"{label_dict[labels.item()]} {scores.item():.2f}",
        color = 'black',
        fontsize = 6,
        verticalalignment = 'top',
        bbox = {'color': label_color_map[labels.item()] if labels.item() in label_color_map else 'black', 'pad': 0})
    return ax


# function for plotting all predictions on images
def plot_predictions(image, boxes, labels, scores, ax = None):
    ax = img_show(image, ax = ax)
    for i in range(len(boxes)):
        box = get_box(boxes[i])
        plot_bbox_predicted(ax, box, labels[i], scores[i])

In [None]:
# Plot 32 samples from batch in a grid of subplots.
plt.figure(figsize = (24, 36))
for i in range(0, 32):
    ax = plt.subplot(8, 4, 1 + i)
    plot_predictions(images[i], predictions[i]['boxes'], predictions[i]['labels'], predictions[i]['scores'], ax = ax)
    plt.axis('off')
    plt.title(test_image_names[i])

plt.show()

##### Run inference on full dataset to get model estimates of abundance

In [None]:
dataset_all = MAVdroneDataset(csv_file = 'C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_annotations.csv',
                                root_dir = 'C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_images/', 
                                transforms = get_transform(train = False))

In [None]:
image_names = [dataset_all.unique_image_names[i] for i in indices]

In [None]:
# create dictionary of test indices and image names
name_dict = dict(zip(indices, image_names))

data_loader_all = torch.utils.data.DataLoader(dataset_all, batch_size = 1, shuffle = False,
                                            collate_fn = utils.collate_fn, num_workers = 0,
                                            pin_memory = True)

In [None]:
# get model predictions for every image in data_loader_all
model_predictions_all = []

for images, targets in data_loader_all:
    # use image_id to get image_name from image_names list
    image_id = [target['image_id'].item() for target in targets]

    # convert boxes in targets to tensors
    targets = [{k: torch.tensor(v) if k == 'boxes' else v for k, v in t.items()} for t in targets]

    with torch.no_grad():
        prediction = model(images, targets)

    # append image name to mean_AP
    prediction['image_name'] = name_dict[image_id[0]]

    # Append mean_AP and predictions to results list. 
    model_predictions_all.append(prediction)

In [None]:
# from torchmetrics.detection.mean_ap import MeanAveragePrecision

# results = []
# metric = MeanAveragePrecision(iou_type="bbox",
#                               class_metrics=True,
#                               max_detection_thresholds=[1, 10, 100]
#                               )
# model.to('cpu')
# model.eval()

# for images, targets in data_loader_all:
#     # use image_id to get image_name from image_names list
#     image_id = [target['image_id'].item() for target in targets]

#     # convert boxes in targets to tensors
#     targets = [{k: torch.tensor(v) if k == 'boxes' else v for k, v in t.items()} for t in targets]

#     # filter targets to only include boxes and labels keys
#     ground_truth = [{k: v for k, v in t.items() if k in ('boxes', 'labels')} for t in targets]

#     with torch.no_grad():
#         prediction = model(images, targets)

#     # calculate mAP and mAR from test dataset
#     metric.update(prediction, ground_truth)
#     mean_AP = metric.compute()

#     # append image name to mean_AP
#     mean_AP['image_name'] = name_dict[image_id[0]]

#     # Append mean_AP and predictions to results list. 
#     results.append(mean_AP)

# # use pandas to create a dataframe of image names and mAP values
# results_df = pd.DataFrame()
# results_df['image_name'] = [result['image_name'] for result in results]
# results_df['mAP'] = [result['map'].item() for result in results]
# results_df['mAP_50'] = [result['map_50'].item() for result in results]
# results_df['mAP_75'] = [result['map_75'].item() for result in results]
# results_df['mAP_small'] = [result['map_small'].item() for result in results]
# results_df['mAP_medium'] = [result['map_medium'].item() for result in results]
# results_df['mAP_large'] = [result['map_large'].item() for result in results]
# results_df['mAR_1'] = [result['mar_1'].item() for result in results]
# results_df['mAR_10'] = [result['mar_10'].item() for result in results]
# results_df['mAR_100'] = [result['mar_100'].item() for result in results]
# results_df['mAR_small'] = [result['mar_small'].item() for result in results]
# results_df['mAR_medium'] = [result['mar_medium'].item() for result in results]
# results_df['mAR_large'] = [result['mar_large'].item() for result in results]

# # if value is == -1.0, replace with NaN
# results_df = results_df.replace(-1.0, np.nan)

# # save results_df to csv
# results_df.to_csv('C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/all_image_results.csv', index = False)

In [None]:
# class_res_df = pd.DataFrame()

# # store 'map_per_class' and 'mar_100_per_class' from results in df
# class_res_df['image_name'] = [result['image_name'] for result in results]
# class_res_df['classes'] = [result['classes'] for result in results]
# class_res_df['map_per_class'] = [result['map_per_class'] for result in results]
# class_res_df['mar_100_per_class'] = [result['mar_100_per_class'] for result in results]