### <center>**Reading and Cleaning Annotation Data for Custom PyTorch Object Detection**</center> 

In [1]:
# Import necessary packages
%matplotlib inline
import json
import os
import shutil
import pandas as pd
import matplotlib.pyplot as plt

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

plt.ion(); # interactive mode

##### Helper functions for processing JSONs

In [2]:
# Function for reading JSON as dictionary
def read_json(filename: str) -> dict:
  
    try:
        with open(filename, "r") as f:
            data = json.loads(f.read())
    except:
        raise Exception(f"Reading {filename} file encountered an error")
  
    return data

# Function to append records to df
def create_dataframe(data: list) -> pd.DataFrame:

    # Create an empty dataframe to append records
    df = pd.DataFrame()
  
    # Looping through each record
    for d in data:
          
        # Normalize the column levels
        record = pd.json_normalize(d)

        df = pd.concat([df, record], axis=0)
          
    return df

# Main function to iterate over files in directory and add to df
def main():
    # Assign directory and empty df for appending annotations
    directory = "C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/Annotations/" # annotation directory
    annos_df = pd.DataFrame()
    
    # Iterate over files in directory
    for filename in os.listdir(directory):
        f = os.path.join(directory, filename)
        # checking if it is a file
        if os.path.isfile(f):
            print(f)
            
        # Read the JSON file as python dictionary 
        data = read_json(filename = f)
    
        # Create the dataframe for the array items in annotations key 
        df = create_dataframe(data = data['annotations'])
        df.insert(loc = 0, column = 'img_name', value = f'{f[-30:-5]}.JPG')
    
        df.rename(columns = {
            "img_name": "img_name",
            "name": "label",
            "bounding_box.h": "bbox_height",
            "bounding_box.w": "bbox_width",
            "bounding_box.x": "bbox_x_topLeft",
            "bounding_box.y": "bbox_y_topLeft",
            "polygon.paths": "polygon_path"
        }, inplace = True)
        
        # Append the df dataframe to the annos_df dataframe
        annos_df = pd.concat([annos_df, df], ignore_index=True)

    # Convert x, y, h, w to xmin, ymin, xmax, ymax
    annos_df.insert(loc = 2, column = 'xmin', 
                    value = annos_df['bbox_x_topLeft'])
    annos_df.insert(loc = 3, column = 'ymin', 
                    value = annos_df['bbox_y_topLeft'])
    annos_df.insert(loc = 4, column = 'xmax', 
                    value = annos_df['bbox_x_topLeft'] + annos_df['bbox_width'])
    annos_df.insert(loc = 5, column = 'ymax', 
                    value = annos_df['bbox_y_topLeft'] + annos_df['bbox_height']) 
  
    # Drop unneccessary columns 
    annos_df = annos_df.drop(columns = ['bbox_height', 'bbox_width', 'bbox_x_topLeft', 
                                        'bbox_y_topLeft', 'id', 'slot_names', 'polygon_path'])
        
    return annos_df

##### Load annotation data into dataframe

In [None]:
# Execute main loading function
if __name__ == '__main__':
    df = main()

##### Filter annotation dataframe

In [4]:
# If label value count is less than 200, drop the row
df = df.groupby('label').filter(lambda x : len(x) > 200)

# If label value is 'Hen', drop the row
df = df[df['label'] != 'Hen']

##### Filter images since most annotation class were filtered out

In [5]:
# Store unique img_names in filtered df as array
img_names = df['img_name'].unique().tolist()

# Create a new directory called 'filtered_images'
new_dir = 'C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_images'
if not os.path.exists(new_dir):
    os.makedirs(new_dir)

# Copy images in img_names to new directory
for img in img_names:
    shutil.copy2(f'C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/Images/{img}', new_dir)

### <center>**Transform and Augment Image and Annotation Data for Custom PyTorch Object Detection**</center> 

In [6]:
# import necessary packages
import numpy as np
from PIL import Image
from io import BytesIO
from collections import defaultdict
import torch
from torch.utils.data import Dataset
import torch.backends.cudnn as cudnn
cudnn.benchmark = True
import torchvision
torchvision.disable_beta_transforms_warning()
from torchvision.io import read_image
from torchvision import datapoints as dp
import torchvision.transforms.v2 as T
from torchvision.models.detection import ssd300_vgg16, SSD300_VGG16_Weights
import utils

##### Pre-process annotation data

In [7]:
# use ordinal encoder to convert df['label'] to numeric values. 0 is reserved for background class.
from sklearn.preprocessing import OrdinalEncoder
oe = OrdinalEncoder()
df.insert(loc = 6, column = 'target', value = (oe.fit_transform(df[['label']])) + 1)

# Create a dictionary using df['label'] as the keys and df['label_code'] as the values
label_dict = dict(zip(df['target'], df['label']))

# Drop label column from df
df = df.drop(['label'], axis = 1)

# Change target column name to label
df.rename(columns = {'target': 'label'}, inplace = True)

# Save df as csv in directory
df.to_csv('C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_annotations.csv', index = False)

##### PyTorch dataset for custom image and annotation data

In [8]:
# Custom dataset loader (PyTorch) for loading images and annotation data
class MAVdroneDataset(Dataset):
    """Dataset Loader for Waterfowl Drone Imagery"""

    def __init__(self, csv_file, root_dir, transforms):
        """
        Arguments:
            csv_file (string): Path to the CSV file with annotations.
            root_dir (string): Directory containing all images.
            transforms (string): train = True for training transforms
        """
        self.df = pd.read_csv(csv_file)
        self.root_dir = root_dir

        self.transforms = transforms
        self.unique_image_names = self.df['img_name'].unique()


    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        image_name = self.unique_image_names[idx]

        # isolate first row prevents multiple instances of the same image
        row = self.df[self.df['img_name'] == image_name].iloc[0]

        image_path = os.path.join(self.root_dir, row['img_name'])
        image = None

        # ignore corrupted image data during loading else error
        while True:
            with open(image_path, 'rb') as f:
                buff = BytesIO()
                buff.write(f.read())
                buff.seek(0)
                temp_image = np.array(Image.open(buff), dtype = np.uint8)
                # convert np.array to Tensor[image_channels, image_height, image_width]
                image = torch.from_numpy(temp_image).permute(2, 0, 1)

            if image is not None:
                break

        boxes = self.df[self.df['img_name'] == image_name][['xmin', 'ymin', 'xmax', 'ymax']].values 
        labels = self.df[self.df['img_name'] == image_name]['label'].values

        labels = torch.as_tensor(labels, dtype = torch.int64) # (n_objects)

        boxes = torch.as_tensor(boxes, dtype = torch.float32)

        # if xmin > xmax, flip them so width is always positive
        if torch.any(boxes[:, 0] > boxes[:, 2]):
            boxes[:, [0, 2]] = boxes[:, [2, 0]]
        
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        # suppose all instances are not crowd
        iscrowd = torch.zeros((len(labels),), dtype=torch.int64)
            
        target = {}
        target['boxes'] = dp.BoundingBox(boxes, format = "XYXY", spatial_size = (image.shape[1], image.shape[2]))
        target['labels'] = labels
        target['image_id'] = image_id
        target['area'] = area
        target['iscrowd'] = iscrowd

        image = dp.Image(image)

        if self.transforms is not None:
            image, target = self.transforms(image, target)

        return image, target


    def __len__(self):
        return len(self.unique_image_names)

##### Data transformation function

In [9]:
# ImageNet mean and std since using pretrained VGG16 backbone
mean = [0.485, 0.456, 0.406] # 3 bands
std = [0.229, 0.224, 0.225]

# Same transforms as original SSD paper
def get_transform(train):
    transforms = []
    if train:
        transforms.append(T.RandomZoomOut(fill = defaultdict(lambda: 0, {dp.Image: (255, 20, 147)}),
                                          p = 0.3,
                                          side_range = (1.0, 2.0)))
        transforms.append(T.RandomIoUCrop())
        transforms.append(T.Resize((300, 300), antialias = True)) # no maintain aspect ratio
        transforms.append(T.RandomHorizontalFlip(0.5))
    else:
        transforms.append(T.Resize((300, 300), antialias = True)) # no maintain aspect ratio
    transforms.append(T.ToImageTensor())
    transforms.append(T.ConvertImageDtype(torch.float))
    transforms.append(T.SanitizeBoundingBox())
    transforms.append(T.Normalize(mean, std)) # ImageNet mean and std values for normalization
    return T.Compose(transforms)

##### Helper functions for plotting image and annotations

In [10]:
# classes are values in label_dict
classes = list(label_dict.values())

# reverse label dictionary for mapping predictions to classes
rev_label_dict = {v: k for k, v in label_dict.items()}

# distinct colors 
bbox_colors = ['#f032e6', '#ffffff', '#ffe119', '#3cb44b', '#42d4f4',
                    '#f58231', '#e6194B', '#dcbeff', '#469990', '#4363d8']

# label color map for plotting color-coded boxes by class
label_color_map = {k: bbox_colors[i] for i, k in enumerate(label_dict.keys())}

# function for reshaping boxes 
def get_box(boxes):
    boxes = np.array(boxes)
    boxes = boxes.astype('float').reshape(-1, 4)
    if boxes.shape[0] == 1 : return boxes
    return np.squeeze(boxes)


# function for plotting image
def img_show(image, ax = None, figsize = (6, 9)):
    if ax is None:
        fig, ax = plt.subplots(figsize = figsize)
    ax.xaxis.tick_top()
    ax.imshow(image)
    return ax
 

def plot_bbox(ax, boxes, labels):
    # add box to the image and use label_color_map to color-code by bounding box class if exists else 'black'
    ax.add_patch(plt.Rectangle((boxes[:, 0], boxes[:, 1]), boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1],
                    fill = False,
                    color = label_color_map[labels.item()] if labels.item() in label_color_map else 'black', 
                    linewidth = 1.5))
    # add label text to bounding box using label_dict if label exists else labels
    ax.text(boxes[:, 2], boxes[:, 3], 
            (label_dict[labels.item()] if labels.item() in label_dict else labels.item()),
            fontsize = 8,
            bbox = dict(facecolor = 'white', alpha = 0.8, pad = 0, edgecolor = 'none'),
            color = 'black')


# function for plotting all boxes and labels on the image using get_polygon, img_show, and plot_mask functions
def plot_detections(image, boxes, labels, ax = None):
    ax = img_show(image.permute(1, 2, 0), ax = ax)
    for i in range(len(boxes)):
        box = get_box(boxes[i])
        plot_bbox(ax, box, labels[i])

##### Plot sample batch to confirm data loads and transforms correctly

In [None]:
# Load sample batch of data to custom PyTorch Dataset and Transform
sample_dataset = MAVdroneDataset(csv_file = 'C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_annotations.csv', 
                                root_dir = 'C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_images', 
                                transforms = get_transform(train = True))

# store image indices in random order list
indices = torch.randperm(len(sample_dataset)).tolist()

sample_data_loader = torch.utils.data.DataLoader(sample_dataset, batch_size = 8, shuffle = True, 
                                             collate_fn = utils.collate_fn, num_workers = 0)

# store images and annotation targets from sample batch
images, targets = next(iter(sample_data_loader))
images = list(image for image in images)
targets = [{k: v for k, v in t.items()} for t in targets]

# Plot the all samples from batch in a grid of subplots. 
plt.figure(figsize = (8, 32))
for i in range(8):
    ax = plt.subplot(8, 2, 1 + i)
    plot_detections(images[i], targets[i]['boxes'], targets[i]['labels'], ax = ax)
    plt.axis('off')
    plt.title(f"Sample {i + 1}")

plt.show()

### <center>**Tune Model Hyperparameters using Ray Tune**</center> 

In [12]:
import tempfile
from datetime import datetime
from engine import train_one_epoch, evaluate # comment out if using gradient accumulation
# from engine_gradientAccumulation import train_one_epoch, evaluate # uncomment if using gradient accumulation
from ray import train, tune
from ray.tune.schedulers import ASHAScheduler
from ray.tune.search.bayesopt import BayesOptSearch
import torch.nn as nn

##### Helper functions for hyperparameter tuning

In [13]:
# # Wrap data loaders in function and pass global data directories. 
# def load_data(csv_file = 'C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_annotations.csv', 
#               root_dir = 'C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_images/'):
    
#     # use MAVdroneDataset and defined transformations
#     dataset = MAVdroneDataset(csv_file = csv_file,
#                                 root_dir = root_dir, 
#                                 transforms = get_transform(train = True))
    
#     dataset_val = MAVdroneDataset(csv_file = csv_file,
#                                 root_dir = root_dir, 
#                                 transforms = get_transform(train = False))
    
#     return dataset, dataset_val

# def train_MAVdroneDataset(config, indices):
#     model = ssd300_vgg16(weights = SSD300_VGG16_Weights.DEFAULT, 
#                         weights_backbone = 'VGG16_Weights.IMAGENET1K_FEATURES')
    
#     device = "cpu" 
#     if torch.cuda.is_available():
#         device = "cuda:0"
#         if torch.cuda.device_count() > 1:
#             model = nn.DataParallel(model) # train on multiple GPUs if available
#     model.to(device)

#     # construct an optimizer 
#     params = [p for p in model.parameters() if p.requires_grad]
#     optimizer = torch.optim.SGD(params, lr = config["lr"],
#                                 momentum = config["momentum"], 
#                                 weight_decay = config["weight_decay"])
    
#     # and a learning rate scheduler
#     lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
#                                                    step_size = config["step_size"], # period of lr decay
#                                                    gamma = config["gamma"]) # multiplicative factor of lr decay

#     # Load existing checkpoint if exist.
#     if train.get_checkpoint():
#         loaded_checkpoint = train.get_checkpoint()
#         with loaded_checkpoint.as_directory() as loaded_checkpoint_dir:
#             model_state, optimizer_state = torch.load(
#                 os.path.join(loaded_checkpoint_dir, "checkpoint.pt")
#             )
#             model.load_state_dict(model_state)
#             optimizer.load_state_dict(optimizer_state)
#     else:
#         start_epoch = 0

#     dataset, dataset_val = load_data()

#     # subset using a 80/15/5 split for train, validation, and test datasets
#     dataset_train = torch.utils.data.Subset(dataset, indices[:-int(len(dataset)*0.2)]) # first 80% of dataset

#     dataset_val = torch.utils.data.Subset(dataset_val, indices[-int(len(dataset)*0.2):-int(len(dataset)*0.05)]) # next 15% of dataset

#     # # Uncomment if using gradient accumulation
#     # training_steps = [
#     #     {"step": 0, "batch_size": 16, "epochs": 15, "print_freq": 10, 'accumulation_steps': 1}, 
#     #     {"step": 1, "batch_size": 64, "epochs": 15, "print_freq": 5, 'accumulation_steps': 1},
#     #     {"step": 2, "batch_size": 64, "epochs": 10, "print_freq": 5, 'accumulation_steps': 4}, # gpu memory maxes out at batch size 64
#     #     {"step": 3, "batch_size": 64, "epochs": 5, "print_freq": 5, 'accumulation_steps': 16}, # batch size 1024 via gradient accumulation
#     # ]

#     # comment out if using gradient accumulation
#     training_steps = [
#             {"step": 0, "batch_size": 16, "epochs": 10, "print_freq": 10},
#             {"step": 1, "batch_size": 64, "epochs": 5, "print_freq": 5}, 
#             {"step": 2, "batch_size": 256, "epochs": 5, "print_freq": 2},  
#             {"step": 3, "batch_size": 1024, "epochs": 5, "print_freq": 1}
#         ]

#     # loop through training_steps during training to increase batch size and decrease learning rate
#     for step in training_steps:
#         batch_size = int(step['batch_size'])
#         num_epochs = int(step['epochs'])
#         print_freq = int(step['print_freq'])
#         # accumulation_steps = int(step['accumulation_steps']) # uncomment if using gradient accumulation

#         # define training and validation data loaders
#         data_loader = torch.utils.data.DataLoader(dataset_train, batch_size = batch_size, shuffle = True, 
#                                                 collate_fn = utils.collate_fn, num_workers = 0,
#                                                 pin_memory = True)
        
#         data_loader_val = torch.utils.data.DataLoader(dataset_val, batch_size = 1, shuffle = False,
#                                                     collate_fn = utils.collate_fn, num_workers = 0,
#                                                     pin_memory = True)

#         print(f'Beginning training step {step["step"]}... batch size: {batch_size}')

#         for epoch in range(start_epoch, num_epochs + start_epoch):
#             print()
#             print(f'Epoch {epoch} beginning training...')
#             print()

#             # # uncomment if using gradient accumulation
#             # train_metric_logger, val_metric_logger = train_one_epoch(model, optimizer, data_loader, data_loader_val, accumulation_steps, device, epoch, print_freq)

#             # comment out if using gradient accumulation
#             train_metric_logger, val_metric_logger = train_one_epoch(model, optimizer, data_loader, data_loader_val, device, epoch, print_freq)

#             # update the learning rate
#             lr_scheduler.step()

#             print()
#             print(f'Epoch {epoch} preparing to evaluate the validation dataset...')
#             print()
            
#             # evaluate on the val dataset
#             train_coco_evaluator, val_coco_evaluator = evaluate(model, data_loader, data_loader_val, device)

#             # Here we save a checkpoint. It is automatically registered with Ray Tune
#             with tempfile.TemporaryDirectory() as temp_checkpoint_dir:
#                 path = os.path.join(temp_checkpoint_dir, "checkpoint.pt")
#                 torch.save(
#                     (model.state_dict(), optimizer.state_dict()), path
#                 )
#                 checkpoint = train.Checkpoint.from_directory(temp_checkpoint_dir)
#                 train.report(
#                     {"train_loss": train_metric_logger.loss.global_avg, # metric_logger object
#                     "val_loss": val_metric_logger.loss.global_avg,
#                     "train_mAP_50": train_coco_evaluator.coco_eval['bbox'].stats[1],
#                     "train_mAR_100": train_coco_evaluator.coco_eval['bbox'].stats[8],
#                     "val_mAP_50": val_coco_evaluator.coco_eval['bbox'].stats[1],
#                     "val_mAR_100": val_coco_evaluator.coco_eval['bbox'].stats[8],
#                     "training_step": step["step"],
#                     "epoch": epoch}, 
#                     checkpoint = checkpoint
#                 )

#         print(f'Training step {step["step"]} complete! Moving onto training step {step["step"] + 1}...')
#         print()
        
#         # set start_epoch to current epoch for next training step
#         start_epoch = num_epochs if step['step'] == 0 else num_epochs + start_epoch       
    
#     print('Tuning Trial Complete!')

# # test set accuracy of best model
# def test_best_model(best_result, indices):
#     best_trained_model = ssd300_vgg16(weights = SSD300_VGG16_Weights.DEFAULT, 
#                         weights_backbone = 'VGG16_Weights.IMAGENET1K_FEATURES')
                                      
#     device = "cuda:0" if torch.cuda.is_available() else "cpu"
#     best_trained_model.to(device)

#     checkpoint_path = os.path.join(best_result.checkpoint.to_directory(), "checkpoint.pt")

#     model_state, _ = torch.load(checkpoint_path)
#     best_trained_model.load_state_dict(model_state)

#     _, dataset_test = load_data()

#     dataset_test = torch.utils.data.Subset(dataset_test, indices[-int(len(dataset_test)*0.05):]) # last 5% of dataset

#     data_loader_test = torch.utils.data.DataLoader(dataset_test, batch_size = 1, shuffle = False,
#                                                 collate_fn = utils.collate_fn, num_workers = 0,
#                                                 pin_memory = True)
    
#     test_results = evaluate(best_trained_model, data_loader_test, device)

#     print(f'Best trial test set mAP_50: {test_results.coco_eval["bbox"].stats[1]} and mAR_100: {test_results.coco_eval["bbox"].stats[8]}')


# def trial_dirname_creator(trial):
#     return f"train_MAVdroneDataset_{trial.trial_id}"

##### Main Tuning Program

In [14]:
# torch.cuda.empty_cache()

# def main(num_samples, max_num_epochs, indices):
#     config = {
#         "lr": tune.uniform(0.0001, 0.09),
#         "momentum": tune.uniform(0.3, 0.95),
#         "weight_decay": tune.uniform(0.0004, 0.04),
#         "step_size": tune.uniform(5, 30),
#         "gamma": tune.uniform(0.2, 0.75)
#     }
     
#     scheduler = ASHAScheduler(
#         time_attr="epoch",
#         max_t=max_num_epochs,
#         grace_period=5,
#         reduction_factor=2
#     )

#     algo = BayesOptSearch(
#         points_to_evaluate = [
#             {"lr": 0.01002, 
#              "momentum": 0.75711, 
#              "weight_decay": 0.00465, 
#              "step_size": 21.8483, 
#              "gamma": 0.39259} 
#         ], # values as starting point for search
#         random_search_steps = 50
#     )
    
#     algo = tune.search.ConcurrencyLimiter(algo, max_concurrent=1)

#     tuner = tune.Tuner(
#         tune.with_resources(
#             tune.with_parameters(train_MAVdroneDataset, indices = indices),
#             resources={"cpu": 2, "gpu": 1}
#         ),
#         run_config=train.RunConfig(
#             storage_path='C:/Users/exx/Documents/GitHub/SSD_VGG_PyTorch/ray_results' 
#         ),
#         tune_config=tune.TuneConfig(
#             metric="val_mAP_50",
#             mode="max",
#             search_alg = algo,
#             scheduler=scheduler,
#             num_samples=num_samples,
#             time_budget_s=600000,
#             trial_dirname_creator=trial_dirname_creator
#         ),
#         param_space=config
#     )

#     results = tuner.fit()

#     best_result = results.get_best_result("val_mAP_50", "max")

#     print("Best trial config: {}".format(best_result.config))
#     print("Best trial final validation loss: {}".format(best_result.metrics["val_loss"]))
#     print("Best trial final validation mAP_50: {}".format(best_result.metrics["val_mAP_50"]))
#     print("Best trial final validation mAR_100: {}".format(best_result.metrics["val_mAR_100"]))

#     test_best_model(best_result, indices)

#     return best_result

# if __name__ == "__main__":
#     best_trial = main(num_samples = 15, max_num_epochs = 25, indices = indices)

### <center>**Train Model Using Tuned Hyperparameters**</center> 

In [None]:
from torch.utils.tensorboard import SummaryWriter
torch.cuda.empty_cache()

# # Hyperparameters are best trial results from Bayesian Optimization using Ray Tune
# learning_rate = best_trial.config["lr"]
# momentum = best_trial.config["momentum"]
# weight_decay = best_trial.config["weight_decay"]
# step_size = best_trial.config["step_size"]
# gamma = best_trial.config["gamma"]

learning_rate = 0.01002
momentum = 0.75711 
weight_decay = 0.00465
step_size= 21.8483
gamma = 0.39259

# # If using gradient accumulation to overcome GPU memory limitations, set accumulation_steps to N
# # accumulation steps is N batches to accumulate gradients for before zeroing gradients
# training_steps = [
#     {"step": 0, "batch_size": 16, "lr": learning_rate, "epochs": 15, "print_freq": 20, 'accumulation_steps': 1}, 
#     {"step": 1, "batch_size": 64, "lr": learning_rate, "epochs": 15, "print_freq": 5, 'accumulation_steps': 1},
#     {"step": 2, "batch_size": 64, "lr": learning_rate, "epochs": 10, "print_freq": 5, 'accumulation_steps': 4}, # gpu memory maxes out at batch size 64
#     {"step": 3, "batch_size": 64, "lr": learning_rate, "epochs": 5, "print_freq": 5, 'accumulation_steps': 16}, # batch size 1024 via gradient accumulation
# ]

training_steps = [
            {"step": 0, "batch_size": 16, "lr": learning_rate, "epochs": 15, "print_freq": 10},
            {"step": 1, "batch_size": 64, "lr": learning_rate, "epochs": 15, "print_freq": 5}, 
            {"step": 2, "batch_size": 256, "lr": learning_rate, "epochs": 10, "print_freq": 2}, 
            {"step": 3, "batch_size": 1024, "lr": learning_rate, "epochs": 5, "print_freq": 1}, 
]

# Main function that performs training and validation.
def main():
    # Initialize model--SSD300 w/ VGG16 backbone pre-trained
    model = ssd300_vgg16(weights = SSD300_VGG16_Weights.DEFAULT, 
                        weights_backbone = 'VGG16_Weights.IMAGENET1K_FEATURES')

    device = "cpu" 
    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
    model.to(device)

    start_epoch = 0

    # initialize tensorboard writer
    writer = SummaryWriter()

    # Store one checkpoint dictionary for each epoch in a list of dictionaries. 
    checkpoints = []

    # loop through training_steps during training to increase batch size and decrease learning rate
    for step in training_steps:
        batch_size = step['batch_size']
        lr = step['lr']
        num_epochs = step['epochs']
        print_freq = step['print_freq']
        # accumulation_steps = step['accumulation_steps']
    
        # use MAVdroneDataset and defined transformations
        dataset = MAVdroneDataset(csv_file = 'C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_annotations.csv',
                                root_dir = 'C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_images/', 
                                transforms = get_transform(train = True))
        
        dataset_val = MAVdroneDataset(csv_file = 'C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_annotations.csv',
                                root_dir = 'C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_images/',
                                transforms = get_transform(train = False))

        # subset using a 80/15/5 split for train, validation, and test datasets
        dataset = torch.utils.data.Subset(dataset, indices[:-int(len(dataset)*0.2)])

        dataset_val = torch.utils.data.Subset(dataset_val, indices[-int(len(dataset_val)*0.2):-int(len(dataset_val)*0.05)])

        # define training and validation data loaders
        data_loader = torch.utils.data.DataLoader(dataset, batch_size = batch_size, shuffle = True, 
                                                collate_fn = utils.collate_fn, num_workers = 0,
                                                pin_memory = True)
        
        data_loader_val = torch.utils.data.DataLoader(dataset_val, batch_size = 1, shuffle = False,
                                                    collate_fn = utils.collate_fn, num_workers = 0,
                                                    pin_memory = True)
        
        # construct an optimizer - SGD w/ momentum and weight decay
        params = [p for p in model.parameters() if p.requires_grad]
        optimizer = torch.optim.SGD(params, lr = lr,
                                    momentum = momentum, weight_decay = weight_decay)
        
        # and a learning rate scheduler
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                    step_size = step_size,
                                                    gamma = gamma)
        
        # Log the hyperparameters to tensorboard
        writer.add_hparams(
            # {"lr": lr, "momentum": momentum, "weight_decay": weight_decay, "step_size": step_size, "gamma": gamma, "batch_size": (batch_size*accumulation_steps)}, # if using gradient accumulation
            {"lr": lr, "momentum": momentum, "weight_decay": weight_decay, "step_size": step_size, "gamma": gamma, "batch_size": (batch_size)},
            {}
        )
        
        # print(f'Beginning training step {step["step"]}... batch size: {batch_size*accumulation_steps}')
        print(f'Beginning training step {step["step"]}... batch size: {batch_size}')

        #########################################################
        ##               The main training loop                ##
        #########################################################
        for epoch in range(start_epoch, num_epochs + start_epoch):
            print()
            print(f'Epoch {epoch} beginning training...')
            print()
            
            # # uncomment if using gradient accumulation
            # train_metric_logger, val_metric_logger = train_one_epoch(model, optimizer, data_loader, data_loader_val, accumulation_steps, device, epoch, print_freq)

            # comment out if using gradient accumulation
            train_metric_logger, val_metric_logger = train_one_epoch(model, optimizer, data_loader, data_loader_val, device, epoch, print_freq)
        
            print()
            print(f'Epoch {epoch} finished training!')
            print()

            # update the learning rate
            lr_scheduler.step()

            print()
            print(f'Epoch {epoch} preparing to calculate train and val set accuracy...')
            print()
            
            # evaluate on the validation dataset
            train_coco_evaluator, val_coco_evaluator = evaluate(model, data_loader, data_loader_val, device)

            # store training and validation metrics in checkpoint dictionary. 
            checkpoint = {
                "epoch": epoch,
                "train_loss": train_metric_logger.loss.global_avg, # metric_logger object
                "train_bbox_loss": train_metric_logger.bbox_regression.global_avg,
                "train_class_loss": train_metric_logger.classification.global_avg,
                "val_loss": val_metric_logger.loss.global_avg,
                "val_bbox_loss": val_metric_logger.bbox_regression.global_avg,
                "val_class_loss": val_metric_logger.classification.global_avg,
                "train_mAP_50": train_coco_evaluator.coco_eval['bbox'].stats[1],
                "train_mAR_100": train_coco_evaluator.coco_eval['bbox'].stats[8],
                "val_mAP_50": val_coco_evaluator.coco_eval['bbox'].stats[1],
                "val_mAR_100": val_coco_evaluator.coco_eval['bbox'].stats[8],
                "model_state_dict": model.state_dict(),
                "optimizer_state_dict": optimizer.state_dict()
            }

            # append checkpoint to checkpoints list
            checkpoints.append(checkpoint)

            # report training and validation scalars to tensorboard
            writer.add_scalar('Train Loss', np.array(float(checkpoint["train_loss"])), epoch)
            writer.add_scalar('Val Loss', np.array(float(checkpoint["val_loss"])), epoch)
            writer.add_scalar('Train mAP@50', np.array(float(checkpoint["train_mAP_50"])), epoch)
            writer.add_scalar('Train mAR@100', np.array(float(checkpoint["train_mAR_100"])), epoch)
            writer.add_scalar('Val mAP@50', np.array(float(checkpoint["val_mAP_50"])), epoch)
            writer.add_scalar('Val mAR@100', np.array(float(checkpoint["val_mAR_100"])), epoch)

            print()
            print(f'Epoch {epoch} complete! Moving onto epoch {epoch + 1}...')
            print()
        
        print()
        print(f'Training step {step["step"]} complete! Moving onto training step {step["step"] + 1}...')
        print()

        # set start_epoch to current epoch for next training step
        start_epoch = num_epochs if step['step'] == 0 else num_epochs + start_epoch

    print('All Training Steps Complete!')

    # close tensorboard writer
    writer.close()

    return checkpoints

if __name__ == "__main__":
    checkpoints = main()

In [16]:
# Best train epoch is dictionary in checkpoints with highest val_mAP_50 value
best_train_epoch = max(checkpoints, key = lambda x: x['val_mAP_50'])

# initialize SSD VGG16 pre-trained model
model = ssd300_vgg16()

# load model weights from best_train_epoch
model.load_state_dict(best_train_epoch["model_state_dict"])

# save model weights to .pth file
torch.save(model.state_dict(), 'SSD_VGG16_DuckNet_' + str(datetime.now().strftime("%m%d%Y")) + '.pth')

In [None]:
# copy checkpoints and remove model and optimizer state dicts
checkpoints_copy = checkpoints.copy()
for dict in checkpoints_copy:
    del dict["model_state_dict"]
    del dict["optimizer_state_dict"]

# save checkpoints list to text file
with open('C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/checkpoints.txt', 'w') as f:
    for item in checkpoints_copy:
        f.write("%s\n" % item)

### <center>**Model Inference on Test Dataset**</center> 

##### Load the test dataset

In [17]:
# test_indices is last 5% of indices list--not seen by model during training/validation
test_indices = indices[-int(len(indices)*0.05):]

dataset_test = MAVdroneDataset(csv_file = 'C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_annotations.csv',
                                root_dir = 'C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_images/', 
                                transforms = get_transform(train = False))

test_image_names = [dataset_test.unique_image_names[i] for i in test_indices]

# create dictionary of test indices and image names
test_dict = dict(zip(test_indices, test_image_names))

# subset test dataset using test_indices
dataset_test = torch.utils.data.Subset(dataset_test, test_indices)

data_loader_test = torch.utils.data.DataLoader(dataset_test, batch_size = 1, shuffle = False,
                                            collate_fn = utils.collate_fn, num_workers = 0,
                                            pin_memory = True)

In [None]:
test_performance = evaluate(model, data_loader_test, device=torch.device('cpu'))

##### Calculate performance metrics on every image in test dataset

In [19]:
from torchmetrics.detection.mean_ap import MeanAveragePrecision

In [20]:
results = []

metric = MeanAveragePrecision(iou_type="bbox",
                              class_metrics=True,
                              max_detection_thresholds=[1, 10, 100]
                              )

model.to('cpu')
model.eval()

for images, targets in data_loader_test:
    # use image_id to get image_name from image_names list
    image_id = [target['image_id'].item() for target in targets]

    # convert boxes in targets to tensors
    targets = [{k: torch.tensor(v) if k == 'boxes' else v for k, v in t.items()} for t in targets]

    # filter targets to only include boxes and labels keys
    ground_truth = [{k: v for k, v in t.items() if k in ('boxes', 'labels')} for t in targets]

    with torch.no_grad():
        prediction = model(images, targets)

    # calculate mAP and mAR from test dataset
    metric.update(prediction, ground_truth)
    mean_AP = metric.compute()

    # append image name to mean_AP
    mean_AP['image_name'] = test_dict[image_id[0]]

    # Append mean_AP and predictions to results list. 
    results.append(mean_AP)

##### Store per-image test dataset metrics as dataframe

In [21]:
# use pandas to create a dataframe of image names and mAP values
img_results_df = pd.DataFrame()
img_results_df['image_name'] = [result['image_name'] for result in results]
img_results_df['mAP'] = [result['map'].item() for result in results]
img_results_df['mAP_50'] = [result['map_50'].item() for result in results]
img_results_df['mAP_75'] = [result['map_75'].item() for result in results]
img_results_df['mAP_small'] = [result['map_small'].item() for result in results]
img_results_df['mAP_medium'] = [result['map_medium'].item() for result in results]
img_results_df['mAP_large'] = [result['map_large'].item() for result in results]
img_results_df['mAR_1'] = [result['mar_1'].item() for result in results]
img_results_df['mAR_10'] = [result['mar_10'].item() for result in results]
img_results_df['mAR_100'] = [result['mar_100'].item() for result in results]
img_results_df['mAR_small'] = [result['mar_small'].item() for result in results]
img_results_df['mAR_medium'] = [result['mar_medium'].item() for result in results]
img_results_df['mAR_large'] = [result['mar_large'].item() for result in results]

# # if value is == -1.0, replace with NaN
img_results_df = img_results_df.replace(-1.0, np.nan)

In [22]:
# Metric values are running averages in torch metrics, so the last value is the final value.
final_metrics = img_results_df.iloc[-1]
final_metrics = final_metrics.drop('image_name')

##### Print per-image metrics for test dataset as table

In [None]:
from prettytable import PrettyTable
# create a pretty table object
x = PrettyTable()

cols = ['Metric', 'Value']  

# add column headers
x.field_names = cols

# values for column one in table are column names from final_metrics, column two are the column values. 
for i in range(len(final_metrics)):
    x.add_row([final_metrics.index[i], f'{final_metrics[i]*100:.2f}%'])

# print table
print(x)

# save table as txt file
with open('C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/testDataset_image_summary_results.txt', 'w') as f:
    print(x, file = f)

# save results_df to csv
img_results_df.to_csv('C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/per_image_results_test_dataset.csv', index = False)

##### Store per-class test dataset metrics as dataframe

In [24]:
class_res_df = pd.DataFrame()

# store 'map_per_class' and 'mar_100_per_class' from results in df
class_res_df['image_name'] = [result['image_name'] for result in results]
class_res_df['classes'] = [result['classes'] for result in results]
class_res_df['map_per_class'] = [result['map_per_class'] for result in results]
class_res_df['mar_100_per_class'] = [result['mar_100_per_class'] for result in results]

# convert tensors to numpy arrays
class_res_df['classes'] = class_res_df['classes'].apply(lambda x: x.numpy())
class_res_df['map_per_class'] = class_res_df['map_per_class'].apply(lambda x: x.numpy())
class_res_df['mar_100_per_class'] = class_res_df['mar_100_per_class'].apply(lambda x: x.numpy())

# replace integer labels in classes column with labels using label_dict
class_res_df['classes'] = class_res_df['classes'].apply(lambda x: [label_dict.get(i) for i in x])

# replace -1.0 values in map_per_class and mar_100_per_class with NaN
class_res_df['map_per_class'] = class_res_df['map_per_class'].apply(lambda x: np.where(x == -1.0, np.nan, x))
class_res_df['mar_100_per_class'] = class_res_df['mar_100_per_class'].apply(lambda x: np.where(x == -1.0, np.nan, x))

# if map_per_class or mar_100_per_class is NaN, delete value from list. Also delete corresponding class label.
class_res_df['classes'] = class_res_df.apply(lambda x: [i for i, j in zip(x['classes'], x['map_per_class']) if not np.isnan(j)], axis = 1)
class_res_df['map_per_class'] = class_res_df['map_per_class'].apply(lambda x: [i for i in x if not np.isnan(i)])
class_res_df['mar_100_per_class'] = class_res_df['mar_100_per_class'].apply(lambda x: [i for i in x if not np.isnan(i)])

In [25]:
# metric values are running averages in TorchMetrics. Store map and mar from last image in dataset
classes = class_res_df['classes'].iloc[-1]
class_map = class_res_df['map_per_class'].iloc[-1]
class_mar_100 = class_res_df['mar_100_per_class'].iloc[-1]

##### Print per-class metrics for every image in test dataset as table

In [None]:
# cols = 'value' and all unique classes
cols = ['Class', 'mAP', 'mAR_100']

# create a pretty table object
x = PrettyTable()

# add column headers
x.field_names = cols

# classes go in first column, class_map in second column, and class_mar_100 in third column
for i in range(len(classes)):
    x.add_row([classes[i], f'{class_map[i]*100:.2f}%', f'{class_mar_100[i]*100:.2f}%'])

# print table
print(x)

# save table as txt file
with open('C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/testDataset_class_summary_results.txt', 'w') as f:
    print(x, file = f)

# save results_df to csv
class_res_df.to_csv('C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/per_class_results_test_dataset.csv', index = False)

##### Load test data into sample batch containing some test images

In [27]:
# load entire test dataset into one batch
data_loader_test_all = torch.utils.data.DataLoader(dataset_test, batch_size = len(dataset_test), shuffle = False,
                                                collate_fn = utils.collate_fn, num_workers = 0)

# run predictions on all images in the test dataset
images, targets = next(iter(data_loader_test_all))

images = list(image for image in images)
targets = [{k: v for k, v in t.items()} for t in targets]

# convert boxes in targets to tensors
targets = [{k: torch.tensor(v) if k == 'boxes' else v for k, v in t.items()} for t in targets]

model.to('cpu')

model.eval()

with torch.no_grad():
    predictions = model(images, targets) 

##### Post-process model predictions for plotting on original images

In [28]:
# for each image in the batch, remove all predicted boxes with scores below 0.5
for i in range(len(predictions)):
    predictions[i]['boxes'] = predictions[i]['boxes'][predictions[i]['scores'] > 0.5]
    predictions[i]['labels'] = predictions[i]['labels'][predictions[i]['scores'] > 0.5]
    predictions[i]['scores'] = predictions[i]['scores'][predictions[i]['scores'] > 0.5]

# resize boxes to original image shape
for i in range(len(images)):
    tran_w, tran_h = images[i].shape[1], images[i].shape[2]
    
    images[i] = Image.open('C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_images/' + test_image_names[i])

    orig_w, orig_h = images[i].size


    predictions[i]['boxes'] = predictions[i]['boxes'] * torch.tensor([orig_w/tran_w, 
                                                                      orig_h/tran_h, 
                                                                      orig_w/tran_w,
                                                                      orig_h/tran_h]).view(1, 4)

### <center>**Plot Model Predictions for Images in Test Dataset**</center> 

In [29]:
def plot_bbox_predicted(ax, boxes, labels, scores): # modify plot_bbox to add confidence scores
    # add box to the image and use label_color_map to color-code by bounding box class if exists else 'black'
    ax.add_patch(plt.Rectangle((boxes[:, 0], boxes[:, 1]), boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1],
                    fill = False,
                    color = label_color_map[labels.item()] if labels.item() in label_color_map else 'black', 
                    linewidth = 1.5))
    
    # add label and score to the bounding box. concatenate label and score to one string. 
    # use label_dict to replace class numbers with class names
    ax.text(boxes[:, 0], boxes[:, 1] - 100,
        s = f"{label_dict[labels.item()]} {scores.item():.2f}",
        color = 'black',
        fontsize = 6,
        verticalalignment = 'top',
        bbox = {'color': label_color_map[labels.item()] if labels.item() in label_color_map else 'black', 'pad': 0})
    return ax


# function for plotting all predictions on images
def plot_predictions(image, boxes, labels, scores, ax = None):
    ax = img_show(image, ax = ax)
    for i in range(len(boxes)):
        box = get_box(boxes[i])
        plot_bbox_predicted(ax, box, labels[i], scores[i])

In [None]:
# Plot 32 samples from batch in a grid of subplots.
plt.figure(figsize = (24, 36))
for i in range(0, 32):
    ax = plt.subplot(8, 4, 1 + i)
    plot_predictions(images[i], predictions[i]['boxes'], predictions[i]['labels'], predictions[i]['scores'], ax = ax)
    plt.axis('off')
    plt.title(test_image_names[i])

plt.show()

##### Run inference on full dataset to get model estimates of abundance

In [9]:
dataset_all = MAVdroneDataset(csv_file = 'C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_annotations.csv',
                                root_dir = 'C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/filtered_images/', 
                                transforms = get_transform(train = False))

In [11]:
image_names = [dataset_all.unique_image_names[i] for i in indices]

In [12]:
# create dictionary of test indices and image names
name_dict = dict(zip(indices, image_names))

data_loader_all = torch.utils.data.DataLoader(dataset_all, batch_size = 1, shuffle = False,
                                            collate_fn = utils.collate_fn, num_workers = 0,
                                            pin_memory = True)

In [None]:
# get model predictions for every image in data_loader_all
model_predictions_all = []

for images, targets in data_loader_all:
    # use image_id to get image_name from image_names list
    image_id = [target['image_id'].item() for target in targets]

    # convert boxes in targets to tensors
    targets = [{k: torch.tensor(v) if k == 'boxes' else v for k, v in t.items()} for t in targets]

    with torch.no_grad():
        prediction = model(images, targets)

    # append image name to mean_AP
    prediction['image_name'] = name_dict[image_id[0]]

    # Append mean_AP and predictions to results list. 
    model_predictions_all.append(prediction)

In [19]:
# from torchmetrics.detection.mean_ap import MeanAveragePrecision

# results = []
# metric = MeanAveragePrecision(iou_type="bbox",
#                               class_metrics=True,
#                               max_detection_thresholds=[1, 10, 100]
#                               )
# model.to('cpu')
# model.eval()

# for images, targets in data_loader_all:
#     # use image_id to get image_name from image_names list
#     image_id = [target['image_id'].item() for target in targets]

#     # convert boxes in targets to tensors
#     targets = [{k: torch.tensor(v) if k == 'boxes' else v for k, v in t.items()} for t in targets]

#     # filter targets to only include boxes and labels keys
#     ground_truth = [{k: v for k, v in t.items() if k in ('boxes', 'labels')} for t in targets]

#     with torch.no_grad():
#         prediction = model(images, targets)

#     # calculate mAP and mAR from test dataset
#     metric.update(prediction, ground_truth)
#     mean_AP = metric.compute()

#     # append image name to mean_AP
#     mean_AP['image_name'] = name_dict[image_id[0]]

#     # Append mean_AP and predictions to results list. 
#     results.append(mean_AP)

# # use pandas to create a dataframe of image names and mAP values
# results_df = pd.DataFrame()
# results_df['image_name'] = [result['image_name'] for result in results]
# results_df['mAP'] = [result['map'].item() for result in results]
# results_df['mAP_50'] = [result['map_50'].item() for result in results]
# results_df['mAP_75'] = [result['map_75'].item() for result in results]
# results_df['mAP_small'] = [result['map_small'].item() for result in results]
# results_df['mAP_medium'] = [result['map_medium'].item() for result in results]
# results_df['mAP_large'] = [result['map_large'].item() for result in results]
# results_df['mAR_1'] = [result['mar_1'].item() for result in results]
# results_df['mAR_10'] = [result['mar_10'].item() for result in results]
# results_df['mAR_100'] = [result['mar_100'].item() for result in results]
# results_df['mAR_small'] = [result['mar_small'].item() for result in results]
# results_df['mAR_medium'] = [result['mar_medium'].item() for result in results]
# results_df['mAR_large'] = [result['mar_large'].item() for result in results]

# # if value is == -1.0, replace with NaN
# results_df = results_df.replace(-1.0, np.nan)

# # save results_df to csv
# results_df.to_csv('C:/Users/exx/Deep Learning/UAV_Waterfowl_Detection/all_image_results.csv', index = False)

In [22]:
# class_res_df = pd.DataFrame()

# # store 'map_per_class' and 'mar_100_per_class' from results in df
# class_res_df['image_name'] = [result['image_name'] for result in results]
# class_res_df['classes'] = [result['classes'] for result in results]
# class_res_df['map_per_class'] = [result['map_per_class'] for result in results]
# class_res_df['mar_100_per_class'] = [result['mar_100_per_class'] for result in results]