In [1]:
# Import Packages

# General Tools
import numpy as np
import scipy as sp
import pandas as pd
import json
import pprint

# Machine Learning
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import ParameterGrid

# Deep Learning
import torch
import torch.nn            as nn
import torch.nn.functional as F
from torch.optim.optimizer import Optimizer
from torch.optim.lr_scheduler import LRScheduler
from torch.utils.data import DataLoader, Dataset
import torchinfo
from torchmetrics.classification import MulticlassAccuracy
import torchvision
from torchvision.transforms import v2 as TorchVisionTrns
from torchvision.io.image import read_image
from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights
from torchvision.models.detection import maskrcnn_resnet50_fpn, MaskRCNN_ResNet50_FPN_V2_Weights
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from torchvision.utils import draw_bounding_boxes
from torchvision.transforms.functional import to_pil_image


# Miscellaneous
import copy
from enum import auto, Enum, unique
import math
import os
from platform import python_version
import random
import shutil
import time
from pathlib import Path
from skimage.io import imread

# Typing
from typing import Callable, Dict, Generator, List, Optional, Self, Set, Tuple, Union

# Visualization
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

# Jupyter
from IPython import get_ipython
from IPython.display import HTML, Image
from IPython.display import display
from ipywidgets import Dropdown, FloatSlider, interact, IntSlider, Layout, SelectionSlider
from ipywidgets import interact

import wandb
import utils
from engine import train_one_epoch, evaluate

In [2]:
# Configuration
# %matplotlib inline

seedNum = 512
np.random.seed(seedNum)
random.seed(seedNum)

# Matplotlib default color palette
lMatPltLibclr = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
# sns.set_theme() #>! Apply SeaBorn theme

# Improve performance by benchmarking
torch.backends.cudnn.benchmark = True

# Reproducibility (Per PyTorch Version on the same device)
# torch.manual_seed(seedNum)
# torch.backends.cudnn.deterministic = True
# torch.backends.cudnn.benchmark     = False #<! Makes things slower

In [3]:
# Constants

FIG_SIZE_DEF    = (8, 8)
ELM_SIZE_DEF    = 50
CLASS_COLOR     = ('b', 'r')
EDGE_COLOR      = 'k'
MARKER_SIZE_DEF = 10
LINE_WIDTH_DEF  = 2

DATA_FOLDER = os.path.join('data', 'Forehead')
TRAIN_IMAGES_FOLDER = os.path.join(DATA_FOLDER, 'train', 'images')
TRAIN_LABELS_FOLDER = os.path.join(DATA_FOLDER, 'train', 'labels')
TRAIN_MASKS_FOLDER = os.path.join(DATA_FOLDER, 'train', 'masks')
VAL_IMAGES_FOLDER = os.path.join(DATA_FOLDER, 'val', 'images')
VAL_LABELS_FOLDER = os.path.join(DATA_FOLDER, 'val', 'labels')
VAL_MASKS_FOLDER = os.path.join(DATA_FOLDER, 'val', 'masks')
TEST_IMAGES_FOLDER = os.path.join(DATA_FOLDER, 'test', 'images')
TEST_LABELS_FOLDER = os.path.join(DATA_FOLDER, 'test', 'labels')
TEST_MASKS_FOLDER = os.path.join(DATA_FOLDER, 'test', 'masks')
key_to_image_folder = {
    'train': {'images': TRAIN_IMAGES_FOLDER, 'labels': TRAIN_LABELS_FOLDER,'masks': TRAIN_MASKS_FOLDER},
    'val': {'images': VAL_IMAGES_FOLDER, 'labels': VAL_LABELS_FOLDER,'masks': VAL_MASKS_FOLDER},
    'test': {'images': TEST_IMAGES_FOLDER, 'labels': TEST_LABELS_FOLDER,'masks': TEST_MASKS_FOLDER}, 
}


T_IMG_SIZE = (480, 640, 3)

TENSOR_BOARD_BASE   = 'TB'

In [4]:
# Parameters

# Data
# numSamplesTrain = 30_000
# numSamplesVal   = 10_000
# boxFormat       = BBoxFormat.YOLO
numCls          = 2 #<! Number of classes
# maxObj          = 3

# Model
# gridSize = 5 #<! The gris is (gridSize x gridSize) 

# Training
batchSize   = 4
numWorkers  = 2 #<! Number of workers
numEpochs   = 2
λ = 20.0 #<! Localization Loss
ϵ = 0.1 #<! Label Smoothing

# Visualization
# numImg = 3


In [5]:
#auxilary functions

def only_jpg_files(files: List) -> List[str]:
    return [item for item in files if item.endswith('jpg')]

def get_bbox(filename: str, dirname: str) -> List[float]:
    json_file = os.path.join(dirname, f'{filename}.json')
    with open(json_file, 'r') as f:
        bbox = json.load(f)['bbox']
    return bbox

def get_label(filename: str, dirname: str) -> List[int]:
    json_file = os.path.join(dirname, f'{filename}.json')
    with open(json_file, 'r') as f:
        label = json.load(f)['class']
    return label

def build_dataset(batchsz,dsTrain,dsVal,dstest):

    # ADD NUM_WORKERS ETC' when moving to gpu
    train = DataLoader(dsTrain, shuffle = True, batch_size = 1 * batchsz, collate_fn=utils.collate_fn, num_workers = numWorkers, drop_last = True, persistent_workers = True)
    val   = DataLoader(dsVal, shuffle = False, batch_size = 2 * batchsz, collate_fn=utils.collate_fn, num_workers = numWorkers, persistent_workers = True)
    test   = DataLoader(dstest, shuffle = False, batch_size = 2 * batchsz, collate_fn=utils.collate_fn, num_workers = numWorkers, persistent_workers = True)
    
    return train, val, test

def build_network(num_classes, typ):
    if typ=='Faster RCNN':
        weights = FasterRCNN_ResNet50_FPN_V2_Weights.COCO_V1
        oModel = fasterrcnn_resnet50_fpn_v2(weights=weights, box_score_thresh=0.9)
        # get number of input features for the classifier
        in_features = oModel.roi_heads.box_predictor.cls_score.in_features
        # replace the pre-trained head with a new one
        oModel.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    elif typ=='Mask RCNN':
        weights = MaskRCNN_ResNet50_FPN_V2_Weights.COCO_V1
        oModel = maskrcnn_resnet50_fpn(weights=weights)
        # get number of input features for the classifier
        in_features = oModel.roi_heads.box_predictor.cls_score.in_features
        # replace the pre-trained head with a new one
        oModel.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

        # now get the number of input features for the mask classifier
        in_features_mask = oModel.roi_heads.mask_predictor.conv5_mask.in_channels
        hidden_layer = 256
        # and replace the mask predictor with a new one
        oModel.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, hidden_layer,num_classes)

    return oModel

def build_optimizer(network, optimizer, learning_rate):
    if optimizer == "sgd":
        optimizer = torch.optim.SGD(network.parameters(),
                              lr=learning_rate, momentum=0.9,weight_decay=0.0005)
    elif optimizer == "adam":
        optimizer = torch.optim.Adam(network.parameters(),
                               lr=learning_rate,betas = (0.9, 0.99), weight_decay = 2e-4)
    return optimizer

## Defining the Model

using Faster R-CNN with a Resnet50 Back bone with weights from a model trained on COCO dataset

In [6]:
# Step 1: Initialize model with the best available weights
# This can be changes to different weights/Model
# suitable models can be found here https://pytorch.org/vision/stable/models.html#object-detection or
# https://pytorch.org/vision/stable/models.html#instance-segmentation
weights = FasterRCNN_ResNet50_FPN_V2_Weights.COCO_V1
oModel_org = fasterrcnn_resnet50_fpn_v2(weights=weights, box_score_thresh=0.9)

In [7]:
# Model as is 

torchinfo.summary(oModel_org, (10, 3, 640, 480), col_names = ['kernel_size', 'output_size', 'num_params'], device = 'cpu')

Layer (type:depth-idx)                             Kernel Shape              Output Shape              Param #
FasterRCNN                                         --                        [0, 4]                    --
├─GeneralizedRCNNTransform: 1-1                    --                        [10, 3, 1088, 800]        --
├─BackboneWithFPN: 1-2                             --                        [10, 256, 17, 13]         --
│    └─IntermediateLayerGetter: 2-1                --                        [10, 2048, 34, 25]        --
│    │    └─Conv2d: 3-1                            [7, 7]                    [10, 64, 544, 400]        (9,408)
│    │    └─BatchNorm2d: 3-2                       --                        [10, 64, 544, 400]        (128)
│    │    └─ReLU: 3-3                              --                        [10, 64, 544, 400]        --
│    │    └─MaxPool2d: 3-4                         3                         [10, 64, 272, 200]        --
│    │    └─Sequential: 3-5      

In [8]:
# replace the classifier with a new one, that has
# num_classes which is user-defined
# This only changes the head

# num_classes = 2  # 1 class (person) + background
# build a network
NNType='Faster RCNN'
oModel=build_network(numCls,NNType)

In [9]:
# Model info of model with new head
torchinfo.summary(oModel, (10, 3, 640, 480), col_names = ['kernel_size', 'output_size', 'num_params'], device = 'cpu')

Layer (type:depth-idx)                             Kernel Shape              Output Shape              Param #
FasterRCNN                                         --                        [0, 4]                    --
├─GeneralizedRCNNTransform: 1-1                    --                        [10, 3, 1088, 800]        --
├─BackboneWithFPN: 1-2                             --                        [10, 256, 17, 13]         --
│    └─IntermediateLayerGetter: 2-1                --                        [10, 2048, 34, 25]        --
│    │    └─Conv2d: 3-1                            [7, 7]                    [10, 64, 544, 400]        (9,408)
│    │    └─BatchNorm2d: 3-2                       --                        [10, 64, 544, 400]        (128)
│    │    └─ReLU: 3-3                              --                        [10, 64, 544, 400]        --
│    │    └─MaxPool2d: 3-4                         3                         [10, 64, 272, 200]        --
│    │    └─Sequential: 3-5      

## Dataset and Dataloader

In [10]:
#List of images
train_images_files = only_jpg_files(os.listdir(TRAIN_IMAGES_FOLDER))
test_images_files = only_jpg_files(os.listdir(TEST_IMAGES_FOLDER))
val_images_files = only_jpg_files(os.listdir(VAL_IMAGES_FOLDER))

In [11]:
#defining dataset
class ForeheadbboxDataset(Dataset):

    def __init__(self, images_list, data_typ, key_folders, transform=None):
        self.images_list = images_list
        self.data_typ = data_typ
        self.transform = transform
        self.key_folders=key_folders

    def __len__(self):
        return len(self.images_list)

    def __getitem__(self, idx):

        img=self.images_list[idx]
        file_no_ext = Path(img).stem

        #bounding box
        bbox_folder = self.key_folders[self.data_typ]['labels']
        bbox = get_bbox(file_no_ext, bbox_folder)
        bbox=torch.tensor(bbox, dtype=torch.float32, device='cuda')

        #labels
        label=get_label(file_no_ext, bbox_folder)
        labels=torch.tensor(np.array([label]), dtype=torch.int64, device='cuda')

        #image
        image_folder = self.key_folders[self.data_typ]['images']
        full_image_filename = os.path.join(image_folder, img)
        image=torchvision.io.read_image(full_image_filename).float() / 255.0
        
        #masks
        masks_folder = self.key_folders[self.data_typ]['masks']
        full_masks_filename = os.path.join(masks_folder, img)
        mask=torchvision.io.read_image(full_masks_filename)
        
        # instances are encoded as different colors
        obj_ids = torch.unique(mask)
        # first id is the background, so remove it
        obj_ids = obj_ids[1:]
        num_objs = len(obj_ids)

        # split the color-encoded mask into a set of binary masks
        masks = (mask == obj_ids[:, None, None]).to(dtype=torch.uint8)
        # Ensure bbox is of size (1, 4)
        if bbox.ndim == 1:
            bbox = bbox.unsqueeze(0)

        image_id = torch.tensor(np.array([idx]), dtype=torch.int64, device='cuda')
        area = (bbox[:, 3] - bbox[:, 1]) * (bbox[:, 2] - bbox[:,0])
        
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64, device='cuda')

        # Wrap sample and targets into tensors:
        target = {}
        target["boxes"]=bbox
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd
        target["masks"] = masks
        if self.transform:
            image, target = self.transform(image, target)


        return image, target

In [12]:
# Optional transforms
# currently not used and everything here already occurs in the dataset
class ToTensor_imri(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):
        image, vy = sample['image'], sample['target'] 
        # swap color axis because
        # numpy image: H x W x C
        # torch image: C x H x W
        # image = np.transpose(image, (2, 0, 1))/255
        return {'image': torch.Tensor(image),
                'target': torch.Tensor(vy)}

In [13]:
# creating datasets
dsTrain = ForeheadbboxDataset(train_images_files, 'train', key_to_image_folder, transform=None)#TorchVisionTrns.Compose([ToTensor_imri()]))
dsVal   = ForeheadbboxDataset(val_images_files, 'val', key_to_image_folder, transform=None) #TorchVisionTrns.Compose([ToTensor_imri()]))
dstest   = ForeheadbboxDataset(test_images_files, 'test', key_to_image_folder, transform=None) #TorchVisionTrns.Compose([ToTensor_imri()]))

In [14]:
# creating dataloaders

dlTrain, dlVal, dltest=build_dataset(batchSize,dsTrain,dsVal,dstest)

## Test Training


In [15]:
# Run Device
# gpu not working good on my laptop, should be changed
runDevice = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') #<! The 1st CUDA device
# runDevice = torch.device('cpu')

In [16]:
#testing if working
images, targets = next(iter(dlTrain))
images = list(image for image in images)
targets = [{k: v.to(runDevice) if isinstance(v, torch.Tensor) else v for k, v in t.items()} for t in targets]
output = oModel(images, targets)  # Returns losses and detections
print(output)

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!

## Freezing all weights except the new head

In [16]:
#getting all parameters for length
params = [p for p in oModel.parameters() if p.requires_grad]
len(params)

176

In [17]:
#freezing all parameters
for param in oModel.parameters():
    param.requires_grad = False

In [18]:
#unfreezing parameters for the new head
for param in oModel.roi_heads.box_predictor.parameters():
    param.requires_grad = True
if NNType=='Mask RCNN':
    for param in oModel.roi_heads.mask_predictor.parameters():
        param.requires_grad = True

In [19]:
# parameters length
params2 = [p for p in oModel.parameters() if p.requires_grad]
len(params2)

4

## wandb

In [None]:
wandb.login()

In [None]:
sweep_config = {'method': 'random'}

metric = {'name': 'loss','goal': 'minimize'}
sweep_config['metric'] = metric

parameters_dict = {'optimizer': {'values': ['adam', 'sgd']},
    'fc_layer_size': {'values': [128, 256, 512]},##
    'dropout': {'values': [0.3, 0.4, 0.5]},}##
sweep_config['parameters'] = parameters_dict

parameters_dict.update({'epochs': {'value': 1}})

parameters_dict.update({
    'learning_rate': {'distribution': 'uniform','min': 0.0001,'max': 0.1},# a flat distribution between 0.0001 and 0.1
    'batch_size': {'values': [2,4,8,16]}})# integers between 2 and 16
pprint.pprint(sweep_config)

In [None]:
sweep_id = wandb.sweep(sweep_config, project="pytorch-sweeps-demo")


In [None]:
def train(config=None):
    # Initialize a new wandb run
    with wandb.init(config=config):
        # If called by wandb.agent, as below,
        # this config will be set by Sweep Controller
        config = wandb.config

        dlTrain, dlVal, dltest = build_dataset(config.batch_size,dsTrain,dsVal,dstest)
        oModel=build_network(numCls,NNType)

        #freezing layers
        for param in oModel.roi_heads.box_predictor.parameters():
            param.requires_grad = True
        if NNType=='Mask RCNN':
            for param in oModel.roi_heads.mask_predictor.parameters():
                param.requires_grad = True
        oModel.to(runDevice)
        optimizer = build_optimizer(oModel, config.optimizer, config.learning_rate)

        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=3,gamma=0.1)
        for epoch in range(config.epochs):
            # train for one epoch, printing every 10 iterations
            logger=train_one_epoch(oModel, optimizer, dlTrain, runDevice, epoch, print_freq=10)
            # update the learning rate
            lr_scheduler.step()
            # evaluate on the test dataset
            logger_eval=evaluate(oModel, dlVal, device=runDevice)#need to add meters from logger_eval to wandb logger, didn't get this far on CPU
            wandb.log({"loss": logger.meters['loss'].avg, "loss_classifier":logger.meters['loss_classifier'].avg,
                       "loss_box_reg":logger.meters['loss_box_reg'].avg, "loss_objectness":logger.meters['loss_objectness'].avg,
                       "loss_rpn_box_reg":logger.meters['loss_rpn_box_reg'].avg,"epoch": epoch})

In [None]:
wandb.agent(sweep_id, train, count=15)

## Training

In [20]:
# # training block, nothing is truely defined here with actual thought,everything is copied from tutorial,
# # from here wanDB and maybe gridsearch/optuna should be defined and hyper parameters searched, 
# # maybe different optimizer and learning rate schedular, all weight are unfrozen

# # move model to the right device
# oModel.to(runDevice)

# # construct an optimizer
# params = [p for p in oModel.parameters() if p.requires_grad]
# optimizer = torch.optim.SGD(
#     params,
#     lr=0.005,
#     momentum=0.9,
#     weight_decay=0.0005
# )

# # and a learning rate scheduler
# lr_scheduler = torch.optim.lr_scheduler.StepLR(
#     optimizer,
#     step_size=3,
#     gamma=0.1
# )

# # let's train it just for 2 epochs
# num_epochs = 2

# for epoch in range(num_epochs):
#     # train for one epoch, printing every 10 iterations
#     a=train_one_epoch(oModel, optimizer, dlTrain, runDevice, epoch, print_freq=10)
#     # update the learning rate
#     lr_scheduler.step()
#     # evaluate on the test dataset
#     evaluate(oModel, dlVal, device=runDevice)

# print("That's it!")

lr: 0.000010  loss: 1.9234 (1.9234)  loss_classifier: 0.6832 (0.6832)  loss_box_reg: 0.0012 (0.0012)  loss_objectness: 0.3097 (0.3097)  loss_rpn_box_reg: 0.9293 (0.9293)
Epoch: [0]  [   0/4550]  eta: 13:31:13  lr: 0.000010  loss: 1.9234 (1.9234)  loss_classifier: 0.6832 (0.6832)  loss_box_reg: 0.0012 (0.0012)  loss_objectness: 0.3097 (0.3097)  loss_rpn_box_reg: 0.9293 (0.9293)  time: 10.6974  data: 0.2992  max mem: 0
lr: 0.000015  loss: 1.9234 (2.0006)  loss_classifier: 0.6797 (0.6814)  loss_box_reg: 0.0012 (0.0015)  loss_objectness: 0.3097 (0.3480)  loss_rpn_box_reg: 0.9293 (0.9697)


KeyboardInterrupt: 