In [None]:
"""
Process: To carry out all phases of Deep Learning
training process to produce optimal model

# --- logging setup
- initializes native python logging (41)
- class for Neptune.ai logging (53)

# -- parameter config
- uses dataclass to create dataclass class
to store all parameters

# -- Data config
- Defines directory where data is at (49)


Main method:

----- Step 1: parameter initialization
1) initializes parameters objection and neptune-settings object
5) Sets random seed (so know any changes in performance
are from hyperparameter tuning)

----- Step 2: Import and Preprocess Data
3) Defines class mappings
4) Creates ComposeDouble objects that define data augmentation
for train,validation,test

2) reads in all the files available (fine tuning)
- sorts them
6) train/val/test split

7) Creates dataset objects by defining:
- filenams
- class mapping
- transforms
- whether to use cache
8) Creates dataloaders by defining
- dataset obj
- batch-size
- shuffling
- collate function (func that will convert list of dicts into arrays)

----- Step 3: Create Logger
9) Creates neptune loger uisng neptune setting objs
10) Nept: logs hyper parameters

----- Step 4: Defining Model
11) Model creation:
- creates regular FasterRCNN model using:
    num classes
    what backbone using
    anchor size/aspect ratio parameters
    whether to add fpn
    min/max size of inputs
- turns model into Lightning model
    FasterRCNN model
    learning late (because optimizer is defined here)
    iou_threshold (which will be used to print out mAP on validation)

----- Step 5: Defining and Running Training Process
12) Defines callbacks (to be used by pytorch lightning trainer)

13) Trainer object creation from
- whether to use GPU
- neptune logger as logger
- callbacks
- where to sae directory
- how often to log
- DON'T SEND IT MODEL JUST YET

13) Fits the model with trainer.fit
- model
- train dataloader
- validation dataloader

----- Step 6: Tests model
14) tests model with trainer.test
- path of the best model
- dataloader test

----- Step 7: Documents Best Model
15) Logs best model with neptune

16: stops neptune logging

-------------- inference method
"""

In [1]:
%load_ext autoreload
%autoreload 2

# System Setup

In [2]:
# from google.colab import drive
# drive.mount('/content/drive')

In [3]:
# %%shell

# #missing packages
# pip3 install pytorch-lightning
# pip3 install importlib-metadata
# pip3 install pydantic[dotenv]
# pip3 install -U 'neptune-client'


# cd /content
# git clone https://github.com/bacelii/python_tools.git
# pip3 install -e python_tools
# pip3 install -e /content/drive/MyDrive/Grad_Classes/Hike/object_detection_utils/

# Necessary Modules

In [4]:
import logging
import pathlib
import sys
from dataclasses import asdict, dataclass, field
from typing import Dict, List, Optional, Tuple

import albumentations
import numpy as np
from pydantic import BaseSettings, Field
from pytorch_lightning import Trainer, seed_everything
from pytorch_lightning.callbacks import (
    EarlyStopping,
    LearningRateMonitor,
    ModelCheckpoint,
)
from pytorch_lightning.loggers.neptune import NeptuneLogger
from torch.utils.data import DataLoader
from torchvision.models.detection.faster_rcnn import FasterRCNN

from object_detection_utils.backbone_resnet import ResNetBackbones
from object_detection_utils.datasets import ObjectDetectionDataSet
from object_detection_utils.faster_RCNN import (
    FasterRCNNLightning,
    get_faster_rcnn_resnet,
)
from object_detection_utils.transformations import (
    AlbumentationWrapper,
    Clip,
    ComposeDouble,
    FunctionWrapperDouble,
    normalize_01,
)
from object_detection_utils.utils import (
    collate_double,
    get_filenames_of_path,
    log_model_neptune,
)

  from neptune.version import version as neptune_client_version
  from neptune import new as neptune


# Configurations

In [5]:
# Data configuration
from pathlib import Path
ROOT_PATH = Path("/content/drive/MyDrive/Grad_Classes/Hike/object_detection_utils/Applications/foot_faster_rcnn")
print(f"ROOT_PATH exists? {ROOT_PATH.exists()}")

#ROOT_PATH: Path = Path(__file__).parent.absolute()
checkpoint_dir: Path = (
    ROOT_PATH  / Path("model_checkpoints")
)
checkpoint_dir
checkpoint_dir.mkdir(exist_ok=True)

data_path: Path = (
    ROOT_PATH / Path("data/train")
)

print(f"data_path exists? {data_path.exists()}")

ROOT_PATH exists? True
data_path exists? True


In [7]:
# parameter config

mapping: Dict[str, int] = {
    "T1": 1,
    "T2": 2,
    "T3":3,
    "T4": 4,
    "T5": 5,
    "MT1":6,
    "MT5":7,
    "CBL":8,
    "CBR":9,
    "CTL":10,
    "CTR":11
}

@dataclass
class Parameters:
    """
    Dataclass for the parameters.
    """

    BATCH_SIZE: int = 2
    CACHE: bool = True
    DATA_DIR: Optional[str] = str(data_path.absolute())
    SAVE_DIR: Optional[
        str
    ] = str(checkpoint_dir.absolute())  # checkpoints will be saved to cwd (current working directory) if None
    LOG_MODEL: bool = True  # whether to log the model to neptune after training
    ACCELERATOR: Optional[str] = "auto"  # set to "gpu" if you want to use GPU
    LR: float = 0.001
    PRECISION: int = 32
    CLASSES: int = len(mapping) + 1 # adds on for the base class
    SEED: int = 42
    MAXEPOCHS: int = 200
    PATIENCE: int = 50
    BACKBONE: ResNetBackbones = ResNetBackbones.RESNET34
    FPN: bool = False
    ANCHOR_SIZE: Tuple[Tuple[int, ...], ...] = ((32, 64, 128, 256, 512),)
    ASPECT_RATIOS: Tuple[Tuple[float, ...]] = ((0.5, 1.0, 2.0),)
    MIN_SIZE: int = 1024
    MAX_SIZE: int = 1025
    IMG_MEAN: List = field(default_factory=lambda: [0.485, 0.456, 0.406])
    IMG_STD: List = field(default_factory=lambda: [0.229, 0.224, 0.225])
    IOU_THRESHOLD: float = 0.5
    FAST_DEV_RUN: bool = False

    def __post_init__(self):
        if self.SAVE_DIR is None:
            self.SAVE_DIR: str = str(pathlib.Path.cwd())

In [8]:
# base level python logging
logger: logging.Logger = logging.getLogger(__name__)

# logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d:%(funcName)s - %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
    handlers=[logging.StreamHandler(sys.stdout)],
)

In [9]:
class NeptuneSettings(BaseSettings):
    """
    Reads the variables from the environment.
    Errors will be raised if the required variables are not set.
    """
    api_key: str = Field(env="NEPTUNE")
    OWNER: str = "brendanacelii"  # set your name here, e.g. johndoe22
    PROJECT: str = "Object-Detection-FasterRCNN"  # set your project name here, e.g. Heads
    EXPERIMENT: str = "complete-retrain"  # set your experiment name here, e.g. heads

    class Config:
        # this tells pydantic to read the variables from the .env file
        env_file = str((ROOT_PATH / Path(".env")).absolute())



# --- Main Run Method

# ---Step 1: parameter and logging init

In [10]:
parameters: Parameters = Parameters()
seed_everything(parameters.SEED)
neptune_settings: NeptuneSettings = NeptuneSettings()

INFO:lightning_fabric.utilities.seed:Global seed set to 42


# ---Step 2: Importing and Preprocessing Data

In [11]:
# training transformations and augmentations
transforms_training: ComposeDouble = ComposeDouble(
    [
        Clip(),
        AlbumentationWrapper(albumentation=albumentations.HorizontalFlip(p=0.5)),
        AlbumentationWrapper(
            albumentation=albumentations.RandomScale(p=0.5, scale_limit=0.5)
        ),
        # AlbuWrapper(albu=A.VerticalFlip(p=0.5)),
        FunctionWrapperDouble(function=np.moveaxis, source=-1, destination=0),
        FunctionWrapperDouble(function=normalize_01),
    ]
)

# validation transformations
transforms_validation: ComposeDouble = ComposeDouble(
    [
        Clip(),
        FunctionWrapperDouble(function=np.moveaxis, source=-1, destination=0),
        FunctionWrapperDouble(function=normalize_01),
    ]
)

# test transformations
transforms_test: ComposeDouble = ComposeDouble(
    [
        Clip(),
        FunctionWrapperDouble(function=np.moveaxis, source=-1, destination=0),
        FunctionWrapperDouble(function=normalize_01),
    ]
)

In [12]:
from sklearn.model_selection import train_test_split
import numpy as np
def train_val_test_split_idx(
    n_observations,
    val_size = 0.2,
    test_size = 0.2,
    seed = None,
    verbose = False,
    ):
    obs = np.arange(n_observations)

    train_indices, val_indices = train_test_split(
        obs,
        test_size = val_size + test_size,
        random_state = seed,
    )

    val_indices, test_indices = train_test_split(
        val_indices,
        test_size = test_size/(test_size + val_size),
        random_state = seed,
    )

    if verbose:
        print(f"# train = {len(train_indices)}")
        print(f"# val = {len(val_indices)}")
        print(f"# test = {len(test_indices)}")

    return train_indices,val_indices,test_indices



In [13]:
# input and target files
inputs: List[pathlib.Path] = get_filenames_of_path(
    Path(parameters.DATA_DIR) / "images")
targets: List[pathlib.Path] = get_filenames_of_path(Path(
    parameters.DATA_DIR)  / "labels")

# sort inputs and targets
inputs.sort()
targets.sort()

inputs = np.array(inputs)
targets = np.array(targets)

# do a test/train/split
train_idx,val_idx,test_idx = train_val_test_split_idx(
    len(inputs),
    verbose = True
)

inputs_train, inputs_valid, inputs_test = (
    inputs[train_idx],
    inputs[val_idx],
    inputs[test_idx]
)
targets_train, targets_valid, targets_test = (
    targets[train_idx],
    targets[val_idx],
    targets[test_idx]
)

# train = 164
# val = 55
# test = 55


In [14]:
# creating the datasets and dataloaders

# dataset training
dataset_train: ObjectDetectionDataSet = ObjectDetectionDataSet(
    inputs=inputs_train,
    targets=targets_train,
    transform=transforms_training,
    use_cache=parameters.CACHE,
    convert_to_format=None,
    mapping=mapping,
)

# dataset validation
dataset_valid: ObjectDetectionDataSet = ObjectDetectionDataSet(
    inputs=inputs_valid,
    targets=targets_valid,
    transform=transforms_validation,
    use_cache=parameters.CACHE,
    convert_to_format=None,
    mapping=mapping,
)

# dataset test
dataset_test: ObjectDetectionDataSet = ObjectDetectionDataSet(
    inputs=inputs_test,
    targets=targets_test,
    transform=transforms_test,
    use_cache=parameters.CACHE,
    convert_to_format=None,
    mapping=mapping,
)

# dataloader training
dataloader_train: DataLoader = DataLoader(
    dataset=dataset_train,
    batch_size=parameters.BATCH_SIZE,
    shuffle=True,
    num_workers=0,
    collate_fn=collate_double,
)

# dataloader validation
dataloader_valid: DataLoader = DataLoader(
    dataset=dataset_valid,
    batch_size=1,
    shuffle=False,
    num_workers=0,
    collate_fn=collate_double,
)

# dataloader test
dataloader_test: DataLoader = DataLoader(
    dataset=dataset_test,
    batch_size=1,
    shuffle=False,
    num_workers=0,
    collate_fn=collate_double,
)

In [15]:
# ---- Step 3: Creating the external logger (and logging hyperparameters)
neptune_logger: NeptuneLogger = NeptuneLogger(
    api_key=neptune_settings.api_key,
    project=f"{neptune_settings.OWNER}/{neptune_settings.PROJECT}",  # use your neptune name here
    name=neptune_settings.PROJECT,
    log_model_checkpoints=False,
)

# log hyperparameters
neptune_logger.log_hyperparams(asdict(parameters))

  self._run_instance = neptune.init_run(**self._neptune_init_args)


https://app.neptune.ai/brendanacelii/Object-Detection-FasterRCNN/e/OB-7


/usr/local/lib/python3.10/dist-packages/pytorch_lightning/loggers/neptune.py:402: NeptuneUnsupportedType: You're attempting to log a type that is not directly supported by Neptune (<enum 'ResNetBackbones'>).
        Convert the value to a supported type, such as a string or float, or use stringify_unsupported(obj)
        for dictionaries or collections that contain unsupported values.
        For more, see https://docs.neptune.ai/help/value_of_unsupported_type
  self.run[parameters_key] = params
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/loggers/neptune.py:402: NeptuneUnsupportedType: You're attempting to log a type that is not directly supported by Neptune (<class 'tuple'>).
        Convert the value to a supported type, such as a string or float, or use stringify_unsupported(obj)
        for dictionaries or collections that contain unsupported values.
        For more, see https://docs.neptune.ai/help/value_of_unsupported_type
  self.run[parameters_key] = params
/usr/

# ---- Step 4: Defining Model Architecture (and turning into Pytorch Lightning Model)

In [16]:
# model init
model: FasterRCNN = get_faster_rcnn_resnet(
    num_classes=parameters.CLASSES,
    backbone_name=parameters.BACKBONE,
    anchor_size=parameters.ANCHOR_SIZE,
    aspect_ratios=parameters.ASPECT_RATIOS,
    fpn=parameters.FPN,
    min_size=parameters.MIN_SIZE,
    max_size=parameters.MAX_SIZE,
)

# lightning model
model: FasterRCNNLightning = FasterRCNNLightning(
    model=model,
    lr=parameters.LR,
    iou_threshold=parameters.IOU_THRESHOLD
)

Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to /root/.cache/torch/hub/checkpoints/resnet34-b627a593.pth
  rank_zero_warn(


# ----- Step 5: Definig training process and pytorch lightning trainer

In [17]:
# callbacks
checkpoint_callback: ModelCheckpoint = ModelCheckpoint(
    monitor="Validation_mAP", mode="max"
)
learning_rate_callback: LearningRateMonitor = LearningRateMonitor(
    logging_interval="step", log_momentum=False
)
early_stopping_callback: EarlyStopping = EarlyStopping(
    monitor="Validation_mAP", patience=parameters.PATIENCE, mode="max"
)

In [18]:
# trainer init
trainer: Trainer = Trainer(
    accelerator=parameters.ACCELERATOR,
    #accelerator = "cpu",
    #accelerator='gpu', devices=1,
    logger=neptune_logger,
    callbacks=[
        checkpoint_callback,
        learning_rate_callback,
        early_stopping_callback,
    ],
    default_root_dir=parameters.SAVE_DIR,  # where checkpoints are saved to
    log_every_n_steps=1,  # increase to reduce the amount of log flushes (lowers the overhead)
    num_sanity_val_steps=0,  # set to 0 to skip sanity check
    max_epochs=parameters.MAXEPOCHS,
    fast_dev_run=parameters.FAST_DEV_RUN,  # set to True to test the pipeline with one batch and without validation, testing and logging
)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


# Starting the Training

In [19]:
# start training
trainer.fit(
    model=model,
    train_dataloaders=dataloader_train,
    val_dataloaders=dataloader_valid,
)

INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name  | Type       | Params
-------------------------------------
0 | model | FasterRCNN | 50.5 M
-------------------------------------
50.5 M    Trainable params
0         Non-trainable params
50.5 M    Total params
201.941   Total estimated model params size (MB)
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/loggers/neptune.py:402: NeptuneUnsupportedType: You're attempting to log a type that is not directly supported by Neptune (<class 'NoneType'>).
        Convert the value to a supported type, such as a string or float, or use stringify_unsupported(obj)
        for dictionaries or collections that contain unsupported values.
        For more, see https://docs.neptune.ai/help/value_of_unsupported_type
  self.run[parameters_key] = params


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]



Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


In [None]:
trainer.test(ckpt_path="best", dataloaders=dataloader_test)

In [32]:
import neptune.legacy as neptune

# log model
if parameters.LOG_MODEL:
    checkpoint_path = pathlib.Path(checkpoint_callback.best_model_path)
    name="best_model.pt"
    checkpoint = torch.load(checkpoint_path)
    model = checkpoint["hyper_parameters"]["model"]
    torch.save(model.state_dict(), Path(parameters.SAVE_DIR) / Path(name))

In [33]:
import torch
torch.save(model.state_dict(), Path(ROOT_PATH) / Path("last_model.ckpt"))

In [34]:
# stop logger
neptune_logger.experiment.stop()
logger.info("Training finished")

Shutting down background jobs, please wait a moment...
Done!
All 0 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/brendanacelii/Object-Detection-FasterRCNN/e/OB-7/metadata
