# Basic Training Template


Notebook to train a relatively basic model (no augmentation) using given parameters and then optionally apply that model to a validation dataset and export a variety of statistics about predictions on that data.

### Parameters

In [None]:
# Required
data_dir = None
model_dir = None
export_dir = None
train_image_ids = None
val_image_ids = None
config_script = None

# Optional
n_epochs = 10
train_init_mode = 'coco'
n_steps_per_epoch_train = None
n_steps_per_epoch_val = None
skip_inference = False

In [None]:
assert data_dir is not None, 'Must provide "data_dir"'
assert model_dir is not None, 'Must provide "model_dir"'
assert export_dir is not None, 'Must provided "export_dir"'
assert train_image_ids is not None, 'Must provide training image ids'
assert val_image_ids is not None, 'Must provide validation image ids'
assert config_script is not None, 'Must provide config script location'

train_image_ids = train_image_ids.split(',')
val_image_ids = val_image_ids.split(',')
n_epochs = int(n_epochs)

## Initialization

In [None]:
%run $config_script
%matplotlib inline

import os
import os.path as osp
import warnings
import numpy as np
import pandas as pd
import papermill as pm
import matplotlib.pyplot as plt
from mrcnn import visualize as mrcnn_viz
from mrcnn import model as mrcnn_model_lib
from cvutils.mrcnn import model as mrcnn_model
from cvutils.mrcnn.session import init_keras_session
from celldom.dataset import CelldomDataset
from celldom import seed
init_keras_session()

In [None]:
# Initialize output directories
os.makedirs(export_dir, exist_ok=True)
os.makedirs(model_dir, exist_ok=True)

In [None]:
# Show class names being used for training
CLASS_NAMES

In [None]:
# Training dataset
train_image_paths = [osp.join(data_dir, img) for img in train_image_ids]
dataset_train = CelldomDataset()
dataset_train.initialize(train_image_paths, CLASS_NAMES)
dataset_train.prepare()

# # Validation dataset
dataset_val = CelldomDataset()
val_image_paths = [osp.join(data_dir, img) for img in val_image_ids]
dataset_val.initialize(val_image_paths, CLASS_NAMES)
dataset_val.prepare()

pm.record('n_train', len(train_image_paths))
pm.record('n_val', len(val_image_paths))

In [None]:
train_config = CelldomTrainingConfig()

# Override certain properties if configured to do so
if n_steps_per_epoch_train is not None:
    train_config.STEPS_PER_EPOCH = n_steps_per_epoch_train
if n_steps_per_epoch_val is not None:
    train_config.VALIDATION_STEPS = n_steps_per_epoch_val

train_config.display()

## Training

In [None]:
model = mrcnn_model.get_model(
    mode="training", config=train_config, 
    model_dir=model_dir, init_with=train_init_mode
)

In [None]:
# Ignore these warnings for now as they seem to be irrelevant so far
warnings.filterwarnings(
    'ignore', category=UserWarning,
    message='Converting sparse IndexedSlices to a dense Tensor of unknown shape'
)
warnings.filterwarnings(
    'ignore', category=UserWarning,
    message='Using a generator with `use_multiprocessing=True` and multiple workers may duplicate your data'
)

model.train(
    dataset_train, dataset_val, 
    learning_rate=train_config.LEARNING_RATE, 
    epochs=n_epochs, 
    layers='heads'
)

## Quantification

In [None]:
if not skip_inference:
    from cvutils.mrcnn import inference as mrcnn_inference
    from celldom import inference as celldom_inference

    inference_config = CelldomInferenceConfig()
    model = mrcnn_model.get_model('inference', inference_config, model_dir, init_with='last')

    pred_gen = mrcnn_inference.prediction_generator(model, dataset_val)
    analysis_fns = celldom_inference.get_default_analysis_fns()
    df = pd.DataFrame([celldom_inference.analyze_prediction(p, analysis_fns) for p in pred_gen])
    
    export_path = osp.join(export_dir, 'stats.pkl')
    pm.record('stats_path', export_path)
    df.to_pickle(export_path)