# Training Notebook

The following files from the source directory are needed:
*   __src/preproc.py__ – functions used to preprocess the data
*   __src/visualize.py__ – tools to visualize processed images
*   __src/generate_tfrecord.py__ – a script to generate TF .record files from image data and XML annotations

Assumed project structure:
```
main_project_dir/
├─ README.md                               
├─ data/                  
│   ├─ train-valid-split
│   │    ├─ synth_train/                   <- synthesized training set plus annotations
│   │    ├─ synth_valid/                   <- synthesized validation set plus annotations
│   │    ├─ train/                         <- preprocessed (in training notebook) training images with xml annotations
│   │    ├─ valid/                         <- preprocessed (in training notebook) validation images with xml annotations
│   │    ├─ train.record                   <- TF .record file containing the training data
│   │    ├─ valid.record                   <- TF .record file containing the validation data
│   │    └─ label_map.pbtxt                <- label map file that maps class IDs to class names
│   └─ test-images-all                     <- test images used for evaluation
├─ tf-models/                              
│   ├─ pre-trained/                        <- pre-trained models downloaded from TF Object Detection Model Zoo
│   └─ fine-tuned/                         <- fine-tuned models trained on the data in train-valid-split
├─ notebooks/                              <- notebooks used for training, validation and evaluation
└─ src/                                    <- modules used in the notebooks
```

## Set up Paths common to entire workflow

In [None]:
from pathlib import Path
import os, sys
NOTEBOOK_DIR = Path(os.getcwd().replace(' ',''))
MAIN_DIR1 = Path(os.path.abspath("..").replace(' ',''))
sys.path.insert(0, str(MAIN_DIR1))
MAIN_DIR = MAIN_DIR1#/'CabinetsCV'

DATA_DIR = MAIN_DIR/'data'
#RAW_TEST_DATA_DIR = DATA_DIR/'test-annotated-images'
TRAIN_VALID_DIR = DATA_DIR/'train-valid-split' # Processed (e.g. resized, augmented) training/validation data 
synth_train_data_dir = TRAIN_VALID_DIR/'synth_train' # RAW synthesized images, training
synth_valid_data_dir = TRAIN_VALID_DIR/'synth_valid' # RAW synthesized images, validation

# Models working directory
MODELS_DIR = MAIN_DIR/'models'
PRE_MODELS_DIR = MODELS_DIR/'pre-trained' # Pretrained models from TF model zoo
FT_MODELS_DIR = MODELS_DIR/'fine-tuned' # Our fine-tuned models
TF_DIR = MAIN_DIR/"tf-models"  # tensorflow models directory
TF_DIR_OD = MAIN_DIR/"tf-models/research"

# Directories for preprocessed annotated image data used for training/validation
train_data_dir = TRAIN_VALID_DIR/'train'
valid_data_dir = TRAIN_VALID_DIR/'valid'
if not train_data_dir.exists():
    train_data_dir.mkdir()
if not valid_data_dir.exists():
    valid_data_dir.mkdir()

# Paths to TF .record files containing training and validation data (including annotations)
train_tfrec_path = TRAIN_VALID_DIR/'train.record'
valid_tfrec_path = TRAIN_VALID_DIR/'valid.record'

# Path to the label map file that maps class IDs to class names
label_map_path = TRAIN_VALID_DIR/'label_map.pbtxt'

## Initialize Object Detection API

In [None]:
## Install extra packages and update openCV headless
!pip install seaborn imgaug -q
!pip install opencv-python-headless --upgrade -q

# install Object Detection API
%cd $TF_DIR_OD
!protoc object_detection/protos/*.proto --python_out=. && cp object_detection/packages/tf2/setup.py . && python -m pip -q install . 
%cd $MAIN_DIR

In [None]:
## OPTIONAL
VERIFICATION_SCRIPT = os.path.join(TF_DIR, 'research', 'object_detection', 'builders', 'model_builder_tf2_test.py')
# Verify Object Detection API correct installation
!python {VERIFICATION_SCRIPT}

## Import libraries and modules

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os, shutil, glob
import urllib.request
import tarfile

# TF object detection API utils
from object_detection.utils import label_map_util 
from object_detection.utils import config_util 

# our src/ functions
import src.preproc as src_pre
import src.visualize as src_viz

%matplotlib inline

## Set up the Image size variable

In [None]:
# Target size of training/validation images after preprocessing (should be consistent with the model in use)
IMAGE_SIZE = 1024

## Create/update label map file

In [None]:
%%writefile $label_map_path
  item {
    id: 1
    name: 'AllenBradley'
  }
  item {
    id: 2
    name: 'AllenBradleyXM121'
  }
  item {
    id: 3
    name: 'Fanuc9030'
  }
  item {
    id: 4
    name: 'Ovation'
  }
  item {
    id: 5
    name: 'Siemens'
  } 

In [None]:
category_index = label_map_util.create_category_index_from_labelmap(label_map_path)
label_map_dict = label_map_util.get_label_map_dict(str(label_map_path))
# Number of classes extracted from the label map
num_classes = len(label_map_dict.items())

## Raw data processing and augmentation
If reusing tfrecords file, can skip to Model Setup

### Copy synthetic data into synth_train and synth_valid folders

In [None]:
# clear train/validation directories if they exist
src_pre.clear_dir(train_data_dir)
src_pre.clear_dir(valid_data_dir)

In [None]:
## OPTIONAL
# double check all the files were copied correctly into the synth data directory
file_dir = Path(synth_train_data_dir)

filenames = os.listdir(os.path.join(file_dir))
filenames = [os.path.join(file_dir, f) for f in filenames if (f.endswith('.jpg'))]
print(len(filenames))
for f in filenames:
    jf = f
    xf = f.replace('.jpg','.xml')
    xf_path = Path(xf)
    if not xf_path.is_file():
        print(xf)

filenames = os.listdir(os.path.join(file_dir))
filenames = [os.path.join(file_dir, f) for f in filenames if (f.endswith('.xml'))]
print(len(filenames))
for f in filenames:
    jf = f
    xf = f.replace('.xml','.jpg')
    xf_path = Path(xf)
    if not xf_path.is_file():
        print(xf)

In [None]:
# Resize, apply augmentation to images and annotations in SYNTH_TRAIN_DATA_DIR 
# and place the processed data in train_data_dir.
src_pre.copy_augment_data(
    synth_train_data_dir, train_data_dir,
    augment_mult = 10, # how many extra images to produce
    target_max_size = IMAGE_SIZE,
    pad2square = True,
    rand_augment = True,
    rand_aug_mag = 1.8, # magnitude of augmentation
    rand_aug_num =  2., # number of augmentations per image
    )

In [None]:
# Copy images to the validation data folder WITHOUT augmentation
# Resize the images in-place. Augmentation should be disabled for validation data.
src_pre.copy_augment_data(synth_valid_data_dir, valid_data_dir, 
                          target_max_size=IMAGE_SIZE,
                          pad2square=True)

In [None]:
## OPTIONAL
# Resize, apply augmentation to images and annotations in SYNTH_VALID_DATA_DIR 
# and place the processed data in train_data_dir.
src_pre.copy_augment_data(
    synth_valid_data_dir, valid_data_dir,
    augment_mult = 5,
    target_max_size = IMAGE_SIZE,
    pad2square = True,
    rand_augment = True,
    rand_aug_mag = 1,
    rand_aug_num = 1,
    )

In [None]:
# Create TFRecords files
%cd $MAIN_DIR
!python src/generate_tfrecord.py -x $train_data_dir -l $label_map_path -o $train_tfrec_path
!python src/generate_tfrecord.py -x $valid_data_dir -l $label_map_path -o $valid_tfrec_path

# Set up Model

In [None]:
# Select a pre-trained model
PRE_MODEL_NAME = 'efficient_det_1024'

# Set the name of our fine-tuned model
MY_MODEL_NAME = 'efficient_det_all_demo' #change this

In [None]:
## OPTIONAL: set up new model
#import tarfile
#ZIP_LOC = PRE_MODELS_DIR/'faster_rcnn.tar.gz'
#with tarfile.open(ZIP_LOC, "r:gz") as tar:
#    tar.extractall(PRE_MODELS_DIR)
#os.remove(ZIP_LOC)
## change permissions
#faster_rcnn = PRE_MODELS_DIR/'faster_rcnn_inception_resnet_v2_1024x1024_coco17_tpu-8'
#os.chmod(faster_rcnn, 775)

In [None]:
## Set directories and paths for the model
# Our model directory
my_model_dir = FT_MODELS_DIR / MY_MODEL_NAME
if not my_model_dir.exists(): my_model_dir.mkdir()

# Make a folder for the exported model
my_export_dir = my_model_dir/'exported'
if not my_export_dir.exists():
    my_export_dir.mkdir()

# Path to the initial fine tune checkpoint (from the pre-trained model)
ft_ckpt_dir = my_model_dir / 'fine_tune_checkpoint'
if not ft_ckpt_dir.exists(): ft_ckpt_dir.mkdir()
ft_ckpt_path = ft_ckpt_dir / 'ckpt-0'

# Path to the model configuration file
config_path = my_model_dir / 'pipeline.config'

# Copy checkpint file into our model
for ckpt_file in glob.glob(str(PRE_MODELS_DIR/PRE_MODEL_NAME/'checkpoint/ckpt-0.*')):
    shutil.copyfile(ckpt_file, ft_ckpt_dir / Path(ckpt_file).name)

In [None]:
# Batch size (reduce if out of GPU memory)
BATCH_SIZE = 32

## Copy and modify the pipeline.config file

In [None]:
# copy the config file to our model directory
#shutil.copy(PRE_MODELS_DIR/PRE_MODEL_NAME/"pipeline.config", config_path)
# or copy the existing pre-configured pipeline.config
shutil.copy(FT_MODELS_DIR/"pipeline.config", config_path)

In [None]:
# load the config file (or do edits manually)
config = config_util.get_configs_from_pipeline_file(config_path)

# update path to fine-tune checkpoint
config['train_config'].fine_tune_checkpoint = str(ft_ckpt_path)
config['train_input_config'].label_map_path = str(label_map_path)
config['train_input_config'].tf_record_input_reader.input_path[0] = str(train_tfrec_path)
config['eval_input_config'].label_map_path = str(label_map_path)
config['eval_input_config'].tf_record_input_reader.input_path[0] = str(valid_tfrec_path)

# update batch size
config['train_config'].batch_size = BATCH_SIZE
config['eval_config'].batch_size = BATCH_SIZE
if BATCH_SIZE<=4:
  # improves training for small batch sizes
  config['model'].ssd.freeze_batchnorm = True

# update num classes
config['model'].ssd.num_classes = num_classes
    
# reduce learning rate for smaller batch sizes
default_lr = config['train_config'].optimizer.momentum_optimizer.learning_rate.cosine_decay_learning_rate.learning_rate_base
default_warmup_lr = config['train_config'].optimizer.momentum_optimizer.learning_rate.cosine_decay_learning_rate.warmup_learning_rate
f = np.sqrt(BATCH_SIZE/32)
config['train_config'].optimizer.momentum_optimizer.learning_rate.cosine_decay_learning_rate.learning_rate_base = f * default_lr
config['train_config'].optimizer.momentum_optimizer.learning_rate.cosine_decay_learning_rate.warmup_learning_rate = f * default_warmup_lr

# tune the aspect_ratios
config['model'].ssd.anchor_generator.multiscale_anchor_generator.aspect_ratios.pop(0)
config['model'].ssd.anchor_generator.multiscale_anchor_generator.aspect_ratios.pop(0)
config['model'].ssd.anchor_generator.multiscale_anchor_generator.aspect_ratios.pop(0)

#ab only: 1.5, 1.8, 2.0, 2.25, 2.5
#config['model'].ssd.anchor_generator.multiscale_anchor_generator.aspect_ratios.append(1.5)
#config['model'].ssd.anchor_generator.multiscale_anchor_generator.aspect_ratios.append(1.8)
#config['model'].ssd.anchor_generator.multiscale_anchor_generator.aspect_ratios.append(2.0)
#config['model'].ssd.anchor_generator.multiscale_anchor_generator.aspect_ratios.append(2.25)
#config['model'].ssd.anchor_generator.multiscale_anchor_generator.aspect_ratios.append(2.5)

#all controllers:
config['model'].ssd.anchor_generator.multiscale_anchor_generator.aspect_ratios.append(0.3)
config['model'].ssd.anchor_generator.multiscale_anchor_generator.aspect_ratios.append(0.6)
config['model'].ssd.anchor_generator.multiscale_anchor_generator.aspect_ratios.append(1.5)
config['model'].ssd.anchor_generator.multiscale_anchor_generator.aspect_ratios.append(1.75)
config['model'].ssd.anchor_generator.multiscale_anchor_generator.aspect_ratios.append(2.0)
config['model'].ssd.anchor_generator.multiscale_anchor_generator.aspect_ratios.append(2.25)
config['model'].ssd.anchor_generator.multiscale_anchor_generator.aspect_ratios.append(2.5)

# save the updated configuration
config_proto = config_util.create_pipeline_proto_from_configs(config)
config_util.save_pipeline_config(config_proto, my_model_dir)

os.chmod(config_path, 775)  # save persmission to allow editing in winSCP

# Begin training

In [None]:
## OPTIONAL
# to train on less GPU's (for example to get the evaluatio script to run on a GPU):
#import tensorflow as tf
#tf.config.list_physical_devices('GPU')
#os.environ["CUDA_VISIBLE_DEVICES"]="0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16"

In [None]:
# checkpoint every n steps:
check_num = 500
tot_steps = 6000

In [None]:
# write a shell script for training, and execute it from a terminal (INSIDE the notebooks directory)
%cd $NOTEBOOK_DIR
training_script = f"""python {TF_DIR}/research/object_detection/model_main_tf2.py --model_dir={my_model_dir} --pipeline_config_path={config_path} --num_train_steps={tot_steps} --checkpoint_every_n={check_num}"""
with open('training_launch.sh', 'w') as fp:
    fp.write(training_script)
    
os.chmod("training_launch.sh", 755)

In [None]:
## OPTIONAL
## or run training from the notebook:
#%cd $NOTEBOOK_DIR
#!python {TF_DIR}/research/object_detection/model_main_tf2.py --model_dir={my_model_dir} --pipeline_config_path={config_path} --checkpoint_every_n={check_num}

### Run validation in parallel using the Validation notebook. Wait for the cuDNN to get loaded (takes about 10-15 minutes); launch tensorboard

## Save model

In [None]:
%cd {TF_DIR/'research/object_detection'}
!python exporter_main_v2.py \
--input_type image_tensor \
--pipeline_config_path $config_path \
--trained_checkpoint_dir $my_model_dir \
--output_directory $my_export_dir
%cd {NOTEBOOK_DIR}

### Can modify which checkpoint to use
This is done by opening {my_model_dir}/checkpoint file and modifying model_checkpoint_path.

In [None]:
# change permissions of checkpoint file so it can be edited
checkpoint_file = os.path.join(my_model_dir,'checkpoint')
os.chmod(checkpoint_file, 775)

In [None]:
# Make a folder for the (extra) exported model with different checkpoint, then rerun the Save Model
my_export_dir = my_model_dir/'exported-2'
if not my_export_dir.exists():
    my_export_dir.mkdir()