# Train and fine-tune a classifier on Covid x-rays dataset

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/jabascal/covid_detection/blob/main/src/covid_xrays_classification_tb_colab.ipynb)

### Set google colab

On colab, to run on GPU, select *GPU* from the navigation menu *Runtime/Change runtime type*.

In [None]:
!nvidia-smi

Set *mode_colab=True* to run in colab. Mount google drive

In [None]:
mode_colab = True
if (mode_colab is True):
    # Connect to googledrive
    #if 'google.colab' in str(get_ipython()):
    # Mount google drive to access files via colab
    from google.colab import drive
    drive.mount("/content/gdrive")
    %cd /content/gdrive/MyDrive/

    # For the profiler
    !pip install -U tensorboard-plugin-profile
    !pip install mlflow

    # Load the TensorBoard notebook extension
    %load_ext tensorboard

### Clone repository

In [None]:
if mode_colab:
    # Clone repository
    !git clone https://github.com/jabascal/covid_detection.git
    %cd covid_detection/src
    #!pip install -r requirements.txt


### Dependencies

In [None]:
import random
import os
import datetime
import tensorflow as tf

from utils.helper_in_out import load_config
from utils.helper_tf import train_finetune_clf

random.seed(123)


### Load config file

In [None]:
# Parse config file
config_file = 'config/config_clf.yaml'

# Laod config file
param = load_config(config_file)

### Verify dataset location

Download data to desired path in drive and check the path

In [None]:
# Check data path
# data_path = param['data']['path']
train_path = '../../Colab_Notebooks/Data/COVID-19_CXR_Dataset_final_train'
test_path = '../../Colab_Notebooks/Data/COVID-19_CXR_Dataset_final_test

print(f"Train data Location {train_path}")
!ls $train_path
print(f"Test data Location {test_path}")
!ls $test_path

# Reset path 
param['data']['train_path'] = train_path
param['data']['test_path'] = test_path

### Tensorboard

You can launch tensorboard before or after training. If done before, you can update the visualization to see progress during training. 

In [None]:
# Launch TensorBoard
tb_path = param['tb']['log_dir']
if not os.path.exists(tb_path):
    os.makedirs(tb_path)
    print(f"Created path {tb_path}")
%tensorboard --logdir $tb_path

Set a permanent link to upload TensorBoard logs with TensorBoard.dev 

### mlflow

Tracking experiments with mlflow

In [None]:
# Set MLflow tracking
if param['mlflow']['tracking']:
    from utils.helper_mlflow import set_mlflow
    set_mlflow(config_file=config_file, 
                experiment_id=param['mlflow']['experiment_id'],
                experiment_name=param['mlflow']['experiment_name'],
                run_name=param['mlflow']['run_name'])

### Train

In [None]:
%%timeit
# Train and fine-tune a classifier model
model, history, test_loss, test_acc = train_finetune_clf(
                        # Data
                        train_dir=param['data']['train_path'],
                        test_dir=param['data']['test_path'],
                        val_dir=param['data']['val_path'],
                        img_height=param['data']['img_height'],
                        img_width=param['data']['img_width'],
                        batch_size=param['train']['batch_size'],
                        validation_split=param['data']['val_split'],
                        test_split=param['data']['test_split'],
                        color_mode=param['data']['color'],
                        augmentation_param=param['data']['augmentation'],
                        cache=param['data']['cache'],
                        shuffle=param['data']['shuffle'],
                        # Model
                        base_model_name=param['model']['base_model_name'],
                        model_num_channels=param['model']['num_channels'],
                        dropout=param['model']['dropout'],
                        path_save_model=param['model']['path_save'],
                        # Train
                        initial_epochs=param['train']['epochs'],
                        fine_tune_at_perc=param['train']['fine_tune_at_perc'],
                        base_learning_rate=param['train']['lr'],
                        fine_tune_epochs=param['train']['epochs_finetune'],
                        ft_learning_rate=param['train']['lr_finetune'],
                        metrics=param['train']['metrics'],
                        mode_display=param['train']['mode_display'],
                        # Tensorboard
                        log_dir=param['tb']['log_dir'],
                        histogram_freq=param['tb']['histogram_freq'],
                        profile_batch=param['tb']['profile_batch'],
                        # Early stopping
                        early_stopping_patience=param['tb']['early_stopping']['patience'],
                        early_stopping_monitor=param['tb']['early_stopping']['monitor'],
                        # Model checkpoint
                        ckpt_freq=param['tb']['model_ckpt']['ckpt_freq'],
                        ckpt_path=param['tb']['model_ckpt']['ckpt_path'],
                        ckpt_monitor=param['tb']['model_ckpt']['ckpt_monitor'],
                        # Reduce learning rate
                        reduce_lr_monitor=param['tb']['reduce_lr']['monitor'],
                        reduce_lr_factor=param['tb']['reduce_lr']['factor'],
                        reduce_lr_patience=param['tb']['reduce_lr']['patience'],
                        reduce_lr_min=param['tb']['reduce_lr']['min_lr']
                        # Config file
                        config_file = config_file,
    )

In [None]:
# Copy config file
import shutil   
shutil.copyfile(config_file, os.path.join(param['tb']['log_dir'], 'config.yaml'))


In [None]:
# Stop mlflow
if param['mlflow']['tracking']:
    from utils.helper_mlflow import stop_mlflow
    stop_mlflow()

### Close colab session!

Don't forget to close colab session by deleting the instance at the upper menu Runtime/Manage sessions/.