## Setup the kaggle environment

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

In [2]:
!git clone https://github.com/benihime91/leaf-disease-classification-kaggle.git
!pip install hydra-core timm --upgrade --quiet
!apt install tree

import sys
sys.path.append("/kaggle/working/leaf-disease-classification-kaggle/")

Cloning into 'leaf-disease-classification-kaggle'...
remote: Enumerating objects: 273, done.[K
remote: Counting objects: 100% (273/273), done.[K
remote: Compressing objects: 100% (191/191), done.[K
remote: Total 273 (delta 132), reused 188 (delta 64), pack-reused 0[K
Receiving objects: 100% (273/273), 8.38 MiB | 10.44 MiB/s, done.
Resolving deltas: 100% (132/132), done.
/bin/sh: 1: sudo: not found


## Import depedencies

In [None]:
import os
from experiment import run
from hydra.experimental import initialize, initialize_config_module, initialize_config_dir, compose
from omegaconf import OmegaConf
import time

## Setting up the config pipeline

In [None]:
# setting up config paths
config_dir = "/kaggle/working/leaf-disease-classification-kaggle/conf/"
config_name = "config"
model_arch = "resnext50_32x4d"
# which fold to train on ?
fold_num = 0 

# setting up the configuration
# see: https://hydra.cc/docs/next/advanced/override_grammar/basic/
image_dir = "/kaggle/input/cassava-leaf-disease-classification/train_images/"
csv_dir   = "/kaggle/input/cassava-leaf-disease-classification/train.csv"
json_dir  = "/kaggle/input/cassava-leaf-disease-classification/label_num_to_disease_map.json"
fold_csv_dir = "/kaggle/working/leaf-disease-classification-kaggle/data/fold_df.csv"
run_name = f"{model_arch}_{fold_num}_{int(time.time())}"
checkpoint_path = "/kaggle/working/"
model_save_dir = f"/kaggle/working/weights_stage2_fold_{fold_num}.pt"
optimizer = "adamw"
scheduler = "reducelronplateau"


# configuring the training job
learning_rate = 0.02
weight_decay = 0.01
num_epochs = 20
num_classes = 5
# train_lenght = 17118

# custom parameters to override the default configs
customs = [
    f"model={model_arch}"
    f"optimizer={optimizer}", 
    f"scheduler={scheduler}",
    f"image_dir={image_dir}",
    f"csv_dir={csv_dir}",
    f"json_dir={json_dir}",
    f"fold_csv_dir={fold_csv_dir}",
    f"fold_num={fold_num}",
    f"run_name={run_name}",
    f"checkpoint_path={checkpoint_path}",
    f"model_save_dir={model_save_dir}",
]

## Config Structure

In [7]:
!tree "/kaggle/working/leaf-disease-classification-kaggle/conf/"

[01;34m/kaggle/working/leaf-disease-classification-kaggle/conf/[00m
├── [01;34maugmentation[00m
│   └── augs.yaml
├── config.yaml
├── [01;34mlightning[00m
│   └── default.yaml
├── [01;34mlogger[00m
│   └── wandb.yaml
├── [01;34mmodel[00m
│   ├── efficientnetb0.yaml
│   ├── resnet50.yaml
│   └── resnext50_32x4d.yaml
├── [01;34moptimizer[00m
│   ├── adam.yaml
│   ├── adamw.yaml
│   └── sgd.yaml
├── [01;34mscheduler[00m
│   ├── cosineannealingwarmrestarts.yaml
│   ├── onecyclelr.yaml
│   └── reducelronplateau.yaml
└── [01;34mtraining[00m
    └── default.yaml

7 directories, 14 files


## Initializing Hydra & Run Training Pipeline

In [None]:
if __name__ == "__main__":
    # from : https://github.com/facebookresearch/hydra/blob/master/examples/jupyter_notebooks/compose_configs_in_notebook.ipynb
    with initialize_config_dir(config_dir=config_dir):
        # override defaults
        cfg = compose(config_name=config_name, overrides=customs)
        
        # overide with custom training configuration
        cfg.training.num_epochs  = num_epochs
        cfg.training.num_classes = num_classes
        cfg.training.lr = learning_rate
        cfg.optimizer.params.weight_decay = weight_decay
        
        # train model
        run(cfg)