## Setup the kaggle environment

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

In [2]:
!git clone https://github.com/benihime91/leaf-disease-classification-kaggle.git
!pip install hydra-core timm wandb --upgrade --quiet

import sys
sys.path.append("/kaggle/working/leaf-disease-classification-kaggle/")

Cloning into 'leaf-disease-classification-kaggle'...
remote: Enumerating objects: 338, done.[K
remote: Counting objects: 100% (338/338), done.[K
remote: Compressing objects: 100% (232/232), done.[K
remote: Total 338 (delta 174), reused 231 (delta 84), pack-reused 0[K
Receiving objects: 100% (338/338), 8.39 MiB | 11.14 MiB/s, done.
Resolving deltas: 100% (174/174), done.


## Import depedencies

In [3]:
import os
from experiment import run
from hydra.experimental import initialize_config_dir, compose
from omegaconf import OmegaConf
import time

## Setting up the config pipeline

In [4]:
# setting up config paths
config_dir  = "/kaggle/working/leaf-disease-classification-kaggle/conf/" # config directory
config_name = "config" # name of the parent config file
model_arch  = "efficientnet_b3a" # name of the config file corresponding the model arch

# which fold to train on ?
fold_num = 2 # set the id of the fold to train on should be on of [0, 1, 2, 3, 4]

# setting up the configuration
# see: https://hydra.cc/docs/next/advanced/override_grammar/basic/
image_dir    = "/kaggle/input/cassava-leaf-disease-classification/train_images/"
csv_dir      = "/kaggle/input/cassava-leaf-disease-classification/train.csv"
json_dir     = "/kaggle/input/cassava-leaf-disease-classification/label_num_to_disease_map.json"
fold_csv_dir = "/kaggle/working/leaf-disease-classification-kaggle/data/fold_df.csv"

# setting up model save paths and job_name
run_name        = f"{model_arch}_fold_{fold_num}_{int(time.time())}"
checkpoint_path = "/kaggle/working/"
model_save_dir  = f"/kaggle/working/{model_arch}_fold_{fold_num}.pt"

# configuring the training job
batch_size      = 128 # batch_size for train/valid/test
num_epochs      = 15  # number of maximum epochs to train for
steps_per_epoch = 17118 // batch_size # this is optional
total_steps     = num_epochs * steps_per_epoch # optional
num_classes     = 5 # total number of output classes
image_dim       = 224 # dimensions of the image after resizing

optimizer     = "adamw" # optimizer: should be a string corresponding to the optimizer config file
scheduler     = "onecyclelr" # scheduler: should be a string corresponding to the scheduler config file
learning_rate = 0.002 # learning rate for the optimizer
weight_decay  = 0.001 # weight decay for the optimizer
gradient_clip = 0.1 # gradient clipping norm

# custom parameters to override the default configs
customs = [
    f"model={model_arch}",
    f"optimizer={optimizer}", 
    f"scheduler={scheduler}",
    f"image_dir={image_dir}",
    f"csv_dir={csv_dir}",
    f"json_dir={json_dir}",
    f"fold_csv_dir={fold_csv_dir}",
    f"fold_num={fold_num}",
    f"run_name={run_name}",
    f"checkpoint_path={checkpoint_path}",
    f"model_save_dir={model_save_dir}",
]

## Initializing Hydra Config

In [5]:
# from : https://github.com/facebookresearch/hydra/blob/master/examples/jupyter_notebooks/compose_configs_in_notebook.ipynb
with initialize_config_dir(config_dir=config_dir):
    # override defaults with customs list
    cfg = compose(config_name=config_name, overrides=customs)
    
    # overide with custom training configuration
    cfg.training.num_epochs                   = num_epochs
    cfg.training.num_classes                  = num_classes
    cfg.training.lr                           = learning_rate
    cfg.training.total_steps                  = total_steps
    cfg.optimizer.params.weight_decay         = weight_decay
    cfg.lightning.init_args.gradient_clip_val = gradient_clip
    cfg.model.use_custom_base                 = True
    cfg.training.image_dim                    = image_dim
    
    # display configuration
    print(OmegaConf.to_yaml(cfg))

fold_num: 2
run_name: efficientnet_b3a_fold_2_1606200844
checkpoint_path: /kaggle/working/
model_save_dir: /kaggle/working/efficientnet_b3a_fold_2.pt
seed: 42
use_weights: true
image_dir: /kaggle/input/cassava-leaf-disease-classification/train_images/
csv_dir: /kaggle/input/cassava-leaf-disease-classification/train.csv
json_dir: /kaggle/input/cassava-leaf-disease-classification/label_num_to_disease_map.json
fold_csv_dir: /kaggle/working/leaf-disease-classification-kaggle/data/fold_df.csv
model:
  class_name: timm.create_model
  params:
    pretrained: true
    model_name: efficientnet_b3a
  output_dims: 1000
  fc1: 512
  fc2: 256
  num_classes: ${training.num_classes}
  use_custom_base: true
training:
  seed: ${seed}
  job_name: ${run_name}
  checkpoint_path: ${checkpoint_path}
  model_save_dir: ${model_save_dir}
  num_epochs: 15
  total_steps: 1995
  lr: 0.002
  num_classes: 5
  image_dim: 224
  metric: val_loss
  dataloaders:
    batch_size: 128
    pin_memory: true
    num_workers: 

## Run Training Pipeline

In [6]:
# train model
run(cfg)

[34m[1mwandb[0m: W&B API key is configured (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.
[34m[1mwandb[0m: Currently logged in as: [33mayushman[0m (use `wandb login --relogin` to force relogin)


Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_b3_ra2-cf984f9c.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b3_ra2-cf984f9c.pth

  | Name          | Type                       | Params | In sizes           | Out sizes
----------------------------------------------------------------------------------------------
0 | net           | BasicTransferLearningModel | 12 M   | [128, 3, 224, 224] | [128, 5] 
1 | loss_fn       | CrossEntropyLoss           | 0      | ?                  | ?        
2 | valid_loss_fn | CrossEntropyLoss           | 0      | ?                  | ?        


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…




HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_acc': tensor(0.8588, device='cuda:0'),
 'test_loss': tensor(0.4877, device='cuda:0'),
 'train_loss': tensor(0.3987, device='cuda:0'),
 'val_acc': tensor(0.8776, device='cuda:0'),
 'val_loss': tensor(0.3707, device='cuda:0')}
--------------------------------------------------------------------------------





VBox(children=(Label(value=' 7.51MB of 7.51MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
lr-AdamW,0.0
train_loss,0.39866
epoch,14.0
_step,4019.0
_runtime,3275.0
_timestamp,1606204140.0
val_loss,0.37075
val_acc,0.87757
test_loss,0.48773
test_acc,0.85881


0,1
lr-AdamW,▁▁▂▂▃▄▅▆▇▇███████▇▇▇▇▆▆▅▅▅▄▄▃▃▃▂▂▂▂▁▁▁▁▁
train_loss,█▆▄▄▃▃▃▃▃▃▃▄▃▃▄▃▃▂▂▁▂▃▄▃▂▂▂▂▂▂▂▂▂▁▂▂▂▁▂▁
epoch,▁▁▁▁▁▁▂▂▃▃▃▃▃▃▃▃▄▄▄▅▅▅▅▅▅▅▅▆▆▇▇▇▇▇▇▇▇███
_step,▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▄▄█
_runtime,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_timestamp,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_loss,▇▅█▆▆▄▂▂▂▂▁▁▁▁▁
val_acc,▃▅▁▃▃▆▇▇▇▇█████
test_loss,▁
test_acc,▁
