Training Notebook from Kaggle

# 🚀 Installing and importing

In [1]:
!git clone https://github.com/benihime91/leaf-disease-classification-kaggle.git

!wandb login a74f67fd5fae293e301ea8b6710ee0241f595a63

Cloning into 'leaf-disease-classification-kaggle'...
remote: Enumerating objects: 77, done.[K
remote: Counting objects: 100% (77/77), done.[K
remote: Compressing objects: 100% (58/58), done.[K
remote: Total 1329 (delta 37), reused 51 (delta 19), pack-reused 1252[K
Receiving objects: 100% (1329/1329), 43.77 MiB | 22.50 MiB/s, done.
Resolving deltas: 100% (751/751), done.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [2]:
import sys
sys.path.append('../input/timmmodels/pytorch-image-models/')
sys.path.append('leaf-disease-classification-kaggle/')

import warnings
warnings.filterwarnings('ignore')

In [3]:
import logging
import os

import pytorch_lightning as pl
import torch
from torch import nn, optim
import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2

from fastai.torch_core import apply_init
from functools import partial
import wandb

from src.core import *
from src.lightning.core import *
from src.layers import *
from src.mixmethods import *
from src.networks import *

logger = logging.getLogger("wandb")
logger.setLevel(logging.ERROR)

**set random seeds so that results are reproducible**

In [4]:
seed = seed_everything(42)
idx  = generate_random_id()

# ⚡ 💘 🏋️‍♀️ Configure the Training Parameters

In [5]:
# configure the training paramters/job
config = dict(
    random_seed = seed,
    unique_idx = idx,
    project_name = "kaggle-leaf-disease-v2",
    
    curr_fold = 0,
    image_dir = "../input/cassava-leaf-disease-classification/train_images/",
    csv_path = "leaf-disease-classification-kaggle/data/stratified-data-5folds.csv",
    
    encoder = "seresnext50_32x4d",
    activation = dict(type='torch.nn.ReLU', inplace=True),
    
    image_dims = 512,
    num_epochs = 40,
    batch_size = 32,
    accumulate_batches = 1,
    clip_grad_norm = 0.
    )

hparams = dict(
    mixmethod = dict(type='src.mixmethods.SnapMix', alpha=5.0, conf_prob=1.0),
    loss_function = dict(type='src.core.LabelSmoothingCrossEntropy', eps=0.1),
    
    learning_rate = 1e-03,
    lr_mult = 100,
    
    optimizer = dict(type='torch.optim.SGD', momentum=0.9, weight_decay=1e-06),
    
    scheduler = dict(type='torch.optim.lr_scheduler.CosineAnnealingWarmRestarts', T_0=10, T_mult=2),
    
    metric_to_track = None,
    step_after = "step",
    frequency = 1,
    )


# Albumentations augmentations for train/ valid data
TRAIN_AUGS = A.Compose([
    A.RandomResizedCrop(config["image_dims"], config["image_dims"], p=0.5), 
    A.Resize(config["image_dims"], config["image_dims"], p=1.0),
    A.OneOf([A.ShiftScaleRotate(), A.HorizontalFlip()], p=0.8),
    A.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.5),
    A.CoarseDropout(p=0.5),
    A.Normalize(p=1.0),
    ToTensorV2(p=1.0)
])
    
VALID_AUGS = A.Compose([
    A.Resize(config["image_dims"], config["image_dims"], p=1.0), 
    A.Normalize(p=1.0),
    ToTensorV2(p=1.0),
])

MODEL_SAVE_PATH = f"{config['encoder']}-fold={config['curr_fold']}-{idx}"

# 🏗️ Building a Model with Lightning

In [6]:
# initate the model architecture
# for snapmix we will call BasicTransferLearningModel class to init a model
# suitable for snapmix, we can also use TransferLearningModel class to init
# a model similar to the model created by the fast.ai cnn_learner func

encoder = timm.create_model(config["encoder"], pretrained=True)

model = SnapMixTransferLearningModel(
    encoder=encoder, 
    c=len(idx2lbl), 
    cut=-2, 
    act=object_from_dict(config["activation"]),)

# init the weights of the final untrained layer
apply_init(model.fc, torch.nn.init.kaiming_normal_)

Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/seresnext50_32x4d_racm-a304a460.pth" to /root/.cache/torch/hub/checkpoints/seresnext50_32x4d_racm-a304a460.pth


In [7]:
litModel = LightningCassava(model=model, conf=hparams)

Mixmethod : SnapMix
Loss Function : LabelSmoothingCrossEntropy()


In [8]:
print(litModel)

LightningCassava(
  (model): SnapMixTransferLearningModel(
    (encoder): Sequential(
      (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (4): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act1): ReLU(inplace=True)
          (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act2): ReLU(inplace=True)
          (conv3): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm

# 🛒 Loading data

In [9]:
# init the LightingDataModule + LightningModule
dm = CassavaLightningDataModule(config["csv_path"], config["image_dir"], 
                                curr_fold=config["curr_fold"], 
                                train_augs=TRAIN_AUGS, 
                                valid_augs=VALID_AUGS, 
                                bs=config["batch_size"], 
                                num_workers=4)

# 📲 Callbacks ➕ Optional methods for even better logging

In [10]:
# initialize pytorch_lightning Trainer + Callbacks
callbacks = [
    pl.callbacks.LearningRateMonitor("step"), 
    WandbImageClassificationCallback(dm, default_config=config),
    pl.callbacks.EarlyStopping(monitor="valid/acc", patience=5, mode="max")
]

chkpt_callback = pl.callbacks.ModelCheckpoint(
    monitor="valid/acc",
    save_top_k=1,
    mode='max',
    filename=MODEL_SAVE_PATH)

wb_logger = pl.loggers.WandbLogger(project=config["project_name"], log_model=True)

# 👟 Making a Trainer

In [11]:
trainer = pl.Trainer(
    gpus=-1, 
    precision=16,
    checkpoint_callback=chkpt_callback, logger=wb_logger,
    callbacks=callbacks,
    max_epochs=config["num_epochs"],
    gradient_clip_val=config["clip_grad_norm"], 
    accumulate_grad_batches=config["accumulate_batches"],
    log_every_n_steps=1,
    deterministic=True)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.


In [12]:
# # start learning_rate finder to find optimum starting Lr
# lr_finder = trainer.tuner.lr_find(litModel, datamodule=dm)

# fig = lr_finder.plot(suggest=True)
# fig.show()

# 🏃‍♀️ Running our Model

In [13]:
# modify the initial learning rate 
litModel.hparams['learning_rate'] = 1e-03

# start the training job
trainer.fit(litModel, datamodule=dm)

Generating data for fold: 0
[34m[1mwandb[0m: Currently logged in as: [33mayushman[0m (use `wandb login --relogin` to force relogin)



  | Name      | Type                         | Params
-----------------------------------------------------------
0 | model     | SnapMixTransferLearningModel | 25.5 M
1 | loss_func | LabelSmoothingCrossEntropy   | 0     


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…

wandb config updated -->


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…






1

# 💾 Testing and saving the model

In [14]:
# automatically loads in the best model weights
# according to metric in checkpoint callback
results = trainer.test(datamodule=dm, ckpt_path=None) # uses last-saved model

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/acc': tensor(0.7166, device='cuda:0'),
 'test/loss': tensor(0.9713, device='cuda:0'),
 'train/acc': tensor(0.6207, device='cuda:0'),
 'train/acc_epoch': tensor(0.6443, device='cuda:0'),
 'train/acc_step': tensor(0.6207, device='cuda:0'),
 'train/loss': tensor(1.6712, device='cuda:0'),
 'train/loss_epoch': tensor(1.6036, device='cuda:0'),
 'train/loss_step': tensor(1.6712, device='cuda:0'),
 'valid/acc': tensor(0.7166, device='cuda:0'),
 'valid/loss': tensor(0.9713, device='cuda:0')}
--------------------------------------------------------------------------------



In [15]:
path = f"{MODEL_SAVE_PATH}.pt"
# save the weights of the model
litModel.save_model_weights(path)
wandb.save(path)

weights saved to seresnext50_32x4d-fold=0-ad45e4a2.pt


['/kaggle/working/wandb/run-20201227_132325-2as2eqxt/files/seresnext50_32x4d-fold=0-ad45e4a2.pt']

In [16]:
# finish the experiment
wandb.finish()

VBox(children=(Label(value=' 58.11MB of 59.01MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.984738514…

0,1
lr-SGD/pg1,0.0
lr-SGD/pg2,0.00024
train/loss_step,1.6712
train/acc_step,0.62069
epoch,7.0
_step,8559.0
_runtime,6071.0
_timestamp,1609081477.0
valid/loss,0.97125
valid/acc,0.71659


0,1
lr-SGD/pg1,▁▂▃▇▅▁█▇▅▃▂▁██▇▆▆▅▄▃▂▂▁▁████▇▇▇▇▆▆▅▅▄▄▃▃
lr-SGD/pg2,▁▂▃▇▅▁█▇▅▃▂▁██▇▆▆▅▄▃▂▂▁▁████▇▇▇▇▆▆▅▅▄▄▃▃
train/loss_step,▅▇▄▇▃▅▄▆▃▂▆▂▆▄▆▃▂▇▄▆▇▃▃▂▇█▅▂▇▃▃▂▇▃▃▃▁▅▄▇
train/acc_step,▄▅▅▃▄▃▆▅▆█▅▃▄▅▃▆▆▁▅▅▄▆▆▆▃▂▄▅▄▆▅▅▂▄▆▅▇▆▄▂
epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_runtime,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
_timestamp,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
valid/loss,█▆▄▃▃▂▁▁▁
valid/acc,▁▂▃▅▆▇▇██
