# Setup

In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import matplotlib
from matplotlib import pyplot as plt
%matplotlib inline
matplotlib.__version__

'3.0.2'

In [3]:
import os
from pathlib import Path
import pandas as pd
from sklearn.model_selection import GroupKFold
import cv2

In [4]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import transforms

In [5]:
torch.__version__

'1.0.1'

In [9]:
from albumentations import Compose, JpegCompression, CLAHE, RandomRotate90, Transpose, ShiftScaleRotate, \
        Blur, OpticalDistortion, GridDistortion, HueSaturationValue, Flip, VerticalFlip

import pretrainedmodels as pm

from kekas import Keker, DataOwner, DataKek
from kekas.transformations import Transformer, to_torch, normalize
from kekas.metrics import accuracy
from kekas.modules import Flatten, AdaptiveConcatPool2d
from kekas.callbacks import Callback, Callbacks, DebuggerCallback

In [10]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

# Prepare Data

In [14]:
data_path = Path('GS')

In [15]:
row_list = []
for ds_path in data_path.iterdir():
    for cl in ['on', 'off']:
        for p in (ds_path / cl).iterdir():
            row_list.append([str(p), ds_path.name, cl])

ds_df = pd.DataFrame(row_list, columns=['fpath', 'group', 'label'])
ds_df.shape

(23238, 3)

In [16]:
ds_df.head()

Unnamed: 0,fpath,group,label
0,GS/DESI quan_Swales/on/C21H43O6P+H.png,DESI quan_Swales,on
1,GS/DESI quan_Swales/on/C4H7O8P+Na.png,DESI quan_Swales,on
2,GS/DESI quan_Swales/on/C21H39O7P+H.png,DESI quan_Swales,on
3,GS/DESI quan_Swales/on/C10H11NO3+Na.png,DESI quan_Swales,on
4,GS/DESI quan_Swales/on/C24H40O4+Na.png,DESI quan_Swales,on


In [17]:
train_inds, other_inds = next(GroupKFold(n_splits=4).split(ds_df.fpath, groups=ds_df.group))
valid_inds, test_inds = next(GroupKFold(n_splits=2).split(ds_df.fpath, groups=ds_df.group))
train_inds.shape, valid_inds.shape, test_inds.shape

((17442,), (11621,), (11617,))

In [18]:
from kekas.transformations import Transformer, to_torch, normalize
from torchvision import transforms

# create train and val datasets using DataKek class - a pytorch Dataset that uses
# pandas DataFrame as data source

# at first we need to create a reader function that will define how image will be opened
def reader_fn(i, row):
    # it always gets i and row as parameters
    # where i is an index of dataframe and row is a dataframes row
    image = cv2.imread(row["fpath"])
    if row["label"] == "on":
        label = 0
    else:
        label = 1
    return {"image": image, "label": label}


# Then we should create transformations/augmentations
# We will use awesome https://github.com/albu/albumentations library
def augs(p=0.5):
    return Compose([
        CLAHE(),
        RandomRotate90(),
        Transpose(),
        ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.50, rotate_limit=15, p=.75),
        Blur(blur_limit=3),
        OpticalDistortion(),
        GridDistortion(),
        HueSaturationValue()
    ], p=p)

def get_transforms(dataset_key, size, p):
    # we need to use a Transformer class to apply transformations to DataKeks elements
    # dataset_key is an image key in dict returned by reader_fn
    
    PRE_TFMS = Transformer(dataset_key, lambda x: cv2.resize(x, (size, size)))

    AUGS = Transformer(dataset_key, lambda x: augs()(image=x)["image"])

    NRM_TFMS = transforms.Compose([
        Transformer(dataset_key, to_torch()),
        Transformer(dataset_key, normalize())
    ])
    
    train_tfms = transforms.Compose([PRE_TFMS, AUGS, NRM_TFMS])
    val_tfms = transforms.Compose([PRE_TFMS, NRM_TFMS])  # because we don't want to augment val set yet
    
    return train_tfms, val_tfms

In [19]:
train_tfms, val_tfms = get_transforms("image", 224, 0.5)

train_dk = DataKek(df=ds_df.iloc[train_inds], reader_fn=reader_fn, transforms=train_tfms)
val_dk = DataKek(df=ds_df.iloc[valid_inds], reader_fn=reader_fn, transforms=val_tfms)

In [20]:
# and DataLoaders
batch_size = 32
workers = 8

train_dl = DataLoader(train_dk, batch_size=batch_size, num_workers=workers, shuffle=True, drop_last=True)
val_dl = DataLoader(val_dk, batch_size=batch_size, num_workers=workers, shuffle=False)

# Train Model

In [23]:
# create a simple neural network using pretrainedmodels library
# https://github.com/Cadene/pretrained-models.pytorch

class Net(nn.Module):
    def __init__(
            self,
            num_classes: int,
            p: float = 0.5,
            pooling_size: int = 2,
            last_conv_size: int = 2048,
            arch: str = "se_resnext50_32x4d",
            pretrained: str = "imagenet") -> None:
        """A simple model to finetune
        
        Args:
            num_classes: the number of target classes, the size of the last layer's output
            p: dropout probability
            pooling_size: the size of the result feature map after adaptive pooling layer
            last_conv_size: size of the flatten last backbone conv layer
            arch: the name of the architecture form pretrainedmodels
            pretrained: the mode for pretrained model from pretrainedmodels
        """
        super().__init__()
        net = pm.__dict__[arch](pretrained=pretrained)
        modules = list(net.children())[:-2]  # delete last layers: pooling and linear
        
        # add custom head
        modules += [nn.Sequential(
            # AdaptiveConcatPool2d is a concat of AdaptiveMaxPooling and AdaptiveAveragePooling 
            AdaptiveConcatPool2d(size=pooling_size),
            Flatten(),
            nn.BatchNorm1d(2 * pooling_size * pooling_size * last_conv_size),
            nn.Dropout(p),
            nn.Linear(2 * pooling_size * pooling_size * last_conv_size, num_classes)
        )]
        self.net = nn.Sequential(*modules)

    def forward(self, x):
        logits = self.net(x)
        return logits

In [34]:
# [s for s in list(pm.__dict__) if not s.startswith('__')]

In [69]:
model = pm.__dict__['resnet34']()
modules = list(model.children())
len(modules)

10

In [66]:
model = pm.__dict__['se_resnext50_32x4d']()
modules = list(model.children())[:-2]
len(modules)

5

In [75]:
# the three whales of your pipelane are: the data, the model and the loss (hi, Jeremy)

# the data is represented in Kekas by DataOwner. It is a namedtuple with three fields:
# 'train_dl', 'val_dl', 'test_dl'
# For training process we will need at least two of them, and we can skip 'test_dl' for now
# so we will initialize it with `None` value.
dataowner = DataOwner(train_dl, val_dl, None)

# model is just a pytorch nn.Module, that we created vefore
model = Net(num_classes=2, arch='resnet34', last_conv_size=512)

# loss or criterion is also a pytorch nn.Module. For multiloss scenarios it can be a list of nn.Modules
# for our simple example let's use the standart cross entopy criterion
criterion = nn.CrossEntropyLoss()

In [76]:
# Also we need to specify, what model will do with each batch of data on each iteration
# We should define a `step_fn` function
# The code below repeats a `keker.default_step_fn` code to provide you with a concept of step function

def step_fn(model: torch.nn.Module,
            batch: torch.Tensor) -> torch.Tensor:
    """Determine what your model will do with your data.

    Args:
        model: the pytorch module to pass input in
        batch: the batch of data from the DataLoader

    Returns:
        The models forward pass results
    """
    
    # you could define here whatever logic you want
    inp = batch["image"]  # here we get an "image" from our dataset
    return model(inp)

In [77]:
# previous preparations was mostly out of scope of Kekas library (except DataKeks creation)
# Now let's dive into kekas a little bit

# firstly, we create a Keker - the core Kekas class, that provides all the keks for your pipeline
keker = Keker(model=model,
              dataowner=dataowner,
              criterion=nn.CrossEntropyLoss(),
              step_fn=step_fn,                    # previosly defined step function
              target_key="label",                 # remember, we defined it in the reader_fn for DataKek?
              metrics={"acc": accuracy},          # optional, you can not specify any metrics at all
              opt=torch.optim.Adam,               # optimizer class. if not specifying, 
                                                  # the SGD is used by default
              opt_params={"weight_decay": 1e-5},
              device=device)  # optimizer kwargs in dict format (optional too)

# Actually, there are a lot of params for kekers, but this out of scope of this example
# you can read about them in Keker's docstring (but who really reads the docs, huh?)

In [78]:
# before the start of the finetuning procedure let's freeeze all the layers except the last one - the head
# the `freeze` method is mostly inspired (or stolen) from fastai
# but you should define a model's attribute to deal with
# for example, our model is actually model.net, so we need to specify the 'net' attr
# also this method does not freezes batchnorm layers by default. To change this set `freeze_bn=True`
keker.freeze(model_attr="net")

In [79]:
# let's find an 'optimal' learning rate with learning rate find procedure
# for details please see the fastai course and this articles:
# https://arxiv.org/abs/1803.09820
# https://sgugger.github.io/how-do-you-find-a-good-learning-rate.html

# NOTE: this is an optional step and you can skip it and use your favorite learning rate

# you MUST specify the logdir to see graphics
# keker will write a tensorboard logs into this folder
# to see them start a tensorboard with `--logdir /path/to/logdir`

# keker.kek_lr(final_lr=0.1, logdir="/tmp/tensorboard")

In [80]:
lr = 5e-4
epochs = 5
keker.kek_one_cycle(max_lr=lr,                  # the maximum learning rate
                    cycle_len=epochs,           # number of epochs, actually, but not exactly
                    momentum_range=(0.95, 0.85),  # range of momentum changes
                    div_factor=25,                # max_lr / min_lr
                    increase_fraction=0.3)        # the part of cycle when learning rate increases

# If you don't understand these parameters, read this - https://sgugger.github.io/the-1cycle-policy.html
# NOTE: you cannot use schedulers and early stopping with one cycle!
# another options are the same as for `kek` method

Epoch 1/5: 100% 545/545 [00:39<00:00, 13.81it/s, loss=0.2082, val_loss=0.0833, acc=0.9711]
Epoch 2/5: 100% 545/545 [00:38<00:00, 19.75it/s, loss=0.1519, val_loss=0.0581, acc=0.9803]
Epoch 3/5: 100% 545/545 [00:38<00:00, 19.90it/s, loss=0.0969, val_loss=0.0510, acc=0.9837]
Epoch 4/5: 100% 545/545 [00:38<00:00, 14.03it/s, loss=0.1345, val_loss=0.0444, acc=0.9858]
Epoch 5/5: 100% 545/545 [00:39<00:00, 13.95it/s, loss=0.0950, val_loss=0.0425, acc=0.9873]


In [82]:
test_dk = DataKek(df=ds_df.iloc[test_inds], reader_fn=reader_fn, transforms=val_tfms)
test_dl = DataLoader(test_dk, batch_size=batch_size, num_workers=workers, shuffle=False)

In [124]:
test_outputs = keker.predict_loader(test_dl)

Predict: 100% 364/364 [00:09<00:00, 37.01it/s]


In [125]:
test_preds = (torch.sigmoid(torch.from_numpy(test_outputs[:,1])) > 0.5).numpy()

In [126]:
import numpy as np
test_classes = np.array([row[1].label == 'off' for row in test_dk.data], dtype=np.uint8)

In [127]:
(test_classes == test_preds).mean()

0.9866574847206679

In [None]:
# keker.kek(lr=1e-5,
#           epochs=5,
#           opt=torch.optim.Adam,
#           opt_params={"weight_decay": 1e-5},
#           sched=torch.optim.lr_scheduler.StepLR,
#           sched_params={"step_size":1, "gamma": 0.9},
#           logdir="/path/to/logdir",
#           cp_saver_params={
#               "savedir": "/path/to/save/dir",  
#               "metric": "acc",  
#               "n_best": 3,      
#               "prefix": "kek",  
#               "mode": "max"},     
#           early_stop_params={
#               "patience": 3,   
#               "metric": "acc", 
#               "mode": "min",   
#               "min_delta": 0
#           })