In [1]:
from kekas import Keker, DataOwner, DataKek
from kekas.transformations import Transformer, to_torch, normalize
from kekas.metrics import accuracy, bce_accuracy
from kekas.modules import Flatten, AdaptiveConcatPool2d
from kekas.callbacks import Callback, Callbacks, DebuggerCallback
import pretrainedmodels as pm
from albumentations import Compose, JpegCompression, CLAHE, RandomRotate90, Transpose, ShiftScaleRotate, \
        Blur, OpticalDistortion, GridDistortion, HueSaturationValue, Flip, VerticalFlip

In [2]:
import argparse
from itertools import islice
import json
from pathlib import Path
import shutil
import warnings
from typing import Dict

import numpy as np
import pandas as pd
from sklearn.metrics import fbeta_score
from sklearn.exceptions import UndefinedMetricWarning
import torch
from torch import nn, cuda
from torch.optim import Adam
from torchvision import transforms
import tqdm

from imet.models import get_model
from imet.dataset import TrainDataset, TTADataset, get_ids, DATA_ROOT
from imet.transforms import train_transform, test_transform
from imet.utils import (
    write_event, load_model, mean_df,
    ON_KAGGLE, set_models_path_env, seed_everything, 
    _reduce_loss, _make_mask, binarize_prediction, N_CLASSES)
from imet.losses import loss_function
from imet.optimizers import optimizer
import cv2
from PIL import Image
from torch.utils.data import DataLoader

In [3]:
batch_size = 32
input_size = 288
fold = 0
model = 'resnet34'

In [4]:
folds = pd.read_csv('folds.csv')
train_root = DATA_ROOT / 'train'
train_fold = folds[folds['fold'] != fold]
valid_fold = folds[folds['fold'] == fold]

In [5]:
model = get_model(model, num_classes=N_CLASSES, pretrained=True, input_size=input_size)

In [5]:
def reader_fn(i, row):
    image = cv2.imread(str(train_root / f'{row["id"]}.png'))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = Image.fromarray(image)
    labels = torch.zeros(N_CLASSES)
    for cls in row["attribute_ids"].split():
        labels[int(cls)] = 1
    return {"image": image, "labels": labels}


def get_transforms(dataset_key, size, p):
    # we need to use a Transformer class to apply transformations to DataKeks elements
    # dataset_key is an image key in dict returned by reader_fn
    
    TRAIN_AUGS = Transformer(dataset_key, train_transform(input_size))
    VAL_AUGS = Transformer(dataset_key, test_transform(input_size))
                                                      
    TO_ARRAY = Transformer(dataset_key, lambda x: np.array(x))

    NRM_TFMS = transforms.Compose([
        Transformer(dataset_key, to_torch()),
        Transformer(dataset_key, normalize())
    ])
    
    train_tfms = transforms.Compose([TRAIN_AUGS, TO_ARRAY, NRM_TFMS])
    val_tfms = transforms.Compose([VAL_AUGS, TO_ARRAY, NRM_TFMS])  # because we don't want to augment val set yet
    
    return train_tfms, val_tfms

In [6]:
train_tfms, val_tfms = get_transforms("image", input_size, 0.5)

train_dk = DataKek(df=train_fold, reader_fn=reader_fn, transforms=train_tfms)
val_dk = DataKek(df=valid_fold, reader_fn=reader_fn, transforms=val_tfms)

In [7]:
train_dl = DataLoader(train_dk, batch_size=batch_size, num_workers=6, shuffle=True, drop_last=True)
val_dl = DataLoader(val_dk, batch_size=batch_size, num_workers=6, shuffle=False)

In [8]:
dataowner = DataOwner(train_dl, val_dl, None)
criterion = nn.BCEWithLogitsLoss()

In [9]:
def step_fn(model: torch.nn.Module,
            batch: torch.Tensor) -> torch.Tensor:
    """Determine what your model will do with your data.

    Args:
        model: the pytorch module to pass input in
        batch: the batch of data from the DataLoader

    Returns:
        The models forward pass results
    """
    
    # you could define here whatever logic you want
    inp = batch["image"]  # here we get an "image" from our dataset
    return model(inp)

In [10]:
def fbeta(target: torch.Tensor,
          preds: torch.Tensor,
          thresh: bool = 0.1,
          beta: float = 2) -> float:
    target = target.cpu().detach().numpy()
    preds = (torch.sigmoid(preds).cpu().detach().numpy() > thresh).astype(int)
    return fbeta_score(target, preds, beta=beta, average='samples')

In [12]:
keker = Keker(model=model,
              dataowner=dataowner,
              criterion=criterion,
              step_fn=step_fn,                    # previosly defined step function
              target_key="labels",                 # remember, we defined it in the reader_fn for DataKek?
              metrics={"acc": bce_accuracy, "fbeta": fbeta},          # optional, you can not specify any metrics at all
              opt=torch.optim.Adam,               # optimizer class. if note specifiyng, 
                                                  # an SGD is using by default
              opt_params={"weight_decay": 1e-5})

In [13]:
keker = keker.to_fp16()

In [46]:
keker.freeze()

In [20]:
keker.kek_lr(final_lr=0.1, logdir="logdir/resnet34")

Epoch 1/1: 100% 1365/1365 [06:14<00:00,  3.86it/s, loss=0.0149]
End of LRFinder



In [21]:
keker.plot_kek_lr("logdir/resnet34")

In [47]:
keker.kek_one_cycle(max_lr=1e-4, cycle_len=1, momentum_range=(0.95, 0.85), div_factor=30)

Epoch 1/1: 100% 1365/1365 [07:05<00:00,  3.80it/s, loss=0.0105, val_loss=0.0105, acc=0.0376]


In [48]:
keker.unfreeze()

In [49]:
keker.kek_lr(final_lr=0.1, logdir="logdir/resnet34/1")

Epoch 1/1: 100% 1365/1365 [08:48<00:00,  2.66it/s, loss=0.0163]
End of LRFinder



In [50]:
keker.plot_kek_lr("logdir/resnet34/1")

In [None]:
keker.kek_one_cycle(max_lr=3e-5, cycle_len=15, momentum_range=(0.95, 0.85), div_factor=30)

Epoch 1/15: 100% 1365/1365 [09:28<00:00,  2.54it/s, loss=0.0106, val_loss=0.0103, acc=0.0407]
Epoch 2/15: 100% 1365/1365 [09:19<00:00,  2.59it/s, loss=0.0101, val_loss=0.0100, acc=0.0490]
Epoch 3/15: 100% 1365/1365 [09:22<00:00,  2.68it/s, loss=0.0099, val_loss=0.0098, acc=0.0565]
Epoch 4/15: 100% 1365/1365 [09:34<00:00,  2.64it/s, loss=0.0096, val_loss=0.0096, acc=0.0614]
Epoch 5/15: 100% 1365/1365 [09:36<00:00,  2.63it/s, loss=0.0093, val_loss=0.0094, acc=0.0676]
Epoch 6/15: 100% 1365/1365 [09:38<00:00,  2.60it/s, loss=0.0090, val_loss=0.0093, acc=0.0742]
Epoch 7/15: 100% 1365/1365 [09:54<00:00,  2.54it/s, loss=0.0091, val_loss=0.0092, acc=0.0826]
Epoch 8/15: 100% 1365/1365 [09:37<00:00,  2.65it/s, loss=0.0090, val_loss=0.0090, acc=0.0860]
Epoch 9/15: 100% 1365/1365 [09:37<00:00,  2.57it/s, loss=0.0087, val_loss=0.0089, acc=0.0888]
Epoch 10/15: 100% 1365/1365 [09:55<00:00,  2.58it/s, loss=0.0089, val_loss=0.0088, acc=0.0918]
Epoch 11/15: 100% 1365/1365 [09:54<00:00,  2.55it/s, loss=0

In [14]:
keker.load('keker/resnet34')

In [15]:
keker.kek_one_cycle(max_lr=3e-5, cycle_len=4, momentum_range=(0.95, 0.85), div_factor=30)

Epoch 1/4: 100% 1365/1365 [08:26<00:00,  2.71it/s, loss=0.0084]


F-score is ill-defined and being set to 0.0 in samples with no predicted labels.



Epoch 1/4: 100% 1365/1365 [09:19<00:00,  2.71it/s, loss=0.0084, val_loss=0.0090, acc=0.0872, fbeta=0.5164]
Epoch 2/4: 100% 1365/1365 [09:24<00:00,  2.69it/s, loss=0.0088, val_loss=0.0088, acc=0.0925, fbeta=0.5245]
Epoch 3/4: 100% 1365/1365 [09:24<00:00,  2.73it/s, loss=0.0081, val_loss=0.0086, acc=0.0997, fbeta=0.5364]
Epoch 4/4: 100% 1365/1365 [09:22<00:00,  2.74it/s, loss=0.0077, val_loss=0.0085, acc=0.1071, fbeta=0.5466]


In [17]:
keker.kek_lr(final_lr=0.01, logdir="logdir/resnet34/2")

Epoch 1/1: 100% 1365/1365 [09:00<00:00,  2.58it/s, loss=0.0154]
End of LRFinder



In [18]:
keker.plot_kek_lr("logdir/resnet34/2")

In [20]:
keker.kek_one_cycle(max_lr=2e-5, cycle_len=10, momentum_range=(0.95, 0.85), div_factor=30)

Epoch 1/10: 100% 1365/1365 [08:48<00:00,  2.64it/s, loss=0.0077]


F-score is ill-defined and being set to 0.0 in samples with no predicted labels.



Epoch 1/10: 100% 1365/1365 [09:42<00:00,  2.64it/s, loss=0.0077, val_loss=0.0085, acc=0.1054, fbeta=0.5432]
Epoch 2/10: 100% 1365/1365 [09:41<00:00,  2.73it/s, loss=0.0084, val_loss=0.0086, acc=0.1003, fbeta=0.5368]
Epoch 3/10: 100% 1365/1365 [08:58<00:00,  2.90it/s, loss=0.0082, val_loss=0.0087, acc=0.0980, fbeta=0.5302]
Epoch 4/10: 100% 1365/1365 [08:47<00:00,  2.92it/s, loss=0.0081, val_loss=0.0087, acc=0.1037, fbeta=0.5304]
Epoch 5/10:  15% 203/1365 [01:15<07:26,  2.60it/s, loss=0.0080]


KeyboardInterrupt: 

In [11]:
model = 'resnet50'
model = get_model(model, num_classes=N_CLASSES, pretrained=True, input_size=input_size)

In [12]:
keker = Keker(model=model,
              dataowner=dataowner,
              criterion=criterion,
              step_fn=step_fn,                 
              target_key="labels",               
              metrics={"acc": bce_accuracy, "fbeta": fbeta},      
              opt=torch.optim.SGD).to_fp16()

In [13]:
keker.freeze()

In [26]:
keker.kek_lr(final_lr=0.01, logdir="logdir/resnet50")

Epoch 1/1: 100% 1365/1365 [08:54<00:00,  2.62it/s, loss=0.5872]
End of LRFinder



In [14]:
keker.plot_kek_lr("logdir/resnet50")

In [15]:
keker.kek_one_cycle(max_lr=2e-3, cycle_len=1, momentum_range=(0.95, 0.85), div_factor=25)

Epoch 1/1: 100% 2731/2731 [09:20<00:00,  5.02it/s, loss=0.1031]


F-score is ill-defined and being set to 0.0 in samples with no predicted labels.



Epoch 1/1: 100% 2731/2731 [10:27<00:00,  5.02it/s, loss=0.1031, val_loss=0.1014, acc=0.0000, fbeta=0.0391]


In [16]:
keker.unfreeze()

In [17]:
keker.kek_lr(final_lr=0.01, logdir="logdir/resnet50/0")

Epoch 1/1: 100% 2731/2731 [12:19<00:00,  3.75it/s, loss=0.0924]
End of LRFinder



In [18]:
keker.plot_kek_lr("logdir/resnet50/0")

In [19]:
keker.kek_one_cycle(max_lr=3e-4, cycle_len=10, momentum_range=(0.95, 0.85), div_factor=25)

Epoch 1/10: 100% 2731/2731 [13:37<00:00,  3.63it/s, loss=0.0946, val_loss=0.0928, acc=0.0000, fbeta=0.0542]
Epoch 2/10: 100% 2731/2731 [13:25<00:00,  3.72it/s, loss=0.0821, val_loss=0.0805, acc=0.0000, fbeta=0.0930]
Epoch 3/10: 100% 2731/2731 [12:44<00:00,  4.06it/s, loss=0.0708, val_loss=0.0700, acc=0.0000, fbeta=0.1336]
Epoch 4/10: 100% 2731/2731 [12:31<00:00,  3.91it/s, loss=0.0622, val_loss=0.0616, acc=0.0000, fbeta=0.1512]
Epoch 5/10: 100% 2731/2731 [12:33<00:00,  4.05it/s, loss=0.0563, val_loss=0.0554, acc=0.0000, fbeta=0.1525]
Epoch 6/10:  27% 732/2731 [03:12<08:43,  3.82it/s, loss=0.0549]


Traceback (most recent call last):
  File "/home/allerria/anaconda3/envs/images/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)
  File "/home/allerria/anaconda3/envs/images/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
Traceback (most recent call last):
  File "/home/allerria/anaconda3/envs/images/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)
  File "/home/allerria/anaconda3/envs/images/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/home/allerria/anaconda3/envs/images/lib/python3.6/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/home/allerria/anaconda3/envs/images/lib/python3.6/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
  File "/home/allerria/anaco

KeyboardInterrupt: 

In [20]:
keker.kek_one_cycle(max_lr=2e-3, cycle_len=10, momentum_range=(0.95, 0.85), div_factor=25)

Epoch 1/10: 100% 2731/2731 [12:36<00:00,  3.98it/s, loss=0.0430, val_loss=0.0421, acc=0.0000, fbeta=0.1419]
Epoch 2/10: 100% 2731/2731 [12:35<00:00,  3.97it/s, loss=0.0323, val_loss=0.0315, acc=0.0000, fbeta=0.1383]
Epoch 3/10: 100% 2731/2731 [13:09<00:00,  3.75it/s, loss=0.0260, val_loss=0.0254, acc=0.0000, fbeta=0.1431]
Epoch 4/10: 100% 2731/2731 [13:33<00:00,  3.78it/s, loss=0.0222, val_loss=0.0221, acc=0.0000, fbeta=0.1491]
Epoch 5/10: 100% 2731/2731 [12:59<00:00,  3.91it/s, loss=0.0203, val_loss=0.0202, acc=0.0000, fbeta=0.1550]
Epoch 6/10: 100% 2731/2731 [12:58<00:00,  3.88it/s, loss=0.0192, val_loss=0.0191, acc=0.0000, fbeta=0.1585]
Epoch 7/10: 100% 2731/2731 [12:59<00:00,  3.72it/s, loss=0.0185, val_loss=0.0182, acc=0.0000, fbeta=0.1674]
Epoch 8/10:   3% 82/2731 [00:22<11:34,  3.81it/s, loss=0.0181]


Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/allerria/anaconda3/envs/images/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)
  File "/home/allerria/anaconda3/envs/images/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/home/allerria/anaconda3/envs/images/lib/python3.6/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/home/allerria/anaconda3/envs/images/lib/python3.6/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/allerria/anaconda3/envs/images/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)
  File "/home/allerria/anaconda3/envs/images/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    

KeyboardInterrupt: 