In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
#export
from collections import OrderedDict
import glob
import os
from operator import itemgetter
import json
from multiprocessing import cpu_count
import re
import sys
from pdb import set_trace
from pprint import pprint as pp
import warnings
warnings.filterwarnings('ignore')

import ancli
from imageio import imread
from jupytools import auto_set_trace, is_notebook
import numpy as np
import pandas as pd
import PIL.Image

from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from torch.utils.data import Dataset, DataLoader
import torchvision
import torchvision.transforms as T
from torchvision.transforms.functional import to_tensor
from tqdm import tqdm
import pretrainedmodels
from visdom import Visdom

from catalyst.contrib.schedulers import OneCycleLR
from catalyst.data.dataset import ListDataset
from catalyst.dl.callbacks import AccuracyCallback, AUCCallback, F1ScoreCallback
from catalyst.dl.runner import SupervisedRunner
from catalyst.utils import get_one_hot

try:
    extended
except NameError:
    sys.path.insert(0, 'rxrx1-utils')
    import rxrx.io as rio
    
from basedir import ROOT, TRAIN, TEST, SAMPLE, NUM_CLASSES
from data_bunch import DataBunch, load_data

In [3]:
#export
seed = 1
dev_id = 0
device = torch.device(dev_id)
set_trace = auto_set_trace()
# os.environ['CUDA_VISIBLE_DEVICES'] = f'{dev_id}'

Version of set_trace(): ipdb


In [5]:
load_data??

## Model Definition

### Meta

In [None]:
#export
model_name = 'resnet50'

### Functions 

In [None]:
#export
def get_model(model_name, num_classes, pretrained='imagenet'):
    model_fn = pretrainedmodels.__dict__[model_name]
    model = model_fn(num_classes=1000, pretrained=pretrained)
    dim_feats = model.last_linear.in_features
    model.last_linear = nn.Linear(dim_feats, num_classes)
    new_conv = nn.Conv2d(6, 64, 7, 2, 3, bias=False)
    new_conv.weight.data[:,0:3,:] = model.conv1.weight.data.clone()
    new_conv.weight.data[:,3:6,:] = model.conv1.weight.data.clone()
    model.conv1 = new_conv
    return model

## Train

### Helper utils

In [None]:
#export
class RollingLoss:
    def __init__(self, smooth=0.98):
        self.smooth = smooth
        self.prev = 0
    def __call__(self, curr, batch_no):
        a = self.smooth
        avg_loss = a*self.prev + (1 - a)*curr
        debias_loss = avg_loss/(1 - a**batch_no)
        self.prev = avg_loss
        return debias_loss

In [None]:
#export
def freeze_model(model):
    for param in model.parameters():
        param.requires_grad = False

In [None]:
#export
def get_layer(model, key):
    """Gets model layer using a key.
    
    The key could be hierarchical, like first.second.third where
    each dot separates hierarchy level.
    """
    parts = key.split('.')
    block = model
    for part in parts:
        block = getattr(block, part)
    return block

In [None]:
#export
def unfreeze_layers(model, names):
    for name in names:
        layer = get_layer(model, name)
        print(f'Unfreezing layer {name}')
        for param in layer.parameters():
            param.requires_grad = True

### Loop

In [None]:
#export
def train(epochs: int=1,
          batch_size: int=800, 
          model_name: str='resnet34', 
          logdir: str='/tmp/loops/',
          lrs: tuple=(1e-4, 1e-3, 5e-3),
          eta_min: float=1e-6,
          dev_id: int=1,
          visdom_host: str='0.0.0.0',
          visdom_port: int=9001):
    
    vis = Visdom(server=visdom_host, port=visdom_port,
                 username=os.environ['VISDOM_USERNAME'],
                 password=os.environ['VISDOM_PASSWORD'])
    
    experiment_id = f'{model_name}_e{epochs}_b{batch_size}'
    device = torch.device(f'cuda:{dev_id}')
    # dataset = create_data_loaders(*load_data(), batch_size=batch_size)
    dataset = DataBunch().create(*load_data(), batch_size=batch_size)
    model = get_model(model_name, NUM_CLASSES).to(device)
    freeze_model(model)
    unfreeze_layers(model, ['conv1', 'bn1', 'layer4', 'last_linear'])
    
    loss_fn = nn.CrossEntropyLoss()
    conv, layer, head = lrs
    opt = torch.optim.AdamW([
        {'params': model.conv1.parameters(), 'lr': conv},
        {'params': model.layer4.parameters(), 'lr': layer},
        {'params': model.last_linear.parameters(), 'lr': head}
    ], weight_decay=0.01)
    logdir = os.path.join(logdir, experiment_id)
    sched = CosineAnnealingWarmRestarts(
        opt, T_0=len(dataset['train']), T_mult=2, eta_min=eta_min)
    rolling_loss = RollingLoss()
    os.makedirs(logdir, exist_ok=True)
    iteration = 0
    
    for epoch in range(1, epochs+1):
        trn_dl = dataset['train']
        n = len(trn_dl)

        model.train()
        with tqdm(total=n) as bar:
            for i, batch in enumerate(trn_dl, 1):
                iteration += 1
                if i % 25 == 0:
                    for j, g in enumerate(opt.param_groups):
                        vis.line(X=[iteration], Y=[g['lr']], 
                                 win=f'metrics{j}', name=f'lr{j}', update='append')
                bar.set_description(f'[epoch:{epoch}/{epochs}][{i}/{n}]')
                opt.zero_grad()
                x = batch['features'].to(device)
                y = batch['targets'].to(device)
                out = model(x)
                loss = loss_fn(out, y)
                loss.backward()
                avg_loss = rolling_loss(loss.item(), iteration+1)
                opt.step()
                sched.step()
                bar.set_postfix(avg_loss=f'{avg_loss:.3f}')
                bar.update(1)
                vis.line(X=[iteration], Y=[avg_loss],
                         win='loss', name='avg_loss', update='append')

        val_dl = dataset['valid']
        n = len(val_dl)

        model.eval()
        with torch.no_grad():
            matches = []
            with tqdm(total=n) as bar:
                for batch in val_dl:
                    x = batch['features'].to(device)
                    y = batch['targets'].to(device)
                    out = model(x)
                    y_pred = out.softmax(dim=1).argmax(dim=1)
                    matched = (y == y_pred).detach().cpu().numpy().tolist()
                    matches.extend(matched)
                    bar.update(1)
            acc = np.mean(matches)
            vis.line(X=[epoch], Y=[acc], win='acc', name='val_acc', update='append')
            print(f'validation accuracy: {acc:2.2%}')
            acc_str = str(int(round(acc * 10_000, 0)))
            path = os.path.join(logdir, f'train.{epoch}.{acc_str}.pth')
            torch.save(model.state_dict(), path)

In [None]:
#export
if __name__ == '__main__':
    if not is_notebook():
        ancli.make_cli(train)

In [None]:
train(model_name='resnet34', batch_size=256)

In [None]:
# TODO: report issue with one-hot smoothing AUC and accuracy metrics

## Test

In [1]:
model = get_model(model_name, NUM_CLASSES)
checkpoints = []
checkpoints_path = '/tmp/loops/resnet50_e200_b100'
for filename in os.listdir(checkpoints_path):
    _, _, acc, _ = filename.split('.')
    checkpoints.append((os.path.join(checkpoints_path, filename), int(acc)))
checkpoints.sort(key=itemgetter(1))
best, _ = checkpoints[-1]
print('Best checkpoint:', best)
model.load_state_dict(torch.load(best, map_location=lambda storage, loc: storage))
model.eval()
model.to(device)
for param in model.parameters():
    param.requires_grad = False

NameError: name 'get_model' is not defined

In [None]:
test_dl = DataBunch().create_test(load_data()[1], batch_size=512)

In [None]:
from tqdm import tqdm_notebook as tqdm
preds = []
for batch in tqdm(test_dl):
    out = model(batch['features'].to(device))
    y = out.softmax(dim=1)
    preds.extend(y.tolist())

In [None]:
filenames = sorted(list_files('~/data/protein/tmp/test'))

# odd
site1 = []
for filename, pred in list(zip(filenames, preds))[::2]:
    basename, _ = os.path.splitext(os.path.basename(filename))
    sirna = int(basename.split('_')[-1])
    if sirna != 0: 
        continue
    site1.append(pred)
    
# even
site2 = []
for filename, pred in list(zip(filenames, preds))[1::2]:
    basename, _ = os.path.splitext(os.path.basename(filename))
    sirna = int(basename.split('_')[-1])
    if sirna != 0: 
        continue
    site2.append(pred)

In [None]:
t1 = torch.tensor(site1)
t2 = torch.tensor(site2)
avg_pred = ((t1 + t2)/2).argmax(dim=1)
print(avg_pred.shape)

In [None]:
sample = pd.read_csv('/home/ck/data/protein/sample_submission.csv')
sample['sirna'] = avg_pred.tolist()
sample.to_csv('submit.csv', index=False)
from IPython.display import FileLink
FileLink('submit.csv')

In [None]:
# #export
# print('Saving the trained model')
# basedir = os.path.expanduser('~/data/protein/tmp/models')
# os.makedirs(basedir)
# torch.save(resnet, os.path.join(basedir, 'resnet50_simple.pth'))