In [None]:
# default_exp learner

In [None]:
# hide
%load_ext autoreload
%autoreload 2

from nbdev import *
from nbdev.export import notebook2script

In [None]:
# export

import math
# import logging

import numpy as np
import matplotlib.pyplot as plt

import torch
from torch import nn
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR, OneCycleLR
from torch.utils.data.dataloader import DataLoader

from tqdm import tqdm
from pathlib import Path

## Utils

In [None]:
# export
class AverageMeter:
    def __init__(self, store_vals=False, store_avgs=False):
        self.store_vals = store_vals
        self.store_avgs = store_avgs
        if store_vals: self.values = []
        if store_avgs: self.avgs = []
        self.sum, self.n, self.avg = 0, 0, None
        
    def update(self, v):
        if self.store_vals: self.values.append(v)
        self.n += 1
        self.sum += v
        self.avg = self.sum/self.n
        
    def reset(self):
        if self.store_avgs and self.avg: self.avgs.append(self.avg)
        self.sum, self.n, self.avg = 0, 0, None

In [None]:
# export
def accuracy_binary(pred, targ):
    return ((pred>0).float() == targ).float().mean()

In [None]:
# hide
class Trainer:

    def __init__(self, model, train_dataset, test_dataset, config):
        self.model = model
        self.train_dataset = train_dataset
        self.test_dataset = test_dataset
        self.config = config

        # take over whatever gpus are on the system
        self.device = 'cpu'
        if torch.cuda.is_available():
            self.device = torch.cuda.current_device()
            self.model = torch.nn.DataParallel(self.model).to(self.device)

    def save_checkpoint(self):
        if self.config.ckpt_path is not None:
            ckpt_model = self.model.module if hasattr(self.model, "module") else self.model
            logger.info("saving %s", self.config.ckpt_path)
            torch.save(ckpt_model.state_dict(), self.config.ckpt_path)

    def train(self):
        model, config = self.model, self.config

        # create the optimizer
        no_decay = ["bias", "LayerNorm.weight"]
        params_decay = [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)]
        params_nodecay = [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)]
        optim_groups = [
            {"params": params_decay, "weight_decay": config.weight_decay},
            {"params": params_nodecay, "weight_decay": 0.0},
        ]
        optimizer = optim.AdamW(optim_groups, lr=config.learning_rate, betas=config.betas)

        def run_epoch(split):
            is_train = split == 'train'
            model.train(is_train)
            data = self.train_dataset if is_train else self.test_dataset
            loader = DataLoader(data, batch_size=config.batch_size, num_workers=config.num_workers)

            losses = []
            pbar = tqdm(enumerate(loader), total=len(loader)) if is_train else enumerate(loader)
            for it, (x, y) in pbar:

                # place data on the correct device
                x = x.to(self.device)
                y = y.to(self.device)

                # forward the model
                with torch.set_grad_enabled(is_train):
                    logits, loss = model(x, y)
                    loss = loss.mean() # collapse all losses if they are scattered on multiple gpus
                    losses.append(loss.item())

                if is_train:

                    # backprop and update the parameters
                    model.zero_grad()
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(model.parameters(), config.grad_norm_clip)
                    optimizer.step()

                    # decay the learning rate based on our progress
                    if config.lr_decay:
                        self.tokens += (y >= 0).sum() # number of tokens processed this step (i.e. label is not -100)
                        if self.tokens < config.warmup_tokens:
                            # linear warmup
                            lr_mult = float(self.tokens) / float(max(1, config.warmup_tokens))
                        else:
                            # cosine learning rate decay
                            progress = float(self.tokens - config.warmup_tokens) / float(max(1, config.final_tokens - config.warmup_tokens))
                            lr_mult = max(0.1, 0.5 * (1.0 + math.cos(math.pi * progress)))
                        lr = config.learning_rate * lr_mult
                        for param_group in optimizer.param_groups:
                            param_group['lr'] = lr
                    else:
                        lr = config.learning_rate

                    # report progress
                    pbar.set_description(f"epoch {epoch+1} iter {it}: train loss {loss.item():.5f}. lr {lr:e}")

            if not is_train:
                logger.info("test loss: %f", np.mean(losses))

        self.tokens = 0 # counter used for learning rate decay
        for epoch in range(config.max_epochs):

            run_epoch('train')
            if self.test_dataset is not None:
                run_epoch('test')

            self.save_checkpoint()

## Callbacks

In [None]:
# export
class Callback:
    def __init__(self, learn):
        self.learn = learn
        
    def __getattr__(self, attr):
        pass

In [None]:
# hide
class TrainEvalCallback(Callback):
    
    def before_train(self):
        self.learn.model.train()
        self.learn.training = True
    
    def before_validate(self):
        self.learn.model.eval()
        self.learn.training = False

In [None]:
# hide
class SaveModelCallback(Callback):
    pass

## Learner

In [None]:
import numpy as np
data_train = np.random.normal(size=(16*100, 11)).astype(np.float32)
data_valid = np.random.normal(size=(16*100, 11)).astype(np.float32)

In [None]:
from torch.utils.data import DataLoader, Dataset

In [None]:
class DS(Dataset):
    def __init__(self, data):
        self.data = data
    def __getitem__(self, idx):
        return self.data[idx, :-1], self.data[idx, -1]
    def __len__(self):
        return len(self.data)

In [None]:
train_dl = DataLoader(DS(data_train), 16)
valid_dl = DataLoader(DS(data_valid), 16)
dls = [train_dl, valid_dl]

In [None]:
x, y = next(iter(train_dl))

In [None]:
model = nn.Sequential(nn.Linear(10, 10),
                      nn.BatchNorm1d(10),
                      nn.ReLU(),
                      nn.Linear(10, 1))

In [None]:
out = model(x)
out.shape

torch.Size([16, 1])

In [None]:
for n, p in nn.BatchNorm1d(10).named_parameters():
    print(n)

weight
bias


In [None]:
def group_params(model):
    decay, no_decay = [], []
    for m in model:
        pass

In [None]:
for bla in model:
    print(bla)

Linear(in_features=10, out_features=10, bias=True)
BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
ReLU()
Linear(in_features=10, out_features=1, bias=True)


In [None]:
# export
class LearnerV0:
    
    def __init__(self, model, dataloaders, opt_func, loss_func, metrics=None, use_gpu=True, savepath='./models'):
        
        self.device = 'cuda' if (torch.cuda.is_available() and use_gpu) else 'cpu'
        self.model = model.to(self.device)
        
        self.train_dl = dataloaders[0]
        self.valid_dl = dataloaders[1]
        self.test_dl = dataloaders[2] if len(dataloaders)>2 else None
        
        self.opt_func = opt_func
        self.loss_func = loss_func
        self.metrics = metrics
        
        self.train_losses = AverageMeter(store_vals=True)
        self.valid_losses = AverageMeter(store_avgs=True)
        self.accs = AverageMeter()
#         self.optimizer = opt_func([p for p in self.model.parameters() if p.requires_grad])
        
        self.savepath = Path(savepath)
        if not self.savepath.exists():
            self.savepath.mkdir()
        self.training = True
        self.epoch = -1
        
    def fit(self, epochs, lr=1e-2):
        self.optimizer = self.opt_func([p for p in self.model.parameters() if p.requires_grad], lr)
        for e in range(epochs):
            self.epoch += 1
            train_loss = self.train()
            self.train_losses.reset()
            
            valid_loss, acc = self.validate()
            self.valid_losses.reset()
            self.accs.reset()
            
#             print('Train loss = {:f}; valid loss = {:f}; {} = {:f}'.\
#                   format(train_loss, valid_loss, self.metrics.__name__, acc))
            self.save_model()
        
    
    def train(self):
        
        self.model.train()
        pbar = tqdm(self.train_dl)
        for x_cat, x_cont, y in pbar:
            x_cat = x_cat.to(self.device, dtype=torch.long)
            x_cont = x_cont.to(self.device)
            y = y.to(self.device)

            self.optimizer.zero_grad()
            pred = self.model(x_cat, x_cont)
            loss = self.loss_func(pred, y)

            loss.backward()
            #torch.nn.utils.clip_grad_norm_(model.parameters(), grad_norm_clip)
            self.optimizer.step()

            self.train_losses.update(loss.item())
            pbar.set_description(f'epoch {self.epoch+1}: train loss {self.train_losses.avg:.4f}')
        return self.train_losses.avg
    
    def validate(self):
        
        self.model.eval()
        pbar = tqdm(self.valid_dl)
        for x_cat, x_cont, y in pbar:
            x_cat = x_cat.to(self.device, dtype=torch.long)
            x_cont = x_cont.to(self.device)
            y = y.to(self.device)

            with torch.no_grad():
                pred = self.model(x_cat, x_cont)
                loss = self.loss_func(pred, y)

            self.valid_losses.update(loss.item())
            self.accs.update(accuracy_binary(pred, y).item())
            pbar.set_description(f'epoch {self.epoch+1}: valid loss {self.valid_losses.avg:.4f}, accuracy {self.accs.avg :.4f}')
        
        return self.valid_losses.avg, self.accs.avg
    
    def save_model(self, fn='ckpt_', path=None):
        if not path: path = self.savepath
        fn += str(self.epoch) + '.pt'
        torch.save(self.model.state_dict(), path/fn)
    
    def load_model(self, fn, path=None):
        if not path: path = self.savepath
        self.model.load_state_dict(torch.load(path/fn))

In [None]:
# hide
# try:
#     self._split(b);                                  self('begin_batch')
#     self.pred = self.model(*self.xb);                self('after_pred')
#     self.loss = self.loss_func(self.pred, *self.yb); self('after_loss')
#     if not self.training: return
#     self.loss.backward();                            self('after_backward')
#     self.opt.step();                                 self('after_step')
#     self.opt.zero_grad()
# except CancelBatchException:                         self('after_cancel_batch')
# finally:                                             self('after_batch')

In [None]:
# hide
def fit(model, data):
    get_batch(data)
    pass

In [None]:
train_iter = iter(train_dl)

In [None]:
for i in range(1000):
    try:
        next(train_iter)
    except StopIteration:
        print(i)
        break

100


In [None]:
notebook2script()

Converted 00_core.ipynb.
Converted 01_data.ipynb.
Converted 02_model.ipynb.
Converted 03_learner.ipynb.
Converted index.ipynb.
