In [None]:
# default_exp learner
# default_cls_lvl 3

In [None]:
#export
from seqdata.core import *
from seqdata.models.core import *
from fastai.basics import *
from fastai.callback.progress import *
from fastai.callback.tracker import *

# Learner
> Pytorch Modules for Training Models for sequential data

In [None]:
seq = DataBlock(blocks=(SequenceBlock.from_hdf(['current','voltage'],TensorSequencesInput,clm_shift=[-1,-1]),
                        SequenceBlock.from_hdf(['voltage'],TensorSequencesOutput,clm_shift=[1])),
                 get_items=CreateDict([DfHDFCreateWindows(win_sz=1000+1,stp_sz=1000,clm='current')]),
                 splitter=ApplyToDict(ParentSplitter()))
db = seq.dataloaders(get_hdf_files('test_data/'))

In [None]:
model = SimpleRNN(2,1)
lrn = Learner(db,model,loss_func=nn.MSELoss()).fit(1)

epoch,train_loss,valid_loss,time
0,13.474635,12.615745,00:02


## Callbacks

In [None]:
#export
class GradientClipping(Callback):
    "`Callback` cutts of the gradient of every minibtch at `clip_val`"
    def __init__(self, clip_val=10): self.clip_val = clip_val

    def after_backward(self):
        nn.utils.clip_grad_norm_(self.model.parameters(), self.clip_val)

In [None]:
Learner(db,model,loss_func=nn.MSELoss(),cbs=GradientClipping(10)).fit(1)

epoch,train_loss,valid_loss,time
0,10.943971,9.328612,00:02


In [None]:
#export
class WeightClipping(Callback):
    "`Callback` that clips the weights of a given module at `clip_limit` after every iteration"
    def __init__(self, module, clip_limit = 1):
        self.module = module
        self.clip_limit = clip_limit

    def after_batch(self):
#         import pdb; pdb.set_trace()
        for p in self.module.parameters():
            p.data.clamp_(-self.clip_limit,self.clip_limit)


In [None]:
Learner(db,model,loss_func=nn.MSELoss(),cbs=WeightClipping(model,clip_limit=1)).fit(1)

epoch,train_loss,valid_loss,time
0,5.454536,0.279442,00:02


In [None]:
#export
class SkipFirstNCallback(Callback):
    "`Callback` skips first n samples from prediction and target, optionally `with_loss`"
    def __init__(self, n_skip = 0):
        self.n_skip = n_skip

    def after_pred(self):
        if self.training:
            dl = self.learn.dls.train
            if (hasattr(dl,'rnn_reset') and dl.rnn_reset) or not hasattr(dl,'rnn_reset'): # if tbptt is used, only skip loss in the first minibatch
                self.learn.pred = self.pred[:,self.n_skip:]
        #         import pdb; pdb.set_trace()
                if isinstance(self.yb, tuple):
                    self.learn.yb = tuple([y[:,self.n_skip:] for y in self.yb])
                else:
                    self.learn.yb = self.yb[:,self.n_skip:]


In [None]:
#export
class SkipNaNCallback(Callback):
    "`Callback` skips minibatches with a NaN loss"
    def after_loss(self): 
#         import pdb;pdb.set_trace()
        if torch.isnan(self.learn.loss):
            self.opt.zero_grad()
            raise CancelBatchException()

In [None]:
#export
class VarySeqLen(Callback):
    "`Callback` varies sequence length of every mini batch"
    def __init__(self, min_len = 50):
        self.min_len = min_len

    def before_batch(self):
#         import pdb; pdb.set_trace()
        lx = self.xb[0].shape[1]
        ly = self.yb[0].shape[1]
        lim = random.randint(self.min_len,ly)
#         import pdb; pdb.set_trace()
        if ly < lx:
            self.learn.xb = tuple([x[:,:-(ly-lim)] for x in self.xb])
        else:
            self.learn.xb = tuple([x[:,:lim] for x in self.xb])
            
        self.learn.yb = tuple([y[:,:lim] for y in self.yb])

In [None]:
Learner(db,model,loss_func=nn.MSELoss(),cbs=VarySeqLen(10)).fit(1)

epoch,train_loss,valid_loss,time
0,0.683654,0.301948,00:02


In [None]:
#export
from fastai.callback.hook import *
@delegates()
class TimeSeriesRegularizer(HookCallback):
    "Callback that adds AR and TAR to the loss, calculated by output of provided layer"
    run_before=TrainEvalCallback
    def __init__(self,alpha=0.0, beta=0.0,dim = None,detach=False, **kwargs):
        super().__init__(detach=detach,**kwargs)
        store_attr('alpha,beta,dim')
        
    def hook(self, m, i, o): 
#         import pdb; pdb.set_trace()
        if isinstance(o,torch.Tensor):
            self.out = o
        else:
            self.out = o[0]
        
        #find time axis if not already provided
        if self.dim is None:
            self.dim = np.argmax([0,self.out.shape[1],self.out.shape[2]])
    
    def after_loss(self):
        if not self.training: return
        
        h = self.out.float()
        
        if self.alpha != 0.:  
            l_a = float(self.alpha) * h.pow(2).mean()
            self.learn.loss_grad += l_a 
            
        if self.beta != 0. and h.shape[self.dim]>1:
            h_diff = (h[:,1:] - h[:,:-1]) if self.dim == 1 else (h[:,:,1:] - h[:,:,:-1])
            l_b = float(self.beta) * h_diff.pow(2).mean()
            self.learn.loss_grad += l_b

In [None]:
#export
class ARInitCB(Callback):
    '''Adds the target variable to the input tuple for autoregression'''
    def before_batch(self):
#         import pdb; pdb.set_trace()
        self.learn.xb = tuple([*self.xb,*self.yb])

In [None]:
Learner(db,model,loss_func=nn.MSELoss()).fit(1)

epoch,train_loss,valid_loss,time
0,0.138382,0.112536,00:02


In [None]:
#export
from matplotlib.lines import Line2D
def plot_grad_flow(named_parameters):
    '''Plots the gradients flowing through different layers in the net during training.
    Can be used for checking for possible gradient vanishing / exploding problems.
    *modified version of https://discuss.pytorch.org/t/check-gradient-flow-in-network/15063/8*
    
    Call multiple time for transparent overlays, representing the mean gradients
    '''
    ave_grads = []
    max_grads= []
    layers = []
    for n, p in named_parameters:
        if(p.requires_grad) and ("bias" not in n):
            layers.append(n)
#             pdb.set_trace()
            ave_grads.append(0 if p.grad is None else p.grad.abs().mean())
            max_grads.append(0 if p.grad is None else p.grad.abs().max())
    plt.bar(np.arange(len(max_grads)), max_grads, alpha=0.1, lw=1, color="c")
    plt.bar(np.arange(len(max_grads)), ave_grads, alpha=0.1, lw=1, color="b")
    plt.hlines(0, 0, len(ave_grads)+1, lw=2, color="k" )
    plt.xticks(range(0,len(ave_grads), 1), layers, rotation="vertical")
    plt.xlim(left=0, right=len(ave_grads))
    plt.xlabel("Layers")
    plt.ylabel("Gradient")
    plt.title("Gradient flow")
    plt.grid(True)
    plt.yscale('log')
    plt.tight_layout()
    plt.legend([Line2D([0], [0], color="c", lw=4),
                Line2D([0], [0], color="b", lw=4),
                Line2D([0], [0], color="k", lw=4)], ['max-gradient', 'mean-gradient', 'zero-gradient'])

In [None]:
#export

class CB_PlotGradient(Callback):
    '''Plot the Gradient Distribution for every trainable parameter'''
    
    def __init__(self, n_draws=20): self.n_draws = n_draws
    
    def begin_fit(self):
        '''Create a new figure to plot in'''
        plt.figure()
        plt.tight_layout()
        
    def after_backward(self):
        '''plot the gradient for every layer of the current minibatch'''
        # plotting n_draws times at the whole training
        if self.iter % (max(self.n_epoch*self.n_iter//self.n_draws,1)) == 0:
#         if self.iter == self.n_iter-1:
            plot_grad_flow(self.learn.model.named_parameters())
#             print('done')


In [None]:
Learner(db,model,loss_func=nn.MSELoss(),cbs=CB_PlotGradient()).fit(1)

epoch,train_loss,valid_loss,time
0,0.12974,0.103031,00:02


## Loss Functions

In [None]:
#export
import functools

def ignore_nan(func):
    '''remove nan values from tensors before function execution, reduces tensor to a flat array, apply to functions such as mse'''
    @functools.wraps(func)
    def ignore_nan_decorator(*args, **kwargs):
#         mask = ~torch.isnan(args[-1]) #nan mask of target tensor
#         args = tuple([x[mask] for x in args]) #remove nan values
        mask = ~torch.isnan(args[-1][...,-1]) #nan mask of target tensor
        args = tuple([x[mask,:] for x in args]) #remove nan values
        return func(*args, **kwargs)
    return ignore_nan_decorator

In [None]:
n = 1000
y_t = torch.ones(32,n,6)
y_t[:,20]=np.nan
y_p = torch.ones(32,n,6)*1.1

In [None]:
(~torch.isnan(y_t)).shape

torch.Size([32, 1000, 6])

In [None]:
y_t.shape

torch.Size([32, 1000, 6])

In [None]:
assert torch.isnan(mse(y_p,y_t))

In [None]:
#export
mse_nan = ignore_nan(mse)

In [None]:
test_close(mse_nan(y_p,y_t),0.01)

In [None]:
#export
import functools

def float64_func(func):
    '''calculate function internally with float64 and convert the result back'''
    @functools.wraps(func)
    def float64_func_decorator(*args, **kwargs):
        typ = args[0].dtype
        args = tuple([x.double() if issubclass(type(x),Tensor ) else x for x in args]) #remove nan values
        return func(*args, **kwargs).type(typ)
    return float64_func_decorator

In [None]:
Learner(db,model,loss_func=float64_func(nn.MSELoss())).fit(1)

epoch,train_loss,valid_loss,time
0,0.105287,0.091099,00:02


In [None]:
#export
def SkipNLoss(fn,n_skip=0):
    '''Loss-Function modifier that skips the first n samples of sequential data'''
    @functools.wraps(fn)
    def _inner( input, target):
        return fn(input[:,n_skip:],target[:,n_skip:])
    
    return _inner

In [None]:
Learner(db,model,loss_func=SkipNLoss(nn.MSELoss(),n_skip=30)).fit(1)

epoch,train_loss,valid_loss,time
0,0.142927,0.050507,00:02


In [None]:
#export
def fun_rmse(inp, targ): 
    '''rmse loss function defined as a function not as a AccumMetric'''
    return torch.sqrt(F.mse_loss(inp, targ))

In [None]:
Learner(db,model,loss_func=nn.MSELoss(),metrics=SkipNLoss(fun_rmse,n_skip=30)).fit(1)

epoch,train_loss,valid_loss,fun_rmse,time
0,0.086327,0.075763,0.157558,00:02


In [None]:
#export
def norm_rmse(inp, targ):
    '''rmse loss function defined as a function not as a AccumMetric'''
    return fun_rmse(inp, targ)*100

In [None]:
Learner(db,model,loss_func=nn.MSELoss(),metrics=SkipNLoss(norm_rmse,n_skip=30)).fit(1)

epoch,train_loss,valid_loss,norm_rmse,time
0,0.078408,0.070275,16.06356,00:02


In [None]:
#export
def mean_vaf(inp,targ):
    return (1-((targ-inp).var()/targ.var()))*100

In [None]:
Learner(db,model,loss_func=nn.MSELoss(),metrics=SkipNLoss(mean_vaf,n_skip=30)).fit(1)

epoch,train_loss,valid_loss,mean_vaf,time
0,0.070409,0.054628,91.607941,00:02


# Create Learner Models
Create Learner with different kinds of models with fitting Parameters and regularizations.

In [None]:
#export
def get_inp_out_size(db):
    '''returns input and output size of a timeseries databunch'''
    tup = db.one_batch()
    inp = tup[0].shape[-1]
    out = tup[1].shape[-1]
    return inp,out

In [None]:
test_eq(get_inp_out_size(db),(2,1)) 

## RNN Learner
The Learners include model specific optimizations. Removing the first n_skip samples of the loss function of transient time, greatly improves training stability. In

In [None]:
#export
@delegates(SimpleRNN, keep=True)
def RNNLearner(db,loss_func=nn.MSELoss(),metrics=[fun_rmse],n_skip=0,cbs=None,**kwargs):
    inp,out = get_inp_out_size(db)
    model = SimpleRNN(inp,out,**kwargs)
  
    skip = partial(SkipNLoss,n_skip=n_skip)
        
    metrics= [skip(f) for f in metrics]
    loss_func = skip(loss_func)
        
    lrn = Learner(db,model,loss_func=loss_func,opt_func=ranger,metrics=metrics,cbs=cbs)
    return lrn

In [None]:
RNNLearner(db,rnn_type='gru').fit(1,1e-4)

epoch,train_loss,valid_loss,fun_rmse,time
0,14.738735,14.865482,3.840372,00:02


## TCN Learner
Performs better on multi input data. Higher beta values allow a way smoother prediction. Way faster then RNNs in prediction. 

In [None]:
#export
@delegates(TCN, keep=True)
def TCNLearner(db,hl_depth=3,loss_func=nn.MSELoss(),metrics=[fun_rmse],n_skip=0,cbs=None,**kwargs):
    inp,out = get_inp_out_size(db)
    n_skip = 2**hl_depth if n_skip is None else n_skip
    model = TCN(inp,out,hl_depth,**kwargs)
  
    skip = partial(SkipNLoss,n_skip=n_skip)
        
    metrics= [skip(f) for f in metrics]
    loss_func = skip(loss_func)
        
    lrn = Learner(db,model,loss_func=loss_func,opt_func=ranger,metrics=metrics,cbs=cbs)
    return lrn

In [None]:
TCNLearner(db).fit(1)

epoch,train_loss,valid_loss,fun_rmse,time
0,12.738482,12.759715,3.570702,00:02


## CRNN Learner

In [None]:
#export
@delegates(CRNN, keep=True)
def CRNNLearner(db,loss_func=nn.MSELoss(),metrics=[fun_rmse],n_skip=0,cbs=None,**kwargs):
    inp,out = get_inp_out_size(db)
    model = CRNN(inp,out,**kwargs)
  
    skip = partial(SkipNLoss,n_skip=n_skip)
        
    metrics= [skip(f) for f in metrics]
    loss_func = skip(loss_func)
        
    lrn = Learner(db,model,loss_func=loss_func,opt_func=ranger,metrics=metrics,cbs=cbs)
    return lrn

In [None]:
CRNNLearner(db,rnn_type='gru').fit(1,3e-2)

epoch,train_loss,valid_loss,fun_rmse,time
0,3.278524,2.000227,1.384697,00:02


## Autoregressive Learner

In [None]:
#export
@delegates(TCN, keep=True)
def AR_TCNLearner(db,hl_depth=3,alpha=1,beta=1,early_stop=0,metrics=None,n_skip=None,**kwargs):
    n_skip = 2**hl_depth if n_skip is None else n_skip
    skip = partial(SkipNLoss,n_skip=n_skip)
    
    inp,out = get_inp_out_size(db)
    model = AR_Model(TCN(inp+out,out,hl_depth,**kwargs),ar=False,rf=n_skip)
    model.init_normalize(db.one_batch())
    
    cbs=[ARInitCB(),TimeSeriesRegularizer(alpha=alpha,beta=beta,modules=[model.model.conv_layers[-1]]),SaveModelCallback()]
    if early_stop > 0:
        cbs += [EarlyStoppingCallback(patience=early_stop)]
        
    if metrics is None: metrics=SkipNLoss(fun_rmse,n_skip)
        
    lrn = Learner(db,model,loss_func=nn.MSELoss(),opt_func=ranger,metrics=metrics,cbs=cbs)
    return lrn

In [None]:
#export
@delegates(SimpleRNN, keep=True)
def AR_RNNLearner(db,alpha=0,beta=0,early_stop=0,metrics=None,n_skip=0,fname='model',**kwargs):
    skip = partial(SkipNLoss,n_skip=n_skip)
    
    inp,out = get_inp_out_size(db)
    model = AR_Model(SimpleRNN(inp+out,out,**kwargs),ar=False,hs=True)
    model.init_normalize(db.one_batch())
    
    cbs=[ARInitCB(),TimeSeriesRegularizer(alpha=alpha,beta=beta,modules=[model.model.rnn]),SaveModelCallback()]
    if early_stop > 0:
        cbs += [EarlyStoppingCallback(patience=early_stop)]
        
    if metrics is None: metrics=SkipNLoss(fun_rmse,n_skip)
        
    lrn = Learner(db,model,loss_func=nn.MSELoss(),opt_func=ranger,metrics=metrics,cbs=cbs)
    return lrn

In [None]:
#hide
from nbdev.export import *
notebook2script()

Converted 00_core.ipynb.
Converted 01_models.ipynb.
Converted 01a_IndRNN.ipynb.
Converted 02_learner.ipynb.
Converted 03_dataloaders.ipynb.
Converted 11_dualrnn.ipynb.
Converted 12_TensorQuaternions.ipynb.
Converted 13_HPOpt.ipynb.
Converted index.ipynb.
