In [None]:
# default_exp models.core
# default_cls_lvl 3

# Models
> Pytorch Models for Sequential Data

In [None]:
#export
from seqdata.core import *
from fastai2.basics import *
from fastai2.callback.progress import *
from fastai2.text.models.qrnn import *
from torch.nn.utils import weight_norm
from torch.nn import Parameter

In [None]:
seq = DataBlock(blocks=(SequenceBlock.from_hdf(['current','voltage'],TensorSequencesInput,clm_shift=[0,-1]),
                        SequenceBlock.from_hdf(['voltage'],TensorSequencesOutput,clm_shift=[-1])),
                 get_items=CreateDict([DfHDFCreateWindows(win_sz=1000+1,stp_sz=1000,clm='current')]),
                 splitter=ApplyToDict(ParentSplitter()))
db = seq.dataloaders(get_hdf_files('test_data/'))

## Batchnorm

In [None]:
#export
class BatchNorm_1D_Stateful(nn.Module):
    '''Batchnorm for stateful models. Stores batch statistics for for every timestep seperately to mitigate transient effects.'''
    __constants__ = ['track_running_stats', 'momentum', 'eps', 'weight', 'bias',
                     'running_mean', 'running_var', 'num_batches_tracked']

    def __init__(self, hidden_size, seq_len,stateful=False,batch_first=True, eps=1e-7, momentum=0.1, affine=True,
                 track_running_stats=True):#num_features
        super().__init__()
        channel_d=hidden_size
        self.seq_len = seq_len
        self.stateful = stateful
        self.batch_first = batch_first
        self.eps = eps
        self.momentum = momentum
        self.affine = affine
        self.track_running_stats = track_running_stats
        self.axes=(1,)
        if self.affine:
            self.weight = Parameter(torch.Tensor(channel_d))
            self.bias = Parameter(torch.Tensor(channel_d))
            self.register_parameter('weight', self.weight)
            self.register_parameter('bias', self.bias)
        else:
            self.register_parameter('weight', None)
            self.register_parameter('bias', None)
        if self.track_running_stats:
            self.register_buffer('running_mean', torch.zeros(seq_len,channel_d))
            self.register_buffer('running_var', torch.ones(seq_len,channel_d))
            self.register_buffer('num_batches_tracked', torch.tensor(0, dtype=torch.long))
        else:
            self.register_parameter('running_mean', None)
            self.register_parameter('running_var', None)
            self.register_parameter('num_batches_tracked', None)
        self.reset_parameters()
        self.reset()
        
    def reset(self):
        self.seq_idx = 0

    def reset_parameters(self):
        if self.track_running_stats:
            self.running_mean.zero_()
            self.running_var.fill_(1)
            self.num_batches_tracked.zero_()
        if self.affine:
            #nn.init.uniform_(self.weight)
            #nn.init.zeros_(self.bias)
            self.weight.data.fill_(1.0)
            self.bias.data.fill_(0.0)
            
    def forward(self, input, BN_start=None):
        if input.dim() != 3:
            raise ValueError('expected 3D input (got {}D input)'.format(input.dim()))
            
        
        if self.batch_first: input = input.transpose(0,1)
            
        if BN_start is None:
            if self.stateful:
                BN_start = self.seq_idx
            else:
                BN_start = 0

        exponential_average_factor = 0.0
        if self.training and self.track_running_stats:
            if self.num_batches_tracked is not None:
                self.num_batches_tracked += 1
                if self.momentum is None:  # use cumulative moving average
                    exponential_average_factor = 1.0 / float(self.num_batches_tracked)
                else:  # use exponential moving average
                    exponential_average_factor = self.momentum

        input_t,n_batch,hidden_size =input.size()
        BN_stop = BN_start+input_t
        self.seq_idx = BN_stop #new starting point for next forward call 
        
        
        if self.training:
            mean = input.mean(1)
            var = input.var(1, unbiased=False)# use biased var in train
            
            if self.seq_len-BN_start > 0: #frame has to be in statistics window for updates
                with torch.no_grad():
                    self.running_mean[BN_start:BN_stop] = exponential_average_factor * mean[:self.seq_len-BN_start] + (1 - exponential_average_factor) * self.running_mean[BN_start:BN_stop]
                    self.running_var[BN_start:BN_stop] = exponential_average_factor * var[:self.seq_len-BN_start] * n_batch / (n_batch - 1)  + (1 - exponential_average_factor) * self.running_var[BN_start:BN_stop] # update running_var with unbiased var
        else:        
            mean = self.running_mean[BN_start:BN_stop]
            var = self.running_var[BN_start:BN_stop]
            
            #if elements outside of the statistics are requested, append the last element repeatedly
#             import pdb;pdb.set_trace()
            if BN_stop >= self.seq_len:
                cat_len = input_t - max(self.seq_len-BN_start,0)  # min(BN_stop-self.seq_len,self.seq_len) 
                mean = torch.cat((mean,self.running_mean[-1:].repeat(cat_len,1)))
                var = torch.cat((var,self.running_var[-1:].repeat(cat_len,1)))

        output = (input - mean[:, None, :]) / (torch.sqrt(var[:, None, :] + self.eps))
        if self.affine:
            output = output * self.weight[None, None, :] + self.bias[None, None, :]#[None, :, None, None]
            

        if self.batch_first: output = output.transpose(0,1)
            
        return output

## Linear

In [None]:
#export
class SeqLinear(nn.Module):
    
    def __init__(self,input_size,output_size,hidden_size=100,hidden_layer=1,act=Mish):
        super().__init__()
        def conv_act(inp,out): return nn.Sequential(nn.Conv1d(inp,out,1),act())
        
        if hidden_layer < 1:
            self.lin = nn.Conv1d(input_size,output_size,1)
        else:
            self.lin = nn.Sequential(conv_act(input_size,hidden_size),
                                     *[conv_act(hidden_size,hidden_size) for _ in range(hidden_layer-1)],
                                    nn.Conv1d(hidden_size,output_size,1))
            
    def forward(self, x):
        out = x.transpose(1,2)
        out = self.lin(out)
        return out.transpose(1,2)

## RNNs

In [None]:
#export
from fastai2.text.models.awdlstm import *
from seqdata.models.indrnn import IndRNN
class RNN(nn.Module):
    "inspired by https://arxiv.org/abs/1708.02182"

    def __init__(self, input_size,hidden_size, num_layers, 
                 hidden_p=0.0, input_p=0.0, weight_p=0.0,
                 rnn_type='gru',ret_full_hidden=False,stateful=False,
                 normalization='',residual=False,**kwargs):
        super().__init__()
        store_attr(self, 'ret_full_hidden,num_layers,rnn_type,hidden_size,stateful,input_p,normalization,residual')
        self.bs = 1
        
        self.rnns = nn.ModuleList([self._one_rnn(input_size if l == 0 else hidden_size,
                                                 hidden_size,weight_p,rnn_type,**kwargs) for l in range(num_layers)])
        
        self.res_gate0 = nn.Conv1d(input_size,hidden_size,1) if input_size != hidden_size else None
        
        self.input_dp = RNNDropout(input_p)
        self.hidden_dps = nn.ModuleList([RNNDropout(hidden_p) for l in range(num_layers)])
        
        if normalization == '':
            self.norm_layers = [None]*num_layers
        elif normalization == 'layernorm':
            self.norm_layers = nn.ModuleList([nn.LayerNorm(hidden_size,elementwise_affine=False) for l in range(num_layers)])
        elif normalization == 'batchnorm':
            self.norm_layers = nn.ModuleList([(BatchNorm_1D_Stateful(hidden_size,2000,stateful=stateful,batch_first=True,affine=False)) for i in range(num_layers)])  
        else:
            raise ValueError('Wrong Value for normalization')
        self.reset()

    def forward(self, inp, h_init=None):
        bs,seq_len,_ = inp.shape
        if h_init is None and self.stateful: h_init = self._get_hidden(bs)
                
        r_input = self.input_dp(inp) if self.input_p > 0 else inp
        full_hid,new_hidden = [],[]
#         import pdb; pdb.set_trace()
        for l, (rnn,hid_dp,nrm) in enumerate(zip(self.rnns,self.hidden_dps,self.norm_layers)):
            r_output, h = rnn(r_input,h_init[l] if h_init is not None else None)

            #residual connenction
            if self.residual:
                if l == 0 and self.res_gate0 is not None: r_input = self.res_gate0(r_input.transpose(1,2)).transpose(1,2)
                r_output = r_input + r_output
            
            if self.normalization != '':
                r_output = nrm(r_output)
                
            if l != self.num_layers - 1: 
                r_output = hid_dp(r_output)
            
            full_hid.append(r_output)
            new_hidden.append(h)
            r_input = r_output
        
        self.hidden =  to_detach(new_hidden, cpu=False, gather=False)
        self.bs = bs
        output = r_output if not self.ret_full_hidden else torch.stack(full_hid, 0)
        
        return output, new_hidden

    def _get_hidden(self,bs):
        '''retrieve internal hidden state, check if model device has changed'''
        if self.hidden is None: return None
        if bs!=self.bs: return None
        if self.hidden[0][0].device != one_param(self).device: return None
#         import pdb; pdb.set_trace()
        return self.hidden
    
    def _one_rnn(self, n_in, n_out, weight_p, rnn_type,**kwargs):
        "Return one of the inner rnn"
        if rnn_type == 'gru':
            rnn = nn.GRU(n_in, n_out,1,batch_first=True,**kwargs)
#             rnn = WeightDropout(rnn,weight_p)
        elif rnn_type == 'lstm':
            rnn = nn.LSTM(n_in, n_out,1,batch_first=True,**kwargs)
#             rnn = WeightDropout(rnn,weight_p)
        elif rnn_type == 'rnn':
            rnn = nn.RNN(n_in, n_out,1,batch_first=True,**kwargs)
#             rnn = WeightDropout(rnn,weight_p)
        elif rnn_type == 'qrnn':
            rnn = QRNN(n_in, n_out,1,batch_first=True,**kwargs)
#             rnn.layers[0].linear = WeightDropout(rnn.layers[0].linear,weight_p,layer_names='weight')
        elif rnn_type == 'indrnn':
            rnn = IndRNN(n_in, n_out,1,batch_first=True,**kwargs)
        else:
            raise Exception
        return rnn
    
    def reset(self):
        "Reset the hidden states"
        [r.reset() for r in self.rnns if hasattr(r, 'reset')]
        [r.reset() for r in self.norm_layers if hasattr(r, 'reset')]
        self.hidden = None

In [None]:
#export
class SimpleRNN(nn.Module):
    
    @delegates(RNN, keep=True)
    def __init__(self,input_size,output_size,num_layers=1,hidden_size=100,lrn_init=False,**kwargs):
        super().__init__()
        self.rnn = RNN(input_size=input_size,hidden_size=hidden_size,num_layers=num_layers,**kwargs)
        self.final = SeqLinear(hidden_size,output_size,hidden_size=hidden_size)
        
        self.init_p = (nn.ParameterList([nn.Parameter(torch.randn(size=(1,1,hidden_size)),requires_grad=True) 
                        for f in range(num_layers)]) if lrn_init else None)
        self.reset_flag = False
    def forward(self, x):
        if self.init_p is None or not self.reset_flag:
            h_init = None
        else:
#             import pdb; pdb.set_trace()
            h_init = [p.repeat((1,x.shape[0],1)) for p in self.init_p]
            self.reset_flag = False
        
        out,_ = self.rnn(x,h_init)
        out = self.final(out)
        return out
    def reset(self):
        self.rnn.reset()
        self.reset_flag = True

In [None]:
model = SimpleRNN(2,1,2,stateful=False,normalization='')
lrn = Learner(db,model,loss_func=nn.MSELoss()).fit(10)

epoch,train_loss,valid_loss,time
0,11.883348,9.580118,00:02
1,8.228333,0.092571,00:02
2,5.35243,0.441511,00:02
3,3.771955,0.368044,00:03
4,2.830905,0.140356,00:02
5,2.180635,0.163117,00:02
6,1.728667,0.079794,00:02
7,1.389382,0.073322,00:02
8,1.130678,0.05228,00:02
9,0.929021,0.036981,00:02


In [None]:
model = SimpleRNN(2,1,2,stateful=True,rnn_type='lstm')
lrn = Learner(db,model,loss_func=nn.MSELoss()).fit(10)

epoch,train_loss,valid_loss,time
0,13.638624,12.220023,00:03
1,10.471388,1.058795,00:02
2,8.646121,00:01,


KeyboardInterrupt: 

In [None]:
model = SimpleRNN(2,1,3,stateful=True,rnn_type='qrnn')
lrn = Learner(db,model,loss_func=nn.MSELoss()).fit(1)

In [None]:
#export
class SeperateRNN(nn.Module):
    
    @delegates(RNN, keep=True)
    def __init__(self,input_list,output_size,num_layers=1,hidden_size=100,residual=False,**kwargs):
        super().__init__()
        self.input_list =np.cumsum([0] + input_list)
        rnn_width = hidden_size//len(input_list)
        
        self.rnns =nn.ModuleList([ RNN(input_size=n,hidden_size=rnn_width,num_layers=1,residual=False,**kwargs)
                        for n in input_list])
        
        self.rnn =RNN(input_size=rnn_width*len(input_list),hidden_size=hidden_size,
                      num_layers=num_layers,residual=residual,**kwargs)
        self.final = SeqLinear(hidden_size,output_size,hidden_size=hidden_size,hidden_layer=3)

    def forward(self, x):      
        rnn_out = [rnn(x[...,self.input_list[i]:self.input_list[i+1]])[0] 
                   for i,rnn in enumerate(self.rnns)]
        out = torch.cat(rnn_out,dim=-1)
        out,_ = self.rnn(out)
        out = self.final(out)
        return out
    def reset(self):
        for r in self.rnns:
            r.reset()
        self.rnn.reset()

In [None]:
model = SeperateRNN([1]*2,1,1,stateful=True,rnn_type='lstm',residual=True)
lrn = Learner(db,model,loss_func=nn.MSELoss()).fit(10)

## CNNs

In [None]:
#export
class CausalConv1d(torch.nn.Conv1d):
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 dilation=1,
                 groups=1,
                 bias=True):

        super().__init__(
            in_channels,
            out_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=(kernel_size - 1) * dilation,
            dilation=dilation,
            groups=groups,
            bias=bias)
        self.__padding = (kernel_size - 1) * dilation
        
    def forward(self, input):
        return super().forward(input)[:,:,:-self.__padding]

In [None]:
#export
@delegates(CausalConv1d, keep=True)
def CConv1D(input_size,output_size,kernel_size=2,activation = Mish,wn=True, bn = False, **kwargs):
    conv = CausalConv1d(input_size,output_size,kernel_size,**kwargs)
    if wn: conv = weight_norm(conv)
    act = activation() if activation is not None else None
    bn = nn.BatchNorm1d(input_size) if bn else None
    m = [m for m in [bn,conv,act] if m is not None]
    return nn.Sequential(*m)

In [None]:
#export
@delegates(CausalConv1d, keep=True)
class TCN_Block(nn.Module):
    def __init__(self,input_size,output_size,num_layers=1,
                 activation = Mish,wn=True, bn = False, **kwargs):
        super().__init__()
        
        layers=[]
        for _ in range(num_layers):
            conv = CausalConv1d(input_size,output_size,2,**kwargs)
            if wn: conv = weight_norm(conv)
            act = activation() if activation is not None else None
            bn = nn.BatchNorm1d(input_size) if bn else None
            layers += [m for m in [bn,conv,act] if m is not None]
            
        self.layers = nn.Sequential(*layers)
        
        self.residual = nn.Conv1d(input_size,output_size,kernel_size=1) if output_size!=input_size else None
        
    def forward(self, x):      
        out = self.layers(x)
        out = out + (x if self.residual is None else self.residual(x))  
        return out

In [None]:
#export
class TCN(nn.Module):
    def __init__(self,input_size,output_size,hl_depth=1,hl_width=10,act = Mish,bn=False,stateful=False):
        super().__init__()
        
        conv_layers = [TCN_Block(input_size if i==0 else hl_width,hl_width,
                                      dilation=2**(i),bn=bn,activation=act)
                                          for i in range(hl_depth)]
        self.conv_layers = nn.Sequential(*conv_layers)
        
        
        self.rec_field = (2**hl_depth)-1
        self.final = nn.Conv1d(hl_width,output_size,kernel_size=1)
        self.x_init = None
        self.stateful = stateful
        
    def forward(self, x):      
        if self.x_init is not None:
            if self.x_init.shape[0] != x.shape[0]: 
                self.x_init = None
            elif self.stateful: 
                x = torch.cat([self.x_init,x],dim=1)
        
        x_in = x.transpose(1,2)
        out = self.conv_layers(x_in)
        out = self.final(out).transpose(1,2)
        
        if self.stateful:
            if self.x_init is not None: out = out[:,self.rec_field:]
            self.x_init = x[:,-self.rec_field:]
            
        return out
    
    def reset(self):
        self.x_init = None

In [None]:
model = TCN(2,1,hl_depth=3)
lrn = Learner(db,model,loss_func=nn.MSELoss()).fit(1)

In [None]:
#export
class SeperateTCN(nn.Module):
    def __init__(self,input_list,output_size,hl_depth=1,hl_width=10,act = Mish,bn=False,stateful=False,final_layer=3):
        super().__init__()
        self.input_list =np.cumsum([0] + input_list)
        
        tcn_width = hl_width//len(input_list)
        layers = [ [TCN_Block(n if i==0 else tcn_width,tcn_width,
                                      dilation=2**(i),bn=bn,activation=act)
                                          for i in range(hl_depth)]
                        for n in input_list]
        self.layers = nn.ModuleList([nn.Sequential(*l) for l in layers])
        
        self.rec_field = (2**hl_depth)-1
        self.final = SeqLinear(tcn_width*len(input_list),output_size,hidden_size=hl_width,hidden_layer=final_layer)
        self.x_init = None
        self.stateful = stateful
        
    def forward(self, x):      
        if self.x_init is not None:
            if self.x_init.shape[0] != x.shape[0]: 
                self.x_init = None
            elif self.stateful: 
                x = torch.cat([self.x_init,x],dim=1)
        
        tcn_out = [layer(x[...,self.input_list[i]:self.input_list[i+1]].transpose(1,2)) 
                   for i,layer in enumerate(self.layers)]
        out = torch.cat(tcn_out,dim=1).transpose(1,2)
        
        out = self.final(out)
        
        if self.stateful:
            if self.x_init is not None: out = out[:,self.rec_field:]
            self.x_init = x[:,-self.rec_field:]
            
        return out
    
    def reset(self):
        self.x_init = None

## CRNNs

In [None]:
#export
class CRNN(nn.Module):
    def __init__(self,input_size,output_size,num_ft=10,num_cnn_layers=4,num_rnn_layers=2,hs_cnn=10,hs_rnn=10,
         hidden_p=0, input_p=0, weight_p=0, rnn_type='gru',stateful=True,residual=True):
        super().__init__()
        self.cnn = TCN(input_size,num_ft,num_cnn_layers,hs_cnn,act=nn.ReLU,stateful=stateful)
        self.rnn = SimpleRNN(num_ft,output_size,num_layers=num_rnn_layers,hidden_size=hs_rnn,
                       hidden_p=hidden_p, input_p=input_p, weight_p=weight_p, 
                        rnn_type=rnn_type,stateful=stateful,residual=residual)
        
    def forward(self, x):
        return self.rnn(self.cnn(x))
    
    def reset(self):
        self.cnn.reset()
        self.rnn.reset()

In [None]:
model = CRNN(2,1,10)
lrn = Learner(db,model,loss_func=nn.MSELoss()).fit(1)

In [None]:
model = CRNN(2,1,10,rnn_type='gru')
lrn = Learner(db,model,loss_func=nn.MSELoss()).fit(1)

In [None]:
#export
class SeperateCRNN(nn.Module):
    def __init__(self,input_list,output_size,num_ft=10,num_cnn_layers=4,num_rnn_layers=2,hs_cnn=10,hs_rnn=10,
         hidden_p=0, input_p=0, weight_p=0, rnn_type='gru',stateful=True,residual=True):
        super().__init__()
        self.cnn = SeperateTCN(input_list,num_ft,num_cnn_layers,hs_cnn,act=nn.ReLU,stateful=stateful,final_layer=0)
        self.rnn = SimpleRNN(num_ft,output_size,num_layers=num_rnn_layers,hidden_size=hs_rnn,
                       hidden_p=hidden_p, input_p=input_p, weight_p=weight_p, 
                        rnn_type=rnn_type,stateful=stateful,residual=residual)
        
    def forward(self, x):
        return self.rnn(self.cnn(x))
    
    def reset(self):
        self.cnn.reset()
        self.rnn.reset()

## Autoregressive Models

In [None]:
#export
class Normalizer1D(nn.Module):
    _epsilon = 1e-16

    def __init__(self, mean, std):
        super(Normalizer1D, self).__init__()
        self.register_buffer('std', std.clone().detach() + self._epsilon)
        self.register_buffer('mean', mean.clone().detach())

    def normalize(self, x):
        return (x-self.mean)/self.std

    def unnormalize(self, x):
        return x*self.std + self.mean

In [None]:
#export
class AR_Model(nn.Module):
    def __init__(self,model,ar=True,rf=1,hs=False):
        super().__init__()
        self.model = model
        self.ar = ar
        self.rf = rf
        self.hs = hs
        self.norm = None
        
    def init_normalize(self, batch,axes = [0,1]):
        x = batch[1]
        mean = x.mean(axes, keepdim=True)
        std = x.std(axes, keepdim=True)
        self.norm = Normalizer1D(mean,std)
        
    def forward(self, u,y):
        if self.ar:
            y_e = torch.zeros_like(y)
            hs = None
            for i in range(y_e.shape[1]):
                if i < self.rf:
                    y_in = F.pad(y_e[:, :i], [0,0,self.rf-i, 0])
                    u_in = F.pad(u[:, :i+1], [0,0,self.rf-i-1, 0])
                else:
                    y_in = y_e[:, i-self.rf:i]
                    u_in = u[:, i-self.rf+1:i+1]
                    
                if self.norm is not None: y_in=self.norm.normalize(y_in)

                x = torch.cat((u_in, y_in), 2)
                
                if self.hs:
                    y_next,hs = self.model(x,hs)
                else:
                    y_next = self.model(x)
                y_e[:, i] = y_next[:, -1]
            return y_e
        else:
            y_in = F.pad(y[:,:-1,:],[0,0,1,0])
            
            if self.norm is not None: y_in=self.norm.normalize(y_in)
            
            x = torch.cat([u,y_in],dim=2)
            if self.hs:
                y_e,_ = self.model(x)
            else:
                y_e = self.model(x)
            return y_e

In [None]:
#export
@delegates(RNN, keep=True)
class AR_RNN(nn.Module):
    def __init__(self,input_size,output_size,num_layers=1,hidden_size=100,**kwargs):
        super().__init__()
        self.rnn = RNN(input_size=input_size,hidden_size=hidden_size,num_layers=num_layers,**kwargs)
        self.final = nn.Conv1d(hidden_size,output_size,kernel_size=1)

    def forward(self, x,init_state=None):
#         out = x.transpose(1,2)
        out,hs = self.rnn(x,init_state)
#         import pdb; pdb.set_trace()
        out = out.transpose(1,2)
        out = self.final(out)
        out = out.transpose(1,2)
        return out,hs

In [None]:
model = AR_Model(AR_RNN(3,1),ar=True,hs=True)
model.init_normalize(db.one_batch())

In [None]:
#hide
from nbdev.export import *
notebook2script()

Converted 00_core.ipynb.
Converted 01_models.ipynb.
Converted 01a_IndRNN.ipynb.
Converted 02_learner.ipynb.
Converted 03_dataloaders.ipynb.
Converted 11_dualrnn.ipynb.
Converted 12_TensorQuaternions.ipynb.
Converted 13_HPOpt.ipynb.
Converted index.ipynb.
