In [None]:
#|default_exp models.RNNPlus

# RNNPlus

These are RNN, LSTM and GRU PyTorch implementations created by Ignacio Oguiza - oguiza@timeseriesAI.co

The idea of including a feature extractor to the RNN network comes from the solution developed by the UPSTAGE team (https://www.kaggle.com/songwonho, 
https://www.kaggle.com/limerobot and https://www.kaggle.com/jungikhyo). 
They finished in 3rd position in Kaggle's Google Brain - Ventilator Pressure Prediction competition. They used a Conv1d + Stacked LSTM architecture.

In [None]:
#|export
from tsai.imports import *
from tsai.utils import *
from tsai.models.layers import *
from tsai.models.utils import *

In [None]:
#|export
class _RNN_Backbone(Module):
    def __init__(self, cell, c_in, c_out, seq_len=None, hidden_size=100, n_layers=1, bias=True, rnn_dropout=0, bidirectional=False,
                 n_cat_embeds=None, cat_embed_dims=None, cat_padding_idxs=None, cat_pos=None, feature_extractor=None, init_weights=True):
        
        # Categorical embeddings
        if n_cat_embeds is not None:
            n_cat_embeds = listify(n_cat_embeds)
            if cat_embed_dims is None:  
                cat_embed_dims = [emb_sz_rule(s) for s in n_cat_embeds]
            self.to_cat_embed = MultiEmbedding(c_in, n_cat_embeds, cat_embed_dims=cat_embed_dims, cat_padding_idxs=cat_padding_idxs, 
                                               cat_pos=cat_pos)
            c_in = c_in + sum(cat_embed_dims) - len(n_cat_embeds)
        else:
            self.to_cat_embed = nn.Identity()
        
        # Feature extractor
        if feature_extractor:
            if isinstance(feature_extractor, nn.Module):  self.feature_extractor = feature_extractor
            else: self.feature_extractor = feature_extractor(c_in, seq_len)
            c_in, seq_len = output_size_calculator(self.feature_extractor, c_in, seq_len)
        else:
            self.feature_extractor = nn.Identity()
        
        # RNN layers
        rnn_layers = []
        if len(set(hidden_size)) == 1: 
            hidden_size = hidden_size[0]
            if n_layers == 1: rnn_dropout = 0
            rnn_layers.append(cell(c_in, hidden_size, num_layers=n_layers, bias=bias, batch_first=True, dropout=rnn_dropout, 
                                   bidirectional=bidirectional))
            rnn_layers.append(LSTMOutput()) # this selects just the output, and discards h_n, and c_n
        else: 
            for i in range(len(hidden_size)):
                input_size = c_in if i == 0 else hs * (1 + bidirectional)
                hs = hidden_size[i] 
                rnn_layers.append(cell(input_size, hs, num_layers=1, bias=bias, batch_first=True, bidirectional=bidirectional))
                rnn_layers.append(LSTMOutput()) # this selects just the output, and discards h_n, and c_n
                if rnn_dropout and i < len(hidden_size) - 1: 
                    rnn_layers.append(nn.Dropout(rnn_dropout)) # add dropout to all layers except last
        self.rnn = nn.Sequential(*rnn_layers)
        self.transpose = Transpose(-1, -2, contiguous=True)
        if init_weights: self.apply(self._weights_init) 

    def forward(self, x):
        x = self.to_cat_embed(x)
        x = self.feature_extractor(x)
        x = self.transpose(x)                    # [batch_size x n_vars x seq_len] --> [batch_size x seq_len x n_vars]
        x = self.rnn(x)                          # [batch_size x seq_len x hidden_size * (1 + bidirectional)]
        x = self.transpose(x)                    # [batch_size x hidden_size * (1 + bidirectional) x seq_len]
        return x
    
    def _weights_init(self, m): 
        # same initialization as keras. Adapted from the initialization developed 
        # by JUN KODA (https://www.kaggle.com/junkoda) in this notebook
        # https://www.kaggle.com/junkoda/pytorch-lstm-with-tensorflow-like-initialization
        for name, params in m.named_parameters():
            if "weight_ih" in name: 
                nn.init.xavier_normal_(params)
            elif 'weight_hh' in name: 
                nn.init.orthogonal_(params)
            elif 'bias_ih' in name:
                params.data.fill_(0)
                # Set forget-gate bias to 1
                n = params.size(0)
                params.data[(n // 4):(n // 2)].fill_(1)
            elif 'bias_hh' in name:
                params.data.fill_(0)

In [None]:
#|export
class _RNNPlus_Base(nn.Sequential):
    def __init__(self, c_in, c_out, seq_len=None, hidden_size=[100], n_layers=1, bias=True, rnn_dropout=0, bidirectional=False,
                 n_cat_embeds=None, cat_embed_dims=None, cat_padding_idxs=None, cat_pos=None, feature_extractor=None, fc_dropout=0., 
                 last_step=True, bn=False, custom_head=None, y_range=None, init_weights=True, **kwargs):

        if not last_step: assert seq_len, 'you need to pass a seq_len value'

        # Backbone
        hidden_size = listify(hidden_size)
        backbone = _RNN_Backbone(self._cell, c_in, c_out, seq_len=seq_len, hidden_size=hidden_size, n_layers=n_layers, 
                                 n_cat_embeds=n_cat_embeds, cat_embed_dims=cat_embed_dims, cat_padding_idxs=cat_padding_idxs, 
                                 cat_pos=cat_pos, feature_extractor=feature_extractor,
                                 bias=bias, rnn_dropout=rnn_dropout,  bidirectional=bidirectional, init_weights=init_weights)

        # Head
        self.head_nf = hidden_size * (1 + bidirectional) if isinstance(hidden_size, Integral) else hidden_size[-1] * (1 + bidirectional) 
        if custom_head: 
            if isinstance(custom_head, nn.Module): head = custom_head
            else: head = custom_head(self.head_nf, c_out, seq_len, **kwargs)
        else: head = self.create_head(self.head_nf, c_out, seq_len, last_step=last_step, fc_dropout=fc_dropout, bn=bn, y_range=y_range)
        super().__init__(OrderedDict([('backbone', backbone), ('head', head)]))

    def create_head(self, nf, c_out, seq_len, last_step=True, fc_dropout=0., bn=False, y_range=None):
        if last_step:
            layers = [LastStep()]
        else:
            layers = [Flatten()]
            nf *= seq_len
        if bn: layers += [nn.BatchNorm1d(nf)]
        if fc_dropout: layers += [nn.Dropout(fc_dropout)]
        layers += [nn.Linear(nf, c_out)]
        if y_range: layers += [SigmoidRange(*y_range)]
        return nn.Sequential(*layers)


class RNNPlus(_RNNPlus_Base):
    _cell = nn.RNN

class LSTMPlus(_RNNPlus_Base):
    _cell = nn.LSTM

class GRUPlus(_RNNPlus_Base):
    _cell = nn.GRU

In [None]:
bs = 16
c_in = 3
seq_len = 12
c_out = 2
xb = torch.rand(bs, c_in, seq_len)
test_eq(RNNPlus(c_in, c_out)(xb).shape, [bs, c_out])
test_eq(RNNPlus(c_in, c_out, hidden_size=100, n_layers=2, bias=True, rnn_dropout=0.2, bidirectional=True, fc_dropout=0.5)(xb).shape, 
        [bs, c_out])
test_eq(RNNPlus(c_in, c_out, hidden_size=[100, 50, 10], bias=True, rnn_dropout=0.2, bidirectional=True, fc_dropout=0.5)(xb).shape, 
        [bs, c_out])
test_eq(RNNPlus(c_in, c_out, hidden_size=[100], n_layers=2, bias=True, rnn_dropout=0.2, bidirectional=True, fc_dropout=0.5)(xb).shape, 
        [bs, c_out])
test_eq(LSTMPlus(c_in, c_out, hidden_size=100, n_layers=2, bias=True, rnn_dropout=0.2, bidirectional=True, fc_dropout=0.5)(xb).shape, 
        [bs, c_out])
test_eq(GRUPlus(c_in, c_out, hidden_size=100, n_layers=2, bias=True, rnn_dropout=0.2, bidirectional=True, fc_dropout=0.5)(xb).shape, 
        [bs, c_out])
test_eq(RNNPlus(c_in, c_out, seq_len, last_step=False)(xb).shape, [bs, c_out])
test_eq(RNNPlus(c_in, c_out, seq_len, last_step=False)(xb).shape, [bs, c_out])
test_eq(RNNPlus(c_in, c_out, seq_len, hidden_size=100, n_layers=2, bias=True, rnn_dropout=0.2, bidirectional=True, fc_dropout=0.5, 
                last_step=False)(xb).shape, 
        [bs, c_out])
test_eq(LSTMPlus(c_in, c_out, seq_len, last_step=False)(xb).shape, [bs, c_out])
test_eq(GRUPlus(c_in, c_out, seq_len, last_step=False)(xb).shape, [bs, c_out])

In [None]:
feature_extractor = MultiConv1d(c_in, kss=[1,3,5,7])
custom_head = nn.Sequential(Transpose(1,2), nn.Linear(8,8), nn.SELU(), nn.Linear(8, 1), Squeeze())
test_eq(LSTMPlus(c_in, c_out, seq_len, hidden_size=[32,16,8,4], bidirectional=True, 
                 feature_extractor=feature_extractor, custom_head=custom_head)(xb).shape, [bs, seq_len])
feature_extractor = MultiConv1d(c_in, kss=[1,3,5,7], keep_original=True)
custom_head = nn.Sequential(Transpose(1,2), nn.Linear(8,8), nn.SELU(), nn.Linear(8, 1), Squeeze())
test_eq(LSTMPlus(c_in, c_out, seq_len, hidden_size=[32,16,8,4], bidirectional=True, 
                 feature_extractor=feature_extractor, custom_head=custom_head)(xb).shape, [bs, seq_len])

[W NNPACK.cpp:53] Could not initialize NNPACK! Reason: Unsupported hardware.


In [None]:
bs = 16
c_in = 3
seq_len = 12
c_out = 2
x1 = torch.rand(bs,1,seq_len)
x2 = torch.randint(0,3,(bs,1,seq_len))
x3 = torch.randint(0,5,(bs,1,seq_len))
xb = torch.cat([x1,x2,x3],1)

custom_head = partial(create_mlp_head, fc_dropout=0.5)
test_eq(LSTMPlus(c_in, c_out, seq_len, last_step=False, custom_head=custom_head)(xb).shape, [bs, c_out])
custom_head = partial(create_pool_head, concat_pool=True, fc_dropout=0.5)
test_eq(LSTMPlus(c_in, c_out, seq_len, last_step=False, custom_head=custom_head)(xb).shape, [bs, c_out])
custom_head = partial(create_pool_plus_head, fc_dropout=0.5)
test_eq(LSTMPlus(c_in, c_out, seq_len, last_step=False, custom_head=custom_head)(xb).shape, [bs, c_out])
custom_head = partial(create_conv_head)
test_eq(LSTMPlus(c_in, c_out, seq_len, last_step=False, custom_head=custom_head)(xb).shape, [bs, c_out])
test_eq(LSTMPlus(c_in, c_out, seq_len, hidden_size=[100, 50], n_layers=2, bias=True, rnn_dropout=0.2, bidirectional=True)(xb).shape, 
        [bs, c_out])

n_cat_embeds = [3, 5]
cat_pos = [1, 2]
custom_head = partial(create_conv_head)
m = LSTMPlus(c_in, c_out, seq_len, hidden_size=[100, 50], n_layers=2, bias=True, rnn_dropout=0.2, bidirectional=True, 
             n_cat_embeds=n_cat_embeds, cat_pos=cat_pos)
test_eq(m(xb).shape, [bs, c_out])

In [None]:
from tsai.data.all import *
from tsai.models.utils import *

In [None]:
dsid = 'NATOPS' 
bs = 16
X, y, splits = get_UCR_data(dsid, return_split=False)
tfms  = [None, [Categorize()]]
dls = get_ts_dls(X, y, tfms=tfms, splits=splits, bs=bs)

In [None]:
model = build_ts_model(LSTMPlus, dls=dls)
print(model[-1])
learn = Learner(dls, model,  metrics=accuracy)
learn.fit_one_cycle(1, 3e-3)

Sequential(
  (0): LastStep()
  (1): Linear(in_features=100, out_features=6, bias=True)
)


In [None]:
model = LSTMPlus(dls.vars, dls.c, dls.len, last_step=False)
learn = Learner(dls, model,  metrics=accuracy)
learn.fit_one_cycle(1, 3e-3)

epoch,train_loss,valid_loss,accuracy,time


In [None]:
custom_head = partial(create_pool_head, concat_pool=True)
model = LSTMPlus(dls.vars, dls.c, dls.len, last_step=False, custom_head=custom_head)
learn = Learner(dls, model,  metrics=accuracy)
learn.fit_one_cycle(1, 3e-3)

In [None]:
custom_head = partial(create_pool_plus_head, concat_pool=True)
model = LSTMPlus(dls.vars, dls.c, dls.len, last_step=False, custom_head=custom_head)
learn = Learner(dls, model,  metrics=accuracy)
learn.fit_one_cycle(1, 3e-3)

In [None]:
m = RNNPlus(c_in, c_out, seq_len, hidden_size=100,n_layers=2,bidirectional=True,rnn_dropout=.5,fc_dropout=.5)
print(m)
print(count_parameters(m))
m(xb).shape

RNNPlus(
  (backbone): _RNN_Backbone(
    (to_cat_embed): Identity()
    (feature_extractor): Identity()
    (rnn): Sequential(
      (0): RNN(3, 100, num_layers=2, batch_first=True, dropout=0.5, bidirectional=True)
      (1): LSTMOutput()
    )
    (transpose): Transpose(dims=-1, -2).contiguous()
  )
  (head): Sequential(
    (0): LastStep()
    (1): Dropout(p=0.5, inplace=False)
    (2): Linear(in_features=200, out_features=2, bias=True)
  )
)
81802


torch.Size([16, 2])

In [None]:
m = LSTMPlus(c_in, c_out, seq_len, hidden_size=100,n_layers=2,bidirectional=True,rnn_dropout=.5,fc_dropout=.5)
print(m)
print(count_parameters(m))
m(xb).shape

LSTMPlus(
  (backbone): _RNN_Backbone(
    (to_cat_embed): Identity()
    (feature_extractor): Identity()
    (rnn): Sequential(
      (0): LSTM(3, 100, num_layers=2, batch_first=True, dropout=0.5, bidirectional=True)
      (1): LSTMOutput()
    )
    (transpose): Transpose(dims=-1, -2).contiguous()
  )
  (head): Sequential(
    (0): LastStep()
    (1): Dropout(p=0.5, inplace=False)
    (2): Linear(in_features=200, out_features=2, bias=True)
  )
)
326002


torch.Size([16, 2])

In [None]:
m = GRUPlus(c_in, c_out, seq_len, hidden_size=100,n_layers=2,bidirectional=True,rnn_dropout=.5,fc_dropout=.5)
print(m)
print(count_parameters(m))
m(xb).shape

GRUPlus(
  (backbone): _RNN_Backbone(
    (to_cat_embed): Identity()
    (feature_extractor): Identity()
    (rnn): Sequential(
      (0): GRU(3, 100, num_layers=2, batch_first=True, dropout=0.5, bidirectional=True)
      (1): LSTMOutput()
    )
    (transpose): Transpose(dims=-1, -2).contiguous()
  )
  (head): Sequential(
    (0): LastStep()
    (1): Dropout(p=0.5, inplace=False)
    (2): Linear(in_features=200, out_features=2, bias=True)
  )
)
244602


torch.Size([16, 2])

## Converting a model to TorchScript

In [None]:
model = GRUPlus(c_in, c_out, hidden_size=100, n_layers=2, bidirectional=True, rnn_dropout=.5, fc_dropout=.5)
model.eval()
inp = torch.rand(1, c_in, 50)
output = model(inp)
print(output)

tensor([[-0.0677, -0.0857]], grad_fn=<AddmmBackward0>)


### Tracing

In [None]:
# save to gpu, cpu or both
traced_cpu = torch.jit.trace(model.cpu(), inp)
print(traced_cpu)
torch.jit.save(traced_cpu, "cpu.pt")

# load cpu or gpu model
traced_cpu = torch.jit.load("cpu.pt")
test_eq(traced_cpu(inp), output)

!rm "cpu.pt"

GRUPlus(
  original_name=GRUPlus
  (backbone): _RNN_Backbone(
    original_name=_RNN_Backbone
    (to_cat_embed): Identity(original_name=Identity)
    (feature_extractor): Identity(original_name=Identity)
    (rnn): Sequential(
      original_name=Sequential
      (0): GRU(original_name=GRU)
      (1): LSTMOutput(original_name=LSTMOutput)
    )
    (transpose): Transpose(original_name=Transpose)
  )
  (head): Sequential(
    original_name=Sequential
    (0): LastStep(original_name=LastStep)
    (1): Dropout(original_name=Dropout)
    (2): Linear(original_name=Linear)
  )
)


## Converting a model to ONNX

#| onnx
```python
import onnx

torch.onnx.export(model.cpu(),               # model being run
                  inp,                       # model input (or a tuple for multiple inputs)
                  "cpu.onnx",                # where to save the model (can be a file or file-like object)
                  export_params=True,        # store the trained parameter weights inside the model file
                  verbose=False,
                  opset_version=13,          # the ONNX version to export the model to
                  do_constant_folding=True,  # whether to execute constant folding for optimization
                  input_names = ['input'],   # the model's input names
                  output_names = ['output'], # the model's output names
                  dynamic_axes={
                      'input'  : {0 : 'batch_size'}, 
                      'output' : {0 : 'batch_size'}} # variable length axes
                 )


onnx_model = onnx.load("cpu.onnx")           # Load the model and check it's ok
onnx.checker.check_model(onnx_model)
```

#| onnx
```python
import onnxruntime as ort

ort_sess = ort.InferenceSession('cpu.onnx')
out = ort_sess.run(None, {'input': inp.numpy()})

input_name = ort_sess.get_inputs()[0].name
output_name = ort_sess.get_outputs()[0].name
input_dims = ort_sess.get_inputs()[0].shape

test_close(out, output.detach().numpy())
!rm "cpu.onnx"
```

## Export -

In [None]:
#|eval: false
#|hide
from tsai.export import get_nb_name; nb_name = get_nb_name(locals())
from tsai.imports import create_scripts; create_scripts(nb_name)

<IPython.core.display.Javascript object>

/Users/nacho/notebooks/tsai/nbs/043_models.RNNPlus.ipynb saved at 2023-03-19 14:13:36
Correct notebook to script conversion! 😃
Sunday 19/03/23 14:13:39 CET
