# Modelos: Seq

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('../lib')

import math
import random
from bunch import Bunch

import numpy as np
import pandas as pd

import torch
from   torch               import nn, Tensor
import torch.nn.functional as F
from   torch.utils.data    import DataLoader, Subset
from   torch.optim         import SparseAdam, Adam

import pytorch_common.util as pu
from   pytorch_common.modules.fn import Fn
from   pytorch_common.callbacks  import (
    EarlyStop,
    ReduceLROnPlateau,
    Validation,
    SaveBestModel
)
from pytorch_common.callbacks.output import Logger, MetricsPlotter
from pytorch_common.util import (
    set_device_name,
    get_device,
    LoggerBuilder
)

import model as ml

import util as ut

import data.dataset as ds
import data.plot as pl
import data as dt
from data.plot import describe_num_var

import metric as mt
import metric.discretizer as dr

## Setup

In [3]:
pu.LoggerBuilder().on_console().build()

In [4]:
pu.set_device_name('gpu')

In [5]:
pu.get_device()

In [6]:
cpu = torch.device("cpu")

In [7]:
torch.cuda.is_available()

In [8]:
torch.__version__

In [9]:
def set_seed(value):
    random.seed(value)
    np.random.seed(value)
    torch.manual_seed(value)

In [10]:
set_seed(42)

In [18]:
params = Bunch(
    epochs               = 20,
    feat_seq_len         = 5,
    max_input_seq_len    = 10,
    id_seq_init          = 1,
    batch_size           = 16,
    n_heads              = 2,
    hidden_state_size    = 100,
    n_transformer_layers = 2,
    dropout              = 0.1,
    padding_emb_position = 0,
)

### Examples

In [None]:
vocab_size = embedding_weights.shape[0]
emb_size   = embedding_weights.shape[1]

emb = ml.EmbeddingLayerFactory.create_from_weights(embedding_weights).to(pu.get_device())
# emb = torch.nn.Embedding(emb.shape[0], params.emb_size)

pos_emb = ml.PositionalEncoding(emb_size, params.dropout).to(pu.get_device())

encoder = ml.TransformerEncoderFactory.create(
    emb_size          = emb_size,
    n_heads           = params.n_heads,
    hidden_state_size = params.hidden_state_size,
    dropout           = params.dropout,
    n_layers          = params.n_transformer_layers
).to(pu.get_device())

mlp_decoder = ml.LinearUtils.init_weights(
    nn.Linear(params.feat_seq_len*emb_size, vocab_size)
).to(pu.get_device())

softmax = nn.Softmax(dim=1)

In [61]:
input_batch = torch.randint(0, 1000, (params.feat_seq_len, params.batch_size)).to(pu.get_device())

# src: (S, N, E)
input_batch_emb = emb(input_batch) * math.sqrt(emb_size)

# src: (S, N, E)
input_batch_emb = pos_emb(input_batch_emb)

# src_mask: (S, S)
src_mask = ml.generate_square_subsequent_mask(params.feat_seq_len).to(pu.get_device())

# src_key_padding_mask: (N, S)
src_key_padding_mask = (input_batch == params.padding_emb_position).to(pu.get_device()).byte()

src_key_padding_mask

In [62]:
input_batch.shape, input_batch_emb.shape, src_mask.shape, src_key_padding_mask.shape

In [63]:
output =  encoder(
    input_batch_emb, 
    mask                 = src_mask,
    src_key_padding_mask = src_key_padding_mask.t()
)
output.shape

AssertionError: only bool and floating types of key_padding_mask are supported

In [142]:
output2 = output.view(params.batch_size, -1)
output2.shape

In [143]:
mlp_output = mlp_decoder(output2)

In [144]:
mlp_output

In [145]:
softmax_output = softmax(mlp_output)

In [146]:
softmax_output

In [147]:
pos = torch.argmax(softmax_output[0])
softmax_output[0][pos]