### **LSTM Normal**

In [16]:
import torch
from torch import nn

lstm = nn.LSTM(10, 20, batch_first=True)  # input_size, hidden_size
input = torch.randn(4, 32, 10)  # batch_size, seq_len, input_size

(output, (hn, cn)) = lstm(input)  # output: (batch, seq_len, hidden_size), hn: (num_layers, batch, hidden_size)

print(output.shape)  # (batch, seq_len, hidden_size)
print(hn.shape)  # (num_layers, batch, hidden_size)
print(cn.shape)  # (num_layers, batch, hidden_size)


torch.Size([4, 32, 20])
torch.Size([1, 4, 20])
torch.Size([1, 4, 20])


In [25]:
print(lstm.state_dict()['weight_ih_l0'].shape)  # (4 * hidden_size, input_size)

print(lstm.state_dict()['weight_hh_l0'].shape)


torch.Size([80, 10])
torch.Size([80, 20])


In [24]:
lstm.state_dict()

OrderedDict([('weight_ih_l0',
              tensor([[-5.7216e-02, -1.9367e-01,  9.2481e-02, -1.7623e-01, -1.6725e-01,
                       -1.6577e-01,  1.8495e-01, -6.5760e-02, -1.1191e-01, -1.8601e-01],
                      [ 1.2134e-02, -1.5433e-01,  6.9470e-02, -1.0564e-01,  6.6758e-02,
                       -1.3266e-02, -5.9358e-02, -3.6723e-02,  2.5225e-02,  1.2634e-01],
                      [ 1.4621e-01,  1.0567e-02, -7.3916e-02, -1.5028e-01,  1.4370e-01,
                       -2.9287e-03, -1.8964e-01,  4.1497e-02, -1.2342e-01, -9.0682e-02],
                      [ 1.0304e-01,  2.2318e-01, -7.8909e-02,  4.4542e-02, -1.2780e-01,
                        1.0774e-01, -1.0090e-01, -9.7190e-04,  1.2857e-01, -2.0976e-01],
                      [ 1.7704e-01, -1.3127e-01, -3.9944e-02, -1.0901e-01,  1.2033e-01,
                        1.7726e-01,  9.4991e-02, -6.6559e-02, -1.3103e-01, -2.1678e-01],
                      [-1.1451e-01,  1.3932e-01,  2.1815e-01,  7.4647e-02, -1.4192e-0

### **LSTM multicapa**

In [6]:
import torch
from torch import nn

lstm = nn.LSTM(10, 20, num_layers=3, batch_first=True)  # input_size, hidden_size, num_layers
input = torch.randn(4, 32, 10)  # batch_size, seq_len, input_size

(output, (hn, cn)) = lstm(input)  # output: (batch, seq_len, hidden_size), hn: (num_layers, batch, hidden_size)

print(output.shape)  # (batch, seq_len, hidden_size)
print(hn.shape)  # (num_layers, batch, hidden_size)
print(cn.shape)  # (num_layers, batch, hidden_size)

torch.Size([4, 32, 20])
torch.Size([3, 4, 20])
torch.Size([3, 4, 20])


### **LSTM Bidireccional**

In [7]:
import torch
from torch import nn

lstm = nn.LSTM(10, 20, batch_first=True, bidirectional=True)  # input_size, hidden_size, num_layers
input = torch.randn(4, 32, 10)  # batch_size, seq_len, input_size

(output, (hn, cn)) = lstm(input)  # output: (batch, seq_len, hidden_size), hn: (num_layers, batch, hidden_size)

print(output.shape)  # (batch, seq_len, hidden_size)
print(hn.shape)  # (num_layers, batch, hidden_size)
print(cn.shape)  # (num_layers, batch, hidden_size)

torch.Size([4, 32, 40])
torch.Size([2, 4, 20])
torch.Size([2, 4, 20])


### **LSTM Bidireccional multicapa**

In [9]:
import torch
from torch import nn

lstm = nn.LSTM(10, 20, num_layers=3, batch_first=True, bidirectional=True)  # input_size, hidden_size, num_layers
input = torch.randn(4, 32, 10)  # batch_size, seq_len, input_size

(output, (hn, cn)) = lstm(input)  # output: (batch, seq_len, hidden_size), hn: (num_layers, batch, hidden_size)

print(output.shape)  # (batch, seq_len, hidden_size)
print(hn.shape)  # (num_layers, batch, hidden_size)
print(cn.shape)  # (num_layers, batch, hidden_size)

torch.Size([4, 32, 40])
torch.Size([6, 4, 20])
torch.Size([6, 4, 20])


**Configuración de la LSTM**: 

nn.LSTM(10, 20, num_layers=3, batch_first=True, bidirectional=True)

    - `input_size=10`: Cada token de entrada tiene una dimensión de 10.
    - `hidden_size=20`: Cada dirección de la LSTM (hacia adelante y hacia atrás) produce un vector oculto de 20 dimensiones por capa y por paso de tiempo.
    - `num_layers=3`: La LSTM tiene 3 capas apiladas.
    - `batch_first=True`: Indica que la dimensión del lote es la primera dimensión de la entrada y salida.
    - `bidirectional=True`: Indica que la LSTM es bidireccional, procesando los datos tanto en la dirección hacia adelante como hacia atrás.

### **Tensores de Salida**

- **`output`**: `torch.Size([4, 32, 40])`
    - La salida tiene dimensiones `(batch_size, seq_len, num_directions * hidden_size)`.
    - `batch_size=4`: Tienes 4 ejemplos en un lote.
    - `seq_len=32`: Cada ejemplo es una secuencia de 32 tokens.
    - `num_directions * hidden_size=40`: Como es una LSTM bidireccional (`num_directions=2`), y cada dirección produce vectores de 20 dimensiones, la dimensión final es de 40. Esto es porque las salidas de ambas direcciones se concatenan para cada paso de tiempo.

- **`hn` (estado oculto)**: `torch.Size([6, 4, 20])`
    - La dimensión de `hn` es `(num_layers * num_directions, batch, hidden_size)`.
    - `num_layers * num_directions=6`: Dado que tienes 3 capas y la LSTM es bidireccional, tienes 6 conjuntos de vectores ocultos al final del paso de tiempo final para cada dirección y cada capa.
    - `batch=4`: Correspondiente al tamaño del lote.
    - `hidden_size=20`: La dimensión de cada vector oculto.

- **`cn` (estado de la celda)**: `torch.Size([6, 4, 20])`
    - La dimensión de `cn` es la misma que la de `hn` porque representa el estado de la celda LSTM para cada capa y dirección al final de la secuencia, siguiendo la misma estructura.

### **Capa lineal**

In [15]:
import torch
from torch import nn

lineal = nn.Linear(10, 5)

batch = torch.randn(5,6,7,8,3, 4, 10)

output = lineal(batch)

print(output.shape)  # (3, 7, 5)

torch.Size([5, 6, 7, 8, 3, 4, 5])


#### **GloVe embeddings**

In [46]:
from torchtext.vocab import GloVe

glove_vectors = GloVe(name='6B', dim=300)

glove_embedding = nn.Embedding.from_pretrained(glove_vectors.vectors)

# glove_embedding = torch.cat((glove_embedding.weight, torch.zeros(1, 300)), 0)

In [56]:
# add one more word to the embedding at the end

print(glove_embedding.weight.shape)  # (400001, 300)
glove_embedding.weight[-1] = torch.zeros(300, dtype=torch.float32)


torch.Size([400000, 300])


In [57]:
glove_embedding.weight[-1].shape

torch.Size([300])

In [64]:
glove_embedding(torch.LongTensor([399999]))  # torch.Size([1, 300])

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.

In [22]:
paq = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]

def fun(batch):
    a = zip(*batch)  # Desempaqueta el batch. Prueba con zip(batch)
    print(*batch)  # Prueba con print(batch)
    print(a)
    for i in a:
        print(i)

fun(paq)

[1, 2, 3] [4, 5, 6] [7, 8, 9]
<zip object at 0x1045b2880>
(1, 4, 7)
(2, 5, 8)
(3, 6, 9)
