<br>
<br>

# **Modelos del lenguaje basados en redes neuronales artificiales**

## **Redes neuronales recurrentes (RNN)**

### **Seq2seq**



In [1]:
import random
import string
import torch

allowed_chars = string.digits + '+'

class Generator():
    def __init__(self) -> None:
        pass

    # Método para crear un ejemplo de entrenamiento
    def sample(self):
        s1 = random.randint(0, 999)
        s2 = random.randint(0, 999)
        r = s1 + s2
        s1_string = str(s1).zfill(3)
        s2_string = str(s2).zfill(3)
        output = str(r).zfill(4)
        input = s1_string + "+" + s2_string
        return input, output
    
    # Método para crear un lote de ejemplos de entrenamiento
    def batch(self, n):
        inputs = []
        outputs = []
        for _ in range(n):
            input, output = self.sample()
            inputs.append(input)
            outputs.append(output)
        return inputs, outputs
    
    # Método para codificar una cadena de caracteres en un tensor one-hot
    def string_to_tensor(self, s):
        tensor = torch.zeros(len(s), len(allowed_chars))
        for i, char in enumerate(s):
            tensor[i, allowed_chars.index(char)] = 1
        return tensor

    # Método para decodificar un tensor one-hot en una cadena de caracteres
    def tensor_to_string(self, tensor):
        _, max_idx = tensor.max(1)
        return ''.join([allowed_chars[i] for i in max_idx])
    
    # Método para generar un lote de ejemplos de entrenamiento codificados
    def batch_to_tensor(self, n):
        seq_in = []
        seq_out = []
        inputs, outputs = self.batch(n)
        # print(inputs, outputs)
        for input, output in zip(inputs, outputs):
            seq_in.append(self.string_to_tensor(input))
            seq_out.append(self.string_to_tensor(output))
        return torch.stack(seq_in), torch.stack(seq_out)

In [2]:
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F


# Definir la arquitectura del modelo seq2seq
class Seq2Seq(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Seq2Seq, self).__init__()
        self.hidden_size = hidden_size
        self.encoder = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.decoder = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.output = nn.Linear(hidden_size, output_size)

    def forward(self, input, hidden=None):
        out_enc, (hn_enc, cn_enc) = self.encoder(input, hidden)
        
        latent_tensor = hn_enc[0].unsqueeze(1).repeat(1, 4, 1)
        # print(latent_tensor.shape)

        out_dec, (hn_dec, cn_dec) = self.decoder(latent_tensor, (hn_enc, cn_enc))
        out = F.softmax(self.output(out_dec), dim=2)
        
        return out
    
model = Seq2Seq(input_size=len(allowed_chars), hidden_size=128, output_size=len(allowed_chars))

In [3]:
# Bucle de entrenamiento
def train(model, optimizer, loss_fn, n_epochs, batch_size):
    for epoch in range(n_epochs):
        total_loss = 0
        
        optimizer.zero_grad()
        x, y = dg.batch_to_tensor(batch_size)
        y_pred = model(x)
        loss = loss_fn(y_pred, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        # Print the loss every 10 epochs
        if epoch % 100 == 0:
            print("Epoch: {}, Loss: {}".format(epoch, total_loss))

# Definir la función de pérdida y el optimizador
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Entrenar el modelo
dg = Generator()
train(model, optimizer, loss_fn, 5000, 128)


Epoch: 0, Loss: 0.08257734030485153
Epoch: 100, Loss: 0.06488753110170364
Epoch: 200, Loss: 0.04948587715625763
Epoch: 300, Loss: 0.04897153005003929
Epoch: 400, Loss: 0.04792485386133194
Epoch: 500, Loss: 0.04817119613289833
Epoch: 600, Loss: 0.0436558872461319
Epoch: 700, Loss: 0.04369232431054115
Epoch: 800, Loss: 0.04253627359867096
Epoch: 900, Loss: 0.04441256821155548
Epoch: 1000, Loss: 0.045851930975914
Epoch: 1100, Loss: 0.04189189523458481
Epoch: 1200, Loss: 0.04303479939699173
Epoch: 1300, Loss: 0.04052378237247467
Epoch: 1400, Loss: 0.04200179502367973
Epoch: 1500, Loss: 0.04290764033794403
Epoch: 1600, Loss: 0.036842744797468185
Epoch: 1700, Loss: 0.03484726324677467
Epoch: 1800, Loss: 0.034859251230955124
Epoch: 1900, Loss: 0.03147902712225914
Epoch: 2000, Loss: 0.032323453575372696
Epoch: 2100, Loss: 0.029674706980586052
Epoch: 2200, Loss: 0.02693380042910576
Epoch: 2300, Loss: 0.02868812158703804
Epoch: 2400, Loss: 0.027323616668581963
Epoch: 2500, Loss: 0.02644002437591

In [6]:
# Evaluar el modelo
def evaluate(model, n):
    x, y = dg.batch_to_tensor(n)
    y_pred = model(x)
    for i in range(n):
        print(dg.tensor_to_string(x[i]), dg.tensor_to_string(y_pred[i]), dg.tensor_to_string(y[i]))

evaluate(model, 10)

549+666 1215 1215
486+380 0866 0866
190+960 1150 1150
563+508 1071 1071
954+089 1043 1043
582+751 1333 1333
075+027 0502 0102
376+499 0875 0875
884+665 1549 1549
677+614 1291 1291
