In [31]:
# Libraries
import torch
import torch.nn as nn
import torch.nn.functional as F # hot encoding
text = "Sequence to sequence models are a type of neural network architecture used for tasks such as machine translation, text summarization, and image captioning. They consist of an encoder that processes the input sequence and a decoder that generates the output sequence."


In [32]:
chars = sorted(set(text))
char_to_idx = {ch:i for i,ch in enumerate(chars)}
print(char_to_idx)
idx_to_char = {o:ch for ch, o in char_to_idx.items()}
print(idx_to_char)
vocal_size = len(chars)
print(vocal_size)

{' ': 0, ',': 1, '.': 2, 'S': 3, 'T': 4, 'a': 5, 'c': 6, 'd': 7, 'e': 8, 'f': 9, 'g': 10, 'h': 11, 'i': 12, 'k': 13, 'l': 14, 'm': 15, 'n': 16, 'o': 17, 'p': 18, 'q': 19, 'r': 20, 's': 21, 't': 22, 'u': 23, 'w': 24, 'x': 25, 'y': 26, 'z': 27}
{0: ' ', 1: ',', 2: '.', 3: 'S', 4: 'T', 5: 'a', 6: 'c', 7: 'd', 8: 'e', 9: 'f', 10: 'g', 11: 'h', 12: 'i', 13: 'k', 14: 'l', 15: 'm', 16: 'n', 17: 'o', 18: 'p', 19: 'q', 20: 'r', 21: 's', 22: 't', 23: 'u', 24: 'w', 25: 'x', 26: 'y', 27: 'z'}
28


In [33]:
# Text to tensor 

def text_to_tensor(text): # using torch hot encoding
    indices = [char_to_idx[ch] for ch in text]
    return F.one_hot(torch.tensor(indices), num_classes=vocal_size).float().unsqueeze(1) # one hot encoding



In [34]:
# create the models 
class SimpleRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.rnn = nn.RNN(input_size, hidden_size)
        self.fc = nn.Linear(hidden_size, output_size)
    def forward(self,x):
        out,_ =self.rnn(x)
        return self.fc(out).unsqueeze(1)

class SimpleLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size)
        self.fc = nn.Linear(hidden_size, output_size)
    def forward(self,x):
        out,_ =self.lstm(x)
        return self.fc(out).unsqueeze(1)
    

In [35]:
# train with model parameters
def train_model(model, data, target, epochs=500):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    for epoch in range(epochs):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output.view(-1, vocal_size), target.view(-1))
        loss.backward()
        optimizer.step()
        if epoch % 100 == 0:
            print(f'Epoch {epoch}, Loss: {loss.item()}')

    return model


In [36]:
def generate_text(model, seed , length=60):

    generated = seed 
    for _ in range(length - len(seed)):
        input = text_to_tensor(generated[-len(seed):])
        output = model(input)
        next_idx = torch.argmax(output[-1]).item()
        next_char = idx_to_char[next_idx]
        generated += next_char
    return generated
                            

In [37]:
input_tensor = text_to_tensor(text[:-1])
target_tensor = torch.tensor([char_to_idx[ch] for ch in text[1:]])


In [38]:
rnn_model = train_model(SimpleRNN(vocal_size,64,vocal_size), input_tensor, target_tensor)

Epoch 0, Loss: 3.358973264694214
Epoch 100, Loss: 0.01478792168200016
Epoch 200, Loss: 2.1885597705841064
Epoch 300, Loss: 0.9962773323059082
Epoch 400, Loss: 0.5164268612861633


In [39]:
lstm_model = train_model(SimpleLSTM(vocal_size,64,vocal_size), input_tensor, target_tensor)

Epoch 0, Loss: 3.3185248374938965
Epoch 100, Loss: 0.10996086895465851
Epoch 200, Loss: 0.013026371598243713
Epoch 300, Loss: 0.004575468599796295
Epoch 400, Loss: 0.002689213026314974


In [40]:
seed = "Sequence to sequence models are a "
print("RNN Output:\n", generate_text(rnn_model, seed))
print("\nLSTM Output:\n", generate_text(lstm_model, seed))


RNN Output:
 Sequence to sequence models are a terk arasesequence tranmie

LSTM Output:
 Sequence to sequence models are a type of neural network arc


- One-hot encoding purpose:

    - Converts each character index into a binary vector
    - The vector has length equal to vocab_size
    - Contains all zeros except for a 1 at the position corresponding to the character's index


- Decoder 
    - Converts model output probabilities back into human-readable text
    - Reverses the one-hot encoding process we saw earlier
