<a href="https://colab.research.google.com/github/amrahmani/Pythorch/blob/main/Ch5_LSTM_CharacterPrediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Problem: Next character prediction**

Using PyTorch, create an LSTM and write code to train the model based on this data (https://raw.githubusercontent.com/amrahmani/Pythorch/main/WordsDataset.txt), and test it for the problem.

**Import Libraries**

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import requests

**Load and Prepare Dataset**

In [2]:
# URL of the dataset
url = 'https://raw.githubusercontent.com/amrahmani/Pythorch/main/WordsDataset.txt'

# Download data from the URL
response = requests.get(url)
text = response.text.replace('\n', ' ')

# Character mapping: create a mapping from characters to indices and vice versa
# Also, converting the characters into a numerical format
chars = sorted(list(set(text))) # Only characters in dataset
char_to_idx = {ch: i for i, ch in enumerate(chars)} # Will be used for training
idx_to_char = {i: ch for i, ch in enumerate(chars)} # Will be used for testing

# Hyperparameters 1
seq_length = 30  # Length of the input sequences

# Function to prepare the dataset
def create_dataset(text, seq_length):
    X = []  # Input sequences
    Y = []  # Target characters (the next character after each input sequence)
    for i in range(0, len(text) - seq_length):                      # for example: text = 'Hello World!', seq_length = 7
        X.append([char_to_idx[ch] for ch in text[i:i+seq_length]])  # for example: X[0] = 'Hello W' Y[0] = 'o', X[1] = 'ello Wo' Y[1] = 'r' ...
        Y.append(char_to_idx[text[i+seq_length]])
    return torch.tensor(X, dtype=torch.long), torch.tensor(Y, dtype=torch.long)

# Create the dataset
X, Y = create_dataset(text, seq_length)
print('X=',X)
print('Y=',Y)

X= tensor([[21, 30, 27,  ..., 39,  1, 27],
        [30, 27,  1,  ...,  1, 27, 35],
        [27,  1, 27,  ..., 27, 35, 23],
        ...,
        [31, 35, 40,  ..., 18, 36, 21],
        [35, 40, 27,  ..., 36, 21,  6],
        [40, 27, 38,  ..., 21,  6,  0]])
Y= tensor([35, 23, 24,  ...,  6,  0,  1])


**Define and Train the LSTM**

In [3]:
# Hyperparameters 2
input_size = len(chars)  # Number of charaters in dataset. Size of the one-hot encoded vectors
# output_size = input_size  # one-hot encoded vectors: 'small' would be encoded as [1, 0, 0], 'medium' as [0, 1, 0], and 'large' as [0, 0, 1], 3 inputs => 3 outputs
hidden_size = 64  # Number of features in the hidden state
num_layers = 1  # Number of recurrent layers in the LSTM
num_epochs = 50  # Number of training epochs
learning_rate = 0.001  # Learning rate for the optimizer

# Define the LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        # LSTM layer
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        # Fully connected layer to map LSTM output to character space
        self.fc = nn.Linear(hidden_size, input_size) # output_size = input_size  # one-hot encoded vectors

    def forward(self, x):
        # Initialize hidden state and cell state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        # LSTM forward pass
        out, _ = self.lstm(x, (h0, c0)) # It returns two tensors, and assigns the first tensor to out and discards the second one (might contain the final hidden state) using the _
        # Fully connected layer output
        out = self.fc(out[:, -1, :]) #  It selects all elements of the 1st dimension (:), the last element (-1) of the 2nd dimension, and all elements of the 3rd dimension (:)
        return out

# Instantiate the model
model = LSTMModel(input_size, hidden_size, num_layers)

# Define the loss function and the optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training the model
for epoch in range(num_epochs):
    for i in range(len(X)):
        # Convert input sequence to one-hot encoding
        inputs = torch.eye(input_size)[X[i]] # one-hot encoding: It creates an identity matrix and then select a specific row from it based on the values in the tensor X
        targets = Y[i]

        # Forward pass
        outputs = model(inputs.view(1, seq_length, -1))  # view is used to match input shape
        loss = criterion(outputs, targets.view(1))  # view(1) add an extra dimension of size 1 at the beginning of the tensor's shape.

        # Backward and optimize
        # Zero the gradients to prevent accumulation
        optimizer.zero_grad()
        # Compute gradients of the loss w.r.t. model parameters
        loss.backward()
       # Update model parameters using optimizer
        optimizer.step()

    # Print loss every 10 epochs
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [10/50], Loss: 1.1802
Epoch [20/50], Loss: 0.0482
Epoch [30/50], Loss: 0.0025
Epoch [40/50], Loss: 0.0041
Epoch [50/50], Loss: 0.0000


**Test the LSTM**

In [6]:
# Function to generate text using the trained model
def generate_text(model, start_text, length):
    # Convert start_text to a tensor of indices
    input_seq = torch.tensor([char_to_idx[ch] for ch in start_text], dtype=torch.long).view(1, -1)
    result = start_text
    with torch.no_grad():  # Disable gradient calculation
        for _ in range(length):
            # Convert input_seq to one-hot encoding
            input_seq_onehot = torch.eye(input_size)[input_seq]
            # Forward pass
            output = model(input_seq_onehot)
            # Get the predicted character
            _, predicted = torch.max(output, 1)
            next_char = idx_to_char[predicted.item()]
            # Append the predicted character to the result
            result += next_char
            # Update input_seq to include the predicted character
            input_seq = torch.cat((input_seq[:, 1:], predicted.view(1, 1)), dim=1)
    return result

# Generate text
start_text = "of machine l"
generated_text = generate_text(model, start_text, 10)
print("Generated text:", generated_text)

Generated text: of machine learning (D
