<a href="https://colab.research.google.com/github/YousefAbua/Intro-To-DL/blob/main/HW3/HW3_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split

In [13]:
#Sample Text
text = """Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters
that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like
text. At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These
predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model. One of the most popular approaches to next character prediction involves
the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they
can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies,
making them even more effective for next character prediction tasks. Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability
of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes,
thus improving its predictive accuracy over time. Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it.
This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and
virtual assistants. In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and
human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology."""

In [14]:
# Sorts chars and removes duplicates
chars = sorted(list(set(text)))
# Creates a dictionary that maps each char to unique index
ix_to_char = {i: ch for i, ch in enumerate(chars)}
# Creates a dictionary that maps each unique index back to its char
char_to_ix = {ch: i for i, ch in enumerate(chars)}

In [15]:
# Prepare dataset
def Define_Dataset(max_length):
  x = []
  y = []
  for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    x.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

  x = np.array(x)
  y = np.array(y)
  return x, y

x10, y10 = Define_Dataset(10) # Max Length = 10
x20, y20 = Define_Dataset(20) # Max Length = 20
x30, y30 = Define_Dataset(30) # Max Length = 30

# Split dataset into train/validation sets
x10_train, x10_val, y10_train, y10_val = train_test_split(x10, y10, train_size=0.2, random_state=42)
x20_train, x20_val, y20_train, y20_val = train_test_split(x20, y20, train_size=0.2, random_state=42)
x30_train, x30_val, y30_train, y30_val = train_test_split(x30, y30, train_size=0.2, random_state=42)

# Convert to pytorch tensor
x10_train = torch.tensor(x10_train, dtype=torch.long)
y10_train = torch.tensor(y10_train, dtype=torch.long)
x10_val = torch.tensor(x10_val, dtype=torch.long)
y10_val = torch.tensor(y10_val, dtype=torch.long)

x20_train = torch.tensor(x20_train, dtype=torch.long)
y20_train = torch.tensor(y20_train, dtype=torch.long)
x20_val = torch.tensor(x20_val, dtype=torch.long)
y20_val = torch.tensor(y20_val, dtype=torch.long)

x30_train = torch.tensor(x30_train, dtype=torch.long)
y30_train = torch.tensor(y30_train, dtype=torch.long)
x30_val = torch.tensor(x30_val, dtype=torch.long)
y30_val = torch.tensor(y30_val, dtype=torch.long)

In [16]:
def training_loop(x_train, y_train, x_val, y_val, model, criterion, optimizer, epochs):
  for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    output = model(x_train)
    train_loss = criterion(output, y_train)
    train_loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        val_output = model(x_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {train_loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

# Prediction function
def predict_next_char(model, max_length, char_to_ix, ix_to_char, initial_str):
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]

In [17]:
# Define RNN model
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNNModel, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.RNN(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.rnn(embedded)
        output = self.fc(output[:, -1, :])  # Taking the last time step output
        return output

# Define LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.lstm(embedded)
        output = self.fc(output[:, -1, :])  # Taking the last time step output
        return output

# Define GRU model
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.gru(embedded)
        output = self.fc(output[:, -1, :])  # Taking the last time step output
        return output

In [18]:
hidden_size = 128
learning_rate = 0.001
epochs = 100

RNN10_Model = RNNModel(len(chars), hidden_size, len(chars))
LSTM10_Model = LSTMModel(len(chars), hidden_size, len(chars))
GRU10_Model = GRUModel(len(chars), hidden_size, len(chars))

criterion = nn.CrossEntropyLoss()

RNN10_optimizer = optim.Adam(RNN10_Model.parameters(), lr=learning_rate)
LSTM10_optimizer = optim.Adam(LSTM10_Model.parameters(), lr=learning_rate)
GRU10_optimizer = optim.Adam(GRU10_Model.parameters(), lr=learning_rate)

print("Start Training for sequence size: 10....\n")
print("RNN MODEL")
training_loop(
    x_train = x10_train,
    y_train = y10_train,
    x_val = x10_val,
    y_val = y10_val,
    model = RNN10_Model,
    criterion = criterion,
    optimizer = RNN10_optimizer,
    epochs = 100
)
print("\nLSTM Model")
training_loop(
    x_train = x10_train,
    y_train = y10_train,
    x_val = x10_val,
    y_val = y10_val,
    model = LSTM10_Model,
    criterion = criterion,
    optimizer = LSTM10_optimizer,
    epochs = 100
)
print("\nGRU Model")
training_loop(
    x_train = x10_train,
    y_train = y10_train,
    x_val = x10_val,
    y_val = y10_val,
    model = GRU10_Model,
    criterion = criterion,
    optimizer = GRU10_optimizer,
    epochs = 100
)

Start Training for sequence size: 10....

RNN MODEL
Epoch 10, Loss: 2.9727702140808105, Validation Loss: 3.1283562183380127, Validation Accuracy: 0.23198316991329193
Epoch 20, Loss: 2.411003351211548, Validation Loss: 2.7808191776275635, Validation Accuracy: 0.2498684972524643
Epoch 30, Loss: 2.103773355484009, Validation Loss: 2.6740047931671143, Validation Accuracy: 0.2698579728603363
Epoch 40, Loss: 1.8622057437896729, Validation Loss: 2.600128412246704, Validation Accuracy: 0.3045765459537506
Epoch 50, Loss: 1.6448084115982056, Validation Loss: 2.576533794403076, Validation Accuracy: 0.3135192096233368
Epoch 60, Loss: 1.4354736804962158, Validation Loss: 2.5723626613616943, Validation Accuracy: 0.3435034155845642
Epoch 70, Loss: 1.2305023670196533, Validation Loss: 2.581721305847168, Validation Accuracy: 0.34981587529182434
Epoch 80, Loss: 1.02951979637146, Validation Loss: 2.60629940032959, Validation Accuracy: 0.34560757875442505
Epoch 90, Loss: 0.8377300500869751, Validation Los

In [19]:
# Predicting the next character
test_str = "This is a simple example to demonstrate how to predict the next charac"
predicted_char = predict_next_char(RNN10_Model, 10, char_to_ix, ix_to_char, test_str)
print(f"RNN: Predicted next character: '{predicted_char}'")

# Predicting the next character
test_str = "This is a simple example to demonstrate how to predict the next charac"
predicted_char = predict_next_char(LSTM10_Model, 10, char_to_ix, ix_to_char, test_str)
print(f"LSTM: Predicted next character: '{predicted_char}'")

# Predicting the next character
test_str = "This is a simple example to demonstrate how to predict the next charac"
predicted_char = predict_next_char(GRU10_Model, 10, char_to_ix, ix_to_char, test_str)
print(f"GRU: Predicted next character: '{predicted_char}'")

RNN: Predicted next character: 't'
LSTM: Predicted next character: 't'
GRU: Predicted next character: 't'


In [20]:
RNN20_Model = RNNModel(len(chars), hidden_size, len(chars))
LSTM20_Model = LSTMModel(len(chars), hidden_size, len(chars))
GRU20_Model = GRUModel(len(chars), hidden_size, len(chars))

criterion = nn.CrossEntropyLoss()

RNN20_optimizer = optim.Adam(RNN20_Model.parameters(), lr=learning_rate)
LSTM20_optimizer = optim.Adam(LSTM20_Model.parameters(), lr=learning_rate)
GRU20_optimizer = optim.Adam(GRU20_Model.parameters(), lr=learning_rate)


print("Start Training for sequence size: 20....\n")
print("RNN MODEL")
training_loop(
    x_train = x20_train,
    y_train = y20_train,
    x_val = x20_val,
    y_val = y20_val,
    model = RNN20_Model,
    criterion = criterion,
    optimizer = RNN20_optimizer,
    epochs = 100
)
print("\nLSTM Model")
training_loop(
    x_train = x20_train,
    y_train = y20_train,
    x_val = x20_val,
    y_val = y20_val,
    model = LSTM20_Model,
    criterion = criterion,
    optimizer = LSTM20_optimizer,
    epochs = 100
)
print("\nGRU Model")
training_loop(
    x_train = x20_train,
    y_train = y20_train,
    x_val = x20_val,
    y_val = y20_val,
    model = GRU20_Model,
    criterion = criterion,
    optimizer = GRU20_optimizer,
    epochs = 100
)

Start Training for sequence size: 20....

RNN MODEL
Epoch 10, Loss: 2.9883623123168945, Validation Loss: 3.108588218688965, Validation Accuracy: 0.2303222417831421
Epoch 20, Loss: 2.4274632930755615, Validation Loss: 2.722944736480713, Validation Accuracy: 0.2826201915740967
Epoch 30, Loss: 2.0999159812927246, Validation Loss: 2.5920767784118652, Validation Accuracy: 0.3264659345149994
Epoch 40, Loss: 1.833342432975769, Validation Loss: 2.5061838626861572, Validation Accuracy: 0.3412572741508484
Epoch 50, Loss: 1.589306354522705, Validation Loss: 2.457423210144043, Validation Accuracy: 0.35868990421295166
Epoch 60, Loss: 1.3569259643554688, Validation Loss: 2.4377002716064453, Validation Accuracy: 0.3724247217178345
Epoch 70, Loss: 1.1317120790481567, Validation Loss: 2.4476470947265625, Validation Accuracy: 0.3761225640773773
Epoch 80, Loss: 0.9209274053573608, Validation Loss: 2.48279070854187, Validation Accuracy: 0.3777073323726654
Epoch 90, Loss: 0.7303172945976257, Validation Los

In [21]:
# Predicting the next character
test_str = "This is a simple example to demonstrate how to predict the next charac"
predicted_char = predict_next_char(RNN20_Model, 20, char_to_ix, ix_to_char, test_str)
print(f"RNN: Predicted next character: '{predicted_char}'")

# Predicting the next character
test_str = "This is a simple example to demonstrate how to predict the next charac"
predicted_char = predict_next_char(LSTM20_Model, 20, char_to_ix, ix_to_char, test_str)
print(f"LSTM: Predicted next character: '{predicted_char}'")

# Predicting the next character
test_str = "This is a simple example to demonstrate how to predict the next charac"
predicted_char = predict_next_char(GRU20_Model, 20, char_to_ix, ix_to_char, test_str)
print(f"GRU: Predicted next character: '{predicted_char}'")

RNN: Predicted next character: 't'
LSTM: Predicted next character: 't'
GRU: Predicted next character: 't'


In [22]:
RNN30_Model = RNNModel(len(chars), hidden_size, len(chars))
LSTM30_Model = LSTMModel(len(chars), hidden_size, len(chars))
GRU30_Model = GRUModel(len(chars), hidden_size, len(chars))

criterion = nn.CrossEntropyLoss()

RNN30_optimizer = optim.Adam(RNN30_Model.parameters(), lr=learning_rate)
LSTM30_optimizer = optim.Adam(LSTM30_Model.parameters(), lr=learning_rate)
GRU30_optimizer = optim.Adam(GRU30_Model.parameters(), lr=learning_rate)


print("Start Training for sequence size: 30....\n")
print("RNN MODEL")
training_loop(
    x_train = x30_train,
    y_train = y30_train,
    x_val = x30_val,
    y_val = y30_val,
    model = RNN30_Model,
    criterion = criterion,
    optimizer = RNN30_optimizer,
    epochs = 100
)
print("\nLSTM Model")
training_loop(
    x_train = x30_train,
    y_train = y30_train,
    x_val = x30_val,
    y_val = y30_val,
    model = LSTM30_Model,
    criterion = criterion,
    optimizer = LSTM30_optimizer,
    epochs = 100
)
print("\nGRU Model")
training_loop(
    x_train = x30_train,
    y_train = y30_train,
    x_val = x30_val,
    y_val = y30_val,
    model = GRU30_Model,
    criterion = criterion,
    optimizer = GRU30_optimizer,
    epochs = 100
)

Start Training for sequence size: 30....

RNN MODEL
Epoch 10, Loss: 3.0213866233825684, Validation Loss: 3.1182940006256104, Validation Accuracy: 0.25517240166664124
Epoch 20, Loss: 2.5059235095977783, Validation Loss: 2.7260475158691406, Validation Accuracy: 0.2636604905128479
Epoch 30, Loss: 2.2047336101531982, Validation Loss: 2.6309821605682373, Validation Accuracy: 0.3103448152542114
Epoch 40, Loss: 1.9520454406738281, Validation Loss: 2.553439140319824, Validation Accuracy: 0.3416445553302765
Epoch 50, Loss: 1.720510721206665, Validation Loss: 2.5199694633483887, Validation Accuracy: 0.34588858485221863
Epoch 60, Loss: 1.4962245225906372, Validation Loss: 2.501267671585083, Validation Accuracy: 0.3575596809387207
Epoch 70, Loss: 1.2748626470565796, Validation Loss: 2.5007545948028564, Validation Accuracy: 0.36180371046066284
Epoch 80, Loss: 1.0594645738601685, Validation Loss: 2.5166873931884766, Validation Accuracy: 0.37771883606910706
Epoch 90, Loss: 0.8562628626823425, Validat

In [23]:
# Predicting the next character
test_str = "This is a simple example to demonstrate how to predict the next charac"
predicted_char = predict_next_char(RNN30_Model, 30, char_to_ix, ix_to_char, test_str)
print(f"RNN: Predicted next character: '{predicted_char}'")

# Predicting the next character
test_str = "This is a simple example to demonstrate how to predict the next charac"
predicted_char = predict_next_char(LSTM30_Model, 30, char_to_ix, ix_to_char, test_str)
print(f"LSTM: Predicted next character: '{predicted_char}'")

# Predicting the next character
test_str = "This is a simple example to demonstrate how to predict the next charac"
predicted_char = predict_next_char(GRU30_Model, 30, char_to_ix, ix_to_char, test_str)
print(f"GRU: Predicted next character: '{predicted_char}'")

RNN: Predicted next character: 't'
LSTM: Predicted next character: 't'
GRU: Predicted next character: 't'
