<a href="https://colab.research.google.com/github/indhu68/Intro_To_DL/blob/main/Homework3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

                                    Introduction to Deep Learning

                                            Homework 3

Name: Indhuja Gudluru

Student ID: 801366046

In [53]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
import time


In [54]:
text = "Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text.At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model. One of the most popular approaches to next character prediction involves the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character prediction tasks. Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time. Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and virtual assistants. In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology"


In [55]:
class CharRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.rnn(embedded)
        output = self.fc(output[:, -1, :])  # Get the output of the last RNN cell
        return output

In [56]:
class CharLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.lstm(embedded)
        output = self.fc(output[:, -1, :])
        return output

In [57]:
class CharGRU(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharGRU, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.gru(embedded)
        output = self.fc(output[:, -1, :])
        return output

In [58]:
chars = sorted(list(set(text)))
ix_to_char = {i: ch for i, ch in enumerate(chars)}
char_to_ix = {ch: i for i, ch in enumerate(chars)}
chars = sorted(list(set(text)))



In [59]:
def data_prep(text, max_length):
  X = []
  y = []
  for i in range(len(text) - max_length):
      sequence = text[i:i + max_length]
      label = text[i + max_length]
      X.append([char_to_ix[char] for char in sequence])
      y.append(char_to_ix[label])
  X = np.array(X)
  y = np.array(y)
  return X, y


In [60]:
#Hyperparameters
hidden_size = 128
learning_rate = 0.005
epochs = 100


In [61]:
def train_predict(max_length, model, X_train, y_train, X_val, y_val, criterion, optimizer):
  #Training the model
  start_time = time.time()
  for epoch in range(epochs):
      model.train()
      optimizer.zero_grad()
      output = model(X_train)
      loss = criterion(output, y_train)
      loss.backward()
      optimizer.step()

      # Validation
      model.eval()
      with torch.no_grad():
          val_output = model(X_val)
          val_loss = criterion(val_output, y_val)
          _, predicted = torch.max(val_output, 1)
          val_accuracy = (predicted == y_val).float().mean()

      if (epoch+1) % 10 == 0:
          print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')
  end_time = time.time()
  execution_time = end_time - start_time
  print(f"Execution Time: {execution_time} seconds")
  # Prediction function
  def predict_next_char(model, char_to_ix, ix_to_char, initial_str):
      model.eval()
      with torch.no_grad():
          initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0)
          prediction = model(initial_input)
          predicted_index = torch.argmax(prediction, dim=1).item()
          return ix_to_char[predicted_index]
  total_params = sum(p.numel() for p in model.parameters())
  print(f'Total number of parameters in the model: {total_params}')

  # Predicting the next character
  test_str = "Predicting the next chara"
  predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str)
  print(f"Predicted next character: '{predicted_char}'")

RNN

In [62]:
X,y = data_prep(text,10)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)
model = CharRNN(len(chars), hidden_size, len(chars))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
train_predict(10, model, X_train, y_train, X_val, y_val, criterion, optimizer)

Epoch 10, Loss: 2.5650124549865723, Validation Loss: 2.4793667793273926, Validation Accuracy: 0.33052632212638855
Epoch 20, Loss: 2.069395065307617, Validation Loss: 2.166416645050049, Validation Accuracy: 0.4399999976158142
Epoch 30, Loss: 1.668236255645752, Validation Loss: 1.9849541187286377, Validation Accuracy: 0.4863157868385315
Epoch 40, Loss: 1.3073127269744873, Validation Loss: 1.888149619102478, Validation Accuracy: 0.5094736814498901
Epoch 50, Loss: 0.9758478999137878, Validation Loss: 1.8866946697235107, Validation Accuracy: 0.5178947448730469
Epoch 60, Loss: 0.6958948969841003, Validation Loss: 1.942961573600769, Validation Accuracy: 0.503157913684845
Epoch 70, Loss: 0.46101847290992737, Validation Loss: 2.0349106788635254, Validation Accuracy: 0.4989473819732666
Epoch 80, Loss: 0.2872181534767151, Validation Loss: 2.1374318599700928, Validation Accuracy: 0.4842105209827423
Epoch 90, Loss: 0.17510168254375458, Validation Loss: 2.2609705924987793, Validation Accuracy: 0.486

In [63]:
X,y = data_prep(text,20)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)
model = CharRNN(len(chars), hidden_size, len(chars))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
train_predict(20, model, X_train, y_train, X_val, y_val, criterion, optimizer)

Epoch 10, Loss: 2.520806312561035, Validation Loss: 2.5369691848754883, Validation Accuracy: 0.27695560455322266
Epoch 20, Loss: 2.062290668487549, Validation Loss: 2.224001884460449, Validation Accuracy: 0.41860464215278625
Epoch 30, Loss: 1.684199333190918, Validation Loss: 2.055727481842041, Validation Accuracy: 0.4545454680919647
Epoch 40, Loss: 1.333721399307251, Validation Loss: 1.972806692123413, Validation Accuracy: 0.5052854418754578
Epoch 50, Loss: 1.019422173500061, Validation Loss: 1.9374723434448242, Validation Accuracy: 0.49894291162490845
Epoch 60, Loss: 0.7715293765068054, Validation Loss: 1.9552240371704102, Validation Accuracy: 0.5116279125213623
Epoch 70, Loss: 0.5279343724250793, Validation Loss: 2.0020477771759033, Validation Accuracy: 0.5264270901679993
Epoch 80, Loss: 0.3585427701473236, Validation Loss: 2.0830323696136475, Validation Accuracy: 0.5179703831672668
Epoch 90, Loss: 0.22949470579624176, Validation Loss: 2.171319007873535, Validation Accuracy: 0.50739

In [64]:
X,y = data_prep(text,30)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)
model = CharRNN(len(chars), hidden_size, len(chars))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
train_predict(30, model, X_train, y_train, X_val, y_val, criterion, optimizer)

Epoch 10, Loss: 2.5249485969543457, Validation Loss: 2.5249338150024414, Validation Accuracy: 0.29723992943763733
Epoch 20, Loss: 2.030599594116211, Validation Loss: 2.1713712215423584, Validation Accuracy: 0.40552017092704773
Epoch 30, Loss: 1.6446820497512817, Validation Loss: 1.9889518022537231, Validation Accuracy: 0.46496814489364624
Epoch 40, Loss: 1.2863117456436157, Validation Loss: 1.8763012886047363, Validation Accuracy: 0.47770699858665466
Epoch 50, Loss: 0.9749799370765686, Validation Loss: 1.8597301244735718, Validation Accuracy: 0.4904458522796631
Epoch 60, Loss: 0.6996713280677795, Validation Loss: 1.883407711982727, Validation Accuracy: 0.5159235596656799
Epoch 70, Loss: 0.48456674814224243, Validation Loss: 1.9367228746414185, Validation Accuracy: 0.5159235596656799
Epoch 80, Loss: 0.30376413464546204, Validation Loss: 2.0326833724975586, Validation Accuracy: 0.5180467367172241
Epoch 90, Loss: 0.2172960340976715, Validation Loss: 2.0953543186187744, Validation Accuracy

LSTM

In [66]:
X,y = data_prep(text,10)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)
model = CharLSTM(len(chars), hidden_size, len(chars))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
train_predict(10, model, X_train, y_train, X_val, y_val, criterion, optimizer)

Epoch 10, Loss: 2.5294806957244873, Validation Loss: 2.428402900695801, Validation Accuracy: 0.378947377204895
Epoch 20, Loss: 2.0187320709228516, Validation Loss: 2.105755090713501, Validation Accuracy: 0.4547368288040161
Epoch 30, Loss: 1.6185027360916138, Validation Loss: 1.9610005617141724, Validation Accuracy: 0.4673684239387512
Epoch 40, Loss: 1.256838321685791, Validation Loss: 1.8979696035385132, Validation Accuracy: 0.4989473819732666
Epoch 50, Loss: 0.9374358057975769, Validation Loss: 1.9270497560501099, Validation Accuracy: 0.5094736814498901
Epoch 60, Loss: 0.6471523642539978, Validation Loss: 1.9443050622940063, Validation Accuracy: 0.5073684453964233
Epoch 70, Loss: 0.41391271352767944, Validation Loss: 2.0404160022735596, Validation Accuracy: 0.5010526180267334
Epoch 80, Loss: 0.24856171011924744, Validation Loss: 2.1687257289886475, Validation Accuracy: 0.5010526180267334
Epoch 90, Loss: 0.15127602219581604, Validation Loss: 2.270429849624634, Validation Accuracy: 0.50

In [67]:
X,y = data_prep(text,20)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)
model = CharLSTM(len(chars), hidden_size, len(chars))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
train_predict(20, model, X_train, y_train, X_val, y_val, criterion, optimizer)

Epoch 10, Loss: 2.580595016479492, Validation Loss: 2.5901732444763184, Validation Accuracy: 0.27484142780303955
Epoch 20, Loss: 2.0984981060028076, Validation Loss: 2.2380692958831787, Validation Accuracy: 0.37420719861984253
Epoch 30, Loss: 1.7170329093933105, Validation Loss: 2.0660481452941895, Validation Accuracy: 0.46088793873786926
Epoch 40, Loss: 1.3710252046585083, Validation Loss: 1.9859275817871094, Validation Accuracy: 0.47991544008255005
Epoch 50, Loss: 1.0598021745681763, Validation Loss: 1.9585126638412476, Validation Accuracy: 0.49894291162490845
Epoch 60, Loss: 0.7794843316078186, Validation Loss: 1.9582222700119019, Validation Accuracy: 0.5052854418754578
Epoch 70, Loss: 0.5625208616256714, Validation Loss: 2.0048282146453857, Validation Accuracy: 0.5010570883750916
Epoch 80, Loss: 0.3785140812397003, Validation Loss: 2.0694587230682373, Validation Accuracy: 0.5073995590209961
Epoch 90, Loss: 0.2504660487174988, Validation Loss: 2.1282618045806885, Validation Accuracy

In [68]:
X,y = data_prep(text,30)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)
model = CharLSTM(len(chars), hidden_size, len(chars))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
train_predict(30, model, X_train, y_train, X_val, y_val, criterion, optimizer)

Epoch 10, Loss: 2.523730993270874, Validation Loss: 2.534489154815674, Validation Accuracy: 0.3290870487689972
Epoch 20, Loss: 1.999192714691162, Validation Loss: 2.1675074100494385, Validation Accuracy: 0.4309978783130646
Epoch 30, Loss: 1.5930274724960327, Validation Loss: 1.9806220531463623, Validation Accuracy: 0.47346073389053345
Epoch 40, Loss: 1.243422031402588, Validation Loss: 1.8853580951690674, Validation Accuracy: 0.4883227050304413
Epoch 50, Loss: 0.9304813146591187, Validation Loss: 1.8531309366226196, Validation Accuracy: 0.522292971611023
Epoch 60, Loss: 0.6642557382583618, Validation Loss: 1.8930145502090454, Validation Accuracy: 0.5265392661094666
Epoch 70, Loss: 0.4569658935070038, Validation Loss: 1.9564087390899658, Validation Accuracy: 0.5201698541641235
Epoch 80, Loss: 0.3080301284790039, Validation Loss: 2.0687625408172607, Validation Accuracy: 0.5138004422187805
Epoch 90, Loss: 0.1881590187549591, Validation Loss: 2.160766839981079, Validation Accuracy: 0.50955

GRU

In [69]:
X,y = data_prep(text,10)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)
model = CharGRU(len(chars), hidden_size, len(chars))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
train_predict(10, model, X_train, y_train, X_val, y_val, criterion, optimizer)

Epoch 10, Loss: 2.3642117977142334, Validation Loss: 2.332767963409424, Validation Accuracy: 0.35789474844932556
Epoch 20, Loss: 1.8345539569854736, Validation Loss: 2.0403738021850586, Validation Accuracy: 0.4568420946598053
Epoch 30, Loss: 1.4101696014404297, Validation Loss: 1.9075591564178467, Validation Accuracy: 0.4842105209827423
Epoch 40, Loss: 1.0319633483886719, Validation Loss: 1.8884649276733398, Validation Accuracy: 0.5115789771080017
Epoch 50, Loss: 0.7001526355743408, Validation Loss: 1.932285189628601, Validation Accuracy: 0.5199999809265137
Epoch 60, Loss: 0.43291226029396057, Validation Loss: 2.0490849018096924, Validation Accuracy: 0.5284210443496704
Epoch 70, Loss: 0.24625669419765472, Validation Loss: 2.201974630355835, Validation Accuracy: 0.5284210443496704
Epoch 80, Loss: 0.13881012797355652, Validation Loss: 2.362474203109741, Validation Accuracy: 0.503157913684845
Epoch 90, Loss: 0.08795884996652603, Validation Loss: 2.461801767349243, Validation Accuracy: 0.5

In [70]:
X,y = data_prep(text,20)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)
model = CharGRU(len(chars), hidden_size, len(chars))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
train_predict(20, model, X_train, y_train, X_val, y_val, criterion, optimizer)

Epoch 10, Loss: 2.36944842338562, Validation Loss: 2.419921875, Validation Accuracy: 0.3657505214214325
Epoch 20, Loss: 1.8605729341506958, Validation Loss: 2.0993142127990723, Validation Accuracy: 0.4503171145915985
Epoch 30, Loss: 1.4364513158798218, Validation Loss: 1.9359318017959595, Validation Accuracy: 0.4904862642288208
Epoch 40, Loss: 1.070433497428894, Validation Loss: 1.9066226482391357, Validation Accuracy: 0.49894291162490845
Epoch 50, Loss: 0.757244884967804, Validation Loss: 1.9389744997024536, Validation Accuracy: 0.5073995590209961
Epoch 60, Loss: 0.4926791787147522, Validation Loss: 2.0044991970062256, Validation Accuracy: 0.49894291162490845
Epoch 70, Loss: 0.29585525393486023, Validation Loss: 2.127021551132202, Validation Accuracy: 0.5264270901679993
Epoch 80, Loss: 0.16563060879707336, Validation Loss: 2.2678167819976807, Validation Accuracy: 0.5243129134178162
Epoch 90, Loss: 0.09210468828678131, Validation Loss: 2.404038429260254, Validation Accuracy: 0.51585626

In [71]:
X,y = data_prep(text,30)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)
model = CharGRU(len(chars), hidden_size, len(chars))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
train_predict(30, model, X_train, y_train, X_val, y_val, criterion, optimizer)

Epoch 10, Loss: 2.4109528064727783, Validation Loss: 2.4689695835113525, Validation Accuracy: 0.31847134232521057
Epoch 20, Loss: 1.912304401397705, Validation Loss: 2.116098642349243, Validation Accuracy: 0.4416135847568512
Epoch 30, Loss: 1.4939008951187134, Validation Loss: 1.946638584136963, Validation Accuracy: 0.47983014583587646
Epoch 40, Loss: 1.1152342557907104, Validation Loss: 1.8469924926757812, Validation Accuracy: 0.4989384412765503
Epoch 50, Loss: 0.7743898034095764, Validation Loss: 1.8338068723678589, Validation Accuracy: 0.5414012670516968
Epoch 60, Loss: 0.4930489957332611, Validation Loss: 1.900964617729187, Validation Accuracy: 0.5371549725532532
Epoch 70, Loss: 0.28927695751190186, Validation Loss: 2.014709711074829, Validation Accuracy: 0.5456475615501404
Epoch 80, Loss: 0.15813224017620087, Validation Loss: 2.1540980339050293, Validation Accuracy: 0.5456475615501404
Epoch 90, Loss: 0.0870116800069809, Validation Loss: 2.2440338134765625, Validation Accuracy: 0.5

Problem 2

In [72]:
from torch.utils.data import DataLoader, Dataset, random_split
import os
import sys
import requests

In [73]:
# Step 1: Download the dataset
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text  # This is the entire text data

In [74]:
# Step 2: Prepare the dataset
sequence_length = 20
# Create a character mapping to integers
chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}


In [75]:
# Encode the text into integers
encoded_text = [char_to_int[ch] for ch in text]


In [76]:
# Create sequences and targets
sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

# Convert lists to PyTorch tensors
sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)


In [77]:
# Step 3: Create a dataset class
class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

In [78]:
# Split the dataset into training and testing sets
dataset = CharDataset(sequences, targets)
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])


In [79]:
# Create data loaders
batch_size = 128
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)


In [80]:
# Define the LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.lstm(embedded)
        output = self.fc(output[:, -1, :])
        return output

In [81]:
# Define the GRU model
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.gru(embedded)
        output = self.fc(output[:, -1, :])
        return output

In [95]:
# Train the model with validation loss, model size, number of parameters, and inference time
def training_the_model(model, train_loader, test_loader, device, num_epochs=10, lr=0.005):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    model.to(device)
    start_time = time.time()

    # Calculate the number of trainable parameters
    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f'Number of trainable parameters: {num_params}')

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        model.eval()
        correct = 0
        total = 0
        test_loss = 0
        inference_start_time = time.time()
        with torch.no_grad():
            for inputs, targets in test_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                test_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += targets.size(0)
                correct += (predicted == targets).sum().item()
        inference_end_time = time.time()
        inference_time = inference_end_time - inference_start_time

        print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss/len(train_loader)}, Test Loss: {test_loss/len(test_loader)}, Test Accuracy: {100 * correct / total}%, Inference Time: {inference_time} seconds')

    end_time = time.time()
    print(f'Total Training Time: {end_time - start_time} seconds')

    # Calculate model size
    model_size = sys.getsizeof(model.state_dict())
    print(f'Model Size: {model_size} bytes')





In [96]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


# Initialize and train the LSTM model
input_size = len(chars)
hidden_size = 256
output_size = len(chars)
num_epochs = 10


print("LSTM model\n")
lstm_model = LSTMModel(input_size, hidden_size, output_size)
training_the_model(lstm_model, train_loader, test_loader, device, num_epochs)
total_params = sum(p.numel() for p in model.parameters())
print(f'Total number of parameters in the model: {total_params}')
def predict_next_char(model, char_to_ix, ix_to_char, initial_str, max_length):
  model.eval()
  with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0).to(device)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]
test_str = "Predicting the next charac"
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str, 20)
print(f"Predicted next character: '{predicted_char}'")


LSTM model

Number of trainable parameters: 559681
Epoch 1/10, Train Loss: 1.7492508147717616, Test Loss: 1.6320871364365894, Test Accuracy: 50.87974896335313%, Inference Time: 3.149263620376587 seconds
Epoch 2/10, Train Loss: 1.5960567029672865, Test Loss: 1.58417079398089, Test Accuracy: 52.21741566737644%, Inference Time: 3.155841588973999 seconds
Epoch 3/10, Train Loss: 1.5626432999788997, Test Loss: 1.5601552428695844, Test Accuracy: 52.52134932197691%, Inference Time: 3.2809362411499023 seconds
Epoch 4/10, Train Loss: 1.5479863630036, Test Loss: 1.5603582493819796, Test Accuracy: 53.05525047629721%, Inference Time: 3.0869362354278564 seconds
Epoch 5/10, Train Loss: 1.5432339702709885, Test Loss: 1.5623937219432626, Test Accuracy: 52.666591953378905%, Inference Time: 3.1956615447998047 seconds
Epoch 6/10, Train Loss: 1.54182028958528, Test Loss: 1.5611906047532151, Test Accuracy: 52.79121371735963%, Inference Time: 3.062225103378296 seconds
Epoch 7/10, Train Loss: 1.54373353182824

In [97]:
# Initialize and train the GRU model
input_size = len(chars)
hidden_size = 256
output_size = len(chars)
num_epochs = 10

print("GRU model")
gru_model = GRUModel(input_size, hidden_size, output_size)
training_the_model(gru_model, train_loader, test_loader, device, num_epochs)
total_params = sum(p.numel() for p in model.parameters())
print(f'Total number of parameters in the model: {total_params}')
def predict_next_char(model, char_to_ix, ix_to_char, initial_str, max_length):
  model.eval()
  with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0).to(device)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]
test_str = "Predicting the next charac"
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str, 20)
print(f"Predicted next character: '{predicted_char}'")

GRU model
Number of trainable parameters: 428097
Epoch 1/10, Train Loss: 1.9378979510434797, Test Loss: 1.875877156451866, Test Accuracy: 45.32152863386753%, Inference Time: 2.9833340644836426 seconds
Epoch 2/10, Train Loss: 1.870591915994284, Test Loss: 1.8600143310466992, Test Accuracy: 45.48156449624566%, Inference Time: 2.9664306640625 seconds
Epoch 3/10, Train Loss: 1.8703911815712797, Test Loss: 1.8835771656966525, Test Accuracy: 44.421382942956406%, Inference Time: 2.884946346282959 seconds
Epoch 4/10, Train Loss: 1.8734315595302646, Test Loss: 1.8881918068118158, Test Accuracy: 45.81284321416564%, Inference Time: 2.960973024368286 seconds
Epoch 5/10, Train Loss: 1.873045964909229, Test Loss: 1.892664955323001, Test Accuracy: 44.72352347865068%, Inference Time: 3.1064460277557373 seconds
Epoch 6/10, Train Loss: 1.8806457141488022, Test Loss: 1.8953932055918157, Test Accuracy: 45.90518883783481%, Inference Time: 2.9347944259643555 seconds
Epoch 7/10, Train Loss: 1.884694065571652

In [98]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


# Initialize and train the LSTM model
input_size = len(chars)
hidden_size = 128
output_size = len(chars)
num_epochs = 10


print("LSTM model\n")
lstm_model = LSTMModel(input_size, hidden_size, output_size)
training_the_model(lstm_model, train_loader, test_loader, device, num_epochs)
total_params = sum(p.numel() for p in model.parameters())
print(f'Total number of parameters in the model: {total_params}')
def predict_next_char(model, char_to_ix, ix_to_char, initial_str, max_length):
  model.eval()
  with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0).to(device)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]
test_str = "Predicting the next charac"
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str, 20)
print(f"Predicted next character: '{predicted_char}'")


LSTM model

Number of trainable parameters: 148801
Epoch 1/10, Train Loss: 1.7611486668806957, Test Loss: 1.632340795201812, Test Accuracy: 51.13526840748627%, Inference Time: 2.8939156532287598 seconds
Epoch 2/10, Train Loss: 1.600569605280186, Test Loss: 1.5871434353988745, Test Accuracy: 52.16676005827637%, Inference Time: 2.6885955333709717 seconds
Epoch 3/10, Train Loss: 1.5619404294917587, Test Loss: 1.5655481687442914, Test Accuracy: 52.797937913257876%, Inference Time: 2.6538703441619873 seconds
Epoch 4/10, Train Loss: 1.544314362164277, Test Loss: 1.5523552480129694, Test Accuracy: 53.071836826179535%, Inference Time: 2.730309009552002 seconds
Epoch 5/10, Train Loss: 1.5379988995695415, Test Loss: 1.550775783265793, Test Accuracy: 52.79345511599238%, Inference Time: 2.633871078491211 seconds
Epoch 6/10, Train Loss: 1.537706696398971, Test Loss: 1.5481824621139282, Test Accuracy: 53.24845903843999%, Inference Time: 2.619330644607544 seconds
Epoch 7/10, Train Loss: 1.53581053088

In [106]:
# Initialize and train the GRU model
input_size = len(chars)
hidden_size = 128
output_size = len(chars)
num_epochs = 10

print("GRU model")
gru_model = GRUModel(input_size, hidden_size, output_size)
training_the_model(gru_model, train_loader, test_loader, device, num_epochs)
total_params = sum(p.numel() for p in model.parameters())
print(f'Total number of parameters in the model: {total_params}')
def predict_next_char(model, char_to_ix, ix_to_char, initial_str, max_length):
  model.eval()
  with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0).to(device)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]
test_str = "Predicting the next charac"
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str, 20)
print(f"Predicted next character: '{predicted_char}'")

GRU model
Number of trainable parameters: 115777
Epoch 1/10, Train Loss: 1.8405554874731218, Test Loss: 1.7699469565934856, Test Accuracy: 47.74221891488437%, Inference Time: 2.754101514816284 seconds
Epoch 2/10, Train Loss: 1.7557297082242784, Test Loss: 1.7654796948189442, Test Accuracy: 47.61983745231382%, Inference Time: 2.535686731338501 seconds
Epoch 3/10, Train Loss: 1.7640669728344498, Test Loss: 1.7719145777303462, Test Accuracy: 47.627009992244695%, Inference Time: 2.7631144523620605 seconds
Epoch 4/10, Train Loss: 1.7709557248543408, Test Loss: 1.7699107383765234, Test Accuracy: 47.98025758384027%, Inference Time: 2.803239583969116 seconds
Epoch 5/10, Train Loss: 1.775127562960878, Test Loss: 1.7911810752515196, Test Accuracy: 47.517628758298855%, Inference Time: 2.7452948093414307 seconds
Epoch 6/10, Train Loss: 1.7947750459678955, Test Loss: 1.815902423393569, Test Accuracy: 46.573991473643154%, Inference Time: 2.642761707305908 seconds
Epoch 7/10, Train Loss: 1.8068031687

In [107]:
# Prepare the dataset with sequence length 30
sequence_length = 30
chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}
encoded_text = [char_to_int[ch] for ch in text]

sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

# Define the dataset class
class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

# Split the dataset into training and testing sets
dataset = CharDataset(sequences, targets)
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Create data loaders
batch_size = 128
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

In [124]:
def training_the_model(model, train_loader, test_loader, device, num_epochs=10, lr=0.001):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    model.to(device)
    start_time = time.time()

    train_losses = []
    val_losses = []
    val_accuracies = []

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        train_losses.append(train_loss / len(train_loader))

        model.eval()
        val_loss = 0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, targets in test_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += targets.size(0)
                correct += (predicted == targets).sum().item()
        val_losses.append(val_loss / len(test_loader))
        val_accuracies.append(100 * correct / total)

        print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_losses[-1]}, Val Loss: {val_losses[-1]}, Val Accuracy: {val_accuracies[-1]}%')

    end_time = time.time()
    training_time = end_time - start_time
    print(f'Training time: {training_time} seconds')
    return train_losses, val_losses, val_accuracies, training_time, sum(p.numel() for p in model.parameters() if p.requires_grad)


In [125]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


# Initialize and train the LSTM model
input_size = len(chars)
hidden_size = 128
output_size = len(chars)
num_epochs = 10


print("LSTM model\n")
lstm_model = LSTMModel(input_size, hidden_size, output_size)
lstm_train_losses, lstm_val_losses, lstm_val_accuracy, lstm_training_time, lstm_model_size = training_the_model(lstm_model, train_loader, test_loader, device, num_epochs)
total_params = sum(p.numel() for p in model.parameters())
print(f'Total number of parameters in the model: {total_params}')
def predict_next_char(model, char_to_ix, ix_to_char, initial_str, max_length):
  model.eval()
  with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0).to(device)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]
test_str = "Predicting the next charac"
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str, 30)
print(f"Predicted next character: '{predicted_char}'")

LSTM model

Epoch 1/10, Train Loss: 1.838152635665167, Val Loss: 1.635293082129456, Val Accuracy: 51.34910993262295%
Epoch 2/10, Train Loss: 1.5752492761960744, Val Loss: 1.546041282784029, Val Accuracy: 53.546148570198994%
Epoch 3/10, Train Loss: 1.5059746511695171, Val Loss: 1.4997471655092223, Val Accuracy: 54.75248012982297%
Epoch 4/10, Train Loss: 1.46812657144175, Val Loss: 1.4741209023175263, Val Accuracy: 55.31866250061639%
Epoch 5/10, Train Loss: 1.4427513185967609, Val Loss: 1.4580057390243653, Val Accuracy: 55.738256086572555%
Epoch 6/10, Train Loss: 1.4244118255578575, Val Loss: 1.4460872810802032, Val Accuracy: 56.124676675348425%
Epoch 7/10, Train Loss: 1.4095939435599114, Val Loss: 1.4403169112717087, Val Accuracy: 56.04981328981992%
Epoch 8/10, Train Loss: 1.3983366969872397, Val Loss: 1.4331536470168087, Val Accuracy: 56.37795699165744%
Epoch 9/10, Train Loss: 1.3882147763747288, Val Loss: 1.4207312686649876, Val Accuracy: 56.82086133238895%
Epoch 10/10, Train Loss: 1.

In [126]:
# Initialize and train the GRU model
input_size = len(chars)
hidden_size = 128
output_size = len(chars)
num_epochs = 10

print("GRU model")
gru_model = GRUModel(input_size, hidden_size, output_size)
gru_train_losses, gru_val_losses, gru_val_accuracy, gru_training_time, gru_model_size = training_the_model(gru_model, train_loader, test_loader, device, num_epochs)
total_params = sum(p.numel() for p in model.parameters())
print(f'Total number of parameters in the model: {total_params}')
def predict_next_char(model, char_to_ix, ix_to_char, initial_str, max_length):
  model.eval()
  with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0).to(device)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]
test_str = "Predicting the next charac"
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str, 30)
print(f"Predicted next character: '{predicted_char}'")

GRU model
Epoch 1/10, Train Loss: 1.8020998015187626, Val Loss: 1.6284909841320807, Val Accuracy: 51.37466210612669%
Epoch 2/10, Train Loss: 1.5747163515800864, Val Loss: 1.5487357412886495, Val Accuracy: 53.432284498796356%
Epoch 3/10, Train Loss: 1.521530085564412, Val Loss: 1.5210483432568427, Val Accuracy: 54.084089065014595%
Epoch 4/10, Train Loss: 1.4935064789846035, Val Loss: 1.5044406006135136, Val Accuracy: 54.68837555419078%
Epoch 5/10, Train Loss: 1.4752108037334986, Val Loss: 1.4938513763458374, Val Accuracy: 54.80448104432181%
Epoch 6/10, Train Loss: 1.4625921036857883, Val Loss: 1.4853431668968222, Val Accuracy: 54.935379898060276%
Epoch 7/10, Train Loss: 1.4527435394723185, Val Loss: 1.4757229894040578, Val Accuracy: 55.244247398833565%
Epoch 8/10, Train Loss: 1.4444643594773419, Val Loss: 1.4726558460015644, Val Accuracy: 55.30611055573736%
Epoch 9/10, Train Loss: 1.4336923431867987, Val Loss: 1.4607933464230851, Val Accuracy: 55.623495447678565%
Epoch 10/10, Train Loss

In [127]:
# Compare results
print("\nComparison of LSTM and GRU models:")
print(f"LSTM - Training Loss: {lstm_train_losses[-1]}, Validation Loss: {lstm_val_losses[-1]}, Validation Accuracy: {lstm_val_accuracy[-1]}%, Training Time: {lstm_training_time} seconds, Model Size: {lstm_model_size} parameters")
print(f"GRU - Training Loss: {gru_train_losses[-1]}, Validation Loss: {gru_val_losses[-1]}, Validation Accuracy: {gru_val_accuracy[-1]}%, Training Time: {gru_training_time} seconds, Model Size: {gru_model_size} parameters")


Comparison of LSTM and GRU models:
LSTM - Training Loss: 1.3798802079685242, Validation Loss: 1.4181153937379176, Validation Accuracy: 56.84193066843589%, Training Time: 241.99739480018616 seconds, Model Size: 148801 parameters
GRU - Training Loss: 1.424981420538033, Validation Loss: 1.4552940099798806, Validation Accuracy: 56.04039933116065%, Training Time: 239.64847946166992 seconds, Model Size: 115777 parameters


In [128]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Initialize and train the LSTM model
input_size = len(chars)
hidden_size = 256
output_size = len(chars)
num_epochs = 10


print("LSTM model\n")
lstm_model = LSTMModel(input_size, hidden_size, output_size)
lstm_train_losses, lstm_val_losses, lstm_val_accuracy, lstm_training_time, lstm_model_size = training_the_model(lstm_model, train_loader, test_loader, device, num_epochs)
total_params = sum(p.numel() for p in model.parameters())
print(f'Total number of parameters in the model: {total_params}')
def predict_next_char(model, char_to_ix, ix_to_char, initial_str, max_length):
  model.eval()
  with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0).to(device)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]
test_str = "Predicting the next charac"
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str, 30)
print(f"Predicted next character: '{predicted_char}'")


LSTM model

Epoch 1/10, Train Loss: 1.6981141623654588, Val Loss: 1.5282684947592187, Val Accuracy: 53.80794627767592%
Epoch 2/10, Train Loss: 1.4721924677123575, Val Loss: 1.4613620984916775, Val Accuracy: 55.26755815360891%
Epoch 3/10, Train Loss: 1.4131135745374759, Val Loss: 1.421676237146812, Val Accuracy: 56.54068399133916%
Epoch 4/10, Train Loss: 1.3771824514599142, Val Loss: 1.3976457633646617, Val Accuracy: 57.04769290770286%
Epoch 5/10, Train Loss: 1.3519261161776979, Val Loss: 1.3874774398779088, Val Accuracy: 57.267800226831575%
Epoch 6/10, Train Loss: 1.3324017011514013, Val Loss: 1.3761166163169305, Val Accuracy: 57.67842813787415%
Epoch 7/10, Train Loss: 1.3172536966996293, Val Loss: 1.3736462280863808, Val Accuracy: 57.76405033329897%
Epoch 8/10, Train Loss: 1.3045036109592882, Val Loss: 1.3653856799374076, Val Accuracy: 57.89450090329175%
Epoch 9/10, Train Loss: 1.2936522325461306, Val Loss: 1.3641957537716172, Val Accuracy: 57.88329380964976%
Epoch 10/10, Train Loss: 

In [129]:
# Initialize and train the GRU model
input_size = len(chars)
hidden_size = 256
output_size = len(chars)
num_epochs = 10

print("GRU model")
gru_model = GRUModel(input_size, hidden_size, output_size)
gru_train_losses, gru_val_losses, gru_val_accuracy, gru_training_time, gru_model_size = training_the_model(gru_model, train_loader, test_loader, device, num_epochs)
total_params = sum(p.numel() for p in model.parameters())
print(f'Total number of parameters in the model: {total_params}')
def predict_next_char(model, char_to_ix, ix_to_char, initial_str, max_length):
  model.eval()
  with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0).to(device)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]
test_str = "Predicting the next charac"
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str, 30)
print(f"Predicted next character: '{predicted_char}'")

GRU model
Epoch 1/10, Train Loss: 1.686592279137788, Val Loss: 1.5385178375435642, Val Accuracy: 53.5371828952854%
Epoch 2/10, Train Loss: 1.4943277687603758, Val Loss: 1.483591611810751, Val Accuracy: 55.17834968821865%
Epoch 3/10, Train Loss: 1.447885329464832, Val Loss: 1.4551890905875964, Val Accuracy: 55.87094807529374%
Epoch 4/10, Train Loss: 1.422795365305379, Val Loss: 1.446815055673723, Val Accuracy: 55.98705356542477%
Epoch 5/10, Train Loss: 1.4059528370168246, Val Loss: 1.433349368083908, Val Accuracy: 56.42413021746245%
Epoch 6/10, Train Loss: 1.3934025141831041, Val Loss: 1.429645134844591, Val Accuracy: 56.35509452062777%
Epoch 7/10, Train Loss: 1.3845681111513712, Val Loss: 1.425729160567364, Val Accuracy: 56.68503135744801%
Epoch 8/10, Train Loss: 1.3782804097319838, Val Loss: 1.431422640999637, Val Accuracy: 56.09553823187925%
Epoch 9/10, Train Loss: 1.3740288436635089, Val Loss: 1.4223421413325332, Val Accuracy: 56.57296042102809%
Epoch 10/10, Train Loss: 1.3721061477

In [130]:
# Compare results
print("\nComparison of LSTM and GRU models:")
print(f"LSTM - Training Loss: {lstm_train_losses[-1]}, Validation Loss: {lstm_val_losses[-1]}, Validation Accuracy: {lstm_val_accuracy[-1]}%, Training Time: {lstm_training_time} seconds, Model Size: {lstm_model_size} parameters")
print(f"GRU - Training Loss: {gru_train_losses[-1]}, Validation Loss: {gru_val_losses[-1]}, Validation Accuracy: {gru_val_accuracy[-1]}%, Training Time: {gru_training_time} seconds, Model Size: {gru_model_size} parameters")


Comparison of LSTM and GRU models:
LSTM - Training Loss: 1.2841105844464304, Validation Loss: 1.3563531437483727, Validation Accuracy: 58.34637091893685%, Training Time: 319.38796639442444 seconds, Model Size: 559681 parameters
GRU - Training Loss: 1.3721061477579746, Validation Loss: 1.414999493804391, Validation Accuracy: 56.87151739565075%, Training Time: 304.1290080547333 seconds, Model Size: 428097 parameters


In [131]:
# Prepare the dataset with sequence length 50
sequence_length = 50
chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}
encoded_text = [char_to_int[ch] for ch in text]

sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

# Define the dataset class
class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

# Split the dataset into training and testing sets
dataset = CharDataset(sequences, targets)
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Create data loaders
batch_size = 64
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)


In [132]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Initialize and train the LSTM model
input_size = len(chars)
hidden_size = 256
output_size = len(chars)
num_epochs = 10


print("LSTM model\n")
lstm_model = LSTMModel(input_size, hidden_size, output_size)
lstm_train_losses, lstm_val_losses, lstm_val_accuracy, lstm_training_time, lstm_model_size = training_the_model(lstm_model, train_loader, test_loader, device, num_epochs)
total_params = sum(p.numel() for p in model.parameters())
print(f'Total number of parameters in the model: {total_params}')
def predict_next_char(model, char_to_ix, ix_to_char, initial_str, max_length):
  model.eval()
  with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0).to(device)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]
test_str = "Predicting the next charac"
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str, 50)
print(f"Predicted next character: '{predicted_char}'")


LSTM model

Epoch 1/10, Train Loss: 1.682910358259384, Val Loss: 1.527647488396262, Val Accuracy: 53.822359897610156%
Epoch 2/10, Train Loss: 1.4730430143179776, Val Loss: 1.4510415537700665, Val Accuracy: 55.70339222393071%
Epoch 3/10, Train Loss: 1.4168990724031354, Val Loss: 1.4261195478053594, Val Accuracy: 56.31037929967857%
Epoch 4/10, Train Loss: 1.3840494855261485, Val Loss: 1.400843796805958, Val Accuracy: 56.99985206371123%
Epoch 5/10, Train Loss: 1.359940777830817, Val Loss: 1.3884102034110553, Val Accuracy: 57.263447632795234%
Epoch 6/10, Train Loss: 1.3427014678242113, Val Loss: 1.379472758430759, Val Accuracy: 57.56694117066917%
Epoch 7/10, Train Loss: 1.329247392937402, Val Loss: 1.3782072133431131, Val Accuracy: 57.586217717387896%
Epoch 8/10, Train Loss: 1.3182375680817164, Val Loss: 1.3708770670257486, Val Accuracy: 57.80991531768197%
Epoch 9/10, Train Loss: 1.3103912637618842, Val Loss: 1.3664272476630996, Val Accuracy: 57.91974680480031%
Epoch 10/10, Train Loss: 1.3

In [None]:
# Initialize and train the GRU model
input_size = len(chars)
hidden_size = 256
output_size = len(chars)
num_epochs = 10

print("GRU model")
gru_model = GRUModel(input_size, hidden_size, output_size)
gru_train_losses, gru_val_losses, gru_val_accuracy, gru_training_time, gru_model_size = training_the_model(gru_model, train_loader, test_loader, device, num_epochs)
total_params = sum(p.numel() for p in model.parameters())
print(f'Total number of parameters in the model: {total_params}')
def predict_next_char(model, char_to_ix, ix_to_char, initial_str, max_length):
  model.eval()
  with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0).to(device)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]
test_str = "Predicting the next charac"
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str, 50)
print(f"Predicted next character: '{predicted_char}'")

GRU model


In [None]:
# Compare results
print("\nComparison of LSTM and GRU models:")
print(f"LSTM - Training Loss: {lstm_train_losses[-1]}, Validation Loss: {lstm_val_losses[-1]}, Validation Accuracy: {lstm_val_accuracy[-1]}%, Training Time: {lstm_training_time} seconds, Model Size: {lstm_model_size} parameters")
print(f"GRU - Training Loss: {gru_train_losses[-1]}, Validation Loss: {gru_val_losses[-1]}, Validation Accuracy: {gru_val_accuracy[-1]}%, Training Time: {gru_training_time} seconds, Model Size: {gru_model_size} parameters")