<a href="https://colab.research.google.com/github/UmaNagirireddi/RTML/blob/main/Homework3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Introduction to Deep Learning


Uma Nagirireddy


In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, accuracy_score, confusion_matrix, ConfusionMatrixDisplay, classification_report, f1_score


In [2]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [3]:
class CharRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharRNN, self).__init__()
        self.hidden_size = hidden_size
        #This line takes the input tensor x, which contains indices of characters, and passes it through an embedding layer (self.embedding).
        #The embedding layer converts these indices into dense vectors of fixed size.
        #These vectors are learned during training and can capture semantic similarities between characters.
        #The result is a higher-dimensional representation of the input sequence, where each character index is replaced by its corresponding embedding vector.
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.RNN(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        #The RNN layer returns two outputs:
        #1- the output tensor containing the output of the RNN at each time step for each sequence in the batch,
        #2-the hidden state (_) of the last time step (which is not used in this line, hence the underscore).
        output, _ = self.rnn(embedded)
        #The RNN's output contains the outputs for every time step,
        #but for this task, we're only interested in the output of the last time step because we're predicting the next character after the sequence.
        #output[:, -1, :] selects the last time step's output for every sequence in the batch (-1 indexes the last item in Python).
        output = self.fc(output[:, -1, :])  # Get the output of the last RNN cell
        return output

In [4]:
class CharLstm(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharLstm, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.LSTM = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.LSTM(embedded)
        output = self.fc(output[:, -1, :])
        return output


In [5]:
class CharGRU(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharGRU, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.gru(embedded)
        output = self.fc(output[:, -1, :])
        return output

In [6]:
text = '''Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text.

At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model.

One of the most popular approaches to next character prediction involves the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character prediction tasks.

Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time.

Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and virtual assistants.

In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology'''


In [7]:
chars = sorted(list(set(text)))
ix_to_char = {i: ch for i, ch in enumerate(chars)}
char_to_ix = {ch: i for i, ch in enumerate(chars)}
chars = sorted(list(set(text)))

In [13]:
# Preparing the dataset
max_length = 10  # Maximum length of input sequences
X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

# Splitting the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.long).to(device)
y_train = torch.tensor(y_train, dtype=torch.long).to(device)
X_val = torch.tensor(X_val, dtype=torch.long).to(device)
y_val = torch.tensor(y_val, dtype=torch.long).to(device)


# Hyperparameters
hidden_size = 128
learning_rate = 0.005
epochs = 100

# Model, loss, and optimizer
model = CharRNN(len(chars), hidden_size, len(chars))
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

train_loss_list, val_loss_list, val_accuracy_list = [], [], []

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    output = model(X_train)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        val_output = model(X_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()
    train_loss_list.append(loss.item())
    val_loss_list.append(val_loss.item())
    val_accuracy_list.append(val_accuracy.item())

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

# Prediction function
def predict_next_char(model, char_to_ix, ix_to_char, initial_str):
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0).to(device)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]

total_params = sum(p.numel() for p in model.parameters())
print(f'Total number of parameters in the model: {total_params}')

# Predicting the next character
test_str = "Demonstrating the prediction of next cha"
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str)
print(f"Predicted next character: '{predicted_char}'")

Epoch 10, Loss: 2.2457668781280518, Validation Loss: 2.349344253540039, Validation Accuracy: 0.3739495873451233
Epoch 20, Loss: 1.765572190284729, Validation Loss: 2.152967691421509, Validation Accuracy: 0.40336135029792786
Epoch 30, Loss: 1.4148823022842407, Validation Loss: 2.029064893722534, Validation Accuracy: 0.45168066024780273
Epoch 40, Loss: 1.1081169843673706, Validation Loss: 2.0113773345947266, Validation Accuracy: 0.45798319578170776
Epoch 50, Loss: 0.825012743473053, Validation Loss: 2.0503737926483154, Validation Accuracy: 0.4831932783126831
Epoch 60, Loss: 0.5684662461280823, Validation Loss: 2.143197774887085, Validation Accuracy: 0.4957983195781708
Epoch 70, Loss: 0.3606196343898773, Validation Loss: 2.3098790645599365, Validation Accuracy: 0.4852941036224365
Epoch 80, Loss: 0.21913738548755646, Validation Loss: 2.458484411239624, Validation Accuracy: 0.4852941036224365
Epoch 90, Loss: 0.13514217734336853, Validation Loss: 2.616741180419922, Validation Accuracy: 0.485

In [14]:
# Preparing the dataset
max_length = 20  # Maximum length of input sequences
X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

# Splitting the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.long).to(device)
y_train = torch.tensor(y_train, dtype=torch.long).to(device)
X_val = torch.tensor(X_val, dtype=torch.long).to(device)
y_val = torch.tensor(y_val, dtype=torch.long).to(device)


# Hyperparameters
hidden_size = 128
learning_rate = 0.005
epochs = 100

# Model, loss, and optimizer
model = CharRNN(len(chars), hidden_size, len(chars))
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

train_loss_list, val_loss_list, val_accuracy_list = [], [], []

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    output = model(X_train)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        val_output = model(X_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()
    train_loss_list.append(loss.item())
    val_loss_list.append(val_loss.item())
    val_accuracy_list.append(val_accuracy.item())

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

total_params = sum(p.numel() for p in model.parameters())
print(f'Total number of parameters in the model: {total_params}')

# Prediction function
def predict_next_char(model, char_to_ix, ix_to_char, initial_str):
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0).to(device)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]

# Predicting the next character
test_str = "Demonstrating the prediction of next cha"
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str)
print(f"Predicted next character: '{predicted_char}'")

Epoch 10, Loss: 2.24957537651062, Validation Loss: 2.3702290058135986, Validation Accuracy: 0.38396623730659485
Epoch 20, Loss: 1.7833740711212158, Validation Loss: 2.136301279067993, Validation Accuracy: 0.4261603355407715
Epoch 30, Loss: 1.4249398708343506, Validation Loss: 2.0334324836730957, Validation Accuracy: 0.4683544337749481
Epoch 40, Loss: 1.1117262840270996, Validation Loss: 2.007404088973999, Validation Accuracy: 0.49789029359817505
Epoch 50, Loss: 0.8204943537712097, Validation Loss: 2.049116373062134, Validation Accuracy: 0.502109706401825
Epoch 60, Loss: 0.5614900588989258, Validation Loss: 2.1390299797058105, Validation Accuracy: 0.5084388256072998
Epoch 70, Loss: 0.3773600459098816, Validation Loss: 2.2774412631988525, Validation Accuracy: 0.49789029359817505
Epoch 80, Loss: 0.22547397017478943, Validation Loss: 2.4570999145507812, Validation Accuracy: 0.49367088079452515
Epoch 90, Loss: 0.14135359227657318, Validation Loss: 2.595407247543335, Validation Accuracy: 0.4

In [15]:
# Preparing the dataset
max_length = 30  # Maximum length of input sequences
X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

# Splitting the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.long).to(device)
y_train = torch.tensor(y_train, dtype=torch.long).to(device)
X_val = torch.tensor(X_val, dtype=torch.long).to(device)
y_val = torch.tensor(y_val, dtype=torch.long).to(device)


# Hyperparameters
hidden_size = 128
learning_rate = 0.005
epochs = 100

# Model, loss, and optimizer
model = CharRNN(len(chars), hidden_size, len(chars))
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

train_loss_list, val_loss_list, val_accuracy_list = [], [], []

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    output = model(X_train)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        val_output = model(X_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()
    train_loss_list.append(loss.item())
    val_loss_list.append(val_loss.item())
    val_accuracy_list.append(val_accuracy.item())

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

total_params = sum(p.numel() for p in model.parameters())
print(f'Total number of parameters in the model: {total_params}')

# Prediction function
def predict_next_char(model, char_to_ix, ix_to_char, initial_str):
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0).to(device)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]

# Predicting the next character
test_str = "Demonstrating the prediction of next cha"
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str)
print(f"Predicted next character: '{predicted_char}'")

Epoch 10, Loss: 2.227437973022461, Validation Loss: 2.470881223678589, Validation Accuracy: 0.3262711763381958
Epoch 20, Loss: 1.7588928937911987, Validation Loss: 2.2304470539093018, Validation Accuracy: 0.4131355881690979
Epoch 30, Loss: 1.403831124305725, Validation Loss: 2.121061086654663, Validation Accuracy: 0.44279661774635315
Epoch 40, Loss: 1.0931355953216553, Validation Loss: 2.087891101837158, Validation Accuracy: 0.43855932354927063
Epoch 50, Loss: 0.812015950679779, Validation Loss: 2.122880458831787, Validation Accuracy: 0.45338982343673706
Epoch 60, Loss: 0.567520797252655, Validation Loss: 2.216219663619995, Validation Accuracy: 0.47033897042274475
Epoch 70, Loss: 0.3800422251224518, Validation Loss: 2.353334426879883, Validation Accuracy: 0.48516950011253357
Epoch 80, Loss: 0.23668229579925537, Validation Loss: 2.5101213455200195, Validation Accuracy: 0.472457617521286
Epoch 90, Loss: 0.14700618386268616, Validation Loss: 2.6256988048553467, Validation Accuracy: 0.4915

In [16]:
# Preparing the dataset
max_length = 10  # Maximum length of input sequences
X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

# Splitting the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.long).to(device)
y_train = torch.tensor(y_train, dtype=torch.long).to(device)
X_val = torch.tensor(X_val, dtype=torch.long).to(device)
y_val = torch.tensor(y_val, dtype=torch.long).to(device)


# Hyperparameters
hidden_size = 128
learning_rate = 0.005
epochs = 100

# Model, loss, and optimizer
model = CharLstm(len(chars), hidden_size, len(chars))
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

train_loss_list, val_loss_list, val_accuracy_list = [], [], []

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    output = model(X_train)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        val_output = model(X_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()
    train_loss_list.append(loss.item())
    val_loss_list.append(val_loss.item())
    val_accuracy_list.append(val_accuracy.item())

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')


total_params = sum(p.numel() for p in model.parameters())
print(f'Total number of parameters in the model: {total_params}')

# Prediction function
def predict_next_char(model, char_to_ix, ix_to_char, initial_str):
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0).to(device)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]

# Predicting the next character
test_str = "Demonstrating the prediction of next cha"
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str)
print(f"Predicted next character: '{predicted_char}'")

Epoch 10, Loss: 2.558636426925659, Validation Loss: 2.5546875, Validation Accuracy: 0.3382352888584137
Epoch 20, Loss: 2.040335178375244, Validation Loss: 2.207496166229248, Validation Accuracy: 0.4138655364513397
Epoch 30, Loss: 1.635844349861145, Validation Loss: 2.035276174545288, Validation Accuracy: 0.43697479367256165
Epoch 40, Loss: 1.281584620475769, Validation Loss: 1.9776147603988647, Validation Accuracy: 0.4789915978908539
Epoch 50, Loss: 0.959574282169342, Validation Loss: 1.9767529964447021, Validation Accuracy: 0.4810924232006073
Epoch 60, Loss: 0.6829070448875427, Validation Loss: 2.0069470405578613, Validation Accuracy: 0.4831932783126831
Epoch 70, Loss: 0.44102925062179565, Validation Loss: 2.1095376014709473, Validation Accuracy: 0.45588234066963196
Epoch 80, Loss: 0.26552319526672363, Validation Loss: 2.2543904781341553, Validation Accuracy: 0.4663865566253662
Epoch 90, Loss: 0.16254450380802155, Validation Loss: 2.3705503940582275, Validation Accuracy: 0.46638655662

In [17]:
# Preparing the dataset
max_length = 20  # Maximum length of input sequences
X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

# Splitting the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.long).to(device)
y_train = torch.tensor(y_train, dtype=torch.long).to(device)
X_val = torch.tensor(X_val, dtype=torch.long).to(device)
y_val = torch.tensor(y_val, dtype=torch.long).to(device)


# Hyperparameters
hidden_size = 128
learning_rate = 0.005
epochs = 100

# Model, loss, and optimizer
model = CharLstm(len(chars), hidden_size, len(chars))
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

train_loss_list, val_loss_list, val_accuracy_list = [], [], []

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    output = model(X_train)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        val_output = model(X_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()
    train_loss_list.append(loss.item())
    val_loss_list.append(val_loss.item())
    val_accuracy_list.append(val_accuracy.item())

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')


total_params = sum(p.numel() for p in model.parameters())
print(f'Total number of parameters in the model: {total_params}')

# Prediction function
def predict_next_char(model, char_to_ix, ix_to_char, initial_str):
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0).to(device)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]

# Predicting the next character
test_str = "Demonstrating the prediction of next cha"
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str)
print(f"Predicted next character: '{predicted_char}'")

Epoch 10, Loss: 2.534968614578247, Validation Loss: 2.5415823459625244, Validation Accuracy: 0.31012657284736633
Epoch 20, Loss: 2.0526533126831055, Validation Loss: 2.2337794303894043, Validation Accuracy: 0.4135020971298218
Epoch 30, Loss: 1.670228123664856, Validation Loss: 2.0469532012939453, Validation Accuracy: 0.42827004194259644
Epoch 40, Loss: 1.3293704986572266, Validation Loss: 1.949750304222107, Validation Accuracy: 0.46202531456947327
Epoch 50, Loss: 1.0197216272354126, Validation Loss: 1.9073197841644287, Validation Accuracy: 0.4767932593822479
Epoch 60, Loss: 0.7411264181137085, Validation Loss: 1.9249053001403809, Validation Accuracy: 0.5
Epoch 70, Loss: 0.5158718228340149, Validation Loss: 1.9770665168762207, Validation Accuracy: 0.5042194128036499
Epoch 80, Loss: 0.3329835534095764, Validation Loss: 2.0529518127441406, Validation Accuracy: 0.5042194128036499
Epoch 90, Loss: 0.2431391328573227, Validation Loss: 2.1480913162231445, Validation Accuracy: 0.516877651214599

In [18]:
# Preparing the dataset
max_length = 30  # Maximum length of input sequences
X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

# Splitting the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.long).to(device)
y_train = torch.tensor(y_train, dtype=torch.long).to(device)
X_val = torch.tensor(X_val, dtype=torch.long).to(device)
y_val = torch.tensor(y_val, dtype=torch.long).to(device)


# Hyperparameters
hidden_size = 128
learning_rate = 0.005
epochs = 100

# Model, loss, and optimizer
model = CharLstm(len(chars), hidden_size, len(chars))
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

train_loss_list, val_loss_list, val_accuracy_list = [], [], []

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    output = model(X_train)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        val_output = model(X_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()
    train_loss_list.append(loss.item())
    val_loss_list.append(val_loss.item())
    val_accuracy_list.append(val_accuracy.item())

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')


total_params = sum(p.numel() for p in model.parameters())
print(f'Total number of parameters in the model: {total_params}')

# Prediction function
def predict_next_char(model, char_to_ix, ix_to_char, initial_str):
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0).to(device)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]

# Predicting the next character
test_str = "Demonstrating the prediction of next cha"
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str)
print(f"Predicted next character: '{predicted_char}'")

Epoch 10, Loss: 2.5508790016174316, Validation Loss: 2.657008171081543, Validation Accuracy: 0.2711864411830902
Epoch 20, Loss: 2.022002696990967, Validation Loss: 2.3287742137908936, Validation Accuracy: 0.39830508828163147
Epoch 30, Loss: 1.6112356185913086, Validation Loss: 2.193427801132202, Validation Accuracy: 0.4237288236618042
Epoch 40, Loss: 1.246855616569519, Validation Loss: 2.133040428161621, Validation Accuracy: 0.4237288236618042
Epoch 50, Loss: 0.9265393614768982, Validation Loss: 2.1464650630950928, Validation Accuracy: 0.44703391194343567
Epoch 60, Loss: 0.6726479530334473, Validation Loss: 2.197317600250244, Validation Accuracy: 0.44915252923965454
Epoch 70, Loss: 0.44009771943092346, Validation Loss: 2.2759523391723633, Validation Accuracy: 0.44703391194343567
Epoch 80, Loss: 0.2840941250324249, Validation Loss: 2.4119439125061035, Validation Accuracy: 0.4406779706478119
Epoch 90, Loss: 0.18859991431236267, Validation Loss: 2.5009632110595703, Validation Accuracy: 0.

In [19]:
# Preparing the dataset
max_length = 10  # Maximum length of input sequences
X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

# Splitting the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.long).to(device)
y_train = torch.tensor(y_train, dtype=torch.long).to(device)
X_val = torch.tensor(X_val, dtype=torch.long).to(device)
y_val = torch.tensor(y_val, dtype=torch.long).to(device)


# Hyperparameters
hidden_size = 128
learning_rate = 0.005
epochs = 100

# Model, loss, and optimizer
model = CharGRU(len(chars), hidden_size, len(chars))
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

train_loss_list, val_loss_list, val_accuracy_list = [], [], []

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    output = model(X_train)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        val_output = model(X_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()
    train_loss_list.append(loss.item())
    val_loss_list.append(val_loss.item())
    val_accuracy_list.append(val_accuracy.item())

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')


total_params = sum(p.numel() for p in model.parameters())
print(f'Total number of parameters in the model: {total_params}')

# Prediction function
def predict_next_char(model, char_to_ix, ix_to_char, initial_str):
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0).to(device)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]

# Predicting the next character
test_str = "Demonstrating the prediction of next cha"
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str)
print(f"Predicted next character: '{predicted_char}'")

Epoch 10, Loss: 2.3766560554504395, Validation Loss: 2.389404773712158, Validation Accuracy: 0.3739495873451233
Epoch 20, Loss: 1.8708181381225586, Validation Loss: 2.1323258876800537, Validation Accuracy: 0.40336135029792786
Epoch 30, Loss: 1.4551918506622314, Validation Loss: 1.9912937879562378, Validation Accuracy: 0.43907561898231506
Epoch 40, Loss: 1.0853792428970337, Validation Loss: 1.9377284049987793, Validation Accuracy: 0.4768907427787781
Epoch 50, Loss: 0.7541441321372986, Validation Loss: 1.9610542058944702, Validation Accuracy: 0.4810924232006073
Epoch 60, Loss: 0.4815974533557892, Validation Loss: 2.0393686294555664, Validation Accuracy: 0.4978991448879242
Epoch 70, Loss: 0.2837848365306854, Validation Loss: 2.159581184387207, Validation Accuracy: 0.49369746446609497
Epoch 80, Loss: 0.16056734323501587, Validation Loss: 2.308215856552124, Validation Accuracy: 0.48739495873451233
Epoch 90, Loss: 0.09684307128190994, Validation Loss: 2.4185919761657715, Validation Accuracy:

In [22]:
# Preparing the dataset
max_length = 20  # Maximum length of input sequences
X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

# Splitting the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.long).to(device)
y_train = torch.tensor(y_train, dtype=torch.long).to(device)
X_val = torch.tensor(X_val, dtype=torch.long).to(device)
y_val = torch.tensor(y_val, dtype=torch.long).to(device)


# Hyperparameters
hidden_size = 128
learning_rate = 0.005
epochs = 100

# Model, loss, and optimizer
model = CharGRU(len(chars), hidden_size, len(chars))
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

train_loss_list, val_loss_list, val_accuracy_list = [], [], []

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    output = model(X_train)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        val_output = model(X_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()
    train_loss_list.append(loss.item())
    val_loss_list.append(val_loss.item())
    val_accuracy_list.append(val_accuracy.item())

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')


total_params = sum(p.numel() for p in model.parameters())
print(f'Total number of parameters in the model: {total_params}')

# Prediction function
def predict_next_char(model, char_to_ix, ix_to_char, initial_str):
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0).to(device)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]

# Predicting the next character
test_str = "Demonstrating the prediction of next cha"
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str)
print(f"Predicted next character: '{predicted_char}'")

Epoch 10, Loss: 2.3871400356292725, Validation Loss: 2.4728405475616455, Validation Accuracy: 0.324894517660141
Epoch 20, Loss: 1.8911995887756348, Validation Loss: 2.143627882003784, Validation Accuracy: 0.44092828035354614
Epoch 30, Loss: 1.4946480989456177, Validation Loss: 1.9857977628707886, Validation Accuracy: 0.45358648896217346
Epoch 40, Loss: 1.134361743927002, Validation Loss: 1.8990533351898193, Validation Accuracy: 0.4831223487854004
Epoch 50, Loss: 0.8054695129394531, Validation Loss: 1.888532280921936, Validation Accuracy: 0.5295358896255493
Epoch 60, Loss: 0.5310377478599548, Validation Loss: 1.9568721055984497, Validation Accuracy: 0.5295358896255493
Epoch 70, Loss: 0.32961350679397583, Validation Loss: 2.065840244293213, Validation Accuracy: 0.5168776512145996
Epoch 80, Loss: 0.19743704795837402, Validation Loss: 2.2070839405059814, Validation Accuracy: 0.5126582384109497
Epoch 90, Loss: 0.11726894974708557, Validation Loss: 2.372980833053589, Validation Accuracy: 0.5

In [23]:
# Preparing the dataset
max_length = 30  # Maximum length of input sequences
X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

# Splitting the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.long).to(device)
y_train = torch.tensor(y_train, dtype=torch.long).to(device)
X_val = torch.tensor(X_val, dtype=torch.long).to(device)
y_val = torch.tensor(y_val, dtype=torch.long).to(device)


# Hyperparameters
hidden_size = 128
learning_rate = 0.005
epochs = 100

# Model, loss, and optimizer
model = CharGRU(len(chars), hidden_size, len(chars))
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

train_loss_list, val_loss_list, val_accuracy_list = [], [], []

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    output = model(X_train)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        val_output = model(X_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()
    train_loss_list.append(loss.item())
    val_loss_list.append(val_loss.item())
    val_accuracy_list.append(val_accuracy.item())

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')


total_params = sum(p.numel() for p in model.parameters())
print(f'Total number of parameters in the model: {total_params}')

# Prediction function
def predict_next_char(model, char_to_ix, ix_to_char, initial_str):
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0).to(device)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]

# Predicting the next character
test_str = "Demonstrating the prediction of next cha"
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str)
print(f"Predicted next character: '{predicted_char}'")

Epoch 10, Loss: 2.4122915267944336, Validation Loss: 2.544207811355591, Validation Accuracy: 0.31144067645072937
Epoch 20, Loss: 1.8939142227172852, Validation Loss: 2.2476115226745605, Validation Accuracy: 0.3855932056903839
Epoch 30, Loss: 1.4735640287399292, Validation Loss: 2.100532054901123, Validation Accuracy: 0.43855932354927063
Epoch 40, Loss: 1.1065645217895508, Validation Loss: 2.0221755504608154, Validation Accuracy: 0.4661017060279846
Epoch 50, Loss: 0.7766416072845459, Validation Loss: 2.032391309738159, Validation Accuracy: 0.49788135290145874
Epoch 60, Loss: 0.49831539392471313, Validation Loss: 2.1249871253967285, Validation Accuracy: 0.5021186470985413
Epoch 70, Loss: 0.2966916859149933, Validation Loss: 2.271649122238159, Validation Accuracy: 0.49152541160583496
Epoch 80, Loss: 0.16702033579349518, Validation Loss: 2.457617998123169, Validation Accuracy: 0.48516950011253357
Epoch 90, Loss: 0.09448506683111191, Validation Loss: 2.6303365230560303, Validation Accuracy: