<a href="https://colab.research.google.com/github/UmaNagirireddi/RTML/blob/main/Homework3_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Homework 3
Name : Uma Nagirireddi
Student ID: 801377429

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, accuracy_score, confusion_matrix, ConfusionMatrixDisplay, classification_report, f1_score
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import requests

In [None]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [None]:
class CharLstm(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharLstm, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.LSTM = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.LSTM(embedded)
        output = self.fc(output[:, -1, :])
        return output

class CharGRU(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharGRU, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.gru(embedded)
        output = self.fc(output[:, -1, :])
        return output


In [None]:
# Step 1: Download the dataset
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text  # This is the entire text data

# Step 2: Prepare the dataset
sequence_length = 20
# Create a character mapping to integers
chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

# Encode the text into integers
encoded_text = [char_to_int[ch] for ch in text]

# Create sequences and targets
sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

# Convert lists to PyTorch tensors
sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

# Step 3: Create a dataset class
class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

# Instantiate the dataset
dataset = CharDataset(sequences, targets)

# Step 4: Create data loaders
batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)


In [None]:
def predict_next_char(model, char_to_ix, ix_to_char, initial_str, max_length):
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0).to(device)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]

In [None]:
def train_and_evaluate_model(model, criterion, optimizer, train_loader, test_loader, device='cuda', n_epochs=20):
    model.to(device)

    train_loss_list, val_loss_list, val_accuracy_list = [], [], []

    # Training loop
    epochs = n_epochs
    for epoch in range(epochs):
        running_loss = 0.0
        model.train()
        for i, (inputs, labels) in enumerate(train_loader, 0):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        train_loss_list.append(running_loss / len(train_loader))

        # Validation loop
        running_loss = 0.0
        correct = 0
        total = 0
        model.eval()
        with torch.no_grad():
            for i, (inputs, labels) in enumerate(test_loader, 0):
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                running_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_loss_list.append(running_loss / len(test_loader))

        val_accuracy = 100 * (correct / total)
        val_accuracy_list.append(val_accuracy)
        print(f'Epoch {epoch + 1}, Training loss: {train_loss_list[-1]:.4f}, Validation loss: {val_loss_list[-1]:.4f}, Validation Accuracy: {val_accuracy:.2f}%')

    # Calculate accuracy, precision, recall, and F1 for the final epoch (you may want to change this based on your requirements)

    return {
        'train_loss': train_loss_list,
        'val_loss': val_loss_list,
        'val_accuracy': val_accuracy_list,
        'model': model
    }


In [None]:
hidden_size = 128
learning_rate = 0.005

model = CharLstm(len(chars), hidden_size, len(chars))
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
lstm_results = train_and_evaluate_model(model, criterion, optimizer, train_loader, test_loader, device='cuda', n_epochs=10)
total_params = sum(p.numel() for p in lstm_results['model'].parameters())
print(f'Total number of parameters in the model: {total_params}')
test_str = "This Course is an introduction to deep learnin"
predicted_char = predict_next_char(lstm_results['model'], char_to_int, int_to_char, test_str, 20)
print(f"Predicted next character: '{predicted_char}'")

Epoch 1, Training loss: 1.7613, Validation loss: 1.6431, Validation Accuracy: 50.87%
Epoch 2, Training loss: 1.6063, Validation loss: 1.5877, Validation Accuracy: 52.25%
Epoch 3, Training loss: 1.5711, Validation loss: 1.5749, Validation Accuracy: 52.28%
Epoch 4, Training loss: 1.5556, Validation loss: 1.5696, Validation Accuracy: 52.74%
Epoch 5, Training loss: 1.5498, Validation loss: 1.5645, Validation Accuracy: 52.74%
Epoch 6, Training loss: 1.5469, Validation loss: 1.5677, Validation Accuracy: 52.73%
Epoch 7, Training loss: 1.5473, Validation loss: 1.5720, Validation Accuracy: 52.64%
Epoch 8, Training loss: 1.5450, Validation loss: 1.5683, Validation Accuracy: 52.88%
Epoch 9, Training loss: 1.5503, Validation loss: 1.5782, Validation Accuracy: 52.51%
Epoch 10, Training loss: 1.5503, Validation loss: 1.5737, Validation Accuracy: 52.51%
Total number of parameters in the model: 148801
Predicted next character: 'g'


In [None]:
hidden_size = 128
learning_rate = 0.005


model = CharGRU(len(chars), hidden_size, len(chars))
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
gru_results_seqLen_20 = train_and_evaluate_model(model, criterion, optimizer, train_loader, test_loader, device='cuda', n_epochs=10)
total_params = sum(p.numel() for p in gru_results_seqLen_20['model'].parameters())
print(f'Total number of parameters in the model: {total_params}')
test_str = "Next character predictin"
predicted_char = predict_next_char(gru_results_seqLen_20['model'], char_to_int, int_to_char, test_str, 20)
print(f"Predicted next character: '{predicted_char}'")

Epoch 1, Training loss: 1.8453, Validation loss: 1.7734, Validation Accuracy: 47.06%
Epoch 2, Training loss: 1.7572, Validation loss: 1.7717, Validation Accuracy: 48.49%
Epoch 3, Training loss: 1.7575, Validation loss: 1.7757, Validation Accuracy: 46.21%
Epoch 4, Training loss: 1.7687, Validation loss: 1.7899, Validation Accuracy: 47.14%
Epoch 5, Training loss: 1.7823, Validation loss: 1.8094, Validation Accuracy: 46.89%
Epoch 6, Training loss: 1.7920, Validation loss: 1.7952, Validation Accuracy: 47.17%
Epoch 7, Training loss: 1.8049, Validation loss: 1.8233, Validation Accuracy: 46.33%
Epoch 8, Training loss: 1.8058, Validation loss: 1.8257, Validation Accuracy: 46.45%
Epoch 9, Training loss: 1.8081, Validation loss: 1.8262, Validation Accuracy: 46.82%
Epoch 10, Training loss: 1.8151, Validation loss: 1.8248, Validation Accuracy: 46.56%
Total number of parameters in the model: 115777
Predicted next character: 'g'


In [None]:
sequence_length = 30
# Create a character mapping to integers
chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

# Encode the text into integers
encoded_text = [char_to_int[ch] for ch in text]

# Create sequences and targets
sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

# Convert lists to PyTorch tensors
sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

# Instantiate the dataset
dataset = CharDataset(sequences, targets)

# Step 4: Create data loaders
batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)


In [None]:
hidden_size = 128
learning_rate = 0.005


model = CharLstm(len(chars), hidden_size, len(chars))
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
lstm_res_seqLen_30 = train_and_evaluate_model(model, criterion, optimizer, train_loader, test_loader, device='cuda', n_epochs=10)
total_params = sum(p.numel() for p in lstm_res_seqLen_30['model'].parameters())
print(f'Total number of parameters in the model: {total_params}')
test_str = "Next character predictin"
predicted_char = predict_next_char(lstm_res_seqLen_30['model'], char_to_int, int_to_char, test_str, 30)
print(f"Predicted next character: '{predicted_char}'")

Epoch 1, Training loss: 1.7503, Validation loss: 1.6290, Validation Accuracy: 51.21%
Epoch 2, Training loss: 1.5921, Validation loss: 1.5803, Validation Accuracy: 52.50%
Epoch 3, Training loss: 1.5590, Validation loss: 1.5661, Validation Accuracy: 53.17%
Epoch 4, Training loss: 1.5439, Validation loss: 1.5592, Validation Accuracy: 52.94%
Epoch 5, Training loss: 1.5378, Validation loss: 1.5469, Validation Accuracy: 53.50%
Epoch 6, Training loss: 1.5352, Validation loss: 1.5610, Validation Accuracy: 53.15%
Epoch 7, Training loss: 1.5369, Validation loss: 1.5611, Validation Accuracy: 52.67%
Epoch 8, Training loss: 1.5355, Validation loss: 1.5508, Validation Accuracy: 53.43%
Epoch 9, Training loss: 1.5385, Validation loss: 1.5663, Validation Accuracy: 52.81%
Epoch 10, Training loss: 1.5403, Validation loss: 1.5591, Validation Accuracy: 53.05%
Total number of parameters in the model: 148801
Predicted next character: 'g'


In [None]:
hidden_size = 128
learning_rate = 0.005


model = CharGRU(len(chars), hidden_size, len(chars))
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
gru_results_seqLen_30 = train_and_evaluate_model(model, criterion, optimizer, train_loader, test_loader, device='cuda', n_epochs=10)
total_params = sum(p.numel() for p in gru_results_seqLen_30['model'].parameters())
print(f'Total number of parameters in the model: {total_params}')
test_str = "Next character predictin"
predicted_char = predict_next_char(gru_results_seqLen_30['model'], char_to_int, int_to_char, test_str, 30)
print(f"Predicted next character: '{predicted_char}'")

Epoch 1, Training loss: 1.8354, Validation loss: 1.7691, Validation Accuracy: 47.76%
Epoch 2, Training loss: 1.7556, Validation loss: 1.7675, Validation Accuracy: 48.14%
Epoch 3, Training loss: 1.7588, Validation loss: 1.7840, Validation Accuracy: 48.00%
Epoch 4, Training loss: 1.7719, Validation loss: 1.7892, Validation Accuracy: 47.97%
Epoch 5, Training loss: 1.7832, Validation loss: 1.8072, Validation Accuracy: 46.75%
Epoch 6, Training loss: 1.7981, Validation loss: 1.8170, Validation Accuracy: 47.08%
Epoch 7, Training loss: 1.8266, Validation loss: 1.8404, Validation Accuracy: 46.19%
Epoch 8, Training loss: 1.8262, Validation loss: 1.8520, Validation Accuracy: 45.67%
Epoch 9, Training loss: 1.8175, Validation loss: 1.8081, Validation Accuracy: 47.47%
Epoch 10, Training loss: 1.8198, Validation loss: 1.8496, Validation Accuracy: 46.87%
Total number of parameters in the model: 115777
Predicted next character: 'g'


In [None]:
hidden_size = 256
learning_rate = 0.001


model = CharLstm(len(chars), hidden_size, len(chars))
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
lstm_results_seqLen_20 = train_and_evaluate_model(model, criterion, optimizer, train_loader, test_loader, device='cuda', n_epochs=10)
total_params = sum(p.numel() for p in lstm_results_seqLen_20['model'].parameters())
print(f'Total number of parameters in the model: {total_params}')
test_str = "Next character predictin"
predicted_char = predict_next_char(lstm_results_seqLen_20['model'], char_to_int, int_to_char, test_str, 30)
print(f"Predicted next character: '{predicted_char}'")

Epoch 1, Training loss: 1.7003, Validation loss: 1.5382, Validation Accuracy: 53.41%
Epoch 2, Training loss: 1.4778, Validation loss: 1.4601, Validation Accuracy: 55.46%
Epoch 3, Training loss: 1.4169, Validation loss: 1.4270, Validation Accuracy: 56.39%
Epoch 4, Training loss: 1.3804, Validation loss: 1.4030, Validation Accuracy: 56.91%
Epoch 5, Training loss: 1.3550, Validation loss: 1.3856, Validation Accuracy: 57.50%
Epoch 6, Training loss: 1.3359, Validation loss: 1.3770, Validation Accuracy: 57.78%
Epoch 7, Training loss: 1.3214, Validation loss: 1.3711, Validation Accuracy: 57.92%
Epoch 8, Training loss: 1.3090, Validation loss: 1.3665, Validation Accuracy: 57.95%
Epoch 9, Training loss: 1.2969, Validation loss: 1.3630, Validation Accuracy: 57.92%
Epoch 10, Training loss: 1.2875, Validation loss: 1.3619, Validation Accuracy: 58.23%
Total number of parameters in the model: 559681
Predicted next character: 'g'


In [None]:
hidden_size = 256
learning_rate = 0.001
epochs = 10

model = CharGRU(len(chars), hidden_size, len(chars))
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
gru_results_seqLen_20 = train_and_evaluate_model(model, criterion, optimizer, train_loader, test_loader, device='cuda', n_epochs=10)
total_params = sum(p.numel() for p in gru_results_seqLen_20['model'].parameters())
print(f'Total number of parameters in the model: {total_params}')
test_str = "Next character predictin"
predicted_char = predict_next_char(gru_results_seqLen_20['model'], char_to_int, int_to_char, test_str, 30)
print(f"Predicted next character: '{predicted_char}'")

Epoch 1, Training loss: 1.6899, Validation loss: 1.5385, Validation Accuracy: 53.75%
Epoch 2, Training loss: 1.4949, Validation loss: 1.4799, Validation Accuracy: 55.02%
Epoch 3, Training loss: 1.4473, Validation loss: 1.4605, Validation Accuracy: 55.66%
Epoch 4, Training loss: 1.4223, Validation loss: 1.4437, Validation Accuracy: 55.87%
Epoch 5, Training loss: 1.4057, Validation loss: 1.4379, Validation Accuracy: 55.97%
Epoch 6, Training loss: 1.3929, Validation loss: 1.4324, Validation Accuracy: 56.35%
Epoch 7, Training loss: 1.3845, Validation loss: 1.4219, Validation Accuracy: 56.67%
Epoch 8, Training loss: 1.3786, Validation loss: 1.4168, Validation Accuracy: 56.95%
Epoch 9, Training loss: 1.3730, Validation loss: 1.4179, Validation Accuracy: 56.61%
Epoch 10, Training loss: 1.3679, Validation loss: 1.4134, Validation Accuracy: 56.89%
Total number of parameters in the model: 428097
Predicted next character: 'g'


In [None]:
sequence_length = 50
# Create a character mapping to integers
chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

# Encode the text into integers
encoded_text = [char_to_int[ch] for ch in text]

# Create sequences and targets
sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

# Convert lists to PyTorch tensors
sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

# Instantiate the dataset
dataset = CharDataset(sequences, targets)

# Step 4: Create data loaders
batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)


In [None]:
hidden_size = 256
learning_rate = 0.001
epochs = 10

model = CharLstm(len(chars), hidden_size, len(chars))
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
lstm_results_50 = train_and_evaluate_model(model, criterion, optimizer, train_loader, test_loader, device='cuda', n_epochs=10)
total_params = sum(p.numel() for p in lstm_results_50['model'].parameters())
print(f'Total number of parameters in the model: {total_params}')
test_str = "Next character predictin"
predicted_char = predict_next_char(lstm_results_50['model'], char_to_int, int_to_char, test_str, 30)
print(f"Predicted next character: '{predicted_char}'")

Epoch 1, Training loss: 1.6963, Validation loss: 1.5258, Validation Accuracy: 53.86%
Epoch 2, Training loss: 1.4669, Validation loss: 1.4514, Validation Accuracy: 55.69%
Epoch 3, Training loss: 1.4046, Validation loss: 1.4115, Validation Accuracy: 56.83%
Epoch 4, Training loss: 1.3678, Validation loss: 1.3939, Validation Accuracy: 57.10%
Epoch 5, Training loss: 1.3425, Validation loss: 1.3769, Validation Accuracy: 57.64%
Epoch 6, Training loss: 1.3233, Validation loss: 1.3717, Validation Accuracy: 57.95%
Epoch 7, Training loss: 1.3078, Validation loss: 1.3607, Validation Accuracy: 58.16%
Epoch 8, Training loss: 1.2944, Validation loss: 1.3557, Validation Accuracy: 58.33%
Epoch 9, Training loss: 1.2842, Validation loss: 1.3551, Validation Accuracy: 58.35%
Epoch 10, Training loss: 1.2745, Validation loss: 1.3453, Validation Accuracy: 58.76%
Total number of parameters in the model: 559681
Predicted next character: 'g'


In [None]:
hidden_size = 256
learning_rate = 0.001
epochs = 10

model = CharGRU(len(chars), hidden_size, len(chars))
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
gru_results_50 = train_and_evaluate_model(model, criterion, optimizer, train_loader, test_loader, device='cuda', n_epochs=10)
total_params = sum(p.numel() for p in gru_results_50['model'].parameters())
print(f'Total number of parameters in the model: {total_params}')
test_str = "Next character predictin"
predicted_char = predict_next_char(gru_results_50['model'], char_to_int, int_to_char, test_str, 30)
print(f"Predicted next character: '{predicted_char}'")