In [80]:
# Importing all the stuff
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim.lr_scheduler as lr_scheduler
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, Dataset
from sklearn.model_selection import train_test_split
from pathlib import Path

In [2]:
# Custom Dataset class
class CustomDataset(Dataset):
    def __init__(self, inputs, targets):
        self.inputs = inputs
        self.targets = targets

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        return self.inputs[idx], self.targets[idx]

# Creating training and testing data

In [3]:
filepath = 'shakespeare.txt'
torch.manual_seed(42)

# storing the text in a variable
text = open(filepath, 'rb').read().decode(encoding='utf-8').lower()

# selecting only a portion of the text because the model takes a long time to train
text = text[300000:800000]

# creating a mapping from unique characters to integers
chars = sorted(set(text)) # set returns unique characters in text

# creating a dictionary for the mapping
char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))

# filling up the sentences and the next characters lists
SEQ_LENGTH = 40
STEP_SIZE = 3

sentences = []
next_chars = []

for i in range(0, len(text) - SEQ_LENGTH, STEP_SIZE):
    sentences.append(text[i: i+SEQ_LENGTH])
    next_chars.append(text[i+SEQ_LENGTH])
    
# declaring np arrays for input and output
X = np.zeros(shape=(len(sentences), SEQ_LENGTH, len(chars)), dtype=np.float32)
y = np.zeros(shape=(len(sentences), len(chars)), dtype=np.float32)

for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        X[i, t, char_to_int[char]] = 1
    y[i, char_to_int[next_chars[i]]] = 1

In [4]:
# creating custom datasets and splitting it into batches
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
train_dataset = CustomDataset(X_train, y_train)
test_dataset = CustomDataset(X_test, y_test)

batch_size = 256
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

# Creating a model

In [72]:
# create a wrapper class because LSTM layer outputs a tuple and Linear can only take in a tensor
class LSTMWrapper(nn.Module):
    def __init__(self, input_size, num_layers, hidden_size):
        super(LSTMWrapper, self).__init__()
        self.lstm = nn.LSTM(input_size=input_size, num_layers=num_layers, hidden_size=hidden_size, batch_first=True)

    def forward(self, x):
        lstm_output, _ = self.lstm(x)  # Only return the `output`
        return lstm_output[:, -1, :]  # Return the last time step's output

In [136]:
# creating the model
model = nn.Sequential(
    LSTMWrapper(input_size=len(chars), num_layers=1, hidden_size=128),
    nn.Linear(in_features=128, out_features=len(chars)),
    # nn.Softmax(dim=1)
)

# creating the loss function and the optimizer
loss_fn = nn.CrossEntropyLoss()
# optimizer = optim.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

## Training the model

In [128]:
# function that trains the model
def train_model(epochs):
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0.0
        total_batches = len(train_loader)
        
        for batch_idx, (inputs, targets) in enumerate(train_loader, start=1):
            # Convert one-hot encoded targets to class indices
            targets = torch.argmax(targets, dim=1)
            
            y_pred = model(inputs)
            loss = loss_fn(y_pred, targets)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
            
            if (batch_idx) % 100 == 0:
                print(f"Epoch: {epoch+1}/{epochs} | Batch: {batch_idx+1}/{total_batches} | Loss: {loss.item():.4f}")

        scheduler.step()

In [129]:
train_model(epochs=5)

Epoch: 1/5 | Batch: 101/489 | Loss: 3.2593
Epoch: 1/5 | Batch: 201/489 | Loss: 3.0165
Epoch: 1/5 | Batch: 301/489 | Loss: 2.7164
Epoch: 1/5 | Batch: 401/489 | Loss: 2.4861
Epoch: 2/5 | Batch: 101/489 | Loss: 2.5716
Epoch: 2/5 | Batch: 201/489 | Loss: 2.3819
Epoch: 2/5 | Batch: 301/489 | Loss: 2.3319
Epoch: 2/5 | Batch: 401/489 | Loss: 2.3309
Epoch: 3/5 | Batch: 101/489 | Loss: 2.2545
Epoch: 3/5 | Batch: 201/489 | Loss: 2.1641
Epoch: 3/5 | Batch: 301/489 | Loss: 2.2286
Epoch: 3/5 | Batch: 401/489 | Loss: 2.0915
Epoch: 4/5 | Batch: 101/489 | Loss: 1.9915
Epoch: 4/5 | Batch: 201/489 | Loss: 2.2164
Epoch: 4/5 | Batch: 301/489 | Loss: 1.9649
Epoch: 4/5 | Batch: 401/489 | Loss: 2.1249
Epoch: 5/5 | Batch: 101/489 | Loss: 2.0499
Epoch: 5/5 | Batch: 201/489 | Loss: 2.0009
Epoch: 5/5 | Batch: 301/489 | Loss: 2.0494
Epoch: 5/5 | Batch: 401/489 | Loss: 1.9146


## Saving/Loading/Evaluating Model

In [130]:
# function that saves the model
def save_model(model_name : str):
    # create model path directory
    MODEL_PATH = Path("models")
    MODEL_PATH.mkdir(parents=True, exist_ok=True)
    
    # create model save path
    MODEL_NAME = model_name + ".pth"
    MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME
    
    # save the model state_dict
    torch.save(obj=model.state_dict(), f=MODEL_SAVE_PATH)


# function that loads model
def load_model(MODEL_SAVE_PATH):
    model_loaded = model
    model_loaded.load_state_dict(torch.load(MODEL_SAVE_PATH))
    return model_loaded


# function that evaluates the model on the test set
def eval_model(model):
    model.eval()
    total_loss = 0.0
    
    with torch.no_grad():  # Disable gradient computation
        for inputs, targets in test_loader:
            # Forward pass: Compute predictions
            outputs = model_1(inputs)
            
            # Compute loss
            loss = loss_fn(outputs, targets)
            
            # Accumulate loss
            total_loss += loss.item()
    
    # Print average test loss
    print(f"Test Loss: {total_loss / len(test_loader)}")

In [131]:
save_model("model_v1")

In [137]:
model_v1 = load_model('models/model_v1.pth')

RuntimeError: Error(s) in loading state_dict for Sequential:
	Unexpected key(s) in state_dict: "0.lstm.weight_ih_l1", "0.lstm.weight_hh_l1", "0.lstm.bias_ih_l1", "0.lstm.bias_hh_l1". 

In [138]:
model_v2 = load_model('models/model_v2.pth')

## Generating Text

In [118]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    
    # Clip predictions to avoid log(0)
    preds = np.clip(preds, 1e-8, None)
    
    # Apply temperature scaling
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    
    # Normalize to get probabilities
    preds = exp_preds / np.sum(exp_preds)
    
    # Sample from the probability distribution
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [119]:
def generate_text(model, length, temperature):
    start_index = random.randint(0, len(text) - SEQ_LENGTH - 1)
    generated = ''
    sentence = text[start_index: start_index + SEQ_LENGTH]
    generated += sentence

    for i in range(length):
        x = np.zeros((1, SEQ_LENGTH, len(chars)))
        for t, character in enumerate(sentence):
            x[0, t, char_to_int[character]] = 1

        # Convert NumPy array to a PyTorch tensor
        x_tensor = torch.tensor(x, dtype=torch.float32)

        predictions = model(x_tensor)[0].detach().numpy()
        next_index = sample(predictions, temperature)
        next_char = int_to_char[next_index]

        generated += next_char
        sentence = sentence[1:] + next_char

    return generated

In [135]:
print(generate_text(model_v1, 300, 0.2))

ldiers, ere her native king
shall falteroaoooaoooiodydlyyyyly lyyyyyyyyylyylyyyyllyyyyyyyyyyyyyyyyyyyyyyyryylyyyyylyyyyyyyyryyyrrlayyyyyyyyyy.yyyyylyyyyyyyyyyyyyyyyryylyyyyyyyyyyyyrryayyyyyyyyylyylylyyyyyyyyyyyylyyyylryyyyyyyyyylyyylyyylyyyyyyyylyyyyrrlyyyyrlyyyyyyyylyylyyyyyyyyyyyyyyyyryyyyyyylyyylyyyyrlyyyyyylyyyrlyyyyyyyyyylyyyyyyylyyy


In [139]:
print(generate_text(model_v2, 200, 0.2))

ocence; we knew not
the doctrine of ill-sing this to my leall and hor. be i to mand be bot with a payen the dead to sords in the fare.

juliten: and be dout and bath at sear thinger wath the ponce fares and i to to butter thou heving his do
