### Credit: https://www.youtube.com/watch?v=kCc8FmEb1nY&t=6s
### This is an LLM that will be trained to generate poetry, hopefully haikus. This is a fun experiment. Haikus were created with OpenAI GPT-4

In [414]:
import os
import glob
import numpy as np
import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader

In [415]:
file_path = './haikus.txt'
with open(file_path, 'r') as file:
    contents = file.read()

contents

"Gentle breeze flows through,\\n\nWhispering leaves tell secrets,\\n\nNature's song, so soft.\\n\\n,\n\nMoonlight bathes the night,\\n\nSilver glow on tranquil sea,\\n\nPeace reigns in darkness.\\n\\n,\n\nCity wakes slowly,\\n\nSunrise over concrete peaks,\\n\nDay's first light glimmers.\\n\\n,\n\nLeaves turn gold and red,\\n\nAutumn's crisp breath in the air,\\n\nSeasons shift their dance.\\n\\n,\n\nPeaks touch the sky's edge,\\n\nAlone in vast wilderness,\\n\nNature's grandeur reigns.\\n\\n,\n\nStars twinkle above,\\n\nIn the vast, endless night sky,\\n\nDreams sail on moonbeams.\\n\\n,\n\nRaindrops kiss the earth,\\n\nA soft, rhythmic melody,\\n\nNature's lullaby.\\n\\n,\n\nSnow blankets the land,\\n\nWhite silence, winter's embrace,\\n\nNature sleeps in peace.\\n\\n,\n\nBlossoms greet the sun,\\n\nSpring whispers through awakening buds,\\n\nNew life in bloom.\\n\\n,\n\nSands stretch to the sky,\\n\nHeat shimmers like ocean waves,\\n\nDesert's mirage dance.\\n\\n,\n\nDeep in ocean's

In [416]:
unique_chars = sorted(list(set(contents)))
unique_chars_len = len(unique_chars)
print(''.join(unique_chars))
print(unique_chars_len)


 ',.ABCDGHILMNOPRSTUW\abcdefghiklmnopqrstuvwyz
47


In [417]:
#Lets try One Hot Encoding and pring out vocab dictionary

def one_hot_encode(data_input, char_set):
    # Create a dictionary mapping each character to its index
    char_to_index = {ch: i for i, ch in enumerate(char_set)}
    vector_set = []

    for i in list(data_input):
    # Initialize a vector of zeros with the length of the character set
        one_hot_vector = [0] * len(char_set)

        # Set the position corresponding to the character to 1
        if i in char_to_index:
            one_hot_vector[char_to_index[i]] = 1
            vector_set.append(one_hot_vector)
        else:
            raise ValueError(f"Character '{i}' not in character set")

    return vector_set


In [418]:
#Decoder
def one_hot_decode(encoded_data, char_set):
    # Create a dictionary mapping each index to its character
    index_to_char = {i: ch for i, ch in enumerate(char_set)}

    decoded_string = ""

    for vector in encoded_data:
        # Find the index of the 1 in the vector
        index = vector.index(1)

        # Append the corresponding character to the decoded string
        decoded_string += index_to_char[index]

    return decoded_string

# Example usage
char_set = ',.ABCDGHILMNOPRSTUWabcdefghiklmnopqrstuvwyz'
data_input = "Hello"
encoded_data = one_hot_encode(data_input, char_set)
print(encoded_data)


[[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]


In [419]:
#test the encoding
encoding = one_hot_encode(unique_chars, unique_chars)
print(encoding)

[[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

In [420]:
#Test the decoding
decoding = one_hot_decode(encoding, unique_chars)
print(decoding)


 ',.ABCDGHILMNOPRSTUW\abcdefghiklmnopqrstuvwyz


In [421]:
#Let's Encode our Data
prepped_data = contents#.replace(" ", "")
prepped_data
encoded_data = one_hot_encode(prepped_data, unique_chars)
print(encoded_data)

[[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

In [422]:
#Transform encoded data into Tensor
tensor_data = torch.tensor(encoded_data, dtype=torch.float32)
print(tensor_data.shape, tensor_data.dtype)
print(tensor_data)

torch.Size([1334, 47]) torch.float32
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.]])


In [423]:
#Train and Validation Sets
n = int(0.8*len(tensor_data))
train = tensor_data[:n]
val = tensor_data[n:]
print(train)
print(val)

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.]])


In [424]:
block_size = 8
train[:block_size + 1]

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.

In [425]:
x = train[:block_size]
y = train[1:block_size+1]
for t in range(block_size):
    context = x[:t+1]
    target = y[t]
    print(f"When the input is {context}, the target is {target}")

When the input is tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]), the target is tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
When the input is tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]), the target is tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,

In [426]:
print(train)

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])


In [427]:
#Introducing the batch dimension
torch.manual_seed(1337)
batch_size = 4
block_size = 8


def get_batch(split):
    data = train if split == 'train' else val
    ix = torch.randint(len(data) - block_size, (batch_size,))
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+block_size+1] for i in ix]) 
    return x, y  

xb, yb = get_batch('train')
print("inputs:")
print(xb.shape)
print(xb)
print('targets')
print(yb.shape)
print(yb)

print("-----")

for b in range(batch_size):
    for t in range(block_size):
        context = xb[b, :t+1]
        target = yb[b,t]
        print(f"when the input is {context.tolist()}, the target is: {target}")

inputs:
torch.Size([4, 8, 47])
tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
     

In [428]:
x = train[:block_size]
y = train[1:block_size+1]
for t in range(block_size):
    context = x[:t+1]
    target = y[t]
    print(f"When context is {context}, the target is {target}")

When context is tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]), the target is tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
When context is tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]), the target is tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,

In [440]:
#RNN can handle One-Hot vectors well
class SimpleRNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleRNNModel, self).__init__()
        self.hidden_size = hidden_size

        # RNN layer
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)

        # Output layer
        self.linear = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), self.hidden_size)
        out, hidden = self.rnn(x, h0)
        out = out.contiguous().view(-1, self.hidden_size)
        out = self.linear(out)
        return out

   

        
model = SimpleRNNModel(unique_chars_len, 128, unique_chars_len)

In [430]:
# Loss function
loss_function = nn.CrossEntropyLoss()

# Optimizer (example: using Adam)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [431]:
class CustomDataset(Dataset):
    def __init__(self, data, targets):
        self.data = data
        self.targets = targets

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.targets[idx]


In [432]:
# Assuming train_data and train_targets are your data tensors
train_dataset = CustomDataset(xb, yb)

# Create the DataLoader
train_loader = DataLoader(train_dataset, batch_size, shuffle=True)


### Test Run

In [433]:
num_epochs = 500
previous_loss = float('inf') #somthing ridiculous to start out with

for epoch in range(num_epochs):
    for batch in train_loader:  # Assuming you have a DataLoader
        # Split batch data
        x_batch, y_batch = batch  # x_batch is input, y_batch is target labels
        y_batch = y_batch.view(-1, unique_chars_len) #Need to reshape in order to meet the expected shape of the models output
        
        # Forward pass: Compute predicted y by passing x to the model
        y_pred = model(x_batch)
        
        # Compute and print loss
        current_loss = loss_function(y_pred, y_batch)

        #Save the model if the loss is not improving
        if current_loss < previous_loss:
            previous_loss = current_loss
            torch.save(model.state_dict(), 'best_model.pth')

        
        print(f"Epoch {epoch}, Loss: {current_loss.item()}")

        # Zero gradients, perform a backward pass, and update the weights.
        optimizer.zero_grad()
        current_loss.backward()
        optimizer.step()


Input shape: torch.Size([4, 8, 47])
RNN output shape: torch.Size([4, 8, 128])
Final output shape: torch.Size([32, 47])
Epoch 0, Loss: 3.835320472717285
Input shape: torch.Size([4, 8, 47])
RNN output shape: torch.Size([4, 8, 128])
Final output shape: torch.Size([32, 47])
Epoch 1, Loss: 3.7992961406707764
Input shape: torch.Size([4, 8, 47])
RNN output shape: torch.Size([4, 8, 128])
Final output shape: torch.Size([32, 47])
Epoch 2, Loss: 3.7630434036254883
Input shape: torch.Size([4, 8, 47])
RNN output shape: torch.Size([4, 8, 128])
Final output shape: torch.Size([32, 47])
Epoch 3, Loss: 3.7252533435821533
Input shape: torch.Size([4, 8, 47])
RNN output shape: torch.Size([4, 8, 128])
Final output shape: torch.Size([32, 47])
Epoch 4, Loss: 3.6845874786376953
Input shape: torch.Size([4, 8, 47])
RNN output shape: torch.Size([4, 8, 128])
Final output shape: torch.Size([32, 47])
Epoch 5, Loss: 3.6395015716552734
Input shape: torch.Size([4, 8, 47])
RNN output shape: torch.Size([4, 8, 128])
Final

### Put it in a function

In [434]:
def train_function(epochs, train_data_loader, model, b, l):
  previous_loss = float('inf')
  loss_function = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
  for epoch in range(epochs):
      for batch in train_data_loader:  # Assuming you have a DataLoader
        # Split batch data
        x_batch, y_batch = batch  # x_batch is input, y_batch is target labels
        y_batch = y_batch.view(-1, unique_chars_len) #Need to reshape in order to meet the expected shape of the models output

        # Forward pass: Compute predicted y by passing x to the model
        y_pred = model(x_batch)

        # Compute and print loss
        current_loss = loss_function(y_pred, y_batch)

        if current_loss < previous_loss:
           previous_loss = current_loss
           torch.save(model, "best_model_batch_{}_layers_{}_epochs_{}.pth".format(b, l, epochs))

        print(f"Epoch {epoch}, Loss: {current_loss.item()}")

        # Zero gradients, perform a backward pass, and update the weights.
        optimizer.zero_grad()
        current_loss.backward()
        optimizer.step()
  

### Train using different hyperparameters

In [435]:
batch_sizes = [8, 16, 32, 64]
num_epochs = [500, 1000]
layers = [128, 256, 512]


for batch in batch_sizes:
    train_loader = DataLoader(train_dataset, batch_size, shuffle=True)
    for layer in layers:
        model = SimpleRNNModel(unique_chars_len, layer, unique_chars_len)
        for epochs in num_epochs:
            print("Size (hidden state): {} Epoch: {} Batch Size: {}".format(layer, epochs, batch))
            train_function(epochs, train_loader, model, batch, layer)

Size (hidden state): 128 Epoch: 500 Batch Size: 8
Input shape: torch.Size([4, 8, 47])
RNN output shape: torch.Size([4, 8, 128])
Final output shape: torch.Size([32, 47])
Epoch 0, Loss: 3.8381879329681396
Input shape: torch.Size([4, 8, 47])
RNN output shape: torch.Size([4, 8, 128])
Final output shape: torch.Size([32, 47])
Epoch 1, Loss: 3.8017258644104004
Input shape: torch.Size([4, 8, 47])
RNN output shape: torch.Size([4, 8, 128])
Final output shape: torch.Size([32, 47])
Epoch 2, Loss: 3.7647194862365723
Input shape: torch.Size([4, 8, 47])
RNN output shape: torch.Size([4, 8, 128])
Final output shape: torch.Size([32, 47])
Epoch 3, Loss: 3.7259387969970703
Input shape: torch.Size([4, 8, 47])
RNN output shape: torch.Size([4, 8, 128])
Final output shape: torch.Size([32, 47])
Epoch 4, Loss: 3.683966636657715
Input shape: torch.Size([4, 8, 47])
RNN output shape: torch.Size([4, 8, 128])
Final output shape: torch.Size([32, 47])
Epoch 5, Loss: 3.637169122695923
Input shape: torch.Size([4, 8, 47]

### Define a function that can generate text

In [444]:
def generate_text(model, start_input, char_to_index, index_to_char, max_length=100):
    model.eval()  # Set the model to evaluation mode
    
    # Ensure start_input is a tensor with shape [1, input_size]
    input_seq = torch.tensor(start_input, dtype=torch.float).unsqueeze(0).unsqueeze(0)  # [1, 1, input_size]
    generated_text = ""
    hidden = torch.zeros(1, 1, model.hidden_size)  # Shape: [1, 1, hidden_size]

   

    for _ in range(max_length):
        # Forward pass
        out, hidden = model.rnn(input_seq, hidden)
        out = model.linear(out.squeeze(1))  # Remove the sequence length dimension

        # Get the character with the highest probability
        _, predicted_index = torch.max(out, dim=1)
        last_char_index = predicted_index.item()
        generated_text += index_to_char[last_char_index]

        # Prepare the next input

        input_seq = torch.zeros((1, 1, len(char_to_index)))  # Shape: [1, 1, input_size]
        input_seq[0, 0, last_char_index] = 1.0  # Set the correct character index to 1

    return generated_text


### Iterate through model files

In [437]:
model_dir = './'
pattern = "best_model_batch*.pth"
search_pattern = f"{model_dir}/{pattern}"
model_files = glob.glob(search_pattern)
print(model_files)

['./best_model_batch_8_layers_128_epochs_500.pth', './best_model_batch_32_layers_128_epochs_500.pth', './best_model_batch_32_layers_256_epochs_1000.pth', './best_model_batch_64_layers_128_epochs_500.pth', './best_model_batch_16_layers_256_epochs_1000.pth', './best_model_batch_8_layers_256_epochs_1000.pth', './best_model_batch_64_layers_256_epochs_1000.pth', './best_model_batch_16_layers_128_epochs_500.pth', './best_model_batch_16_layers_256_epochs_500.pth', './best_model_batch_64_layers_512_epochs_1000.pth', './best_model_batch_64_layers_128_epochs_1000.pth', './best_model_batch_16_layers_512_epochs_500.pth', './best_model_batch_8_layers_256_epochs_500.pth', './best_model_batch_64_layers_512_epochs_500.pth', './best_model_batch_16_layers_128_epochs_1000.pth', './best_model_batch_8_layers_512_epochs_1000.pth', './best_model_batch_32_layers_256_epochs_500.pth', './best_model_batch_16_layers_512_epochs_1000.pth', './best_model_batch_8_layers_128_epochs_1000.pth', './best_model_batch_8_lay

### Lets see which model performs the best

In [445]:
starting_vector = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] #start with a capital letter
for file in model_files:
    model = torch.load(file)
    print("Testing model: {}".format(file))
    model_text = generate_text(model, starting_vector, encoding, decoding, max_length=100)
    print(model_text)

Testing model: ./best_model_batch_8_layers_128_epochs_500.pth
he vast,
,
th,th,thhthhthhthhtth th th th th th th th th th th th th th th th th th th th th th th t
Testing model: ./best_model_batch_32_layers_128_epochs_500.pth
he vast,
,hh vv \n,,,

hm va o to,th v be t,,h he v be,o,th th vast,
,hh v  v to,th th vast,
,hh vv 
Testing model: ./best_model_batch_32_layers_256_epochs_1000.pth
he vast,
hhhv vast,
hhhv vast,
hhhv vast,
hhhvvvas,,
hhhvvvas,,
hhhvvvas,,
hhhvvvas,,
hhhvvvas,,
hhh
Testing model: ./best_model_batch_64_layers_128_epochs_500.pth
he vast,
,
,
,
,
,
,
,
,
,
,
,
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Testing model: ./best_model_batch_16_layers_256_epochs_1000.pth
he vast,
,
hh v  ast,
,hhhh  vast,
,
hhd   vast,
,hhh    ast,
,hhhd   vast,
,hhh    ast,
,hhhh  v sa
Testing model: ./best_model_batch_8_layers_256_epochs_1000.pth
he vast,
t, th th th th th th th th th th th th th th th th th th th th th th th th th th th th th t
Testing model: 