### Inspiration: https://www.youtube.com/watch?v=kCc8FmEb1nY&t=6s
### This is an LLM that will be trained to generate poetry, hopefully haikus. This is a fun experiment. Haikus were created with OpenAI GPT-4

In [460]:
import os
import glob
import numpy as np
import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader

In [461]:
file_path = './haikus.txt'
with open(file_path, 'r') as file:
    contents = file.read()

contents

"Gentle breeze flows through,\nWhispering leaves tell secrets,\nNature's song, so soft.\n\nMoonlight bathes the night,\nSilver glow on tranquil sea,\nPeace reigns in darkness.\n\nCity wakes slowly,\nSunrise over concrete peaks,\nDay's first light glimmers.\n\nLeaves turn gold and red,\nAutumn's crisp breath in the air,\nSeasons shift their dance.\n\nPeaks touch the sky's edge,\nAlone in vast wilderness,\nNature's grandeur reigns.\n\nStars twinkle above,\nIn the vast, endless night sky,\nDreams sail on moonbeams.\n\nRaindrops kiss the earth,\nA soft, rhythmic melody,\nNature's lullaby.\n\nSnow blankets the land,\nWhite silence, winter's embrace,\nNature sleeps in peace.\n\nBlossoms greet the sun,\nSpring whispers through awakening buds,\nNew life in bloom.\n\nSands stretch to the sky,\nHeat shimmers like ocean waves,\nDesert's mirage dance.\n\nDeep in ocean's heart,\nSecrets hidden in the blue,\nSilent world below.\n\nTrees stand tall and proud,\nWhispering ancient secrets,\nIn the fore

In [462]:
unique_chars = sorted(list(set(contents)))
unique_chars_len = len(unique_chars)
print(''.join(unique_chars))
print(unique_chars_len)


 ',.ABCDGHILMNOPRSTUWabcdefghiklmnopqrstuvwyz
46


In [463]:
#Lets try One Hot Encoding and pring out vocab dictionary

def one_hot_encode(data_input, char_set):
    # Create a dictionary mapping each character to its index
    char_to_index = {ch: i for i, ch in enumerate(char_set)}
    vector_set = []

    for i in list(data_input):
    # Initialize a vector of zeros with the length of the character set
        one_hot_vector = [0] * len(char_set)

        # Set the position corresponding to the character to 1
        if i in char_to_index:
            one_hot_vector[char_to_index[i]] = 1
            vector_set.append(one_hot_vector)
        else:
            raise ValueError(f"Character '{i}' not in character set")

    return vector_set


In [464]:
#Decoder
def one_hot_decode(encoded_data, char_set):
    # Create a dictionary mapping each index to its character
    index_to_char = {i: ch for i, ch in enumerate(char_set)}

    decoded_string = ""

    for vector in encoded_data:
        # Find the index of the 1 in the vector
        index = vector.index(1)

        # Append the corresponding character to the decoded string
        decoded_string += index_to_char[index]

    return decoded_string


In [465]:
#test the encoding
encoding = one_hot_encode(unique_chars, unique_chars)
print(encoding)

[[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 

In [466]:
#Test the decoding
decoding = one_hot_decode(encoding, unique_chars)
print(decoding)


 ',.ABCDGHILMNOPRSTUWabcdefghiklmnopqrstuvwyz


In [467]:
#Let's Encode our Data
prepped_data = contents#.replace(" ", "")
print(prepped_data)
encoded_data = one_hot_encode(prepped_data, unique_chars)
print(encoded_data)

Gentle breeze flows through,
Whispering leaves tell secrets,
Nature's song, so soft.

Moonlight bathes the night,
Silver glow on tranquil sea,
Peace reigns in darkness.

City wakes slowly,
Sunrise over concrete peaks,
Day's first light glimmers.

Leaves turn gold and red,
Autumn's crisp breath in the air,
Seasons shift their dance.

Peaks touch the sky's edge,
Alone in vast wilderness,
Nature's grandeur reigns.

Stars twinkle above,
In the vast, endless night sky,
Dreams sail on moonbeams.

Raindrops kiss the earth,
A soft, rhythmic melody,
Nature's lullaby.

Snow blankets the land,
White silence, winter's embrace,
Nature sleeps in peace.

Blossoms greet the sun,
Spring whispers through awakening buds,
New life in bloom.

Sands stretch to the sky,
Heat shimmers like ocean waves,
Desert's mirage dance.

Deep in ocean's heart,
Secrets hidden in the blue,
Silent world below.

Trees stand tall and proud,
Whispering ancient secrets,
In the forest's heart.

Sky ablaze at dusk,
Sunset paints 

In [468]:
#Transform encoded data into Tensor
tensor_data = torch.tensor(encoded_data, dtype=torch.float32)
print(tensor_data.shape, tensor_data.dtype)
print(tensor_data)

torch.Size([1202, 46]) torch.float32
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.]])


In [469]:
#Train and Validation Sets
n = int(0.8*len(tensor_data))
train = tensor_data[:n]
val = tensor_data[n:]
print(train)
print(val)

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.]])


In [470]:
block_size = 8
train[:block_size + 1]

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.

In [471]:
x = train[:block_size]
y = train[1:block_size+1]
for t in range(block_size):
    context = x[:t+1]
    target = y[t]
    print(f"When the input is {context}, the target is {target}")

When the input is tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]), the target is tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
When the input is tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]), the target is tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       

In [472]:
print(train)

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])


In [474]:
#Introducing the batch dimension
torch.manual_seed(1337)
batch_size = 4
block_size = 8


def get_batch(split):
    data = train if split == 'train' else val
    ix = torch.randint(len(data) - block_size, (batch_size,))
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+block_size+1] for i in ix]) 
    return x, y  

xb, yb = get_batch('train')
print("inputs:")
print(xb.shape)
print(xb)
print('targets')
print(yb.shape)
print(yb)

print("-----")

for b in range(batch_size):
    for t in range(block_size):
        context = xb[b, :t+1]
        target = yb[b,t]
        print(f"when the input is {context.tolist()}, the target is: {target}")

inputs:
torch.Size([4, 8, 46])
tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[1., 0., 0.,  ..., 0., 0., 0.],
         [1., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
     

In [475]:
x = train[:block_size]
y = train[1:block_size+1]
for t in range(block_size):
    context = x[:t+1]
    target = y[t]
    print(f"When context is {context}, the target is {target}")

When context is tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]), the target is tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
When context is tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]), the target is tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.,

In [476]:
#RNN can handle One-Hot vectors well
class SimpleRNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleRNNModel, self).__init__()
        self.hidden_size = hidden_size

        # RNN layer
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)

        # Output layer
        self.linear = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), self.hidden_size)
        out, hidden = self.rnn(x, h0)
        out = out.contiguous().view(-1, self.hidden_size)
        out = self.linear(out)
        return out

   

        
model = SimpleRNNModel(unique_chars_len, 128, unique_chars_len)

In [477]:
# Loss function
loss_function = nn.CrossEntropyLoss()

# Optimizer (example: using Adam)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [478]:
class CustomDataset(Dataset):
    def __init__(self, data, targets):
        self.data = data
        self.targets = targets

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.targets[idx]


In [479]:
# Assuming train_data and train_targets are your data tensors
train_dataset = CustomDataset(xb, yb)

# Create the DataLoader
train_loader = DataLoader(train_dataset, batch_size, shuffle=True)


### Test Run

In [480]:
num_epochs = 500
previous_loss = float('inf') #somthing ridiculous to start out with

for epoch in range(num_epochs):
    for batch in train_loader:  # Assuming you have a DataLoader
        # Split batch data
        x_batch, y_batch = batch  # x_batch is input, y_batch is target labels
        y_batch = y_batch.view(-1, unique_chars_len) #Need to reshape in order to meet the expected shape of the models output
        
        # Forward pass: Compute predicted y by passing x to the model
        y_pred = model(x_batch)
        
        # Compute and print loss
        current_loss = loss_function(y_pred, y_batch)

        #Save the model if the loss is not improving
        if current_loss < previous_loss:
            previous_loss = current_loss
            torch.save(model.state_dict(), 'best_model.pth')

        
        print(f"Epoch {epoch}, Loss: {current_loss.item()}")

        # Zero gradients, perform a backward pass, and update the weights.
        optimizer.zero_grad()
        current_loss.backward()
        optimizer.step()


Epoch 0, Loss: 3.8453917503356934
Epoch 1, Loss: 3.81050968170166
Epoch 2, Loss: 3.776167869567871
Epoch 3, Loss: 3.7411439418792725
Epoch 4, Loss: 3.7041187286376953
Epoch 5, Loss: 3.663675308227539
Epoch 6, Loss: 3.618194580078125
Epoch 7, Loss: 3.5658059120178223
Epoch 8, Loss: 3.504486322402954
Epoch 9, Loss: 3.4325602054595947
Epoch 10, Loss: 3.3498687744140625
Epoch 11, Loss: 3.2593605518341064
Epoch 12, Loss: 3.167685031890869
Epoch 13, Loss: 3.0828158855438232
Epoch 14, Loss: 3.009718179702759
Epoch 15, Loss: 2.9486045837402344
Epoch 16, Loss: 2.8974978923797607
Epoch 17, Loss: 2.854818344116211
Epoch 18, Loss: 2.819521427154541
Epoch 19, Loss: 2.7903969287872314
Epoch 20, Loss: 2.7658884525299072
Epoch 21, Loss: 2.7445008754730225
Epoch 22, Loss: 2.7251999378204346
Epoch 23, Loss: 2.7074713706970215
Epoch 24, Loss: 2.6911425590515137
Epoch 25, Loss: 2.6761248111724854
Epoch 26, Loss: 2.6621663570404053
Epoch 27, Loss: 2.6487460136413574
Epoch 28, Loss: 2.635188579559326
Epoch 

### Put it in a function

In [481]:
def train_function(epochs, train_data_loader, model, b, l):
  previous_loss = float('inf')
  loss_function = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
  for epoch in range(epochs):
      for batch in train_data_loader:  # Assuming you have a DataLoader
        # Split batch data
        x_batch, y_batch = batch  # x_batch is input, y_batch is target labels
        y_batch = y_batch.view(-1, unique_chars_len) #Need to reshape in order to meet the expected shape of the models output

        # Forward pass: Compute predicted y by passing x to the model
        y_pred = model(x_batch)

        # Compute and print loss
        current_loss = loss_function(y_pred, y_batch)

        if current_loss < previous_loss:
           previous_loss = current_loss
           torch.save(model, "best_model_batch_{}_layers_{}_epochs_{}.pth".format(b, l, epochs))

        print(f"Epoch {epoch}, Loss: {current_loss.item()}")

        # Zero gradients, perform a backward pass, and update the weights.
        optimizer.zero_grad()
        current_loss.backward()
        optimizer.step()
  

### Train using different hyperparameters

In [483]:
batch_sizes = [8, 16, 32, 64]
num_epochs = [500, 1000, 10000]
layers = [128, 256, 512]


for batch in batch_sizes:
    train_loader = DataLoader(train_dataset, batch_size, shuffle=True)
    for layer in layers:
        model = SimpleRNNModel(unique_chars_len, layer, unique_chars_len)
        for epochs in num_epochs:
            print("Size (hidden state): {} Epoch: {} Batch Size: {}".format(layer, epochs, batch))
            train_function(epochs, train_loader, model, batch, layer)

Size (hidden state): 128 Epoch: 500 Batch Size: 8
Epoch 0, Loss: 3.775813102722168
Epoch 1, Loss: 3.739750862121582
Epoch 2, Loss: 3.702859401702881
Epoch 3, Loss: 3.6638543605804443
Epoch 4, Loss: 3.621365547180176
Epoch 5, Loss: 3.5738892555236816
Epoch 6, Loss: 3.519770622253418
Epoch 7, Loss: 3.457456350326538
Epoch 8, Loss: 3.386073589324951
Epoch 9, Loss: 3.3064699172973633
Epoch 10, Loss: 3.222364664077759
Epoch 11, Loss: 3.140298843383789
Epoch 12, Loss: 3.066906452178955
Epoch 13, Loss: 3.0048980712890625
Epoch 14, Loss: 2.951996326446533
Epoch 15, Loss: 2.904125690460205
Epoch 16, Loss: 2.859341621398926
Epoch 17, Loss: 2.818415403366089
Epoch 18, Loss: 2.7832109928131104
Epoch 19, Loss: 2.755099058151245
Epoch 20, Loss: 2.733987331390381
Epoch 21, Loss: 2.718099594116211
Epoch 22, Loss: 2.7047526836395264
Epoch 23, Loss: 2.691671848297119
Epoch 24, Loss: 2.6777706146240234
Epoch 25, Loss: 2.6630163192749023
Epoch 26, Loss: 2.6478798389434814
Epoch 27, Loss: 2.632809638977051

### Define a function that can generate text

In [485]:
def generate_text(model, start_input, char_to_index, index_to_char, max_length=100):
    model.eval()  # Set the model to evaluation mode
    
    # Ensure start_input is a tensor with shape [1, input_size]
    input_seq = torch.tensor(start_input, dtype=torch.float).unsqueeze(0).unsqueeze(0)  # [1, 1, input_size]
    generated_text = ""
    hidden = torch.zeros(1, 1, model.hidden_size)  # Shape: [1, 1, hidden_size]

   

    for _ in range(max_length):
        # Forward pass
        out, hidden = model.rnn(input_seq, hidden)
        out = model.linear(out.squeeze(1))  # Remove the sequence length dimension

        # Get the character with the highest probability
        _, predicted_index = torch.max(out, dim=1)
        last_char_index = predicted_index.item()
        generated_text += index_to_char[last_char_index]

        # Prepare the next input

        input_seq = torch.zeros((1, 1, len(char_to_index)))  # Shape: [1, 1, input_size]
        input_seq[0, 0, last_char_index] = 1.0  # Set the correct character index to 1

    return generated_text


### Iterate through model files

In [486]:
model_dir = './'
pattern = "best_model_batch*.pth"
search_pattern = f"{model_dir}/{pattern}"
model_files = glob.glob(search_pattern)
print(model_files)

['./best_model_batch_64_layers_256_epochs_10000.pth', './best_model_batch_8_layers_128_epochs_500.pth', './best_model_batch_64_layers_128_epochs_10000.pth', './best_model_batch_32_layers_128_epochs_500.pth', './best_model_batch_32_layers_256_epochs_1000.pth', './best_model_batch_64_layers_128_epochs_500.pth', './best_model_batch_16_layers_256_epochs_10000.pth', './best_model_batch_16_layers_256_epochs_1000.pth', './best_model_batch_8_layers_256_epochs_1000.pth', './best_model_batch_16_layers_128_epochs_10000.pth', './best_model_batch_32_layers_512_epochs_10000.pth', './best_model_batch_64_layers_256_epochs_1000.pth', './best_model_batch_16_layers_128_epochs_500.pth', './best_model_batch_8_layers_256_epochs_10000.pth', './best_model_batch_8_layers_128_epochs_10000.pth', './best_model_batch_64_layers_512_epochs_10000.pth', './best_model_batch_16_layers_256_epochs_500.pth', './best_model_batch_64_layers_512_epochs_1000.pth', './best_model_batch_64_layers_128_epochs_1000.pth', './best_mode

### Lets see which model performs the best

In [488]:
starting_vector = [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
for file in model_files:
    model = torch.load(file)
    print("Loading model: {}".format(file))
    model_text = generate_text(model, starting_vector, encoding, decoding, max_length=100)
    print(model_text)

Loading model: ./best_model_batch_64_layers_256_epochs_10000.pth
o the skscroconcr ct crncr concr co crncr contt banbsksoocottt tb skskroconcr ct crncr concr concrnc
Loading model: ./best_model_batch_8_layers_128_epochs_500.pth

Sahd skkkr  rocr cr cr cr cr crocrocronconconcrncrncr cr cr crocrocronconconcrncrncr cr cr cr crocr
Loading model: ./best_model_batch_64_layers_128_epochs_10000.pth
o the skncr
cr
crncr crncr crncr crncr concr concr concr concr concr concr concr concr concr concr c
Loading model: ./best_model_batch_32_layers_128_epochs_500.pth
o the sksks sksksksksksksksksksksksksksksksksksksksksksksksksksksksksksksksksksksksksksksksksksksksk
Loading model: ./best_model_batch_32_layers_256_epochs_1000.pth

Sands sks s sks sks sksks sks sks sksks sks sks s s s sgt t e sks sksks sks sksks sks sksks sks sks
Loading model: ./best_model_batch_64_layers_128_epochs_500.pth
o the skssskcricha banco concrncrncrncrncrncrncrncrncrncrncrncrncrncrncrncrncrncrncrncrncrncrncrncrn
Loading mode

### wooooow, such awful. but we learned a lot. Let's make this better