In [4]:
import numpy as np 

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from torch.autograd import Variable
import torch.nn.functional as F


from tqdm import tqdm # progress bar

device = ("cuda" if torch.cuda.is_available() else "mps"
          if torch.backends.mps.is_available() else "cpu")
print(f"Using {device} device")
torch.set_default_device(device)

Using cuda device


In [8]:
text_file = "tiny-shakespeare.txt"
with open(text_file, "r") as file:
    text = file.read()
    
len(text)

1115393

In [5]:
# Create a character-level vocabulary
chars = sorted(list(set(text)))
vocab_size = len(chars)
print(f'Vocabulary size: {vocab_size}')

# Create mappings from characters to indices and vice versa
char_to_idx = {char: idx for idx, char in enumerate(chars)}
idx_to_char = {idx: char for idx, char in enumerate(chars)}

encoded_text = np.array([char_to_idx[char] for char in text])

seq_length = 100 # Sequence length
num_samples = len(encoded_text) // seq_length

input_sequences = []
target_sequences = []

for i in range(num_samples):
    start_idx = i * seq_length
    end_idx = start_idx + seq_length
    input_sequences.append(encoded_text[start_idx:end_idx])
    target_sequences.append(encoded_text[start_idx + 1:end_idx + 1])

input_sequences = torch.tensor(input_sequences, dtype=torch.long)
target_sequences = torch.tensor(target_sequences, dtype=torch.long)


Vocabulary size: 65


  return func(*args, **kwargs)


In [9]:
class LSTMModel(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size, num_layers):
        super(LSTMModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x, hidden):
        x = self.embedding(x)
        out, hidden = self.lstm(x, hidden)
        out = self.fc(out.reshape(out.size(0) * out.size(1), out.size(2)))
        return out, hidden

    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        return (weight.new_zeros(num_layers, batch_size, hidden_size),
                weight.new_zeros(num_layers, batch_size, hidden_size))


In [15]:
# Hyperparameters
embed_size = 512
hidden_size = 256
num_layers = 2
num_epochs = 50
learning_rate = 0.001
batch_size = 64

model = LSTMModel(vocab_size, embed_size, hidden_size, num_layers)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


In [16]:
# Training loop
for epoch in range(num_epochs):
    hidden = model.init_hidden(batch_size)
    total_loss = 0

    for i in range(0, input_sequences.size(0) - batch_size, batch_size):
        inputs = input_sequences[i:i+batch_size]
        targets = target_sequences[i:i+batch_size]

        hidden = tuple([h.detach() for h in hidden])
        
        # Forward 
        outputs, hidden = model(inputs, hidden)
        loss = criterion(outputs, targets.view(-1))

        # Backward 
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / (input_sequences.size(0) // batch_size)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')


Epoch [1/50], Loss: 2.4487
Epoch [2/50], Loss: 1.9235
Epoch [3/50], Loss: 1.7700
Epoch [4/50], Loss: 1.6711
Epoch [5/50], Loss: 1.6023
Epoch [6/50], Loss: 1.5535
Epoch [7/50], Loss: 1.5163
Epoch [8/50], Loss: 1.4871
Epoch [9/50], Loss: 1.4637
Epoch [10/50], Loss: 1.4440
Epoch [11/50], Loss: 1.4272
Epoch [12/50], Loss: 1.4118
Epoch [13/50], Loss: 1.3982
Epoch [14/50], Loss: 1.3865
Epoch [15/50], Loss: 1.3752
Epoch [16/50], Loss: 1.3652
Epoch [17/50], Loss: 1.3553
Epoch [18/50], Loss: 1.3466
Epoch [19/50], Loss: 1.3381
Epoch [20/50], Loss: 1.3300
Epoch [21/50], Loss: 1.3220
Epoch [22/50], Loss: 1.3144
Epoch [23/50], Loss: 1.3071
Epoch [24/50], Loss: 1.3003
Epoch [25/50], Loss: 1.2935
Epoch [26/50], Loss: 1.2871
Epoch [27/50], Loss: 1.2807
Epoch [28/50], Loss: 1.2747
Epoch [29/50], Loss: 1.2686
Epoch [30/50], Loss: 1.2627
Epoch [31/50], Loss: 1.2571
Epoch [32/50], Loss: 1.2515
Epoch [33/50], Loss: 1.2464
Epoch [34/50], Loss: 1.2417
Epoch [35/50], Loss: 1.2373
Epoch [36/50], Loss: 1.2328
E

In [19]:
def generate_text(model, start_str, char_to_idx, idx_to_char, num_generate=100, temperature=1.0):
    model.eval()
    input_eval = torch.tensor([char_to_idx[c] for c in start_str], dtype=torch.long).unsqueeze(0)
    hidden = model.init_hidden(1)
    
    generated_text = start_str
    
    with torch.no_grad():
        for _ in range(num_generate):
            output, hidden = model(input_eval, hidden)
            output = output / temperature
            predicted_idx = torch.multinomial(torch.softmax(output[-1], dim=0), num_samples=1).item()
            input_eval = torch.tensor([[predicted_idx]], dtype=torch.long)
            generated_text += idx_to_char[predicted_idx]
    
    return generated_text

start_str = "ROMEO: "
generated_text = generate_text(model, start_str, char_to_idx, idx_to_char, num_generate=300)
print(generated_text)


ROMEO: Juliet;
Ould this held: it is not saddle.

Gaolent me, if I be
not mistake,
Pardon me, Signior Baptista;
But shall hear the gods as you any enemy,
That she had ask you what I shout her reason and all
From whom, madam: Margaret's faired one:
I will not be so, no write I will conscrawns
The tire me fr
