In [4]:
import numpy as np 

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from torch.autograd import Variable
import torch.nn.functional as F


from tqdm import tqdm # progress bar

device = ("cuda" if torch.cuda.is_available() else "mps"
          if torch.backends.mps.is_available() else "cpu")
print(f"Using {device} device")
torch.set_default_device(device)

Using cuda device


In [8]:
# hyper-param
text_file = "tiny-shakespeare.txt"
with open(text_file, "r") as file:
    text = file.read()
    
len(text)

1115393

In [5]:
# Create a character-level vocabulary
chars = sorted(list(set(text)))
vocab_size = len(chars)
print(f'Vocabulary size: {vocab_size}')

# Create mappings from characters to indices and vice versa
char_to_idx = {char: idx for idx, char in enumerate(chars)}
idx_to_char = {idx: char for idx, char in enumerate(chars)}

# Encode the entire text as integers
encoded_text = np.array([char_to_idx[char] for char in text])

# Define the sequence length
seq_length = 100
num_samples = len(encoded_text) // seq_length

# Create input and target sequences
input_sequences = []
target_sequences = []

for i in range(num_samples):
    start_idx = i * seq_length
    end_idx = start_idx + seq_length
    input_sequences.append(encoded_text[start_idx:end_idx])
    target_sequences.append(encoded_text[start_idx + 1:end_idx + 1])

# Convert sequences to PyTorch tensors
input_sequences = torch.tensor(input_sequences, dtype=torch.long)
target_sequences = torch.tensor(target_sequences, dtype=torch.long)


Vocabulary size: 65


  return func(*args, **kwargs)


In [9]:
class LSTMModel(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size, num_layers):
        super(LSTMModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x, hidden):
        x = self.embedding(x)
        out, hidden = self.lstm(x, hidden)
        out = self.fc(out.reshape(out.size(0) * out.size(1), out.size(2)))
        return out, hidden

    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        return (weight.new_zeros(num_layers, batch_size, hidden_size),
                weight.new_zeros(num_layers, batch_size, hidden_size))


In [10]:
# Hyperparameters
embed_size = 128
hidden_size = 256
num_layers = 2
num_epochs = 20
learning_rate = 0.002
batch_size = 64

# Initialize the model, loss function, and optimizer
model = LSTMModel(vocab_size, embed_size, hidden_size, num_layers)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


In [11]:
# Training loop
for epoch in range(num_epochs):
    hidden = model.init_hidden(batch_size)
    total_loss = 0

    for i in range(0, input_sequences.size(0) - batch_size, batch_size):
        inputs = input_sequences[i:i+batch_size]
        targets = target_sequences[i:i+batch_size]

        # Detach hidden state to prevent backpropagation through entire training history
        hidden = tuple([h.detach() for h in hidden])
        
        # Forward pass
        outputs, hidden = model(inputs, hidden)
        loss = criterion(outputs, targets.view(-1))

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / (input_sequences.size(0) // batch_size)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')


Epoch [1/20], Loss: 2.3668
Epoch [2/20], Loss: 1.8128
Epoch [3/20], Loss: 1.6498
Epoch [4/20], Loss: 1.5539
Epoch [5/20], Loss: 1.4903
Epoch [6/20], Loss: 1.4469
Epoch [7/20], Loss: 1.4147
Epoch [8/20], Loss: 1.3892
Epoch [9/20], Loss: 1.3679
Epoch [10/20], Loss: 1.3492
Epoch [11/20], Loss: 1.3332
Epoch [12/20], Loss: 1.3186
Epoch [13/20], Loss: 1.3052
Epoch [14/20], Loss: 1.2926
Epoch [15/20], Loss: 1.2814
Epoch [16/20], Loss: 1.2713
Epoch [17/20], Loss: 1.2613
Epoch [18/20], Loss: 1.2524
Epoch [19/20], Loss: 1.2445
Epoch [20/20], Loss: 1.2382


In [14]:
def generate_text(model, start_str, char_to_idx, idx_to_char, num_generate=100, temperature=1.0):
    model.eval()
    input_eval = torch.tensor([char_to_idx[c] for c in start_str], dtype=torch.long).unsqueeze(0)
    hidden = model.init_hidden(1)
    
    generated_text = start_str
    
    with torch.no_grad():
        for _ in range(num_generate):
            output, hidden = model(input_eval, hidden)
            output = output / temperature
            predicted_idx = torch.multinomial(torch.softmax(output[-1], dim=0), num_samples=1).item()
            input_eval = torch.tensor([[predicted_idx]], dtype=torch.long)
            generated_text += idx_to_char[predicted_idx]
    
    return generated_text

start_str = "ROMEO: "
generated_text = generate_text(model, start_str, char_to_idx, idx_to_char, num_generate=300)
print(generated_text)


ROMEO: With thee?
I am good that, here wherein.

PETRUCHIO:
He
has no rough a curst master-grief,
For sworn, to attled date. Thyself and a
madems aside.

First Sillant:
Ashout! Was thou didst mad.
Believe me, sir.

BAPTISTA:
To-mords, to
speak; and what is a palms
Upon him being sorrows be guilty.

BIONDEL
