In [1]:
import re

def preprocess_text(text):
    # Lowercase everything
    text = text.lower()

    # Keep only letters and basic punctuation
    text = re.sub(r"[^a-zA-Z0-9\s.,!?']", " ", text)

    # Collapse multiple spaces
    text = re.sub(r"\s+", " ", text).strip()

    # Tokenize into words
    words = text.split()

    vocab = sorted(set(words))
    word2idx = {w:i for i,w in enumerate(vocab)}
    idx2word = {i: w for w, i in word2idx.items()}
    
    return words, vocab, word2idx, idx2word


In [2]:
data = ""
with open("harrypotter.txt") as f:
    data = f.read()


words, vocab, word2idx, idx2word = preprocess_text(data)

In [3]:
import torch
import torch.nn as nn


# We’ll use 3 words to predict the next one
seq_len = 3

# Build training pairs: (input words → next word)
data = []
for i in range(len(words) - seq_len):
    X = [word2idx[w] for w in words[i:i+seq_len]]   # e.g. [once, upon, a]
    y = word2idx[words[i+seq_len]]                  # e.g. "time"
    data.append((torch.tensor(X), torch.tensor(y)))


In [4]:
import random
random.shuffle(data)
split = int(0.8 * len(data))
train_data = data[:split]
val_data   = data[split:]

device = 'cuda' if torch.cuda.is_available() else 'cpu'
vocab_size = len(vocab)


In [5]:
device

'cuda'

In [6]:
class RNNModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, 16)        # turn words → vectors
        self.rnn = nn.LSTM(16, 32, batch_first=True)     # process sequence
        self.fc = nn.Linear(32, vocab_size)              # predict next word

    def forward(self, x):
        x = self.embed(x)                # [batch, seq, embed]
        out, _ = self.rnn(x)             # [batch, seq, hidden]
        last = out[:, -1, :]             # take the last output
        return self.fc(last)             # predict next word

In [7]:
class TransformerModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, 16)
        layer = nn.TransformerEncoderLayer(d_model=16, nhead=2, batch_first=True)
        self.encoder = nn.TransformerEncoder(layer, num_layers=2)
        self.fc = nn.Linear(16, vocab_size)

    def forward(self, x):
        x = self.embed(x)
        out = self.encoder(x)
        last = out[:, -1, :]             # last token’s info
        return self.fc(last)

In [17]:
import pyplot as plt
def train(model, name, epochs=1):
    model = model.to(device)
    opt = torch.optim.Adam(model.parameters(), lr=0.01)
    loss_fn = nn.CrossEntropyLoss()

    train_losses = []
    val_losses = []

    for epoch in range(epochs):
        # Training
        total_loss = 0
        for X, y in train_data:
            X, y = X.unsqueeze(0).to(device), y.to(device)
            opt.zero_grad()
            pred = model(X)
            loss = loss_fn(pred, y.unsqueeze(0))
            loss.backward()
            opt.step()
            total_loss += loss.item()
        train_loss = total_loss / len(train_data)
        train_losses.append(train_loss)

        # Validation
        total_val = 0
        with torch.no_grad():
            for X, y in val_data:
                X, y = X.unsqueeze(0).to(device), y.to(device)
                pred = model(X)
                loss = loss_fn(pred, y.unsqueeze(0))
                total_val += loss.item()
        val_loss = total_val / len(val_data)
        val_losses.append(val_loss)

        if epoch % 20 == 0:
            print(f"{name} epoch {epoch} | train {train_loss:.3f} val {val_loss:.3f}")

    # Plot losses
    plt.plot(train_losses, label='train')
    plt.plot(val_losses, label='val')
    plt.title(f'{name} Loss per Epoch')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

    return model

ModuleNotFoundError: No module named 'pyplot'

In [18]:
def predict(model, text_prefix):
    model.eval()
    words_in = re.findall(r"\w+", text_prefix.lower())[-seq_len:]
    X = torch.tensor([[word2idx[w] for w in words_in]]).to(device)
    with torch.no_grad():
        pred = model(X).argmax(-1).item()
    print(f"{text_prefix} ➜ {idx2word[pred]}")


In [19]:
rnn = train(RNNModel(), "RNN")

RNN epoch 0 | train 9.219 val 9.431


NameError: name 'plt' is not defined

In [None]:
transformer = train(TransformerModel(), "Transformer")

In [None]:

print("\nPredictions:")
predict(rnn, "once upon a")
predict(transformer, "once upon a")

In [None]:
# Save
torch.save(rnn.state_dict(), "rnn_model.pth")
torch.save(transformer.state_dict(), "transformer_model.pth")

In [None]:

# Load later
rnn_loaded = RNNModel()
rnn_loaded.load_state_dict(torch.load("rnn_model.pth"))
rnn_loaded.to(device)
rnn_loaded.eval()

transformer_loaded = TransformerModel()
transformer_loaded.load_state_dict(torch.load("transformer_model.pth"))
transformer_loaded.to(device)
transformer_loaded.eval()