**Stage 3 - Model Evaluation**

In [1]:
# Import libraries

import torch 
import torch.nn as nn
import torch.optim as optim 
import numpy as np
import pandas as pd 
import random
import os 
import re

In [2]:
# 1. Setup and load files

BASE_DIR = r"C:\Users\Aliya Sarfaraz\OneDrive\Desktop\Softwares\git_github\100DaysOfBuildables\Capstone-Project"
TEXT_FILE = os.path.join(BASE_DIR, "pile_uncopyrighted_100k.txt")
MODEL_PATH = os.path.join(BASE_DIR, "char_lstm_model.pth")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Load dataset
with open(TEXT_FILE, "r", encoding="utf-8") as f:
    text = f.read()

print("Dataset length:", len(text))

Using device: cpu
Dataset length: 187469564


In [3]:
# 2. Build vocab from data

itos = sorted(list(set(text)))
stoi = {ch: i for i, ch in enumerate(itos)}
vocab_size = len(itos)
print("Vocab size:", vocab_size)

def encode(s):  # text -> integers
    return [stoi[c] for c in s if c in stoi]

def decode(l):  # integers -> text
    return ''.join([itos[i] for i in l])

data = torch.tensor(encode(text), dtype=torch.long)

Vocab size: 152


In [4]:
# 3. Prepare training data

block_size = 128
batch_size = 64

def get_batch():
    ix = torch.randint(len(data) - block_size - 1, (batch_size,))
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+1+block_size] for i in ix])
    return x.to(device), y.to(device)

In [5]:
# 4. Define model

class CharLSTM(nn.Module):
    def __init__(self, vocab_size, embed_size=128, hidden_size=256, num_layers=2):
        super(CharLSTM, self).__init__()
        self.embed = nn.Embedding(vocab_size, embed_size)
        self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x, hidden=None):
        x = self.embed(x)
        out, hidden = self.lstm(x, hidden)
        out = self.fc(out)
        return out, hidden

model = CharLSTM(vocab_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.003)

In [None]:
# 5. Train model

epochs = 25  # you can increase later for better results
print("Training started...")

for epoch in range(epochs):
    model.train()
    total_loss = 0
    for _ in range(200):  # mini-epochs
        x, y = get_batch()
        optimizer.zero_grad()
        output, _ = model(x)
        loss = criterion(output.view(-1, vocab_size), y.view(-1))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/200:.4f}")

torch.save(model.state_dict(), MODEL_PATH)
print("Model trained and saved to:", MODEL_PATH)

Training started...
Epoch 1/25, Loss: 2.2054
Epoch 2/25, Loss: 1.7260
Epoch 3/25, Loss: 1.5868


In [None]:
# 6. Text generation function 

def clean_text(text):
    text = text.replace("Ä ", " ")
    text = text.replace(" .", ".").replace(" ,", ",")
    text = ' '.join(text.split())  # remove extra spaces
    return text.strip()

def generate_text(model, start_text="Once upon a time", length=400, temperature=0.5):
    model.eval()
    input_seq = torch.tensor(encode(start_text), dtype=torch.long).unsqueeze(0).to(device)
    hidden = None
    generated = list(start_text)

    for _ in range(length):
        with torch.no_grad():
            output, hidden = model(input_seq, hidden)
            logits = output[:, -1, :] / temperature
            probs = torch.softmax(logits, dim=-1).detach().cpu().numpy().ravel()
            next_idx = np.random.choice(len(probs), p=probs)
            next_char = itos[next_idx]
            generated.append(next_char)
            input_seq = torch.tensor([[next_idx]], dtype=torch.long).to(device)

    return clean_text(''.join(generated))

# Example usage
text_out = generate_text(model, start_text="Once upon a time", length=400, temperature=0.5)
print(text_out)

In [None]:
# 7. Generate examples and save results

prompts = [
    "Once upon a time",
    "The future of AI",
    "In a small village",
    "Medical technology",
    "Sports injuries"
]

examples = []
for prompt in prompts:
    text_out = generate_text(model, start_text=prompt, length=300, temperature=0.7)
    text_out = clean_text(text_out)
    examples.append({"prompt": prompt, "generated_text": text_out})

# Save results
output_file = os.path.join(BASE_DIR, "evaluation_outputs.csv")
df = pd.DataFrame(examples)
df.to_csv(output_file, index=False)
print(f"Generated texts saved to {output_file}")

# Print an example 
print("\nExample generated text for prompt:", prompts[0])
print(examples[0]["generated_text"])