In [None]:
# Imports and setup
import numpy as np
import pandas as pd
import tensorflow as tf
import pickle
from tensorflow.keras.preprocessing.sequence import pad_sequences
from math import exp
from collections import Counter

In [None]:
# Load model, tokenizer, and data
MODEL_PATH = '../models/best_model.h5'
TOKENIZER_PATH = '../data/processed/tokenizer.pkl'
X = np.load('../data/processed/data_X.npy')
y = np.load('../data/processed/data_y.npy')

model = tf.keras.models.load_model(MODEL_PATH)
with open(TOKENIZER_PATH, 'rb') as f:
    tokenizer = pickle.load(f)
inv_vocab = {i: w for w, i in tokenizer.word_index.items()}
seq_length = model.input_shape[1]
vocab_size = len(tokenizer.word_index) + 1

print(f"Loaded model expecting seq_length={seq_length}, vocab_size={vocab_size}")



Loaded model expecting seq_length=20, vocab_size=25759


In [None]:
# Prediction helper functions
def predict_next(seed, top_k=3):
    """Return top_k (word, prob) given a seed text."""
    seq = tokenizer.texts_to_sequences([seed.lower()])[0]
    seq = seq[-seq_length:]
    padded = pad_sequences([seq], maxlen=seq_length)
    probs = model.predict(padded, verbose=0)[0]
    idxs = np.argsort(probs)[-top_k:][::-1]
    return [(inv_vocab.get(i, '<UNK>'), probs[i]) for i in idxs]


def generate_text(seed, length=10):
    """Generate a continuation of `length` words by feeding back predictions."""
    result = seed.split()
    for _ in range(length):
        pred = predict_next(' '.join(result))
        next_word = pred[0][0]
        result.append(next_word)
    return ' '.join(result)

In [None]:
# Evaluate coherence on Shakespearean seeds
shakespeare_seeds = [
    "to be or not to",
    "friends romans countrymen lend",
    "o romeo romeo wherefore art"
]
print("### Shakespearean Coherence Tests")
for seed in shakespeare_seeds:
    print(f"Seed: {seed}")
    print("Next words:", predict_next(seed))
    print("Continuation:", generate_text(seed, length=5))
    print()

### Shakespearean Coherence Tests
Seed: to be or not to
Next words: [('be', np.float32(0.039063923)), ('the', np.float32(0.018615661)), ('make', np.float32(0.012967142))]
Continuation: to be or not to be the king of the

Seed: friends romans countrymen lend
Next words: [('me', np.float32(0.1385538)), ('him', np.float32(0.05157193)), ('them', np.float32(0.037278906))]
Continuation: friends romans countrymen lend me the king and the

Seed: o romeo romeo wherefore art
Next words: [('thou', np.float32(0.79819345)), ('i', np.float32(0.04851988)), ('you', np.float32(0.03971749))]
Continuation: o romeo romeo wherefore art thou a man that thou



In [None]:
# Coherence on custom inputs
custom_seeds = [
    "in a galaxy far away",
    "the quick brown fox",
    "machine learning is"
]
print("### Custom Input Tests")
for seed in custom_seeds:
    print(f"Seed: {seed}")
    print("Next words:", predict_next(seed))
    print()


### Custom Input Tests
Seed: in a galaxy far away
Next words: [('and', np.float32(0.092671275)), ('in', np.float32(0.02969546)), ('to', np.float32(0.02868353))]

Seed: the quick brown fox
Next words: [('and', np.float32(0.16170955)), ('of', np.float32(0.07763309)), ('that', np.float32(0.04280298))]

Seed: machine learning is
Next words: [('the', np.float32(0.056452557)), ('not', np.float32(0.0493109)), ('in', np.float32(0.02187959))]



In [None]:
# Generalization: compute perplexity on held-out split
split = int(0.9 * len(X))
X_val, y_val = X[split:], y[split:]

def compute_perplexity(X_data, y_data):
    log_probs = []
    # batch inference
    preds = model.predict(X_data, verbose=0)
    for i, true_idx in enumerate(y_data):
        prob = preds[i, true_idx]
        log_probs.append(-np.log(prob + 1e-10))
    return exp(np.mean(log_probs))

ppl = compute_perplexity(X_val[:10000], y_val[:10000])  # sample 10k for speed
print(f"Perplexity on validation (10k samples): {ppl:.2f}")

Perplexity on validation (10k samples): 541.74


In [None]:
# Rare word analysis
word_counts = tokenizer.word_counts  # OrderedDict(word -> count)
rare_words = [w for w, c in word_counts.items() if c < 5]
print(f"Found {len(rare_words)} rare words (<5 occurrences). Sampling 5." )
for w in rare_words[:5]:
    print(f"Word: {w}")
    example_seq = f"{w} " + ' '.join(['the']*(seq_length-1))
    print("Predictions after rare word:", predict_next(example_seq))
    print()


Found 17161 rare words (<5 occurrences). Sampling 5.
Word: shaken
Predictions after rare word: [('king', np.float32(0.006242735)), ('duke', np.float32(0.0036731032)), ('fifth', np.float32(0.0029947849))]

Word: wan
Predictions after rare word: [('king', np.float32(0.010814611)), ('duke', np.float32(0.007911079)), ('day', np.float32(0.00595105))]

Word: pant
Predictions after rare word: [('king', np.float32(0.0067404183)), ('duke', np.float32(0.004088267)), ('day', np.float32(0.0031066607))]

Word: commenced
Predictions after rare word: [('king', np.float32(0.005773588)), ('duke', np.float32(0.0036536434)), ('fourth', np.float32(0.0034841537))]

Word: strands
Predictions after rare word: [('king', np.float32(0.010009347)), ('duke', np.float32(0.007444155)), ('day', np.float32(0.005769807))]



In [None]:
# OOV handling test
oov_seed = "flibbertigibbet"
print("### OOV Test")
print("Seed:", oov_seed)
print("Predictions:", predict_next(oov_seed))


### OOV Test
Seed: flibbertigibbet
Predictions: [('of', np.float32(0.15067743)), ('and', np.float32(0.09390331)), ('with', np.float32(0.03431334))]


In [10]:
# Summary of findings
from IPython.display import Markdown
display(Markdown("""
## Evaluation Summary

- **Shakespearean Coherence:** Predictions generally align with expected high-frequency follow-up words and produce plausible 5-word continuations.
- **Custom Inputs:** The model falls back to common English words when context is unfamiliar.
- **Perplexity:** ~[insert] on 10k validation samples indicates moderate model uncertainty, typical for large vocabularies.
- **Rare Words:** Performance after rare tokens shows the model defaults to high-frequency words, struggling to predict truly infrequent terms.
- **OOV Behavior:** Unknown tokens are ignored, and predictions rely solely on last valid tokens.

*Strengths:* captures local syntax and high-frequency patterns.
*Weaknesses:* limited by sequence length, vocabulary size, and inability to generate rare or unseen words.
"""))


## Evaluation Summary

- **Shakespearean Coherence:** Predictions generally align with expected high-frequency follow-up words and produce plausible 5-word continuations.
- **Custom Inputs:** The model falls back to common English words when context is unfamiliar.
- **Perplexity:** ~[insert] on 10k validation samples indicates moderate model uncertainty, typical for large vocabularies.
- **Rare Words:** Performance after rare tokens shows the model defaults to high-frequency words, struggling to predict truly infrequent terms.
- **OOV Behavior:** Unknown tokens are ignored, and predictions rely solely on last valid tokens.

*Strengths:* captures local syntax and high-frequency patterns.
*Weaknesses:* limited by sequence length, vocabulary size, and inability to generate rare or unseen words.
