In [2]:

# Cell 1: Imports
import pickle
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from IPython.display import display, Markdown



In [3]:
# Cell 2: Load trained model and tokenizer
MODEL_PATH = '../models/best_model.h5'
TOKENIZER_PATH = '../data/processed/tokenizer.pkl'

print(f"Loading model from {MODEL_PATH} and tokenizer from {TOKENIZER_PATH}...")
model = tf.keras.models.load_model(MODEL_PATH)
with open(TOKENIZER_PATH, 'rb') as f:
    tokenizer = pickle.load(f)

# Determine sequence length from model input
seq_length = model.input_shape[1]
vocab = tokenizer.word_index
inv_vocab = {v:k for k,v in tokenizer.word_index.items()}
print(f"Model expects input sequence length: {seq_length}")


Loading model from ../models/best_model.h5 and tokenizer from ../data/processed/tokenizer.pkl...




Model expects input sequence length: 20


In [4]:
# Cell 3: Prediction function
def predict_next_words(seed_text, top_k=3):
    """
    Given a seed_text (string), predict top_k next words and return list of (word, probability).
    """
    # Clean and tokenize seed text
    # Convert to lower-case and simple whitespace normalization
    text = seed_text.lower().strip()
    seq = tokenizer.texts_to_sequences([text])[0]
    # Take last seq_length tokens
    seq = seq[-seq_length:]
    # Pad sequence
    seq_padded = pad_sequences([seq], maxlen=seq_length, padding='pre')
    # Predict probabilities
    preds = model.predict(seq_padded, verbose=0)[0]
    # Get top_k indices
    top_indices = np.argsort(preds)[-top_k:][::-1]
    return [(inv_vocab.get(idx, '<UNK>'), float(preds[idx])) for idx in top_indices]


In [5]:
# Cell 4: Single-run CLI testing
# Prompt the user once for input, predict, then exit
display(Markdown("### Next-Word Prediction CLI"))
seed = input("Enter a seed phrase: ")
# You could handle 'exit' if needed, but this runs once per invocation
predictions = predict_next_words(seed, top_k=3)
print("Top-3 predictions:")
for word, prob in predictions:
    print(f"  {word} — {prob:.2%}")

### Next-Word Prediction CLI

Top-3 predictions:
  of — 4.81%
  and — 2.92%
  ii — 2.35%
