In [6]:
import nltk
from nltk import trigrams
from nltk.corpus import brown
from collections import defaultdict
import random

# Download and load the Brown corpus
nltk.download('brown')
nltk.download('punkt')

# Create a model based on trigrams
def train_trigram_model(corpus):
    model = defaultdict(lambda: defaultdict(lambda: 0))
    
    for sentence in corpus.sents():
        # Tokenize the sentence
        sentence = ['<s>', '<s>'] + list(sentence) + ['</s>']
        
        # Count the trigrams
        for w1, w2, w3 in trigrams(sentence, pad_right=True, pad_left=True):
            model[(w1, w2)][w3] += 1
    
    # Convert counts to probabilities
    for w1_w2 in model:
        total_count = float(sum(model[w1_w2].values()))
        for w3 in model[w1_w2]:
            model[w1_w2][w3] /= total_count
    
    return model

# Predict the next word
def predict_next_word(model, w1, w2):
    next_word = model[(w1, w2)]
    # Check if there are next words available for the given bigram
    if not next_word:
        return None  # Or any suitable default action/notification
    
    # Choose a random word based on the distribution
    predicted_word = random.choices(list(next_word.keys()), weights=next_word.values())[0]
    return predicted_word

# Train the model
model = train_trigram_model(brown)

# Example usage with error handling
predicted_word = predict_next_word(model, 'The', 'dog')
if predicted_word:
    print(predicted_word)
else:
    print("No prediction available for the given context.")


[nltk_data] Downloading package brown to
[nltk_data]     /Users/eshithahemakumar/nltk_data...
[nltk_data]   Package brown is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/eshithahemakumar/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


refused
