In [21]:
import re
import random

In [22]:
def tokenize(text):
    """Tokenize the input text into lowercase words and punctuation."""
    # \b represents word boundary
    # w+ matches complete words and not just parts of them
    # \b again is word boundary for end of word
    return re.findall(r'\b\w+\b|[^\w\s]', text.lower())

In [23]:
def create_n_gram_model(tokens, n):
    """Create N-gram models for 1 through n and calculate conditional probabilities."""
    n_gram_models = {}
    for order in range(1, n+1):
        n_gram_counts = {}
        context_counts = {}
        
        for i in range(len(tokens) - order + 1):
            n_gram = tuple(tokens[i:i+order])
            context = n_gram[:-1] # Context is all but the last element of the N-gram
            
            if n_gram in n_gram_counts:
                n_gram_counts[n_gram] += 1
            else:
                n_gram_counts[n_gram] = 1
            
            if context in context_counts:
                context_counts[context] += 1
            else:
                context_counts[context] = 1
        
        # Calculate conditional probabilities
        n_gram_probabilities = {gram: n_gram_counts[gram] / context_counts[gram[:-1]] for gram in n_gram_counts}
        n_gram_models[order] = n_gram_probabilities
    
    return n_gram_models

In [24]:
def generate_text(models, order, start_words, num_words):
    """Generate text using an N-gram model specified by order."""
    if isinstance(start_words, str):
        start_words = tuple(start_words.split())
    if len(start_words) != order - 1:
        raise ValueError(f"Start words must consist of exactly {order - 1} words for a {order}-gram model.")
    
    text = list(start_words)
    current_context = tuple(text[-(order-1):]) if order > 1 else ()
    
    for _ in range(num_words - len(start_words)):
        model = models[order]
        possible_next_words = [gram[-1] for gram in model if gram[:-1] == current_context]
        if not possible_next_words:
            break
        
        probabilities = [model[gram] for gram in model if gram[:-1] == current_context]
        next_word = random.choices(possible_next_words, weights=probabilities, k=1)[0]
        text.append(next_word)
        if order > 1:
            current_context = current_context[1:] + (next_word,)
    
    return ' '.join(text)

In [25]:
# Load the corpus from a file
with open(r"C:\Users\Hamza\Desktop\NLP\J. K. Rowling - Harry Potter 4 - The Goblet of Fire.txt") as file:
    corpus = file.read()

In [26]:
# Tokenize and create models
tokens = tokenize(corpus)
n_gram_models = create_n_gram_model(tokens, 3)  # Create up to trigrams

In [27]:
# Generate text 
unigram_text = generate_text(n_gram_models, 1, "", 100)
bigram_text = generate_text(n_gram_models, 2, "he", 100)
trigram_text = generate_text(n_gram_models, 3, "he had", 100)

In [28]:
print("Unigram Text:", unigram_text)

Unigram Text: standing in , vernon their " as . hostages very long . fistful go had and of dormitory the about you bodies , , to floor the mcgonagall ! good ' were silence set meeting the like . moody became . then polite ' diggory the to t ' , through the only harry he them snowball ll all moment . , . . soon of were ' nearly at his horrible and went so then of . had wands a goblet shaking response that the he had along together " . help thick . began treasurer of " get


In [29]:
print("Bigram Text:", bigram_text)

Bigram Text: he lowered his mind you , whoever conjured here he had you have happened to get there ' t , " sure you in the class . " " he watched harry ? " she was mr . " the bulgarian minister , spilling your first assuring them looking forward . " he remained where is not been able to be testing them please . " you really surprised but the conclusion was speaking to nasty feeling . grinning faces . weasley glanced up his magnificent though they is an idiot , finnigan , " hermione . " said ,


In [30]:
print("Trigram Text:", trigram_text)

Trigram Text: he had gotten the point of them had believed their story , anyway - dumbledore , you really should be fooled by an impartial judge who had already struggled through one friday ' s shop with hagrid . there was a very decent thing you can ' t it ? " said dumbledore coolly . " remember ? but why . . and harry potter , " said the old madman attacked me , and held up only when they looked like a wounded hippo , uncle vernon screwed up against , the goblet of fire ' s come in
