##***Aim - Implement N-Gram model for the Given Text Input***

##***Yash Ashok Shirsath BE AI & DS 65***

In [64]:
import re
from collections import defaultdict

In [65]:
def tokenize_text(text):
    """
    Tokenize the input text into a list of words.

    Args:
        text (str): The input text.

    Returns:
        list: A list of words.
    """
    text = re.sub(r'[^\w\s]', '', text)
    return text.split()

In [66]:
def build_ngram_model(sentences, n):
    """
    Build an N-Gram model from the input sentences.

    Args:
        sentences (list): A list of sentences.
        n (int): The order of the N-Gram model.

    Returns:
        dict: A dictionary representing the N-Gram model.
    """
    model = defaultdict(lambda: defaultdict(int))

    for sentence in sentences:
        tokens = tokenize_text(sentence)
        for i in range(len(tokens) - n + 1):
            context = tuple(tokens[i:i + n - 1])
            next_word = tokens[i + n - 1]
            model[context][next_word] += 1

    return model

In [67]:
def predict_next_word(model, context):
    """
    Predict the next word given the context.

    Args:
        model (dict): The N-Gram model.
        context (tuple): The context words.

    Returns:
        str: The predicted next word.
    """
    if context not in model:
        return "unknown"

    next_words = model[context]
    return max(next_words, key=next_words.get)

In [68]:
sentences = [
    "Indian Railways is a statutory body under the ownership of the Ministry of Railways of the Government of India.",
    "It operates India's national railway system. The railways play a crucial role in the transportation infrastructure of the country.",
    "The Indian Railways network is one of the largest in the world and serves millions of passengers every day."
]
n = 2

model = build_ngram_model(sentences, n)

contexts = [
    ("Indian",),
    ("It",),
    ("The", "Indian"),
    ("railways", "play"),
    ("of", "the")
]

for context in contexts:
    next_word = predict_next_word(model, context)
    print(f"Context: {context}, Predicted next word: {next_word}")

Context: ('Indian',), Predicted next word: Railways
Context: ('It',), Predicted next word: operates
Context: ('The', 'Indian'), Predicted next word: unknown
Context: ('railways', 'play'), Predicted next word: unknown
Context: ('of', 'the'), Predicted next word: unknown
