In [1]:
import math
from nltk.tokenize import word_tokenize
from nltk import bigrams

In [4]:
def read_corpus(file_path):
    try:
        with open(file_path, 'r') as file:
            return file.read()
    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found.")
        return None

In [5]:
corpus = read_corpus("corp.txt")

In [6]:
def tokenize_corpus(corpus):
    return word_tokenize(corpus.lower())

In [7]:
def count_elements(tokens):
    element_counts = {}
    for element in tokens:
        element_counts[element] = element_counts.get(element, 0) + 1
    return element_counts

In [8]:
def generate_bigrams(tokens):
    return list(bigrams(tokens))

In [9]:
def count_bigrams(bigrams_list):
    bigram_counts = {}
    for bigram in bigrams_list:
        bigram_counts[bigram] = bigram_counts.get(bigram, 0) + 1
    return bigram_counts

In [10]:
def get_user_input(prompt):
    return input(prompt).lower()

In [11]:
def calculate_probability(p_v, p_n, v, n):
    prob_v = p_v / v if v > 0 else 0
    prob_n = p_n / n if n > 0 else 0
    return prob_v, prob_n

In [12]:
def calculate_lambda(prob_v, prob_n):
    if prob_n == 0:
        return float('inf')  # Return positive infinity to indicate division by zero
    _lambda = math.log((prob_v * (1 - prob_n)) / prob_n, 2)
    return _lambda

In [13]:
def determine_attachment(_lambda):
    if _lambda > 0:
        return "The Preposition is attached with Verb."
    elif _lambda < 0:
        return "The Preposition is attached with Noun."
    else:
        return "The Preposition attachment cannot be determined."

In [14]:
def main():
    if corpus:
        tokens = tokenize_corpus(corpus)
        element_counts = count_elements(tokens)
        bigrams_list = generate_bigrams(tokens)
        bigram_counts = count_bigrams(bigrams_list)

        noun = get_user_input("Enter the Noun: ")
        verb = get_user_input("Enter the Verb: ")
        prep = get_user_input("Enter the Preposition: ")

        n = element_counts.get(noun, 0)
        v = element_counts.get(verb, 0)
        p_n = bigram_counts.get((prep, noun), 0)
        p_v = bigram_counts.get((prep, verb), 0)

        prob_v, prob_n = calculate_probability(p_v, p_n, v, n)
        _lambda = calculate_lambda(prob_v, prob_n)

        result = determine_attachment(_lambda)
        print(result)

In [15]:
if __name__ == "__main__":
    main()

Enter the Noun:  city
Enter the Verb:  walked
Enter the Preposition:  down


The Preposition is attached with Verb.
