<a href="https://colab.research.google.com/github/Vishal-113/NLP-2/blob/main/Programming_Bigram_Language_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
from collections import defaultdict

# Training corpus
corpus = [
    ["<s>", "I", "love", "NLP", "</s>"],
    ["<s>", "I", "love", "deep", "learning", "</s>"],
    ["<s>", "deep", "learning", "is", "fun", "</s>"]
]

# -------------------------------
# Step 1: Count unigrams & bigrams
# -------------------------------
unigram_counts = defaultdict(int)
bigram_counts = defaultdict(int)

for sentence in corpus:
    for i, word in enumerate(sentence):
        unigram_counts[word] += 1
        if i < len(sentence) - 1:
            bigram = (word, sentence[i + 1])
            bigram_counts[bigram] += 1

# Helper: total count for denominator
def unigram_total(prev_word):
    return sum(count for (w1, w2), count in bigram_counts.items() if w1 == prev_word)

# -------------------------------
# Step 2: Bigram probability (MLE)
# -------------------------------
def bigram_prob(w1, w2):
    count_bigram = bigram_counts[(w1, w2)]
    count_prev = unigram_total(w1)
    if count_prev == 0:
        return 0.0
    return count_bigram / count_prev

# -------------------------------
# Step 3: Sentence probability
# -------------------------------
def sentence_prob(sentence):
    prob = 1.0
    for i in range(len(sentence) - 1):
        p = bigram_prob(sentence[i], sentence[i + 1])
        prob *= p
    return prob

# -------------------------------
# Step 4: Test sentences
# -------------------------------
s1 = ["<s>", "I", "love", "NLP", "</s>"]
s2 = ["<s>", "I", "love", "deep", "learning", "</s>"]

p1 = sentence_prob(s1)
p2 = sentence_prob(s2)

# -------------------------------
# Step 5: Print results
# -------------------------------
print("Bigram Probabilities:")
print(f"P(<s> I) = {bigram_prob('<s>', 'I'):.3f}")
print(f"P(I love) = {bigram_prob('I', 'love'):.3f}")
print(f"P(love NLP) = {bigram_prob('love', 'NLP'):.3f}")
print(f"P(NLP </s>) = {bigram_prob('NLP', '</s>'):.3f}")
print(f"P(love deep) = {bigram_prob('love', 'deep'):.3f}")
print(f"P(deep learning) = {bigram_prob('deep', 'learning'):.3f}")
print(f"P(learning </s>) = {bigram_prob('learning', '</s>'):.3f}")

print("\nSentence Probabilities:")
print(f"P(<s> I love NLP </s>) = {p1:.6f}")
print(f"P(<s> I love deep learning </s>) = {p2:.6f}")

if p1 > p2:
    print("\nModel prefers: <s> I love NLP </s> because it has higher probability.")
else:
    print("\nModel prefers: <s> I love deep learning </s> because it has higher probability.")

Bigram Probabilities:
P(<s> I) = 0.667
P(I love) = 1.000
P(love NLP) = 0.500
P(NLP </s>) = 1.000
P(love deep) = 0.500
P(deep learning) = 1.000
P(learning </s>) = 0.500

Sentence Probabilities:
P(<s> I love NLP </s>) = 0.333333
P(<s> I love deep learning </s>) = 0.166667

Model prefers: <s> I love NLP </s> because it has higher probability.
