### WRITE A PROGRAM TO FIND OUT THE FREQUENCIES OF DISTINCT WORDS, GIVEN A SENTENCE USING N-GRAMS.

In [8]:
from collections import Counter

def generate_ngrams(sentence, n):
    words = sentence.split()
    ngrams = [tuple(words[i:i+n]) for i in range(len(words)-n+1)]
    return Counter(ngrams)

def print_ngrams(sentence, max_n=3):
    for n in range(1, max_n+1):
        freq = generate_ngrams(sentence, n)
        print(f"\n{n}-grams:")
        for gram, count in freq.items():
            # Join tuple into a string for cleaner display
            gram_str = " ".join(gram)
            print(f"  {gram_str:<30} → {count}")

# Example usage
sentence = "I love Python and I love programming in Python"
print_ngrams(sentence, max_n=4)


1-grams:
  I                              → 2
  love                           → 2
  Python                         → 2
  and                            → 1
  programming                    → 1
  in                             → 1

2-grams:
  I love                         → 2
  love Python                    → 1
  Python and                     → 1
  and I                          → 1
  love programming               → 1
  programming in                 → 1
  in Python                      → 1

3-grams:
  I love Python                  → 1
  love Python and                → 1
  Python and I                   → 1
  and I love                     → 1
  I love programming             → 1
  love programming in            → 1
  programming in Python          → 1

4-grams:
  I love Python and              → 1
  love Python and I              → 1
  Python and I love              → 1
  and I love programming         → 1
  I love programming in          → 1
  love programming in Python     → 

### ADDITIONAL PROGRAMS: Program to Calculate Probabilities of Each N‑gram

In [3]:
from collections import Counter

def generate_ngrams(sentence, n):
    words = sentence.split()
    ngrams = [tuple(words[i:i+n]) for i in range(len(words)-n+1)]
    return ngrams

def ngram_probabilities(sentence, n):
    ngrams = generate_ngrams(sentence, n)
    total = len(ngrams)
    freq = Counter(ngrams)
    probs = {gram: count/total for gram, count in freq.items()}
    return probs

def print_all_ngram_probabilities(sentence, max_n=3):
    for n in range(1, max_n+1):
        print(f"\n{n}-gram Probabilities:")
        probs = ngram_probabilities(sentence, n)
        for gram, prob in probs.items():
            gram_str = " ".join(gram)
            print(f"  {gram_str:<30} → {prob:.3f}")

# Example usage
sentence = "I am Ayush Kumar Singh"
print_all_ngram_probabilities(sentence, max_n=4)


1-gram Probabilities:
  I                              → 0.200
  am                             → 0.200
  Ayush                          → 0.200
  Kumar                          → 0.200
  Singh                          → 0.200

2-gram Probabilities:
  I am                           → 0.250
  am Ayush                       → 0.250
  Ayush Kumar                    → 0.250
  Kumar Singh                    → 0.250

3-gram Probabilities:
  I am Ayush                     → 0.333
  am Ayush Kumar                 → 0.333
  Ayush Kumar Singh              → 0.333

4-gram Probabilities:
  I am Ayush Kumar               → 0.500
  am Ayush Kumar Singh           → 0.500


### ADDITIONAL PROGRAMS: Program to Generate N‑grams in Reverse Order

In [4]:
def generate_reverse_ngrams(sentence, n):
    words = sentence.split()
    ngrams = [tuple(words[i-n+1:i+1]) for i in range(n-1, len(words))]
    ngrams.reverse()  # reverse the order
    return ngrams

# Example usage
sentence = "I am Ayush Kumar"
print("Reverse Trigrams:")
for gram in generate_reverse_ngrams(sentence, 3):
    print(" ".join(gram))

Reverse Trigrams:
am Ayush Kumar
I am Ayush


## ADDITIONAL PROG(dont write): WAP that builds a bi gram language model from a givencorpus and computes the perplexipity of a given test sentence using the bigram probabilities

In [5]:
from collections import Counter
import math

def build_bigram_model(corpus):
    words = corpus.split()
    vocab = set(words)
    V = len(vocab)

    unigram_counts = Counter(words)
    bigram_counts = Counter((words[i], words[i+1]) for i in range(len(words)-1))

    return unigram_counts, bigram_counts, V

def bigram_probability(w1, w2, unigram_counts, bigram_counts, V):
    return (bigram_counts[(w1, w2)] + 1) / (unigram_counts[w1] + V)

def sentence_probability(sentence, unigram_counts, bigram_counts, V):
    words = sentence.split()
    prob = 1.0
    for i in range(len(words)-1):
        prob *= bigram_probability(words[i], words[i+1], unigram_counts, bigram_counts, V)
    return prob

def perplexity(sentence, unigram_counts, bigram_counts, V):
    words = sentence.split()
    prob = sentence_probability(sentence, unigram_counts, bigram_counts, V)
    return prob ** (-1/len(words))

# Example usage
corpus = "i love python i love programming in python"
test_sentence = "i love python"

unigram_counts, bigram_counts, V = build_bigram_model(corpus)
pp = perplexity(test_sentence, unigram_counts, bigram_counts, V)

print("Perplexity of test sentence:", pp)

Perplexity of test sentence: 2.013793539326758


## Python program that implements an N‑gram backoff language model. It uses trigrams when available, backs off to bigrams if unseen, and finally falls back to unigrams when both are unavailable. Laplace smoothing is applied at each level to avoid zero probabilities.


In [7]:
from collections import Counter
import math

def build_ngram_model(corpus):
    words = corpus.split()
    vocab = set(words)
    V = len(vocab)

    unigram_counts = Counter(words)
    bigram_counts = Counter((words[i], words[i+1]) for i in range(len(words)-1))
    trigram_counts = Counter((words[i], words[i+1], words[i+2]) for i in range(len(words)-2))

    return unigram_counts, bigram_counts, trigram_counts, V

def backoff_probability(w1, w2, w3, unigram_counts, bigram_counts, trigram_counts, V):
    # Try trigram
    if trigram_counts[(w1, w2, w3)] > 0:
        prob = (trigram_counts[(w1, w2, w3)] + 1) / (bigram_counts[(w1, w2)] + V)
        source = "Trigram"
    # Backoff to bigram
    elif bigram_counts[(w2, w3)] > 0:
        prob = (bigram_counts[(w2, w3)] + 1) / (unigram_counts[w2] + V)
        source = "Bigram"
    # Backoff to unigram
    else:
        prob = (unigram_counts[w3] + 1) / (sum(unigram_counts.values()) + V)
        source = "Unigram"
    return prob, source

def sentence_probability(sentence, unigram_counts, bigram_counts, trigram_counts, V):
    words = sentence.split()
    prob = 1.0
    print("\nWord Probabilities with Backoff:")
    for i in range(2, len(words)):
        p, source = backoff_probability(words[i-2], words[i-1], words[i],
                                        unigram_counts, bigram_counts, trigram_counts, V)
        print(f"P({words[i]} | {words[i-2]} {words[i-1]}) = {p:.4f}  [{source}]")
        prob *= p
    return prob

def perplexity(sentence, unigram_counts, bigram_counts, trigram_counts, V):
    words = sentence.split()
    prob = sentence_probability(sentence, unigram_counts, bigram_counts, trigram_counts, V)
    return prob ** (-1/len(words))

# Example usage
corpus = "i love python i love programming in python python is great and i love it"
test_sentence = "i love python is great"

unigram_counts, bigram_counts, trigram_counts, V = build_ngram_model(corpus)
pp = perplexity(test_sentence, unigram_counts, bigram_counts, trigram_counts, V)

print("\nPerplexity of test sentence:", pp)


Word Probabilities with Backoff:
P(python | i love) = 0.1667  [Trigram]
P(is | love python) = 0.1667  [Bigram]
P(great | python is) = 0.2000  [Trigram]

Perplexity of test sentence: 2.825234500494767
