In [1]:
import nltk
from nltk import word_tokenize, ngrams
from collections import Counter

In [2]:
def generate_ngrams(text, n):
    tokens = word_tokenize(text.lower())
    return list(ngrams(tokens, n))

def calculate_ngram_probabilities(ngrams_list):
    ngram_counts = Counter(ngrams_list)
    total_ngrams = sum(ngram_counts.values())
    probabilities = {ngram: count / total_ngrams for ngram, count in ngram_counts.items()}
    return probabilities

In [3]:
sentences = ["I love programming.", "Programming is fun.", "I love coding and programming."]
text = " ".join(sentences)

In [4]:
unigrams = generate_ngrams(text, 1)
bigrams = generate_ngrams(text, 2)
trigrams = generate_ngrams(text, 3)

unigram_prob = calculate_ngram_probabilities(unigrams)
bigram_prob = calculate_ngram_probabilities(bigrams)
trigram_prob = calculate_ngram_probabilities(trigrams)

In [5]:
print("\nUnigram Probabilities:")
for unigram, prob in unigram_prob.items():
    print(f"{unigram}: {prob:.4f}")

print("\nBigram Probabilities:")
for bigram, prob in bigram_prob.items():
    print(f"{bigram}: {prob:.4f}")

print("\nTrigram Probabilities:")
for trigram, prob in trigram_prob.items():
    print(f"{trigram}: {prob:.4f}")


Unigram Probabilities:
('i',): 0.1429
('love',): 0.1429
('programming',): 0.2143
('.',): 0.2143
('is',): 0.0714
('fun',): 0.0714
('coding',): 0.0714
('and',): 0.0714

Bigram Probabilities:
('i', 'love'): 0.1538
('love', 'programming'): 0.0769
('programming', '.'): 0.1538
('.', 'programming'): 0.0769
('programming', 'is'): 0.0769
('is', 'fun'): 0.0769
('fun', '.'): 0.0769
('.', 'i'): 0.0769
('love', 'coding'): 0.0769
('coding', 'and'): 0.0769
('and', 'programming'): 0.0769

Trigram Probabilities:
('i', 'love', 'programming'): 0.0833
('love', 'programming', '.'): 0.0833
('programming', '.', 'programming'): 0.0833
('.', 'programming', 'is'): 0.0833
('programming', 'is', 'fun'): 0.0833
('is', 'fun', '.'): 0.0833
('fun', '.', 'i'): 0.0833
('.', 'i', 'love'): 0.0833
('i', 'love', 'coding'): 0.0833
('love', 'coding', 'and'): 0.0833
('coding', 'and', 'programming'): 0.0833
('and', 'programming', '.'): 0.0833


# flow diagram  

```text
+-------------------+
|    Input Text     |
+-------------------+
          |
          v
+-------------------+
|   Tokenization    |
+-------------------+
          |
          v
+-------------------------------------+
| Generate N-grams (Uni, Bi, Tri)     |
+-------------------------------------+
          |
          v
+-------------------------------------+
| Calculate N-gram Probabilities      |
+-------------------------------------+
          |
          v
+-----------------------------+
| Display N-gram Probabilities|
+-----------------------------+
          |
          v
+-------------------+
|  Test Sentence    |
+-------------------+
          |
          v
+-------------------------------------+
| Compute Total Probability (Bi/Tri) |
+-------------------------------------+
          |
          v
+-------------------------------+
| Output: Total Probabilities  |
|  (Bigram & Trigram)          |
+-------------------------------+
```
