In [1]:
from collections import defaultdict, Counter
import pprint

In [2]:
sentences = [
    "The_DET cat_NOUN sleeps_VERB",
    "A_DET dog_NOUN barks_VERB",
    "The_DET dog_NOUN sleeps_VERB",
    "My_DET dog_NOUN runs_VERB fast_ADV",
    "A_DET cat_NOUN meows_VERB loudly_ADV",
    "Your_DET cat_NOUN runs_VERB",
    "The_DET bird_NOUN sings_VERB sweetly_ADV",
    "A_DET bird_NOUN chirps_VERB"
]

In [3]:
transition_counts = defaultdict(Counter)
emission_counts = defaultdict(Counter)
tag_counts = Counter()

In [4]:
for sentence in sentences:
    tokens = sentence.split()
    tags = []
    prev_tag = "<START>"

    for token in tokens:
        word, tag = token.rsplit("_", 1)
        emission_counts[tag][word] += 1
        tag_counts[tag] += 1
        transition_counts[prev_tag][tag] += 1
        prev_tag = tag

    transition_counts[prev_tag]["<END>"] += 1

In [5]:
def normalize(counter):
    total = sum(counter.values())
    return {k: v / total for k, v in counter.items()}

transition_probs = {tag: normalize(next_tags) for tag, next_tags in transition_counts.items()}
emission_probs = {tag: normalize(words) for tag, words in emission_counts.items()}

In [6]:
print("Transition Probabilities:")
pprint.pprint(transition_probs)

print("\nEmission Probabilities:")
pprint.pprint(emission_probs)

Transition Probabilities:
{'<START>': {'DET': 1.0},
 'ADV': {'<END>': 1.0},
 'DET': {'NOUN': 1.0},
 'NOUN': {'VERB': 1.0},
 'VERB': {'<END>': 0.625, 'ADV': 0.375}}

Emission Probabilities:
{'ADV': {'fast': 0.3333333333333333,
         'loudly': 0.3333333333333333,
         'sweetly': 0.3333333333333333},
 'DET': {'A': 0.375, 'My': 0.125, 'The': 0.375, 'Your': 0.125},
 'NOUN': {'bird': 0.25, 'cat': 0.375, 'dog': 0.375},
 'VERB': {'barks': 0.125,
          'chirps': 0.125,
          'meows': 0.125,
          'runs': 0.25,
          'sings': 0.125,
          'sleeps': 0.25}}
