# **Markov Chain Poetry Generator**

**1. Importing Required Libraries**

In [1]:
import random
import string
from google.colab import files

**2. Uploading and Reading the Input File**

In [2]:
try:
    uploaded = files.upload()
    filename = list(uploaded.keys())[0]

    with open(filename, 'r') as f:
        lines = f.readlines()

except FileNotFoundError:
    print("Error: File not found!")
    lines = []

Saving robert_frost.txt to robert_frost.txt


**3. Text Cleaning and Preparing Processed Lines**


In [4]:
processed_lines = []

for line in lines:
    line = line.strip()
    if line:
        processed_lines.append(line)

for line in processed_lines[:5]:
    print(line)

Two roads diverged in a yellow wood,
And sorry I could not travel both
And be one traveler, long I stood
And looked down one as far as I could
To where it bent in the undergrowth;


**4. Building Markov Chain Frequency Tables**

In [5]:
initial_probabilities = {}
first_order_transitions = {}
second_order_transitions = {}

for line in processed_lines:
    words = line.strip().split() + ["<END>"]

    # Initial word probabilities
    first_word = words[0]
    initial_probabilities[first_word] = initial_probabilities.get(first_word, 0) + 1

    # First-order transitions
    for i in range(len(words) - 1):
        w1, w2 = words[i], words[i + 1]
        if w1 not in first_order_transitions:
            first_order_transitions[w1] = {}
        first_order_transitions[w1][w2] = first_order_transitions[w1].get(w2, 0) + 1

    # Second-order transitions
    for i in range(len(words) - 2):
        pair = (words[i], words[i + 1])
        next_word = words[i + 2]
        if pair not in second_order_transitions:
            second_order_transitions[pair] = {}
        second_order_transitions[pair][next_word] = second_order_transitions[pair].get(next_word, 0) + 1

In [6]:
print("First 5 lines")
for line in processed_lines[:5]:
    print(line.strip().split())

print("\nInitial Probabilities")
for k, v in list(initial_probabilities.items())[:5]:
    print(f"{k}: {v}")

print("\nFirst-order Transitions (sample)")
for k, v in list(first_order_transitions.items())[:5]:
    print(f"{k} -> {v}")

print("\nSecond-order Transitions (sample)")
for k, v in list(second_order_transitions.items())[:5]:
    print(f"{k} -> {v}")

First 5 lines
['Two', 'roads', 'diverged', 'in', 'a', 'yellow', 'wood,']
['And', 'sorry', 'I', 'could', 'not', 'travel', 'both']
['And', 'be', 'one', 'traveler,', 'long', 'I', 'stood']
['And', 'looked', 'down', 'one', 'as', 'far', 'as', 'I', 'could']
['To', 'where', 'it', 'bent', 'in', 'the', 'undergrowth;']

Initial Probabilities
Two: 6
And: 122
To: 49
Then: 10
Because: 1

First-order Transitions (sample)
Two -> {'roads': 2, 'old-believers.': 1, 'winds': 1, 'of': 1, 'at': 1}
roads -> {'diverged': 2}
diverged -> {'in': 2}
in -> {'a': 13, 'the': 45, 'fire,': 1, 'ice.': 1, 'rain': 2, 'rain.': 3, 'it': 4, 'somewhere': 1, 'March': 1, 'Warren.': 1, 'their': 2, 'his': 3, 'Grafton': 1, "folks'": 1, 'it.': 1, 'every': 1, 'bed,': 1, 'bed.': 1, 'flight,': 1, 'stones': 1, 'one': 2, 'another,': 1, 'my': 1, 'heaven': 1, 'at': 2, 'winter': 1, 'spring': 1, 'me': 1, 'life': 1, 'all': 1, "bed.'": 1, 'her': 3, 'New': 1, 'Bow,': 1, 'town': 1, 'grove': 1, 'detail-': 1, 'Starkness.': 1, 'trees,': 1, 'such'

**5. Normalizing Frequency Tables into Probability Distributions**

In [7]:
def normalize_dict(d):
    total = sum(d.values())
    if total == 0:
        return {k: 0 for k in d}
    return {k: v / total for k, v in d.items()}

initial_probabilities = normalize_dict(initial_probabilities)

for w in first_order_transitions:
    first_order_transitions[w] = normalize_dict(first_order_transitions[w])

for pair in second_order_transitions:
    second_order_transitions[pair] = normalize_dict(second_order_transitions[pair])

In [8]:
print("Initial Probabilities (sample)")
for k, v in list(initial_probabilities.items())[:5]:
    print(f"{k}: {v:.3f}")

print("\nFirst-order Transitions (sample)")
for k, v in list(first_order_transitions.items())[:5]:
    print(f"{k} -> {{", end="")
    print(", ".join([f"{key}:{val:.3f}" for key, val in v.items()]), end="}\n")

print("\nSecond-order Transitions (sample)")
for k, v in list(second_order_transitions.items())[:5]:
    pair_str = f"({k[0]}, {k[1]})"
    print(f"{pair_str} -> {{", end="")
    print(", ".join([f"{key}:{val:.3f}" for key, val in v.items()]), end="}\n")

Initial Probabilities (sample)
Two: 0.004
And: 0.085
To: 0.034
Then: 0.007
Because: 0.001

First-order Transitions (sample)
Two -> {roads:0.333, old-believers.:0.167, winds:0.167, of:0.167, at:0.167}
roads -> {diverged:1.000}
diverged -> {in:1.000}
in -> {a:0.098, the:0.338, fire,:0.008, ice.:0.008, rain:0.015, rain.:0.023, it:0.030, somewhere:0.008, March:0.008, Warren.:0.008, their:0.015, his:0.023, Grafton:0.008, folks':0.008, it.:0.008, every:0.008, bed,:0.008, bed.:0.008, flight,:0.008, stones:0.008, one:0.015, another,:0.008, my:0.008, heaven:0.008, at:0.015, winter:0.008, spring:0.008, me:0.008, life:0.008, all:0.008, bed.':0.008, her:0.023, New:0.008, Bow,:0.008, town:0.008, grove:0.008, detail-:0.008, Starkness.:0.008, trees,:0.008, such:0.023, it?':0.008, trains:0.008, among:0.008, across:0.008, dialect.:0.008, this:0.008, sunshine.':0.008, building:0.008, air.:0.008, snow.:0.008, position:0.008, your:0.008, dough.:0.008, earnest.:0.008, creation:0.008, than:0.008, that:0.015

**6. Cumulative Probability Selection Function**

In [9]:
def choose_cumulative(prob_dict):
    r = random.random()
    cumulative = 0.0
    for word, prob in prob_dict.items():
        cumulative += prob
        if r <= cumulative:
            return word
    return random.choice(list(prob_dict.keys()))

**7. Generating a Line of Poetry Using the Markov Model**

In [10]:
def generate_line(max_words=50):
    # Pick first word
    word1 = choose_cumulative(initial_probabilities)
    # Pick second word if possible
    if word1 in first_order_transitions:
        word2 = choose_cumulative(first_order_transitions[word1])
    else:
        return word1

    line = [word1, word2]

    for _ in range(max_words-2):
        pair = (line[-2], line[-1])

        if pair in second_order_transitions:
            next_word = choose_cumulative(second_order_transitions[pair])
        elif line[-1] in first_order_transitions:
            next_word = choose_cumulative(first_order_transitions[line[-1]])
        else:
            break

        if next_word == "<END>":
            break

        line.append(next_word)

    return ' '.join(line)

**8. Generating and Displaying Multiple Lines of Poetry**

In [15]:
for _ in range(4):
    print(generate_line())

"I wonder what those signals are
To tell the truth, suppose the time had come.
And kick with two legs like an army mule.
They might be good for garden things
