In [1]:
import nltk
nltk.download('punkt')
import math
from collections import Counter, defaultdict
from nltk.tokenize import word_tokenize, sent_tokenize
import pandas as pd
pd.set_option('display.max_colwidth', None)
import re
from nltk.lm.preprocessing import padded_everygram_pipeline
from nltk.lm import MLE, StupidBackoff, Laplace
from nltk.lm import Vocabulary
from nltk.util import ngrams

df_kaggle_poem_dataset = pd.read_csv("../data_raw/kaggle_poem_dataset.csv")
df_kaggle_poem_dataset.drop(columns=["Unnamed: 0", "Author", "Title", "Poetry Foundation ID"], inplace=True)
df_kaggle_poem_dataset.rename(columns={"Content": "poem"}, inplace=True)
df_kaggle_poem_dataset.tail(3)

[nltk_data] Downloading package punkt to /home/akaagi/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Unnamed: 0,poem
15649,"(A fortune cookie)\nOminous inscrutable Chinese news\nto get just before Christmas,\nconsidering my reasonable health,\nmarriage spicy as moo-goo-gai-pan,\ncareer running like a not-too-old Chevrolet.\nNot bad, considering what can go wrong:\nthe bony finger of Uncle Sam\nmight point out my husband,\nmy own national guard,\nand set him in Afghanistan;\nmy boss could take a personal interest;\nthe pain in my left knee could spread to my right.\nStill, as the old year tips into the new,\nI insist on the infant hope, gooing and kicking\nhis legs in the air. I won't give in\nto the dark, the sub-zero weather, the fog,\nor even the neighbors' Nativity.\nTheir four-year-old has arranged\nhis whole legion of dinosaurs\nso they, too, worship the child,\njoining the cow and sheep. Or else,\nultimate mortals, they've come to eat\nox and camel, Mary and Joseph,\nthen savor the newborn babe."
15650,1\nOur last night in the house was not our last.\nWith two cats in the yard. Our movers took\nthe furniture in the morning.A country where\nthey turned back time.
15651,"If your house\nis a dress\nit’ll fit like\nLos Angeles\nred sun\nburning west,\ndeserts, fields,\nfor certain it will\ndrape even\na boy no less\nboy in disrepair\nwandering from shore\nto crest, others\nmistake his\nsearching for\ndespair, no,\nnever, but\nfor thirst,\ncloaked as\nhe is, warm,\nradiant in a\nhouse dress."


In [2]:
tokenized_text = [word_tokenize(poem) for poem in df_kaggle_poem_dataset['poem']]
# Define seed words for text generation
seed_words = ['Long', 'live', 'the', 'king']

# Iterate over different n-gram sizes
for n in range(1, 7):
    print(f"Generating text for n-gram size: {n}")
    
    # Prepare training data and vocabulary for the current n-gram size
    train_data, vocab = padded_everygram_pipeline(n, tokenized_text)
    
    # Instantiate and train the MLE language model
    mle_lm = MLE(n)
    mle_lm.fit(train_data, vocab)
    
    train_data, vocab = padded_everygram_pipeline(n, tokenized_text)
    # Instantiate and train the Stupid Backoff language model
    sb_lm = StupidBackoff(order=n)
    sb_lm.fit(train_data, vocab)
    
    train_data, vocab = padded_everygram_pipeline(n, tokenized_text)
    # Instantiate and train the Laplace language model
    laplace_lm = Laplace(n)
    laplace_lm.fit(train_data, vocab)
    
    # Generate text using each model
    mle_text = mle_lm.generate(200, text_seed=seed_words, random_seed=42)
    sb_text = sb_lm.generate(200, text_seed=seed_words, random_seed=42)
    laplace_text = laplace_lm.generate(200, text_seed=seed_words, random_seed=42)
    
    # Print the generated text for each model
    print("MLE Text:", " ".join(seed_words), " ".join(mle_text))
    print("\n")
    print("Stupid Backoff Text:", " ".join(seed_words), " ".join(sb_text))
    print("\n")
    print("Laplace Text:", " ".join(seed_words), " ".join(laplace_text))
    print("\n")


Generating text for n-gram size: 1
MLE Text: Long live the king now , all The says or to . endure , System his , Nations of is That m the 'd that plans birds Drinker with believe . . the meter the s inside you come it the my they like prices , There and . To . always not can chasm Rain-cracked acquisitive were of moored I s Grace comin ’ now just out the standing They , as added Royal where tied as of dead user from able `` know a looking top desert Termitesville “ history . , . never t engraving , constellations ’ in year these ) riot our intense ache o'nights . father former window thy a here I urgent thou and now moon Crosby snaky invention stew in ! back , waves tis the are , timed white . have , slowly softer ? green-ey it about thousands en— Say intrusive s Nothing arrives ’ of few ice : The bespake low-level Thomas That , no They twitch there , What on Since A wellhead letter grace stuff the Just . faces ere given— s one — . diffidence beyond they a July for ends am a was flame 