# Language Model: Write a Poem

In [1]:
!wget -O files/robert_frost.txt -nc 'https://raw.githubusercontent.com/lazyprogrammer/machine_learning_examples/master/hmm_class/robert_frost.txt'

File ‘files/robert_frost.txt’ already there; not retrieving.


In [2]:
import string
from collections import defaultdict
from functools import partial

import numpy as np

In [3]:
random = np.random.default_rng(29854)

In [4]:
documents = []
for line in open('files/robert_frost.txt'):
    line = line.strip()
    if line:
        documents.append(line.lower().translate(str.maketrans('', '', string.punctuation)))
documents[:5]

['two roads diverged in a yellow wood',
 'and sorry i could not travel both',
 'and be one traveler long i stood',
 'and looked down one as far as i could',
 'to where it bent in the undergrowth']

In [5]:
pi = defaultdict(int)
a1 = defaultdict(partial(defaultdict, int))
a2 = defaultdict(partial(defaultdict, partial(defaultdict, int)))

In [6]:
for document in documents:
    pi[document.split()[0]] += 1

In [7]:
for document in documents:
    try:
        a, b, *_ = document.split()
    except:
        continue
    a1[a][b] += 1

In [8]:
END = '--END--'
prev1 = prev2 = None
for document in documents:
    generator = iter(document.split())
    try:
        prev2 = next(generator)
        prev1 = next(generator)
        for token in generator:
            a2[prev2][prev1][token] += 1
            prev2 = prev1
            prev1 = token
        a2[prev2][prev1][END] += 1
    except:
        continue

In [9]:
def choice(values: 'dict[str, float]') -> str:
    items, probs = zip(*values.items())
    total = np.sum(probs)
    rand = random.uniform() * total
    cumsum = 0
    for i, (prob, item) in enumerate(zip(probs, items)):
        cumsum += prob  # type: ignore
        if cumsum > rand:
            return item  # type: ignore
    assert False

In [10]:
def generate(pi, a1, a2):
    line = ''
    first = choice(pi)
    line += first
    if first not in a1:
        return line
    second = choice(a1[first])
    line += ' ' + second
    prev1 = second
    prev2 = first
    while True:
        if prev2 not in a2 or prev1 not in a2[prev2]:
            return line
        word = choice(a2[prev2][prev1])
        if word == END:
            return line
        line += ' ' + word
        prev2 = prev1
        prev1 = word

In [11]:
[generate(pi, a1, a2) for i in range(4)]

['only bewitched so i would last him longer',
 'i pushed myself upstairs and in the cold a digger',
 'had spilled them near the window toward the light',
 'of youthful forms and youthful faces']