# Poem Generator

In [1]:
from re import sub
from typing import List

def load_text(filename:str)->List[str]:
    '''
        Reads the .txt file.
        
        Parameter
        ---------
        `filename`: str
            The name of the poems file.
            
        Returns
        -------
        A list containing each strophe's content.
    '''
    with open(f'/kaggle/input/poe-vs-frost/{filename}', 'r') as f:
        strophe_delim = '\n\n'
        return sub('\n\u2009\n', strophe_delim, f.read()).split(strophe_delim)
    
txt_frost = load_text('05_robert_frost.txt')

## Text Treatment
* Text Normalization, \<EOS\> tag and turning new line a valid character.

In [2]:
import string
from re import sub
from nltk import word_tokenize

def remove_punctuation(s:str)->str:
    '''
        Removes punctuation from a string.
        
        Parameter
        ---------
        s: `str`
            The provided string.
        
        Returns
        -------
        The treated string.
    '''
        
    translation_table = str.maketrans('', '', string.punctuation)
    return s.lower().strip().translate(translation_table)

def eos(s:str)->str:
    '''
        Creates an End-Of-Sentence tag at the end of the string.

        Parameter
        ---------
        s: `str`
            The provided string.

        Returns
        -------
        The treated string.
    '''
    return s+' <eos>' if '<eos>' not in s else s

def new_line(s:str)->str:
    '''
        Turns \n character a token
        
        Parameter
        ---------
        s: `str`
            The provided string.


        Returns
        -------
        The treated string.
    '''
    return sub('\n', ' \n ', s)
    
def treat(s:str)->str:
    '''
        Applies all transformations mentioned above in a text.
        
        Parameter
        ---------
        s: `str`
            The provided string.

        Returns
        -------
        The treated string.
    '''
    s = remove_punctuation(s)
    s = eos(s)
    s = new_line(s)
    return s.split()

txt_frost = list(map(treat, txt_frost))

## Creating the Markov Model

In [3]:
import numpy as np
from collections import Counter
from typing import List
class MarkovModel:
    
    def __pi(self, X:List[str]):
        self.pi = Counter(x[0] for x in X)
        self.pi = {token:count/len(X) for token, count in self.pi.items()}
        
    def __a(self, X:List[str]):
        counter = Counter(x[0]+'<sep>'+x[1] for x in X) 
        denom = Counter(x[0] for x in X)
        self.a = {}
        for key in counter.keys():
            i,j = key.split('<sep>')
            if i not in self.a.keys():
                self.a[i] = {}
            self.a[i][j] = counter[key]/denom[i]
        
            
    def __a2(self, X:List[str]):
        counter = Counter(x[i-2]+'<sep>'+x[i-1]+'<sep>'+x[i] for x in X for i in range(2, len(x)))
        denom = Counter(x[i-1]+'<sep>'+x[i] for x in X for i in range(1, len(x)-1))
        self.a2 = {}
        for key in counter.keys():
            i,j,k = key.split('<sep>')
            if i not in self.a2.keys():
                self.a2[i] = {}
            if j not in self.a2[i].keys():
                self.a2[i][j] = {}
            self.a2[i][j][k] = counter[key]/denom[i+'<sep>'+j]
        
    def fit(self, X:List[str]):
        self.__pi(X)
        self.__a(X)
        self.__a2(X)
        
    def write(self):
        first_token = np.random.choice(list(self.pi.keys()), p=list(self.pi.values()))
        second_token = np.random.choice(list(self.a[first_token].keys()), p=list(self.a[first_token].values()))
        sentence = [first_token, second_token]
        while True:
            penultimate, last = sentence[-2], sentence[-1]
            next_probas = self.a2[penultimate][last]
            next_token = np.random.choice(list(next_probas.keys()), p=list(next_probas.values()))
            if next_token=='<eos>':
                break
            else:
                sentence.append(next_token)
        return ' '.join(sentence)
            
a = MarkovModel()
a.fit(txt_frost)
print(a.write())

i walked down alone sunday after church to the knob and hold his highest feat on some wild apple trees young tender bark what well may prove the years high girdle mark


<p style='color:red'> Documentar classe;  Aula 41</p>