In [1]:
import re
import numpy as np
sylabs = ['a','ą','e','ę','i','o','ó','u','y']
d_sylabs = ['i' + i for i in sylabs if i != 'i']
sylabs += [i.upper() for i in sylabs]

def load_unigrams(file):
    K = 5
    unigrams = {}
    bigrams = {}
    with open(file, 'r') as f:
        for line in f:
            count = int(line.split()[0])
            w1 = line.split()[1]
            w2 = line.split()[2]
            if count >= K:
                
                if w1 not in bigrams:
                    bigrams[w1] = {}
                bigrams[w1][w2] = count
                
                if w1 not in unigrams:
                    unigrams[w1] = 1
                if w2 not in unigrams:
                    unigrams[w2] = 1
                unigrams[w1] += 1
                unigrams[w2] += 1
    return unigrams,bigrams

In [2]:
unigrams,bigrams = load_unigrams("poleval_2grams.txt")

In [3]:
def load_supertags(file):
    d_suf_words = {}
    d_tags = {}
    d_words = {}
    with open(file, 'r') as f:
        for line in f:
            t = line.split()[1]
            w = line.split()[0]
            ws = ("^" + w)[-3:]
            d_words[w] = t
            if ws not in d_suf_words:
                d_suf_words[ws] = {}
            if t not in d_suf_words:
                d_suf_words[ws][t] = 0
            d_suf_words[ws][t] += 1
            if t not in d_tags:
                d_tags[t] = {}
            if w in unigrams:
                d_tags[t][w] = unigrams[w]
            else:
                d_tags[t][w] = 1
    return (d_suf_words, d_tags, d_words)
def gen_tag(d_t):
    tags = list (d_t.keys())
    prob = list ( d_t.values() )
    sum_prob = sum (prob)
    r = np.random.choice(len (tags) , p = [p_r/sum_prob for p_r in prob])
    return tags[r]

In [4]:
d_suf_words, d_tags, d_words = load_supertags("supertags.txt")

In [5]:
def count_sylabs(s):
    ans = []
    for w in s:
        c = 0
        for l in range( len(w) - 1):
            if w[l] in sylabs:
                c += 1
            if w[l] + w[l + 1] in d_sylabs:
                c -= 1
        if w[-1] in sylabs:
            c += 1
        if c != 0:
            ans.append(c)
    return ans
          
def load_poem(file):
    poem_tags = []
    poem_sylabs = []
    with open(file, 'r') as f:
        for line in f:
            c = count_sylabs(line.split())
            if sum(c) == 13:
                poem_sylabs.append(c)
                s = [i for i in re.split("\s|\.|\,|\!|\:|\?|\;|\|\(|\)|\"...", line.lower()) if len(i) > 0 ]
                tags = []
                for w in s:
                    if w in d_words:
                        tags.append(d_words[w])
                    else:
                        #print("Could not find word:", w)
                        if ("^" + w)[-3:] in d_suf_words:
                            tags.append(gen_tag (d_suf_words[("^" + w)[-3:]]) )
                poem_tags.append(tags)
                #print(s)
                #print(tags)
                        
    return poem_tags,poem_sylabs
    

In [6]:
poem_tags, poem_sylabs = load_poem("pan-tadeusz.txt")

In [7]:
import random
def get_rhym(w):
    c = 0
    #print(w)
    if w[-1] in sylabs:
        c += 1
    for l in range(len(w) - 2, 0, -1):
        if w[l] in sylabs:
            c += 1
        if w[l] + w[l + 1] in d_sylabs:
            c -= 1
        if c == 2:
            return w[l:]
    return w
        
def gen_word(tag,syl,rhym = False):
    words_tmp = list (d_tags[tag].keys())
    probs_tmp = list ( d_tags[tag].values() )
    words = []
    probs = []
    
    if rhym:
        for w in range(len(words_tmp)):
            if sum(count_sylabs(words_tmp[w].split())) == syl and get_rhym(words_tmp[w]) == rhym:
                words.append(words_tmp[w])
                probs.append(probs_tmp[w])
    else:
        for w in range(len(words_tmp)):
            if sum(count_sylabs(words_tmp[w].split())) == syl:#words_tmp[w] in d_sylabs[syl]:
                words.append(words_tmp[w])
                probs.append(probs_tmp[w])
            #if (sum(count_sylabs(words_tmp[w]))) == syl:
                #print(words_tmp[w], sum(count_sylabs(words_tmp[w])), syl)
    if len(words) == 0:
        return False  
    sum_probs = sum (probs)
    r = np.random.choice(len (words) , p = [p_r/sum_probs for p_r in probs])
    return words[r]

def random_tag():
    return random.choice(list(d_tags.keys()))

def gen_poem():
    for i in range(100):
        r = random.randint(0, len(poem_sylabs) - 2)
        gen_sylabs = [poem_sylabs[r + r % 2], poem_sylabs[r + r % 2 + 1]]
        gen_tags = [poem_tags[r + r % 2], poem_tags[r + r % 2 + 1]]
        while len(gen_sylabs[0]) < 7 and len(gen_sylabs[1]) < 7 and sum(gen_sylabs[0]) == 13 and sum(gen_sylabs[1]) == 13:
            r = random.randint(0, len(poem_sylabs) - 2)
            gen_sylabs = [poem_sylabs[r + r % 2], poem_sylabs[r + r % 2 + 1]]
            gen_tags = [poem_tags[r + r % 2], poem_tags[r + r % 2 + 1]]
            
        l1 = []
        l2 = []
        error = 0
        if not error:
            for j in range(len(gen_sylabs[0])):
                tag = random_tag()
                if j < len(gen_tags[0]):
                    tag = gen_tags[0][j]
                w = gen_word(tag, gen_sylabs[0][j],False)
                #print(w, gen_sylabs[0][j])
                if w:
                    l1.append(w)
                else:
                    #print(tag, gen_sylabs[0][j], gen_sylabs[0])
                    error = 1
        if not error:
            for j in range(len(gen_sylabs[1])):
                tag = random_tag()
                if j < len(gen_tags[1]):
                    tag = gen_tags[1][j]
                if j == len(gen_sylabs[1]) - 1:
                    w = gen_word(tag, gen_sylabs[1][j],get_rhym(l1[-1]))
                    if w == l1[-1]:
                        error = 1
                else:
                    w = gen_word(tag, gen_sylabs[1][j],False)
                if w:
                    l2.append(w)
                else:
                    error = 1
        if not error:
            return " ".join(l1), " ".join(l2)
        #else:
            #print("Couldn't make rhym")

In [8]:
for j in range(10):
    for i in gen_poem():
        print(i)
    print()

nowa tyciu jordanów jak druga spirala
chcę wiem o co nam biega ta licznych kóz sala

jako para kierowców wolny staw miał grupy
duże i inne jako niebieskie skorupy

podając znowu trwają i pierwotnie mogą
a czy pełne że spółkę wyniosą ubogą

wszystkich kraków co chciały jak straty prasowe
czesne grypy nie wierzą czy dobrze jak nowe

wreszcie pracuję formę może na brak długi
może grecji obecnie mizernieć się drugi

wprawdzie dała się cacy celem skuteczniejsza
bo objęta a pałac pozbawia i zmniejsza

sawicki sto lud punktu hymn schował o słonia
pasterczykowi samiec próbował wsi fonia

a pod krzyż koła niby na paktu chromanie
dają się swoje wyspy jak skarby na łanie

badacz i wkładów kulą przylatani stawem
wolną sarą i dużym wstając sobie prawem

potem plusy przed omach cieszą to psów granie
a dotychczas cel słabszy jak głód to spotkanie

