In [1]:
%load_ext autoreload
%autoreload 2

In [12]:
import sys; sys.path.insert(0, '..')

from itertools import combinations
from multiprocessing import Pool
import os
import pickle

from tqdm.auto import tqdm

from paradeller.samples import load_samples
from paradeller.dataprep import prep_data, restructure_data
from paradeller.analysis import (
    find_matches,
    find_final_stanzas_from_stanzas,
    find_final_stanzas,
    consolidate_stanzas,
    consolidate_poems,
    get_num_combos
)
from paradeller.postprocess import stanza_sorter_maker, print_stanzas, print_poems

In [3]:
data = load_samples()
data, duplicates, adj_list_words, adj_list_ids = prep_data(data, verbose=False)

### Find Stanzas

In [4]:
ids = list(adj_list_ids.keys())
pairs = list(combinations(ids, 2))

print(f"{len(pairs):,}")

2,556


In [5]:
def find_matches_for_pair(p):
    return find_matches(p[0], p[1], adj_list_ids, adj_list_words)

In [6]:
with Pool(os.cpu_count()) as pool:
    res = list(tqdm(
        pool.imap(find_matches_for_pair, pairs),
        total=len(pairs)
    ))

valid_stanzas = [x for x in list(zip(pairs, res)) if x[1]]

print("all_valid:", len(valid_stanzas))

HBox(children=(IntProgress(value=0, max=2556), HTML(value='')))


all_valid: 24


In [7]:
stanzas = consolidate_stanzas(valid_stanzas)
print("stanzas: ", len(stanzas))

# stanzas

stanzas:  12


In [8]:
# print_stanzas(stanzas, data)

### Find Complete Poems

In [39]:
combos = combinations(stanzas, 3)

num_combos = get_num_combos(len(stanzas), 3)
num_combos

220

In [40]:
def find_final_stanzas_helper(stanzas):
    """Helper function to find final stanzas, given a group of 3 stanzas"""
    return find_final_stanzas(*stanzas, adj_list_ids, adj_list_words)

In [41]:
with Pool(os.cpu_count()) as pool:
    res = list(
        tqdm(pool.imap(find_final_stanzas_helper, combos), total=num_combos)
    )

combos = combinations(stanzas, 3)
valid_poems = list((x for x in list(zip(combos, res)) if x[1]))

poems = consolidate_poems(valid_poems)
print(f"Found {len(poems)} poems.")

HBox(children=(IntProgress(value=0, max=220), HTML(value='')))


Found 4 poems.


In [17]:
valid_poems = find_final_stanzas_from_stanzas(stanzas, adj_list_ids, adj_list_words)

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




In [14]:
valid_poems

[(((36, 38, 40, 41), (30, 32, 34, 35), (24, 26, 28, 29)),
  [(42, 43, 44, 45, 46, 47)]),
 (((84, 86, 88, 89), (72, 74, 76, 77), (78, 80, 82, 83)),
  [(90, 91, 92, 93, 94, 95)]),
 (((0, 2, 4, 5), (6, 8, 10, 11), (12, 14, 16, 17)),
  [(18, 19, 20, 21, 22, 23)]),
 (((48, 50, 52, 53), (54, 56, 58, 59), (60, 62, 64, 65)),
  [(66, 67, 68, 69, 70, 71)])]

In [15]:
poems = consolidate_poems(valid_poems)
poems

[[(36, 38, 40, 41),
  (30, 32, 34, 35),
  (24, 26, 28, 29),
  (42, 43, 44, 45, 46, 47)],
 [(84, 86, 88, 89),
  (72, 74, 76, 77),
  (78, 80, 82, 83),
  (90, 91, 92, 93, 94, 95)],
 [(0, 2, 4, 5), (6, 8, 10, 11), (12, 14, 16, 17), (18, 19, 20, 21, 22, 23)],
 [(48, 50, 52, 53),
  (54, 56, 58, 59),
  (60, 62, 64, 65),
  (66, 67, 68, 69, 70, 71)]]

In [16]:
print_poems(poems, data)

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@unknown              will such variegated colors blend 
@unknown              will such variegated colors blend 
@unknown              away within envelope of leaves 
@unknown              away within envelope of leaves 
@unknown              of such colors envelope within 
@unknown              variegated leaves away will blend 

@unknown              ink like stains of sap fold down 
@unknown              ink like stains of sap fold down 
@unknown              brown and dripping tears that keep 
@unknown              brown and dripping tears that keep 
@unknown              sap like ink and stains of brown 
@unknown              tears that fold keep dripping down 

@unknown              tis the breeze beneath the cypress trees 
@unknown              tis the breeze beneath the cypress trees 
@unknown              where shady branches bend and bow 
@unknown              where shady branches bend and bow 
@unknown              beneath 

In [None]:
results = dict(
    stanzas=stanzas,
    poems=poems,
    duplicates=duplicates
)

In [None]:
results