In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys; sys.path.insert(0, '..')

from itertools import combinations
from multiprocessing import Pool
import os
import pickle

from tqdm.auto import tqdm

from paradeller.samples import load_samples
from paradeller.dataprep import prep_data
from paradeller.analysis import (
    find_matches,
    find_final_stanzas_from_stanzas,
    consolidate_stanzas,
    consolidate_poems
)
from paradeller.postprocess import stanza_sorter_maker, print_stanzas, print_poems

In [3]:
data = load_samples()
data, duplicates, adj_list_words, adj_list_ids = prep_data(data, verbose=False)

### Find Stanzas

In [4]:
ids = list(adj_list_ids.keys())
pairs = list(combinations(ids, 2))

print(f"{len(pairs):,}")

153


In [5]:
def find_matches_for_pair(p):
    return find_matches(p[0], p[1], adj_list_ids, adj_list_words)

In [6]:
with Pool(os.cpu_count()) as pool:
    res = list(tqdm(
        pool.imap(find_matches_for_pair, pairs),
        total=len(pairs)
    ))

valid_stanzas = [x for x in list(zip(pairs, res)) if x[1]]

print("all_valid:", len(valid_stanzas))

HBox(children=(IntProgress(value=0, max=153), HTML(value='')))


all_valid: 6


In [27]:
stanzas = consolidate_stanzas(valid_stanzas)
print("stanzas: ", len(stanzas))
stanzas

stanzas:  3


[(24, 26, 28, 29), (30, 32, 34, 35), (36, 38, 40, 41)]

In [8]:
print_stanzas(stanzas, data)

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@unknown              tis the breeze beneath the cypress trees 
@unknown              tis the breeze beneath the cypress trees 
@unknown              where shady branches bend and bow 
@unknown              where shady branches bend and bow 
@unknown              beneath the bend and branches breeze 
@unknown              where the cypress bow tis shady trees 
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@unknown              ink like stains of sap fold down 
@unknown              ink like stains of sap fold down 
@unknown              brown and dripping tears that keep 
@unknown              brown and dripping tears that keep 
@unknown              sap like ink and stains of brown 
@unknown              tears that fold keep dripping down 
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@unknown              will such variegated colors blend 
@unknown              will such variegated colors blend 
@unknown              away w

### Find Complete Poems

In [9]:
all_combos = combinations(stanzas, 3)
combos = [c for c in all_combos if len(set().union(*c)) == 12]
len(combos)

1

In [10]:
valid_poems = find_final_stanzas_from_stanzas(stanzas, adj_list_ids, adj_list_words)

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




In [25]:
valid_poems

[(((24, 26, 28, 29), (30, 32, 34, 35), (36, 38, 40, 41)),
  [(42, 43, 44, 45, 46, 47)])]

In [26]:
poems = consolidate_poems(valid_poems)
poems

[[(24, 26, 28, 29),
  (30, 32, 34, 35),
  (36, 38, 40, 41),
  (42, 43, 44, 45, 46, 47)]]

In [13]:
print_poems(poems, data)

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@unknown              tis the breeze beneath the cypress trees 
@unknown              tis the breeze beneath the cypress trees 
@unknown              where shady branches bend and bow 
@unknown              where shady branches bend and bow 
@unknown              beneath the bend and branches breeze 
@unknown              where the cypress bow tis shady trees 

@unknown              ink like stains of sap fold down 
@unknown              ink like stains of sap fold down 
@unknown              brown and dripping tears that keep 
@unknown              brown and dripping tears that keep 
@unknown              sap like ink and stains of brown 
@unknown              tears that fold keep dripping down 

@unknown              will such variegated colors blend 
@unknown              will such variegated colors blend 
@unknown              away within envelope of leaves 
@unknown              away within envelope of leaves 
@unknown            

In [14]:
stanzas

[(24, 26, 28, 29), (30, 32, 34, 35), (36, 38, 40, 41)]

In [15]:
poems

[[(24, 26, 28, 29),
  (30, 32, 34, 35),
  (36, 38, 40, 41),
  (42, 43, 44, 45, 46, 47)]]

In [16]:
duplicates

{24: [25], 26: [27], 30: [31], 32: [33], 36: [37], 38: [39]}

In [32]:
results = dict(
    stanzas=stanzas,
    poems=poems,
    duplicates=duplicates
)

In [33]:
results

{'stanzas': [(24, 26, 28, 29), (30, 32, 34, 35), (36, 38, 40, 41)],
 'poems': [[(24, 26, 28, 29),
   (30, 32, 34, 35),
   (36, 38, 40, 41),
   (42, 43, 44, 45, 46, 47)]],
 'duplicates': {24: [25], 26: [27], 30: [31], 32: [33], 36: [37], 38: [39]}}