# Prose baseline

In [1]:
from pathlib import Path
ROOT = Path.cwd().parent

To speed up computation, we preprocess and pickle the source prose corpus once:

In [None]:
import os

from src.baseline import preprocess_and_cache_prose_corpus, PROSE_CACHE_PATH
from src.utils.prose import anabasis

if not os.path.exists(PROSE_CACHE_PATH):
    cache = preprocess_and_cache_prose_corpus(
        anabasis,
        cache_file=PROSE_CACHE_PATH,)

## 1) Make and compile the prose baselines

In [None]:
from baseline import make_all_prose_baselines

make_all_prose_baselines("triads", randomizations=10_000)


## 2. Compute stats!

In [1]:
from src.stats_comp import compatibility_corpus, compatibility_ratios_to_stats

all_comp_ratios_bl = compatibility_corpus('data/compiled/baselines/triads/prose')
print("Ratio done.")
corpus_comp_stat_bl = compatibility_ratios_to_stats(all_comp_ratios_bl)
print(corpus_comp_stat_bl)

# with open("results.py", "a") as f:
#     f.write("\n")
#     f.write()


41it [08:09, 12.23s/it]                        


Ratio done.
0.7502304988332094


In [None]:
all_comp_ratios = compatibility_corpus('data/compiled/triads/')
corpus_comp_stat = compatibility_ratios_to_stats(all_comp_ratios)
print(corpus_comp_stat)

## 3. Make heatmaps

To show the strongest tendencies, we can make heatmaps showing the mean of all 100 baselines:

In [None]:
import os
from tqdm import tqdm

from src.heatmaps import make_one_heatmap_per_100_baselines

###### SETTINGS #######

responding_unit = "triads"  # "triads", "strophes"
overlay = False
dark = not overlay

#######################

in_folder = f"data/compiled/baselines/{responding_unit}/prose/"
out_folder = f"media/heatmaps/{responding_unit}/baselines/"

xmls = os.listdir(in_folder)
xmls = [f for f in xmls if f.endswith(".xml")]

for xml_file in tqdm(xmls):

    print(xml_file)
    responsion_id = xml_file.split('_')[2].replace('.xml', '')
    xml_path = os.path.join(in_folder, xml_file)

    if responding_unit == "strophes":
        title = f"Baseline Heatmap of {responsion_id} (Strophic-Antistrophic)"
    else:
        title = f"Baseline Heatmap of {responsion_id} (Triadic)"
    make_one_heatmap_per_100_baselines(xml_path, out_folder, responsion_id, title, save=True, show=False, dark_mode=dark)

print(len(os.listdir(out_folder)))

In [None]:
import os
from tqdm import tqdm

from src.utils.utils import get_canticum_ids
from src.heatmaps import canticum_with_at_least_two_strophes, make_one_heatmap

###### SETTINGS #######

responding_unit = "triads"  # "triads", "strophes"
overlay = False
dark = not overlay

#######################

in_folder = f"data/compiled/baselines/{responding_unit}/prose/"
out_folder = f"media/heatmaps/{responding_unit}/baselines/"

xmls = os.listdir(in_folder)
xmls = [f for f in xmls if f.endswith(".xml")]

for xml_file in xmls:

    xml_path = os.path.join(in_folder, xml_file)

    responsion_attributes = get_canticum_ids(xml_path)

    for responsion_attribute in tqdm(responsion_attributes):
        if not canticum_with_at_least_two_strophes(xml_path, responsion_attribute):
            print(f"Skipping {responsion_attribute} in {group[1]} (less than 2 strophes).")
            continue

        print(f"Generating baseline heatmap for {responsion_attribute}...")

        group = ""
        if responsion_attribute.startswith("ol"):
            group = "Olympia"
        elif responsion_attribute.startswith("py"):
            group = "Pythia"
        elif responsion_attribute.startswith("ne"):
            group = "Nemea"
        elif responsion_attribute.startswith("is"):
            group = "Isthmia"

        number = int(responsion_attribute[2:])

        if responding_unit == "strophes":
            title = f"Baseline Heatmap of {group} {number} (Strophic-Antistrophic)"
        else:
            title = f"Baseline Heatmap of {group} {number}"
        make_one_heatmap(xml_path, out_folder, responsion_attribute, title, representative_strophe=1, save=True, show=False, dark_mode=dark, text_overlay=overlay)

print(len(os.listdir(out_folder)))

## Gifs

In [None]:
from PIL import Image

png_files = [
    "media/plots/heatmap_notext_invertedcolours_pythia_4_comp.png",
    "media/plots/heatmap_notext_invertedcolours_pythia_4_comp_baseline.png"
]

frames = [Image.open(f).convert("P", palette=Image.ADAPTIVE, colors=256) for f in png_files]

frames[0].save(
    "baseline_animated_py04_PIL.gif",
    save_all=True,
    append_images=frames[1:],
    duration=500,  # ms
    loop=0,
    optimize=False
)