In [84]:
from musicparser.data_loading import JTBDataset, TSDataset
import wandb
from musicparser.data_loading import JTBDataModule
from musicparser.models import ArcPredictionLightModel
from musicparser.postprocessing import eisner_fast
from pytorch_lightning import Trainer
import os
import torch
import numpy as np
from pathlib import Path
import json
import pandas as pd
from spacy import displacy
from musicparser.data_loading import parse_jht_to_dep_tree

# Best and worst examples

In [88]:
# import table results/JHT result table.csv with pandas
df = pd.read_csv("results/JHT result table.csv")

# best parsing accuracy
df[df["MuDeP Head accuracy"] == df["MuDeP Head accuracy"].max()]

Unnamed: 0,title,Har Arc accuracy,Har Span Accuracy,MuDeP Span Accuracy,MuDeP Arc Accuracy,MuDeP Head Accuracy (no postp),MuDeP Head accuracy,len
1,Equinox,1.0,1.0,1.0,1.0,1.0,1.0,12.0
2,"Jody Grind, The",1.0,1.0,1.0,1.0,1.0,1.0,12.0
3,Mr. P.C.,1.0,1.0,1.0,1.0,1.0,1.0,12.0
27,On The Trail,0.933333,0.933333,1.0,1.0,1.0,1.0,16.0
36,Freight Train,0.5,0.8,1.0,1.0,1.0,1.0,21.0
76,Hot House,0.903226,0.903226,1.0,1.0,1.0,1.0,32.0
86,What Is This Thing Called Love,0.903226,0.903226,1.0,1.0,1.0,1.0,32.0


In [89]:
# worst parsing accuracy
df[df["MuDeP Head accuracy"] == df["MuDeP Head accuracy"].min()]

Unnamed: 0,title,Har Arc accuracy,Har Span Accuracy,MuDeP Span Accuracy,MuDeP Arc Accuracy,MuDeP Head Accuracy (no postp),MuDeP Head accuracy,len
69,Idle Moments,0.758621,0.551724,0.482759,0.448276,0.432703,0.466073,30.0


In [105]:
# piece with the highest difference between postprocess and no postprocess
df["diff"] = df["MuDeP Head accuracy"] - df["MuDeP Head Accuracy (no postp)"]
df[df["diff"] == df["diff"].max()]

Unnamed: 0,title,Har Arc accuracy,Har Span Accuracy,MuDeP Span Accuracy,MuDeP Arc Accuracy,MuDeP Head Accuracy (no postp),MuDeP Head accuracy,len,diff
133,Hackensack,0.647059,0.676471,0.911765,0.911765,0.857026,0.914216,35.0,0.05719


# Tree rendering

**You don't need to rerun this code, all files are already in the repo.**

This code:
1. Download the trees from wandb
2. Produce svg representations for each tree
3. Create a .md file to visualize all of them

In [None]:
api = wandb.Api()
all_data = {}
sweep = api.sweep("fosfrancesco/sweeps_JTB/ekr597pd")
# load the json dataset (to get the exact chords)
with open(Path("data/jazz_tb/treebank.json")) as f:
    treebank_data = json.load(f)
treebank_data = [e for e in treebank_data if e.get("trees") is not None]
tree_dicts = [e["trees"][0]["complete_constituent_tree"] if e.get("trees") is not None else None for e in treebank_data ]
titles = [e["title"] for e in treebank_data]
for run in sweep.runs:
    piece = run.name.split("-")[-1]
    # download test_head_seqs artifact
    artifact = api.artifact(f'fosfrancesco/sweeps_JTB/run-{run.id}-test_head_seqs:latest', type='run_table')
    folder_path = artifact.download()
    with open(Path(folder_path,"test_head_seqs.table.json")) as f:
        data = json.load(f)
    head_predicted = eval(data["data"][0][0])[1:] # first element is for the root, we discard it
    head_predicted_postp = eval(data["data"][0][1])[1:]
    head_truth = eval(data["data"][0][2])[1:]
    # download test_ctree artifact
    artifact = api.artifact(f'fosfrancesco/sweeps_JTB/run-{run.id}-test_ctrees:latest', type='run_table')
    folder_path = artifact.download()
    with open(Path(folder_path,"test_ctrees.table.json")) as f:
        data = json.load(f)
    ctree_predicted = eval(data["data"][0][0])
    ctree_truth = eval(data["data"][0][1])
    # load the chord sequence and title
    arcs, chord_sequence = parse_jht_to_dep_tree(tree_dicts[int(piece)-1])
    title = titles[int(piece)-1]
    assert len(chord_sequence) == len(head_predicted_postp) , f"Chord sequence and head predicted have different length for piece {piece}, {len(chord_sequence)}!={len(head_predicted_postp)}"
    # save everything
    all_data[title] = {"head_predicted" : head_predicted,
                       "head_predicted_postp" : head_predicted_postp,
                       "head_truth" : head_truth,
                       "ctree_predicted" : ctree_predicted,
                       "ctree_truth" : ctree_truth,
                       "chords" : chord_sequence,
                       "arcs:": arcs}

In [96]:
# save the produced dict (to avoid redownloading the artifacts)

with open(Path('results/predicted_JHT.json'), 'w') as outfile:
    json.dump(all_data, outfile)

In [None]:
# import the produced dict (you can restart from this cell)

with open(Path('results/predicted_JHT.json')) as f:
    all_data = json.load(f)

In [94]:
# example on how to produce a single svg

piece_data = all_data["Red Clay"]

spacy_words = [{"text": chord, "tag": ""} for chord in piece_data["chords"]]
spacy_arcs = []
for start_ix, end_ix in enumerate(piece_data["head_truth"]):
    if end_ix == 0:
        continue # skip root
    if start_ix < end_ix-1:
        spacy_arcs.append({"start": start_ix, "end": end_ix-1, "label": "", "dir": "right"})
    else:
        spacy_arcs.append({"start": end_ix-1, "end": start_ix, "label": "", "dir": "left"})


spacy_dict = {"words": spacy_words, "arcs": spacy_arcs}
displacy.render(spacy_dict, style='dep', jupyter=True, manual=True, options={"compact": False, "distance": 80, "arrow_stroke":1, "arrow_width":6, "word_spacing":20})


In [100]:
## save all svg files

# save ground truth
for title in all_data:
    piece_data = all_data[title]
    spacy_words = [{"text": chord, "tag": ""} for chord in piece_data["chords"]]
    spacy_arcs = []
    for start_ix, end_ix in enumerate(piece_data["head_truth"]):
        if end_ix == 0:
            continue # skip root
        if start_ix < end_ix-1:
            spacy_arcs.append({"start": start_ix, "end": end_ix-1, "label": "", "dir": "right"})
        else:
            spacy_arcs.append({"start": end_ix-1, "end": start_ix, "label": "", "dir": "left"})


    spacy_dict = {"words": spacy_words, "arcs": spacy_arcs}
    render_options = {"compact": False, "distance": 80, "arrow_stroke":1, "arrow_width":6, "word_spacing":20}
    rendered_tree = displacy.render(spacy_dict, style='dep', jupyter=False, manual=True, options=render_options)
    output_path = Path(f"./results/rendered_JHT/dependency_trees/ground_truth/{title}.svg")
    with output_path.open("w", encoding="utf-8") as f:
        f.write(rendered_tree)

# save predicted no postprocessing
for title in all_data:
    piece_data = all_data[title]
    spacy_words = [{"text": chord, "tag": ""} for chord in piece_data["chords"]]
    spacy_arcs = []
    for start_ix, end_ix in enumerate(piece_data["head_predicted"]):
        if end_ix == 0:
            continue # skip root
        if start_ix < end_ix-1:
            spacy_arcs.append({"start": start_ix, "end": end_ix-1, "label": "", "dir": "right"})
        else:
            spacy_arcs.append({"start": end_ix-1, "end": start_ix, "label": "", "dir": "left"})


    spacy_dict = {"words": spacy_words, "arcs": spacy_arcs}
    render_options = {"compact": False, "distance": 80, "arrow_stroke":1, "arrow_width":6, "word_spacing":20}
    rendered_tree = displacy.render(spacy_dict, style='dep', jupyter=False, manual=True, options=render_options)
    output_path = Path(f"./results/rendered_JHT/dependency_trees/predicted_no_postprocessing/{title}.svg")
    with output_path.open("w", encoding="utf-8") as f:
        f.write(rendered_tree)

# save predicted with postprocessing
for title in all_data:
    piece_data = all_data[title]
    spacy_words = [{"text": chord, "tag": ""} for chord in piece_data["chords"]]
    spacy_arcs = []
    for start_ix, end_ix in enumerate(piece_data["head_predicted_postp"]):
        if end_ix == 0:
            continue # skip root
        if start_ix < end_ix-1:
            spacy_arcs.append({"start": start_ix, "end": end_ix-1, "label": "", "dir": "right"})
        else:
            spacy_arcs.append({"start": end_ix-1, "end": start_ix, "label": "", "dir": "left"})


    spacy_dict = {"words": spacy_words, "arcs": spacy_arcs}
    render_options = {"compact": False, "distance": 80, "arrow_stroke":1, "arrow_width":6, "word_spacing":20}
    rendered_tree = displacy.render(spacy_dict, style='dep', jupyter=False, manual=True, options=render_options)
    output_path = Path(f"./results/rendered_JHT/dependency_trees/predicted_postprocessing/{title}.svg")
    with output_path.open("w", encoding="utf-8") as f:
        f.write(rendered_tree)

In [102]:
from collections import OrderedDict

# generate the markdown automatically
# for each piece use the template
# ## Title
# ### Ground Truth
# <img src="../results/rendered_JHT/dependency_trees/ground_truth/title.svg"> 
#
# ### Predicted with no postprocessing
# <img src="../results/rendered_JHT/dependency_trees/predicted_no_postprocessing/title.svg"> 
#
# ### Predicted with postprocessing
# <img src="../results/rendered_JHT/dependency_trees/predicted_postprocessing/title.svg"> 

markdown = ""
for title in OrderedDict(reversed(list(all_data.items()))) :
      markdown += f"## {title}\n"
      markdown += f"### Ground Truth\n"
      markdown += f"<img src=\"../results/rendered_JHT/dependency_trees/ground_truth/{title}.svg\"> \n \n"
      markdown += f"### Predicted with no postprocessing\n"
      markdown += f"<img src=\"../results/rendered_JHT/dependency_trees/predicted_no_postprocessing/{title}.svg\"> \n \n"
      markdown += f"### Predicted with postprocessing\n"
      markdown += f"<img src=\"../results/rendered_JHT/dependency_trees/predicted_postprocessing/{title}.svg\"> \n \n"

# save the markdown in the result folder
with open(Path("results/rendered_JHT.md"), "w") as f:
      f.write(markdown)