In [None]:
import pandas as pd
import numpy as np

In [None]:
from harmonic_inference.data.corpus_reading import aggregate_annotation_dfs
from pathlib import Path

ANNOTATIONS_PATH = Path('../corpora/annotations')
OUT_DIR = Path('corpus_data')

aggregate_annotation_dfs(ANNOTATIONS_PATH, OUT_DIR)

In [None]:
from harmonic_inference.data.corpus_reading import read_dump
from pathlib import Path

files_df = read_dump(Path('corpus_data', 'files.tsv'), index_col=0)
measures_df = read_dump(Path('corpus_data', 'measures.tsv'))
chords_df = read_dump(Path('corpus_data', 'chords.tsv'), low_memory=False)
notes_df = read_dump(Path('corpus_data', 'notes.tsv'))

files_df_orig = files_df
measures_df_orig = measures_df
notes_df_orig = notes_df
chords_df_orig = chords_df

In [None]:
from harmonic_inference.utils import corpus_utils as cu
import importlib
importlib.reload(cu)
import time
import logging
logging.basicConfig(filename='parsing.log', level=logging.INFO, filemode='w')

t = time.time()
# Remove measure repeats
if isinstance(measures_df.iloc[0].next, tuple):
    measures_df = cu.remove_repeats(measures_df, remove_unreachable=True)
print(f'Remove measures: {time.time() - t}')

t = time.time()
# Remove unmatched
notes_df = cu.remove_unmatched(notes_df, measures_df)
chords_df = cu.remove_unmatched(chords_df, measures_df)
chords_df = chords_df.drop(chords_df.loc[(chords_df.numeral == '@none') | chords_df.numeral.isnull()].index)
print(f'Remove unmatched: {time.time() - t}')

t = time.time()
# Add offsets
if not all([column in notes_df.columns for column in ['offset_beat', 'offset_mc']]):
    notes_df = cu.add_note_offsets(notes_df, measures_df)
print(f'Add note offsets: {time.time() - t}')

t = time.time()
# Merge ties
notes_df = cu.merge_ties(notes_df)
print(f'Merge ties: {time.time() - t}')

t = time.time()
# Add chord metrical info
chords_df = cu.add_chord_metrical_data(chords_df, measures_df)
print(f'Add chords metrical info: {time.time() - t}')

In [None]:
files_df

In [None]:
notes_df

In [None]:
measures_df

In [None]:
chords_df

In [None]:
import harmonic_inference.data.datasets as ds

import importlib
importlib.reload(ds)

dataset_splits = ds.get_dataset_splits(
    files_df,
    measures_df,
    chords_df,
    notes_df,
    [ds.ChordTransitionDataset, ds.ChordClassificationDataset],
    splits=[0.8, 0.1, 0.1],
    seed=0,
)

In [None]:
import eval_utils as eu
import harmonic_utils as hu
import matplotlib.pyplot as plt

label_strings = hu.get_one_hot_labels()
conf_mat = eu.get_conf_mat(labels, outputs)

plt.figure(figsize=(30,30))
plt.imshow(conf_mat, interpolation='none')
plt.colorbar()
plt.xticks(ticks=list(range(len(label_strings))), labels=label_strings, rotation=90, fontsize=10)
plt.yticks(ticks=list(range(len(label_strings))), labels=label_strings, fontsize=10)
plt.show()

In [None]:
import eval_utils as eu

correct, incorrect = eu.get_correct_and_incorrect_indexes(labels, outputs)
print('Correct: ' + str(len(correct)))
print('Incorrect: ' + str(len(incorrect)))

In [None]:
import eval_utils as eu
    
eu.print_result(incorrect[0], labels, outputs, limit=10, prob=False)

In [None]:
import eval_utils as eu

chord, onset_notes, all_notes = eu.get_input_df_rows(incorrect[0], datasets[data]['test'])

print(chord)
print("USED NOTES:")
print(onset_notes)
print()
print("ALL NOTES:")
print(all_notes)

In [None]:
import matplotlib.pyplot as plt
import eval_utils as eu

correct_ranks, indexes_by_rank = eu.get_correct_ranks(labels, outputs)
    
plt.figure(figsize=(30,30))
plt.bar(range(len(outputs[0])), [len(indexes) for indexes in indexes_by_rank])

In [None]:
import eval_utils as eu
import importlib
importlib.reload(eu)

eval_df = eu.get_eval_df(labels, outputs, datasets[data]['test'])
eval_df

In [None]:
import ablation
import importlib
importlib.reload(ablation)

dfs = ablation.load_all_ablated_dfs(directory='results', prefix=prefix[:-1] if len(prefix) > 0 else None)
_, mask_names = ablation.get_masks_and_names()

In [None]:
import pandas as pd
import os

logs = []
for mask_name in mask_names:
    logs.append(pd.read_csv(os.path.join(os.path.join('results', prefix + mask_name + '.log'))))

In [None]:
for df, log, mask_name in zip(dfs, logs, mask_names):
    print(f"{mask_name} Acc: {100 * df.correct.sum() / len(df)}")
    print(log.iloc[-1])

In [None]:
import eval_utils as eu

global_df = eu.load_eval_df('results/global_no_ablation.csv')
local_df = eu.load_eval_df('results/local_no_ablation.csv')
none_df = eu.load_eval_df('results/no_ablation.csv')

In [None]:
global_df

In [None]:
global_counts = global_df.groupby(['correct_chord'])['correct'].agg(['mean', 'count']).sort_values('count', ascending=False)
local_counts = local_df.groupby(['correct_chord'])['correct'].agg(['mean', 'count']).sort_values('count', ascending=False)
none_counts = none_df.groupby(['correct_chord'])['correct'].agg(['mean', 'count']).sort_values('count', ascending=False)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(12,5))
plt.scatter(global_counts['count'], global_counts['mean'], color='red', label='Global key')
plt.scatter(local_counts['count'], local_counts['mean'], color='blue', label='Local key')
plt.scatter(none_counts['count'], none_counts['mean'], color='yellow', label='No transposition')
plt.title('Global key transposed')
plt.xlabel('Count')
plt.ylabel('Accuracy')
plt.legend()
plt.show()