In [None]:
import logging
from pathlib import Path

from tqdm import tqdm
import pandas as pd

from harmonic_inference.data.data_types import PitchType, NO_REDUCTION
from harmonic_inference.data.piece import get_score_piece_from_data_frames
from harmonic_inference.data.corpus_reading import load_clean_corpus_dfs
import harmonic_inference.utils.harmonic_utils as hu

logging.basicConfig(level=logging.DEBUG)

In [None]:
files_df, measures_df, chords_df, notes_df = load_clean_corpus_dfs('corpus_data')

In [None]:
files_df

In [None]:
measures_df

In [None]:
chords_df

In [None]:
notes_df

In [None]:
composers = sorted(set(name.split('-')[0].strip() for name in files_df.corpus_name.unique()))
composers

In [None]:
# Make data directory
base_dir = Path("Selim_data")
base_dir.mkdir(parents=True, exist_ok=True)

In [None]:
chord_reduction = NO_REDUCTION
use_inversions = True

for file_id, file_row in tqdm(files_df.iterrows(), desc="Loading pieces", total=len(files_df)):
    name = file_row["corpus_name"] + ".." + file_row["file_name"]

    try:
        relative_piece = get_score_piece_from_data_frames(
            notes_df.loc[file_id],
            chords_df.loc[file_id],
            measures_df.loc[file_id],
            chord_reduction=chord_reduction,
            use_inversions=use_inversions,
            use_relative=True,
            name=name,
            use_suspensions=False,
        )
        local_piece = get_score_piece_from_data_frames(
            notes_df.loc[file_id],
            chords_df.loc[file_id],
            measures_df.loc[file_id],
            chord_reduction=chord_reduction,
            use_inversions=use_inversions,
            use_relative=False,
            name=name,
            use_suspensions=False,
        )
    except Exception as e:
        logging.error(f"No data created for file_id {file_id}")
        logging.exception(e)
        continue

    chord_data = {
        "root": [],  # MIDI, Absolute
        "relative_root": [],  # MIDI, Absolute
        "relative_mode": [],  # Categorical
        "type": [],  # Categorical
        "inversion": [],  # Categorical
        "tonic": [],  # MIDI, Absolute
        "mode": [],  # Categorical
    }

    assert len(relative_piece.get_chords()) == len(local_piece.get_chords())

    for chord, local_chord in zip(relative_piece.get_chords(), local_piece.get_chords()):
        chord = chord.to_pitch_type(PitchType.MIDI)
        local_chord = local_chord.to_pitch_type(PitchType.MIDI)

        chord_data["root"].append(chord.root)
        chord_data["relative_root"].append(chord.key_tonic),
        chord_data["relative_mode"].append(chord.key_mode),
        chord_data["type"].append(hu.hc.CHORD_TYPE_TO_STRING[chord.chord_type]),
        chord_data["inversion"].append(chord.inversion)
        chord_data["tonic"].append(local_chord.key_tonic)
        chord_data["mode"].append(local_chord.key_mode)
        
    df = pd.DataFrame(chord_data)
    df.to_csv(str(base_dir / name), sep="\t", index=False)