In [None]:
import pandas as pd
import numpy as np
import corpus_reading

In [None]:
files = corpus_reading.read_dump('data/selected_files.tsv', index_col=0)
files

In [None]:
chords = corpus_reading.read_dump('data/chord_list.tsv')
chords

In [None]:
notes = corpus_reading.read_dump('data/note_list.tsv', index_col=[0,1,2])
notes

In [None]:
measures = corpus_reading.read_dump('data/measure_list.tsv')
measures

In [None]:
def get_block(note_list, start, end, cut_durations=False, staff=None, merge_ties=True):
    """ Whereas get_slice() gets sounding notes at a point, get_block() retrieves
    sounding notes within a range.
    The function adds the column `overlapping` whose values follow the same logic as `tied`:
        NaN for events that lie entirely within the block.
        -1  for events crossing `start` and ending within the block.
        1   for events crossing `end`.
        0   for events that start before and end after the block.
    Parameters
    ----------
    note_list : :obj:`pandas.DataFrame`
        Note list from which to retrieve the block.
    start, end : :obj:`tuple` of (:obj:int, numerical)
        Pass (mc, onset) tuples. `end` is exclusive
    cut_durations : :obj:`bool`, optional
        Set to True if the original note durations should be cut at the block boundaries.
    staff : :obj:`int`, optional
        Return block from this staff only.
    merge_ties : :obj:`bool`, optional
        By default, tied notes are merged so that they do not appear as two different onsets.
    """
    a_mc, a_onset = start
    b_mc, b_onset = end
    a_mc, b_mc = int(a_mc), int(b_mc)
    assert a_mc <= b_mc, f"Start MC ({a_mc}) needs to be at most end MC ({b_mc})."
    a_onset, b_onset = frac(a_onset), frac(b_onset)
    if a_mc == b_mc:
        assert a_onset <= b_onset, "Start onset needs to be at most end onset."

    res = note_list[(note_list.mc >= a_mc) & (note_list.mc <= b_mc)]

    if staff is not None:
        res = res[res.staff == staff]

    in_a = (res.mc == a_mc)
    in_b = (res.mc == b_mc)
    endpoint = res.onset + res.duration
    crossing_left = in_a & (res.onset < a_onset) & (a_onset < endpoint)
    on_onset = in_a & (res.onset == a_onset)
    crossing_right = in_b & (endpoint > b_onset)

    if a_mc == b_mc:
        in_between = in_a & (res.onset >= a_onset) & (res.onset < b_onset)
    else:
        onset_in_a = in_a & (res.onset >= a_onset)
        onset_in_b = in_b & (res.onset < b_onset)
        in_between = onset_in_a | onset_in_b
    if a_mc + 1 < b_mc:
        in_between = in_between | ((res.mc > a_mc) & (res.mc < b_mc))


    res = res[crossing_left | in_between].copy()
    res['overlapping'] = pd.Series([pd.NA]*len(res.index), index=res.index, dtype='Int64')

    start_tie = lambda S: S.fillna(1).replace({-1: 0, 0: 0})
    end_tie   = lambda S: S.fillna(-1).replace({0: 0, 1: 0})

    if crossing_left.any():
        res.loc[crossing_left, 'overlapping'] = end_tie(res.loc[crossing_left, 'overlapping'])

    if crossing_right.any():
        res.loc[crossing_right, 'overlapping'] = start_tie(res.loc[crossing_right, 'overlapping'])

    if res.tied.notna().any():
        tied_from_left = on_onset & res.tied.isin([-1, 0])
        if tied_from_left.any():
            res.loc[tied_from_left, 'overlapping'] = end_tie(res.loc[tied_from_left, 'overlapping'])
        tied_to_right = in_b & (endpoint == b_onset) & res.tied.isin([0, 1])
        if tied_to_right.any():
            res.loc[tied_to_right, 'overlapping'] = start_tie(res.loc[tied_to_right, 'overlapping'])


    if cut_durations:
        if crossing_left.any():
            res.loc[crossing_left, 'duration'] = res.loc[crossing_left, 'duration'] - a_onset + res.loc[crossing_left, 'onset']
            res.loc[crossing_left, ['mc', 'onset']] = [a_mc, a_onset]
        if crossing_right.any():
            res.loc[crossing_right, 'duration'] = b_onset - res.loc[crossing_right, 'onset']

    print(type(res.tied.loc[0]))
    if merge_ties & res.tied.any():
        merged, changes = merge_tied_notes(res, return_changed=True)
        if len(changes) > 0:
            new = [ix for ix, index_list in changes.items() if len(index_list) > 0 and na_to_dummy(res.at[index_list[-1], 'overlapping'], None) in [0,1] ]
            tie_over = merged.index.isin(new)
            merged.loc[tie_over, 'overlapping'] = start_tie(merged.loc[tie_over, 'overlapping'])
            res = merged

    return res
    
    
    
def merge_tied_notes(df, return_changed=False):
    """ In a note list, merge tied notes to single events with accumulated durations.
    Input dataframe needs columns ['duration', 'tied', 'midi', 'staff', 'voice']
    """
    df = df.copy()
    notna = df[df.tied.notna()]
    starts = notna[notna.tied == 1]
    drops = []

    def merge(i, midi, staff, voice):
        """Looks for the ending(s) and recursively accumulates."""
        dur = 0
        ixs = []
        if i == len(notna):
            return dur, ixs
        else:
            end = notna.iloc[i]
        while end.tied == 1 or end.midi != midi\
                            or end.staff != staff:
                           #or end.voice != voice:  <-- caused errors
            i += 1
            if i == len(notna):
                return dur, ixs
            else:
                end = notna.iloc[i]
        dur += end.duration
        ixs.append(end.name)
        if end.tied == 0:
            d, i = merge(i+1, midi, staff, voice)
            dur += d
            ixs.extend(i)
        return dur, ixs


    for ix, r in starts.iterrows():
        add_dur, ixs = merge(notna.index.get_loc(ix)+1, r.midi, r.staff, r.voice)
        df.loc[ix, 'duration'] += add_dur
        drops.append(ixs)
    df.drop([e for l in drops for e in l], inplace=True)
    if return_changed:
        return df, {k: v for k, v in zip(starts.index.to_list(), drops)}
    else:
        return df
        
        
        
def na_to_dummy(val, dummy):
    """ If `val` is pd.NA (or other null value), return `dummy` to make it comparable.
    Otherwise, return `val` as is.
    """
    return dummy if pd.isnull(val) else val

In [None]:
import corpus_utils
import importlib
importlib.reload(corpus_utils)

offset_mc, offset_beat = corpus_utils.get_offsets(notes, measures)

In [None]:
import corpus_utils
import importlib
importlib.reload(corpus_utils)

iloc = None
notes = notes.assign(offset_mc=offset_mc, offset_beat=offset_beat)
notes_iloc = notes.iloc[:iloc] if iloc is not None else notes
merged_notes = corpus_utils.merge_ties(notes_iloc, measures=measures)

In [None]:
merged_notes.loc[~merged_notes.tied.isna()]

In [None]:
index = (113, 0, 1857)
midi = notes.loc[index].midi
min_mc = notes_iloc.loc[index].mc - 2
max_mc = notes_iloc.loc[index].mc + 10
indexed_df = notes_iloc.loc[(index[0],index[1])]
indexed_df.loc[(indexed_df.midi == midi) & (indexed_df.mc > min_mc) & (indexed_df.mc < max_mc)]