In [14]:
%load_ext autoreload
%autoreload 2
from jh import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [15]:
compute_anew = False
data = 'data/tsv/'
note_list = jh.read_note_list(os.path.join(data,'note_list_complete.tsv'), index_col=[0,1,2])
measure_list = jh.read_measure_list(os.path.join(data,'measure_list_complete.tsv'), index_col=[0,1])

if compute_anew:
    compute_beat_column(note_list, measure_list, inplace=True)
    transposed = transpose_to_C(note_list, measure_list)
    transposed['note_names'] = tpc2name(transposed.tpc)
    transposed['octaves'] = midi2octave(transposed.midi)
    transposed[['beat', 'subbeat']] = split_beats(transposed.beats)
    transposed.to_csv(os.path.join(data,'transposed_schubert.tsv'), sep='\t')
    bass = apply_to_pieces(bass_per_beat, transposed[transposed.staff==2], resolution=1/8)
    bass = apply_to_pieces(add_chord_boundaries, bass, measure_list, next_ids='segment_id', multiple_pieces=True)
    bass.to_csv(os.path.join(data,'schubert_bass.tsv'), sep='\t')
    schubert_segments =  apply_to_pieces(all_chord_notes, bass, transposed, by='segment_id', multiple_pieces=True)
    schubert_segments.to_csv(os.path.join(data,'schubert_segmented.tsv'), sep='\t')
    segment_features = schubert_segments.groupby(level=['id','segment_id']).apply(summarize_ints)
    segment_features = pd.DataFrame(segment_features).unstack().droplevel(0, axis=1)
    segment_features.to_csv(os.path.join(data,'schubert_segment_features.tsv'), sep='\t')
else:
    transposed = jh.read_note_list(os.path.join(data,'transposed_schubert.tsv'), index_col=[0,1,2], converters={'beatsize': frac, 'subbeat': frac})
    bass = jh.read_note_list(os.path.join(data,'schubert_bass.tsv'), index_col=[0,1,2,3], dtypes={'next_id': 'Int64'}, converters={'beatsize': frac, 'subbeat': frac, 'onset_next': frac, })
    schubert_segments = jh.read_note_list(os.path.join(data,'schubert_segmented.tsv'), index_col=[0,1,2,3])
    segment_features = pd.read_csv(os.path.join(data,'schubert_segment_features.tsv'), sep='\t', index_col=[0,1],
                             converters={'intervals': lambda t: tuple(i.strip("\',") for i in t.strip("() ").split(", ") if i != '')})

In [16]:
df = transposed
df = df[(df.volta != 1) & (df.mn != 0)]
right = df[df.staff == 1]
left = df[df.staff == 2]
n_measures_per_piece = df.groupby(['id']).mn.nunique()
n_total_measures = n_measures_per_piece.sum()
n_measures_per_piece


def get_pattern_list(onset_patterns, n_most_frequent=None, occurring_in_min=None):
    pattern_list = pd.DataFrame(onset_patterns.value_counts(), columns=['total'])
    def count_pieces(onset_patterns, pattern):
        return len(onset_patterns[onset_patterns == pattern].groupby('id').count())
    pattern_list['n_pieces'] = pattern_list.index.map(lambda i: count_pieces(onset_patterns, i)).to_list()
    if n_most_frequent is not None:
        return pattern_list.iloc[:n_most_frequent]
    if occurring_in_min is not None:
        return pattern_list[pattern_list.n_pieces >= occurring_in_min]
    return pattern_list

onset_patterns = df.groupby(['id', 'mn']).apply(os_pattern)
pattern_list = get_pattern_list(onset_patterns, occurring_in_min=3)
onset_patterns_left = left.groupby(['id', 'mn']).apply(os_pattern)
pattern_list_left = get_pattern_list(onset_patterns_left, occurring_in_min=3)
onset_patterns_right = right.groupby(['id', 'mn']).apply(os_pattern)
pattern_list_right = get_pattern_list(onset_patterns_right, occurring_in_min=3)

id   mn
1    1        TaTaTimgi
     2        TaTaTimgi
     3       TaTitiTiti
     4        TaTaTimgi
     5        TaTaTimgi
               ...     
435  12        TaTaTiti
     13        TitiTaTa
     14    TitiTitiTiti
     15    TitiTitiTiti
     16             Tao
Length: 8495, dtype: object

In [32]:
pattern_list

Unnamed: 0,total,n_pieces
TitiTitiTiti,1665,234
TaTaTa,1621,224
TaTitiTa,579,96
TaTitiTiti,506,115
TaTa,458,219
...,...,...
TitigiTitiTitigi,4,3
TigimTi,3,3
Tigitigi,3,3
Taiti,3,3


In [34]:
def create_os_features(onset_patterns):
    def os_fraction(patterns):
        counts = patterns.value_counts()
        n = counts.sum()
        return counts / n
    res = pd.DataFrame(onset_patterns.groupby('id').apply(os_fraction)).unstack()
    res = res.droplevel(0, axis=1)
    return res

create_os_features(onset_patterns[onset_patterns.isin(pattern_list.index)])\
    .to_csv(os.path.join(data, 'os_patterns.tsv'))
create_os_features(onset_patterns_left[onset_patterns_left.isin(pattern_list_left.index)])\
    .to_csv(os.path.join(data, 'os_patterns.tsv'))
create_os_features(onset_patterns_right[onset_patterns_right.isin(pattern_list.index)])\
    .to_csv(os.path.join(data, 'os_patterns.tsv'))

Unnamed: 0_level_0,Ta,TaTa,TaTaTa,TaTaTi,TaTaTigitigi,TaTaTimgi,TaTaTimgiri,TaTaTiti,TaTaTriole,TaTao,...,TitigiTigitigi,TitigiTiti,TitigiTitiTigitigi,TitigiTitiTiti,TitigiTitiTitigi,TitigiTitigi,TitigiTitigiTitigi,TrioleTa,TrioleTaTa,no
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,0.062500,,,,0.625000,,,,,...,,,,,,,,,,
2,,0.050000,,,,,,,,,...,,,0.05,0.1,,,,,,
3,,0.062500,0.187500,,,0.562500,,,,,...,,,,,,,,,,
4,,0.050000,,,,,,,,,...,,,,,,,,,,
5,,0.071429,0.071429,,,0.357143,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
431,,0.040000,0.440000,,,,,,,,...,,,,,,,,,,
432,,,0.062500,,,,,0.0625,,,...,,,,,,,,,,
433,,0.083333,0.041667,,,,,,,,...,,,,,,,,,,
434,,,,,,,,,,,...,,,,,,,,,,


In [18]:
pattern_list.index.to_list()

['TitiTitiTiti',
 'TaTaTa',
 'TaTitiTa',
 'TaTitiTiti',
 'TaTa',
 'TitiTiti',
 'TitiTitiTa',
 'Tao',
 'TaTaTiti',
 'TitiTaTa',
 'TaoTa',
 'TaTaTimgi',
 'Ta',
 'TaTiti',
 'TitiTa',
 'TaTaTriole',
 'TitiTitiTitigi',
 'TaoTiti',
 'TitiTaTiti',
 'TaTi',
 'TaitiTa',
 'TaTrioleTa',
 'TaTaTi',
 'TigitigiTigitigiTigitigi',
 'TimgiTaTa',
 'Taoa',
 'TaTimgiTa',
 'Titi',
 'TaTao',
 'TitigiTitigi',
 'TitigiTitiTiti',
 'TaoTimgi',
 'TitiTitiTigitigi',
 'TaTitigiTigitigi',
 'TimgiTitiTiti',
 'TigitigiTiti',
 'TimgiTa',
 'TaitiTiti',
 'TrioleTaTa',
 'TaTaTigitigi',
 'TaTaTimgiri',
 'TitigiTiti',
 'TaTimgiTimgi',
 'TitiTi',
 'TimgiTimgiTimgi',
 'TigitigiTa',
 'TimgiTaTiti',
 'TaTimgiTiti',
 'TrioleTa',
 'TigitigiTigitigi',
 'TimgiTaTigitigi',
 'Timgi',
 'TitigiTigitigi',
 'TigimTitiTiti',
 'TitiTitigiTigitigi',
 'TimgiTaTimgi',
 'TitiTitiTimgi',
 'TaoTriole',
 'TitigiTa',
 'Tai',
 'TigitigiTitiTiti',
 'TitiTaTimgi',
 'TitigiTitiTigitigi',
 'TaoTigitigi',
 'TimgiTiti',
 'TimgiriTa',
 'TigitigiTi',
 'Ti

In [None]:
segment_features = apply_to_pieces(add_previous_ix, segment_features)
segment_features = add_previous_vals(segment_features)
segment_features.intervals.value_counts().iloc[:50]
segment_features.loc[segment_features.intervals == ('P1', 'm3')].dropna(axis=1)
segment_features['label'] = np.nan

type2label = {
('M3', 'P5'):       lambda r: name2rn(r.bass),
('M2', 'M3', 'P5'):       lambda r: name2rn(r.bass), ###
('M6', 'P4'):       lambda r: f"V(64)/{tpc2rn(name2tpc(r.bass) - 1)}" if r.bass != 'G' else 'V(64)',
('P5', 'm3'):       lambda r: name2rn(r.bass).lower(),
('M3', 'P5', 'm7'): lambda r: f"V7/{tpc2rn(name2tpc(r.bass) - 1)}" if r.bass != 'G' else 'V7',
('M2', 'M3', 'P5', 'm7'): lambda r: f"V7/{tpc2rn(name2tpc(r.bass) - 1)}" if r.bass != 'G' else 'V7', ###
('M3', 'M6', 'P5', 'm7'): lambda r: f"V7/{tpc2rn(name2tpc(r.bass) - 1)}" if r.bass != 'G' else 'V7', ###
('M3', 'm7'):       lambda r: f"V7/{tpc2rn(name2tpc(r.bass) - 1)}" if r.bass != 'G' else 'V7',
('M2', 'M3', 'm7'):       lambda r: f"V7/{tpc2rn(name2tpc(r.bass) - 1)}" if r.bass != 'G' else 'V7',
('m3', 'm6'):       lambda r: f"{tpc2rn(name2tpc(r.bass)-4)}6",
('P4', 'm6'):       lambda r: f"V(64)/{tpc2rn(name2tpc(r.bass) - 1).lower()}" if r.bass != 'G' else 'V(64)',
#('m6'): lambda r: f"{tpc2rn(name2tpc(r.bass)-4)}6",
('M3', 'M6'):       lambda r: f"{tpc2rn(name2tpc(r.bass)-4).lower()}6",
#('M6'): lambda r: f"{tpc2rn(name2tpc(r.bass)-4).lower()}6",
('D5', 'm3', 'm6'): lambda r: f"V65/{tpc2rn(name2tpc(r.bass) - 4)}" if r.bass != 'B' else 'V65',
('D5', 'm6'):       lambda r: f"V65/{tpc2rn(name2tpc(r.bass) - 4)}" if r.bass != 'B' else 'V65',
('M6', 'P4', 'm3'): lambda r: f"V43/{tpc2rn(name2tpc(r.bass) - 2)}" if r.bass != 'D' else 'V43',
('P4', 'm3'):       lambda r: f"V43/{tpc2rn(name2tpc(r.bass) - 2)}" if r.bass != 'D' else 'V43',
('A4', 'M2', 'M6'): lambda r: f"V2/{tpc2rn(name2tpc(r.bass) + 1)}" if r.bass != 'F' else 'V2',
('A4', 'M2'):       lambda r: f"V2/{tpc2rn(name2tpc(r.bass) + 1)}" if r.bass != 'F' else 'V2',
('M3', 'M6', 'P5'): lambda r: f"ii65/{tpc2rn(name2tpc(r.bass) + 1)}" if r.bass != 'F' else 'ii65',
('M2', 'M7', 'P4', 'P5'): lambda r: f"{tpc2rn(r.bass)}(742)",
('D5', 'D7', 'm3'): lambda r: f"viio7/{tpc2rn(name2tpc(r.bass) - 5)}" if r.bass != 'B' else 'viio7',
('M6', 'm3'):       lambda r: f"viio6/{tpc2rn(name2tpc(r.bass) - 2)}" if r.bass != 'D' else 'viio6',
}

def hard_labeling(segment_features):
    for ints, f in type2label.items():
        segment_features.loc[segment_features.intervals == ints, 'label'] = segment_features[segment_features.intervals == ints].apply(f, axis=1)
hard_labeling(segment_features)
schubert_segments.loc[1].iloc[:50]
status = schubert_segments.join(segment_features[['bass', 'intervals', 'label', 'prev_ints', 'chord_length', 'offbeat']], on=['id', 'segment_id'])
status[status.intervals == ('M2', 'M6', 'P4')].prev_ints.value_counts().iloc[:50]
status[status.intervals == ('M3',)].chord_length.value_counts().iloc[:50]
status[(status.intervals == ('M3',)) & status.offbeat].prev_ints.value_counts().iloc[:50]

underspecified = {
tuple(): None,
('M3',): None,
('P5',): None,
('m3',): None,
('P5', 'm7'): None,
('m7',): None,
('P4',): None,
('M2',): None,
('M6', 'P4', 'P5'): None,
('M7',): None,
('D5', 'm6'): None,
}



