# Preprocessing

In [1]:
%load_ext autoreload
%autoreload 2

# External imports
import os
import pandas
import pickle

# Internal imports
from tools.ms3 import *
from tools.helpers import *
from tools import xcor as cx

In [2]:
# Configuration
data     = "data"
data_ms3 = os.path.join(data, "MuseScore_3")
data_tsv = os.path.join(data, "tsv")

In [3]:
# Load the datasets
files = pd.read_csv(os.path.join(data_ms3, 'merged_ids.tsv'), sep='\t', index_col=0)
note_list = pd.read_csv(os.path.join(data_tsv, "note_list_complete.tsv"), sep='\t', index_col=[0,1,2], 
                        dtype={"tied": "Int64", 
                               "volta": "Int64"}, 
                        converters={"onset":frac, 
                                    "duration":frac, 
                                    "nominal_duration":frac, 
                                    "scalar":frac})
measure_list = pd.read_csv(os.path.join(data_tsv, "measure_list_complete.tsv"), sep="\t", index_col=[0,1], 
                           dtype={"volta": "Int64", 
                                  "numbering_offset": "Int64", 
                                  "dont_count": "Int64"}, 
                           converters={"duration": frac, 
                                       "act_dur": frac, 
                                       "offset": frac, 
                                       "next": lambda l: [int(mc) for mc in l.strip("[]").split(", ") if mc != ""]})
section_order = pd.read_csv(os.path.join(data_tsv, "section_order_complete.tsv"), sep="\t", index_col = [0])\
                  .rename(columns={"object": "sections"})

## Cross-correlations with `harmorhythm` product

In [4]:
path_xcors         = os.path.join(data_ms3, 'xcors.dat')
force_compute_xcor = False
if not force_compute_xcor and os.path.exists(path_xcors):
    # Load the cross-correlations
    with open(path_xcors, "rb") as fd:
        xcors = pickle.load(fd, fix_imports=False)
else:
    # Compute and save the cross-correlations
    xcors = dict()
    for pid, notes in note_list.groupby(level=0):
        print(".", end="", flush=True)
        xcors[pid] = cx.CrossCorrelation(cx.product_harmorhythm, (meas for _, meas in iter_measures(notes, volta=-1)))
    with open(path_xcors, "wb") as fd:
        pickle.dump(xcors, fd, protocol=-1, fix_imports=False)

# Analysis

## Dance structures

In [5]:
# Map trigger level to human-readable name
def trigger_to_name(trig):
    if trig < 0.2:
        return "wild guess"
    if trig < 0.4:
        return "unsure"
    if trig < 0.6:
        return "presumably"
    if trig < 0.8:
        return "confident"
    return "certain"

# Automated structure detection
for pid, xcor in xcors.items():
    pinfo = files.loc[pid]
    name = "D%s %s n°%s:" % (pinfo["D"], pinfo["dance"], pinfo["no"])
    try:
        struct, trigger = cx.detect_structure(xcor)
        if len(struct) < 2:
            raise RuntimeError("Could not find at least 2 distinct parts")
        value = "%s (%s)" % (("").join(struct), trigger_to_name(trigger))
    except Exception as err:
        value = "FAIL: %s" % (err,)
    print("%s%s%s" % (name, " " * (27 - len(name)), value))

D41 menuett n°1:           AABA (confident)
D41 trio n°1:              AAAAABAC (wild guess)
D41 menuett n°2:           ABAA (wild guess)
D41 trio n°2:              AAABAAAA (unsure)
D41 menuett n°3:           FAIL: Could not find at least 2 distinct parts
D41 trio n°3:              ABAAAB (wild guess)
D41 menuett n°4:           ABAC (presumably)
D41 trio n°4:              FAIL: Could not find at least 2 distinct parts
D41 menuett n°5:           AABA (certain)
D41 trio n°5:              ABAA (wild guess)
D41 menuett n°6:           FAIL: Could not find at least 2 distinct parts
D41 trio n°6:              ABA (presumably)
D41 menuett n°7:           FAIL: Could not find at least 2 distinct parts
D41 trio n°7:              FAIL: Could not find at least 2 distinct parts
D41 menuett n°8:           AABA (presumably)
D41 trio n°8:              ABA (presumably)
D41 menuett n°9:           FAIL: Could not find at least 2 distinct parts
D41 trio n°9:              ABAAACAD (unsure)
D41 menuett n°10

D420 deutscher n°12:       ABCBD (certain)
D421 ecossaise n°2:        AABAAA (wild guess)
D421 ecossaise n°3:        ABAA (wild guess)
D421 ecossaise n°4:        FAIL: Could not find at least 2 distinct parts
D421 ecossaise n°5:        FAIL: Could not find at least 2 distinct parts
D421 ecossaise n°6:        AABA (presumably)
D511 ecossaise n°1:        ABAAACA (unsure)
D529 ecossaise n°1:        AABAC (unsure)
D529 ecossaise n°2:        ABAAAC (unsure)
D529 ecossaise n°4:        AABAC (presumably)
D529 ecossaise n°5:        ABCBD (confident)
D529 ecossaise n°6:        ABA (presumably)
D529 ecossaise n°7:        ABACAA (unsure)
D529 ecossaise n°8:        ABAB (unsure)
D600 menuett n°1:          ABAA (unsure)
D610 trio n°1:             ABBBBCBCC (unsure)
D643 deutscher n°1:        FAIL: Could not find at least 2 distinct parts
D643 ecossaise n°1:        ABCBD (unsure)
D681 ländler n°1:          FAIL: Could not find at least 2 distinct parts
D681 ländler n°2:          ABCB (presumably)
D6

# _Playground_

In [6]:
# Auto-correlation plots
slides = list()
for i, xcor in enumerate(xcors.values()):
    if i >= 10:
        break
    slide = dict((off, val) for (off, val) in xcor.slide() if off >= 0)
    slides.append(slide)
    pandas.DataFrame.from_dict(slide, orient="index", columns=["slide"]).plot()

In [7]:
# Spike detection
for pid, slide in enumerate(slides):
    print("%d: %r" % (pid, tuple(cx.detect_spikes(iter(slide.items())))))

0: (4, 8, 12)
1: (2, 4, 6, 8, 10, 14, 16)
2: (6, 8, 12)
3: (2, 5, 7, 9, 12, 14, 16, 18)
4: (5, 10, 12, 15)
5: (2, 6, 8, 10, 12)
6: (2, 4, 11, 13, 15, 17)
7: (2, 10, 12)
8: (4, 8, 12)
9: (5, 8, 12)
