# Preprocessing

In [1]:
%load_ext autoreload
%autoreload 2

# External imports
import os
import pandas

# Internal imports
from tools.ms3 import *
from tools.helpers import *
from tools import xcor as cx

In [2]:
# Configuration
data     = "data"
data_ms3 = os.path.join(data, "MuseScore_3")
data_tsv = os.path.join(data, "tsv")

In [3]:
# Load the datasets
note_list = pd.read_csv(os.path.join(data_tsv, "note_list_complete.tsv"), sep='\t', index_col=[0,1,2], 
                        dtype={"tied": "Int64", 
                               "volta": "Int64"}, 
                        converters={"onset":frac, 
                                    "duration":frac, 
                                    "nominal_duration":frac, 
                                    "scalar":frac})
measure_list = pd.read_csv(os.path.join(data_tsv, "measure_list_complete.tsv"), sep="\t", index_col=[0,1], 
                           dtype={"volta": "Int64", 
                                  "numbering_offset": "Int64", 
                                  "dont_count": "Int64"}, 
                           converters={"duration": frac, 
                                       "act_dur": frac, 
                                       "offset": frac, 
                                       "next": lambda l: [int(mc) for mc in l.strip("[]").split(", ") if mc != ""]})
section_order = pd.read_csv(os.path.join(data_tsv, "section_order_complete.tsv"), sep="\t", index_col = [0])\
                  .rename(columns={"object": "sections"})

# Playground (for now)

In [4]:
xcors = tuple(cx.CrossCorrelation(cx.product_harmorhythm, (meas for _, meas in iter_measures(note_list.loc[i]))) for i in range(1, 10))

In [5]:
slides = list()
for xcor in xcors:
    slide = dict((off, val) for (off, val) in xcor.slide() if off >= 0)
    slides.append(slide)
    pandas.DataFrame.from_dict(slide, orient="index", columns=["slide"]).plot()

In [6]:
# Spike detection
for pid, slide in enumerate(slides):
    print("%d: %r" % (pid, tuple(cx.detect_spikes(iter(slide.items())))))

0: (4, 8, 12)
1: (2, 4, 6, 8, 10, 14, 16)
2: (6, 8, 12)
3: (2, 5, 7, 9, 12, 14, 16, 18)
4: (5, 10, 12, 15)
5: (2, 6, 8, 10, 12)
6: (2, 4, 9, 11, 13, 15, 17)
7: (2, 10, 12)
8: (4, 8, 12)


In [7]:
# Structure discovery
for xcor in xcors:
    struct, trigger = cx.detect_structure(xcor)
    print("%s (%s)" % (("").join(struct), trigger))

AABA (0.6)
AAAAABAC (0.1)
ABAA (0.1)
AAABAAAA (0.2)
A (0.9)
ABAAAB (0.1)
ABCAB (0.2)
A (0.9)
AABA (0.8)
