This code parses through the original MusicNet dataset computing the relevant features for training an expression model on melodic Motifs.

After [obtaining the raw dataset](https://homes.cs.washington.edu/~thickstn/musicnet.html), extract it or symlink according to the path given by variables `labelsdir` and `audiodir`.

In [3]:
import re
import os

import numpy as np
import pandas as pd
from IPython.display import clear_output

import expression_modeling as m

def preprocess(labelsDir, audioDir, instrument, csvname):

    # filter only pieces which inlude the desired instrument
    dataset = [csv for csv in os.listdir(labelsDir) if re.search(r'^(.*?,){2}\s*?' + str(instrument) + '\s*?,(.*?,){3}', open(os.path.join(labelsDir, csv), 'r').read(), re.MULTILINE)]
    #dataset = dataset[0:4]
    outfile = None
    for csv in dataset:

        #  load levels (generated by "Levels computation" notebook)
        levels = np.load('data/levels/' + csv.replace('.csv', '_global_lvls.npy'))

        # load the symbolic information from the dataset
        notearray = np.genfromtxt(os.path.join(labelsDir, csv), delimiter=',', names=True, dtype=['i', 'i', 'i', 'i', 'f', 'f', '|U40'])

        # piece key estimation (only major and minor for now)
        isMajor, key, llhoodM, llhoodm = m.estimateKey(notearray['note'])
        mode = m.Mode.major if isMajor else m.Mode.minor

        piece = m.Piece(key=key, mode=mode, name=csv)
        piece.dynMean = np.mean(levels)
        piece.dynStd = np.std(levels)
        piece.parts = m.buildNoteParts(notearray, (levels - piece.dynMean)/piece.dynStd, 44100, {instrument})
        m.computeTimingDev(piece)
        d, lvls = m.toMotifDataframe(piece, instrument)
        np.save('data/levels/'+ csvname + csv.replace('.csv', 'lvls.npy'), np.array(lvls))
        df = pd.DataFrame(data=d)
        if outfile is None:
            outfile = open('data/' + csvname + '.csv', 'w+')
            df.to_csv(outfile)
        else:
            df.to_csv(outfile, header=False)
    outfile.close

# print('Begin processing violin training set')
# preprocess('./data/musicnet/train_labels', './data/musicnet/train_data', 41, 'violin')
#clear_output()
#print('Begin processing violin test set')
#preprocess('./data/musicnet/test_labels', './data/musicnet/test_data', 41, 'violintest')
clear_output()
print('Begin processing viola training set')
preprocess('./data/musicnet/train_labels', './data/musicnet/train_data', 42, 'viola')
# clear_output()
print('Begin processing viola test set')
preprocess('./data/musicnet/test_labels', './data/musicnet/test_data', 42, 'violatest')
# clear_output()
#print('Begin processing cello training set')
#preprocess('./data/musicnet/train_labels', './data/musicnet/train_data', 43, 'cello')
#clear_output()
#print('Begin processing cello test set')
#preprocess('./data/musicnet/test_labels', './data/musicnet/test_data', 43, 'cellotest')
# clear_output()
print('Begin processing flute training set')
preprocess('./data/musicnet/train_labels', './data/musicnet/train_data', 74, 'flute')
# clear_output()
print('Begin processing flute test set')
preprocess('./data/musicnet/test_labels', './data/musicnet/test_data', 74, 'flutetest')
clear_output()
# print('Begin processing clarinet training set')
# preprocess('./data/musicnet/train_labels', './data/musicnet/train_data', 72, 'clarinet')
# clear_output()
# print('Begin processing clarinet test set')
# preprocess('./data/musicnet/test_labels', './data/musicnet/test_data', 72, 'clarinettest')


Begin processing viola training set
piece=2241.csv, 9 motifs
piece=1788.csv, 105 motifs
piece=2242.csv, 20 motifs
piece=1824.csv, 42 motifs
piece=2451.csv, 40 motifs
piece=2243.csv, 15 motifs
piece=1789.csv, 73 motifs
piece=2131.csv, 331 motifs
piece=1835.csv, 83 motifs
piece=2482.csv, 175 motifs
piece=2497.csv, 258 motifs
piece=2483.csv, 129 motifs
piece=1822.csv, 193 motifs
piece=2481.csv, 31 motifs
piece=2480.csv, 70 motifs
piece=2494.csv, 163 motifs
piece=2127.csv, 80 motifs
piece=1918.csv, 37 motifs
piece=2168.csv, 43 motifs
piece=2154.csv, 214 motifs
piece=2140.csv, 76 motifs
piece=2381.csv, 243 motifs
piece=2431.csv, 146 motifs
piece=2155.csv, 299 motifs
piece=2169.csv, 80 motifs
piece=1931.csv, 209 motifs
piece=1919.csv, 205 motifs
piece=1933.csv, 216 motifs
piece=1728.csv, 55 motifs
piece=2180.csv, 326 motifs
piece=2157.csv, 324 motifs
piece=2433.csv, 19 motifs
piece=2432.csv, 140 motifs
piece=2368.csv, 426 motifs
piece=2383.csv, 154 motifs
piece=2156.csv, 67 motifs
piece=1729

  for n in notes if n.durBeats > 0.125]  # thirty-seconds and faster disconsidered


piece=2147.csv

  rhContour = np.array([n.ioiBeats / n.prevNote.ioiBeats if n.prevNote is not None else 1 for n in motif])


, 174 motifs
piece=2621.csv, 132 motifs
piece=1923.csv, 204 motifs
piece=2186.csv, 101 motifs
piece=2151.csv, 142 motifs
piece=2179.csv, 195 motifs
piece=2384.csv, 45 motifs
piece=2178.csv, 521 motifs
piece=2622.csv, 161 motifs
piece=2150.csv, 129 motifs
piece=2149.csv, 116 motifs
piece=1859.csv, 37 motifs
piece=2377.csv, 132 motifs
piece=2376.csv, 288 motifs
piece=2148.csv, 234 motifs
piece=2177.csv, 539 motifs
piece=1730.csv, 196 motifs
piece=2167.csv, 30 motifs
piece=2403.csv, 205 motifs
piece=2365.csv, 99 motifs
piece=2166.csv, 75 motifs
piece=2562.csv, 93 motifs
piece=1916.csv, 240 motifs
piece=2560.csv, 40 motifs
piece=1727.csv, 525 motifs
piece=2366.csv, 192 motifs
piece=1812.csv, 57 motifs
piece=2314.csv, 161 motifs
piece=2315.csv, 78 motifs
piece=1813.csv, 140 motifs
piece=1807.csv, 51 motifs
piece=1742.csv, 58 motifs
piece=1811.csv, 45 motifs
piece=1805.csv, 94 motifs
piece=1790.csv, 59 motifs
piece=2104.csv, 104 motifs
piece=2138.csv, 46 motifs
piece=2313.csv, 146 motifs
pie

AttributeError: 'NoneType' object has no attribute 'close'