This code parses through the original MusicNet dataset computing the relevant features for training an expression model on melodic Motifs.

After [obtaining the raw dataset](https://homes.cs.washington.edu/~thickstn/musicnet.html), extract it or symlink according to the path given by variables `labelsdir` and `audiodir`.

In [1]:
import re
import os

import numpy as np
import essentia.standard as ess
import arff

import expression_modeling as m

labelsDir = './data/musicnet/train_labels'
audioDir = './data/musicnet/train_data'

# filter only pieces which inlude the violin
dataset = [csv for csv in os.listdir(labelsDir) if re.search(r'^(.*?,){2}\s*?41\s*?,(.*?,){3}', open(os.path.join(labelsDir, csv), 'r').read(), re.MULTILINE)]
#dataset = dataset[0:4]
outfile = None
for csv in dataset:

    #  load audio
    loader = ess.AudioLoader(filename=os.path.join(audioDir, csv.replace('.csv', '.wav')))
    audio, srate = loader()[0:2]
    
    # compute loudness as a dynamics estimate
    loudnessAlg = ess.LoudnessEBUR128(hopSize=0.1, sampleRate=srate)
    levels = loudnessAlg(audio)[0]

    # load the symbolic information from the dataset
    notearray = np.genfromtxt(os.path.join(labelsDir, csv), delimiter=',', names=True, dtype=['i', 'i', 'i', 'i', 'f', 'f', '|U40'])

    # piece key estimation (only major and minor for now)
    isMajor, key, llhoodM, llhoodm = m.estimateKey(notearray['note'])
    mode = m.Mode.major if isMajor else m.Mode.minor

    piece = m.Piece(key=key, mode=mode, name=csv)
    piece.parts = m.buildNoteParts(notearray, levels, srate)
    m.computeTimingDev(piece)
    d = m.toMotifDataset(piece, 41)
    np.save('data/levels/'+ csv.replace('.csv', 'lvls.npy'), np.array(d.moreVals))
    if outfile is None:
        outfile = open('data/allviolin.arff', 'w+')
        outfile.write(arff.dumps(d.toArffDict()))
    else:
        d.toCsv(outfile)
outfile.close

piece=2282.csv, 21 motifs
piece=2241.csv, 53 motifs
piece=2335.csv, 80 motifs
piece=2334.csv, 166 motifs
piece=2283.csv, 99 motifs
piece=1788.csv, 180 motifs
piece=2242.csv, 99 motifs
piece=1824.csv, 24 motifs
piece=2336.csv, 121 motifs
piece=2451.csv, 71 motifs
piece=2243.csv, 25 motifs
piece=1789.csv, 73 motifs
piece=2284.csv, 39 motifs


  for n in notes if n.durBeats > 0.125]  # thirty-seconds and faster disconsidered


piece=2131.csv, 53 motifs
piece=1835.csv, 106 motifs
piece=2482.csv, 221 motifs
piece=2497.csv, 114 motifs


  for n in notes if n.durBeats > 0.125]  # thirty-seconds and faster disconsidered


piece=2483.csv, 227 motifs
piece=2285.csv, 112 motifs
piece=2244.csv, 176 motifs
piece=1822.csv, 152 motifs
piece=2481.csv, 84 motifs
piece=2330.csv, 159 motifs
piece=2480.csv, 114 motifs
piece=2494.csv, 126 motifs
piece=2127.csv, 54 motifs
piece=1918.csv, 34 motifs
piece=2168.csv, 37 motifs
piece=2154.csv, 191 motifs
piece=2140.csv, 83 motifs
piece=2626.csv, 145 motifs
piece=2381.csv, 194 motifs
piece=1893.csv, 20 motifs
piece=2342.csv, 121 motifs
piece=2431.csv, 107 motifs
piece=2627.csv, 27 motifs
piece=2155.csv, 74 motifs
piece=2169.csv, 80 motifs
piece=1931.csv, 56 motifs
piece=1919.csv, 176 motifs
piece=1933.csv, 82 motifs
piece=1728.csv, 65 motifs
piece=2180.csv, 85 motifs
piece=2157.csv, 133 motifs
piece=2341.csv, 80 motifs
piece=2433.csv, 19 motifs
piece=2432.csv, 90 motifs
piece=2368.csv, 206 motifs
piece=2397.csv, 172 motifs
piece=2383.csv, 101 motifs
piece=2156.csv, 73 motifs
piece=1729.csv, 136 motifs
piece=1932.csv, 40 motifs
piece=1922.csv, 34 motifs
piece=1739.csv, 174 

<function TextIOWrapper.close()>

In [None]:
len(dataset)