# Invoking music21 to extract features for 30 second intervals

In [41]:
import music21
import music21.features as features
from music21 import midi
from music21 import stream
from helper_functions import *
from numpy.random import randint

import numpy as np

In [6]:
# get dictionary paths to *.mid files for each composer 

Composers = ['Bach','Brahms','Beethoven','Schubert']

midi_files = make_composer_dict(Composers,data_dir = './')

# print number of files each folder

for composer in Composers:
    print(f'There are {len(midi_files[composer])} compositions by {composer}')

There are 17 compositions by Bach
There are 20 compositions by Brahms
There are 132 compositions by Beethoven
There are 25 compositions by Schubert


In [8]:
# play sample stream from MIDI file
 
audio_path = midi_files['Schubert'][5]

mf = midi.MidiFile()
mf.open(audio_path) # path='abc.midi'
mf.read()
mf.close()
s = midi.translate.midiFileToStream(mf)

In [10]:
# play the stream by calling s.show('midi')

print(audio_path)

s.show('midi')

./Schubert/Piano Sonata in C minor_D958_1759_d958-3.mid


## Extract 30 second duration streams from each composition

In [29]:
# function to extract n streams, target duration T seconds for composer 

# extracts n_clips each from n_tracks

def extract_samples(composer,T=30,n_tracks=10,n_clips=3,show_streams=False):
    delta_measures = list(range(5,50,1))

    clips = []
    
    for audio_path in midi_files[composer][:n_tracks]:

        mf = midi.MidiFile()
        mf.open(audio_path) # path='abc.midi'
        mf.read()
        mf.close()
        s = midi.translate.midiFileToStream(mf)

        total_time = s.secondsMap[0]['durationSeconds']

        number_of_measures = len(s[0])  # think this is total number of measures in the stream?

        print(audio_path,'lasts approx ',total_time,' seconds')
        
        #pick n_clips random measures to start clipping from 
        start_measures = randint(1,number_of_measures//2,size=n_clips)
        
        for i,start in enumerate(start_measures):
            
            for delta in delta_measures:

                stop = start+delta

                excerpt_stream = s.measures(start,stop)

                clip_time = excerpt_stream.secondsMap[0]['durationSeconds']

                if clip_time>T: # clip sample streams until time is longer than T
                    print('clip: ',i,' --> ','t = ',clip_time,'start = ',start,'stop =',stop,' delta = ',delta)

                    clip = (composer,audio_path,start,stop,clip_time,excerpt_stream)
                    
                    clips.append(clip)
                    
                    if show_streams: excerpt_stream.show('midi')
                        
                    break
                    
    
    return clips

In [37]:
Bach_clips = extract_samples('Bach',T=30,n_tracks=5,n_clips=2)

./Bach/Cello Suite 3_BWV1009_2222_cs3-6gig.mid lasts approx  178.0581818181818  seconds
clip:  0  -->  t =  30.75 start =  50 stop = 90  delta =  40
clip:  1  -->  t =  30.75 start =  64 stop = 104  delta =  40
./Bach/Violin Sonata in B minor_BWV1014_2284_vhs1_3.mid lasts approx  151.34728996720207  seconds
clip:  0  -->  t =  32.0 start =  8 stop = 23  delta =  15
clip:  1  -->  t =  32.0 start =  7 stop = 22  delta =  15
./Bach/Violin Sonata No 1 in G minor_BWV1001_2243_vs1_3.mid lasts approx  186.63523499970867  seconds
clip:  0  -->  t =  52.658292383292384 start =  4 stop = 9  delta =  5
clip:  1  -->  t =  46.658292383292384 start =  3 stop = 8  delta =  5
./Bach/Violin Partita No 3 in E major_BWV1006_2186_vs6_1.mid lasts approx  213.22971249764726  seconds
clip:  0  -->  t =  30.271089779785434 start =  25 stop = 44  delta =  19
clip:  1  -->  t =  30.286363636363642 start =  42 stop = 61  delta =  19
./Bach/Violin Sonata No 1 in G minor_BWV1001_2242_vs1_2.mid lasts approx  329.

In [40]:
# check that can access and play the clip streams

clip_stream = Bach_clips[2][-1]

print()

clip_stream.show('midi')




## Now Use Feature Extraction Modules in Music21 to get set of numeric features describing each clip

In [45]:
def features_from_stream(s,output=False):
    
    feature_vector = []
    
    # Narrowing down useful features to use... 

    matches = ["Fraction", "Instrument","Meter","Tempo","Voice"]

    stop = 5

    fs = features.jSymbolic.extractorsById
    
    for k in fs:
        for i in range(len(fs[k])):
              if fs[k][i] is not None:
                    n = fs[k][i].__name__
                    if fs[k][i] not in features.jSymbolic.featureExtractors:
                        n += ' (not implemented)'
                        continue

                    name = n[:-7] # throw away 'Feaure' from method name when printing 

                    if any(x in name for x in matches):
                        continue

                    extractor = getattr(features.jSymbolic,n) 

                    fe = extractor(s)

                    try:
                        val = fe.extract().vector
                    except:
                        continue

                    # take single valued features to make life easier     
                    if len(val)>1:
                        continue
                    
                    feature_vector.append(val[0])
                    if output: print(f'{k} {i} {name} {val}')
                    
    feature_vector = np.array(feature_vector)
    
    return feature_vector

In [46]:
feature_vector = features_from_stream(clip_stream,output=True)

In [49]:
feature_vector

array([2.91242363e+00, 2.00000000e+00, 1.00000000e+00, 3.46232179e-01,
       8.11764706e-01, 2.00000000e+00, 2.56619145e-01, 3.86965377e-02,
       2.81059063e-01, 6.27291242e-01, 1.38492872e-01, 3.46232179e-02,
       1.83299389e-02, 3.46232179e-02, 3.94736842e-01, 1.76428571e+00,
       5.25000000e+00, 1.03202847e-01, 1.72597865e-01, 8.79310345e-01,
       8.04123711e-01, 2.00000000e+00, 2.00000000e+00, 2.00000000e+00,
       4.80000000e+01, 1.20000000e+01, 4.90000000e+01, 7.80000000e+01,
       6.85765125e+01, 1.74377224e-01, 3.20284698e-01, 5.05338078e-01,
       6.00000000e+00, 0.00000000e+00, 1.62500000e+01, 1.77483974e-01,
       1.04069697e-01, 1.12500000e+00, 1.25000000e-01, 0.00000000e+00,
       1.28528226e-01, 2.29474644e-02, 3.20000000e+01])

# Construct data set 

In [52]:
# pull features from all streams

def extract_data(clips):
    
    print(f"Beginning Feature Extraction for {len(clips)} clips")
    
    vectors = []
    
    for i in range(len(clips)):
        
        print(f"extracting clip {i}...")
        feature_vector = features_from_stream(clips[i][-1],output=False)
        
        vectors.append(feature_vector)
        
    return np.vstack(vectors)

In [53]:
X_Bach = extract_data(Bach_clips)

Beginning Feature Extraction for 10 clips
extracting clip 0...
extracting clip 1...
extracting clip 2...
extracting clip 3...
extracting clip 4...
extracting clip 5...
extracting clip 6...
extracting clip 7...
extracting clip 8...
extracting clip 9...


In [59]:
X_Bach

array([3.74264706e+00, 2.00000000e+00, 1.00000000e+00, 3.45588235e-01,
       5.53191489e-01, 2.00000000e+00, 3.16176471e-01, 6.61764706e-02,
       1.91176471e-01, 5.36764706e-01, 1.54411765e-01, 7.35294118e-03,
       7.35294118e-03, 2.20588235e-02, 4.14285714e-01, 1.86666667e+00,
       6.96000000e+00, 1.28654971e-01, 1.87134503e-01, 6.81818182e-01,
       9.68750000e-01, 2.00000000e+00, 2.00000000e+00, 1.00000000e+00,
       2.50000000e+01, 1.10000000e+01, 3.10000000e+01, 7.20000000e+01,
       7.05789474e+01, 0.00000000e+00, 6.43274854e-01, 3.56725146e-01,
       0.00000000e+00, 0.00000000e+00, 3.32202471e+00, 4.87027084e-01,
       2.54149891e-01, 1.20042120e+00, 1.25000000e-01, 0.00000000e+00,
       4.24125880e-01, 2.13585888e-01, 4.66582924e+01])

In [60]:
Brahms_clips = extract_samples('Brahms',T=30,n_tracks=5,n_clips=2)

X_Brahms = extract_data(Brahms_clips)

./Brahms/String Sextet No 2 in G major_OP36_2147_br36m4.mid lasts approx  555.7777490470717  seconds
clip:  0  -->  t =  31.999931647300066 start =  27 stop = 39  delta =  12
clip:  1  -->  t =  31.85835686053077 start =  2 stop = 14  delta =  12
./Brahms/Piano Quartet No 1 in G minor_OP25_2151_br25m4.mid lasts approx  480.1360988926574  seconds
clip:  0  -->  t =  30.918881118881117 start =  148 stop = 180  delta =  32
clip:  1  -->  t =  30.021153846153847 start =  40 stop = 69  delta =  29
./Brahms/Horn Trio in E-flat major_OP40_2158_bra40_1.mid lasts approx  484.7308941926688  seconds
clip:  0  -->  t =  31.0 start =  43 stop = 73  delta =  30
clip:  1  -->  t =  31.0 start =  15 stop = 45  delta =  30
./Brahms/Piano Quartet No 1 in G minor_OP25_2150_br25m3.mid lasts approx  641.6591291851082  seconds
clip:  0  -->  t =  30.080036630036627 start =  62 stop = 78  delta =  16
clip:  1  -->  t =  31.5 start =  17 stop = 37  delta =  20
./Brahms/Clarinet Sonata No 1 in F minor_OP120NO1

In [62]:
Beethoven_clips = extract_samples('Beethoven',T=30,n_tracks=5,n_clips=2)

X_Beethoven = extract_data(Beethoven_clips)

./Beethoven/String Quartet No 1 in F Major_OP18NO1_2403_qt01_4.mid lasts approx  315.7202498935679  seconds
clip:  0  -->  t =  30.34081742570864 start =  122 stop = 156  delta =  34
clip:  1  -->  t =  30.16111375629909 start =  140 stop = 174  delta =  34
./Beethoven/Piano Sonata No 20 in C minor_OP111_2531_ps32_02.mid lasts approx  951.9290715488472  seconds
clip:  0  -->  t =  31.63888888888889 start =  33 stop = 54  delta =  21
clip:  1  -->  t =  31.155405405405407 start =  65 stop = 74  delta =  9
./Beethoven/Piano Sonata No 8 in C minor_OP13_2576_ps08_03.mid lasts approx  263.80465302836416  seconds
clip:  0  -->  t =  32.0 start =  99 stop = 114  delta =  15
clip:  1  -->  t =  32.0 start =  83 stop = 98  delta =  15
./Beethoven/Piano Sonata No 7 in D major_OP10NO3_2611_ps07_02.mid lasts approx  439.44140007437244  seconds
clip:  0  -->  t =  30.115877965877967 start =  83 stop = 95  delta =  12
clip:  1  -->  t =  31.28333333333333 start =  55 stop = 79  delta =  24
./Beethov

In [63]:
Schubert_clips = extract_samples('Schubert',T=30,n_tracks=5,n_clips=2)

X_Schubert = extract_data(Schubert_clips)

./Schubert/4 Impromptus_OP142_1766_scbt1424.mid lasts approx  406.6597554277911  seconds
clip:  0  -->  t =  30.006364241155516 start =  31 stop = 70  delta =  39
clip:  1  -->  t =  30.68493566972694 start =  52 stop = 93  delta =  41
./Schubert/Piano Sonata in C minor_D958_1757_d958-1.mid lasts approx  602.4348325782179  seconds
clip:  0  -->  t =  30.028041451448242 start =  47 stop = 62  delta =  15
clip:  1  -->  t =  30.267071506779363 start =  112 stop = 132  delta =  20
./Schubert/Piano Sonata in A minor_D784_1756_sy_sps53.mid lasts approx  267.58643175879286  seconds
clip:  0  -->  t =  30.5 start =  50 stop = 69  delta =  19
clip:  1  -->  t =  30.110580937154957 start =  12 stop = 40  delta =  28
./Schubert/4 Impromptus_OP142_1765_scbt1423.mid lasts approx  588.7714057000203  seconds
clip:  0  -->  t =  32.0 start =  19 stop = 34  delta =  15
clip:  1  -->  t =  32.0 start =  73 stop = 88  delta =  15
./Schubert/Piano Quintet in A major_OP114_1727_schubert_op114_2.mid lasts 

In [68]:
y_Bach=np.zeros(shape=(len(X_Bach),))
y_Beethoven=1*np.ones(shape=(len(X_Beethoven),))
y_Brahms=2*np.ones(shape=(len(X_Brahms),))
y_Schubert=3*np.ones(shape=(len(X_Schubert),))

print(y_Bach)
print(y_Beethoven)
print(y_Brahms)
print(y_Schubert)

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]
[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]


In [75]:
train_x = np.vstack((X_Bach,X_Beethoven,X_Brahms,X_Schubert))

train_y = np.hstack((y_Bach,y_Beethoven,y_Brahms,y_Schubert))

print(train_x.shape)
print(train_y.shape)

(40, 43)
(40,)


In [79]:

dataset_path = './DataStreamFeatures/TrainData.pkl'

import pickle as pkl

#to save it
with open(dataset_path, "wb") as f:
    pkl.dump([train_x, train_y], f)



In [82]:
#to load it
with open(dataset_path, "rb") as f:
    train_X, train_Y = pkl.load(f)

In [84]:
type(train_X)

numpy.ndarray

In [86]:
train_X.shape

(40, 43)

In [87]:
train_Y.shape

(40,)