# Invoking music21 to extract features for 30 second intervals

In [46]:
import music21
import music21.features as features
from music21 import midi
from music21 import stream
from helper_functions import *
from numpy.random import randint

import numpy as np

from collections import namedtuple

In [2]:
# get dictionary paths to *.mid files for each composer 

Composers = ['Bach','Brahms','Beethoven','Schubert']

midi_files = make_composer_dict(Composers,data_dir = './')

# print number of files each folder

for composer in Composers:
    print(f'There are {len(midi_files[composer])} compositions by {composer}')

There are 17 compositions by Bach
There are 20 compositions by Brahms
There are 132 compositions by Beethoven
There are 25 compositions by Schubert


In [3]:
# play sample stream from MIDI file
 
audio_path = midi_files['Bach'][12]

mf = midi.MidiFile()
mf.open(audio_path) # path='abc.midi'
mf.read()
mf.close()
s = midi.translate.midiFileToStream(mf)

In [4]:
# play the stream by calling s.show('midi')

print(audio_path)

s.show('midi')

./Bach/Violin Partita No 1 in B minor_BWV1002_2288_vs2_2.mid


## Extract 30 second duration streams from each composition

In [5]:
# for seeding random samples 

from numpy.random import Generator, PCG64
rng = Generator(PCG64())

In [6]:
# function to extract n streams, target duration T seconds for composer 

# extracts n_clips each from n_tracks

# will use namedtuples as convient way to store clips, composer info

Clip = namedtuple('Clip',['composer','path','start','stop','seconds','stream'])

def extract_samples(composer,T=30,n_tracks=10,n_clips=3,show_streams=False):
    delta_measures = list(range(5,50,1))

    clips = []
    
    for audio_path in midi_files[composer][:n_tracks]:

        mf = midi.MidiFile()
        mf.open(audio_path) # path='abc.midi'
        mf.read()
        mf.close()
        s = midi.translate.midiFileToStream(mf)

        total_time = s.secondsMap[0]['durationSeconds']

        number_of_measures = len(s[0])  # think this is total number of measures in the stream?

        print(audio_path,'lasts approx ',total_time,' seconds')
        
        #pick n_clips random measures to start clipping from 
        
        #start_measures = randint(1,number_of_measures//2,size=n_clips)
        start_measures = rng.integers(1,number_of_measures//2,size=n_clips)
        
        for i,start in enumerate(start_measures):
            
            for delta in delta_measures:

                stop = start+delta

                excerpt_stream = s.measures(start,stop)

                clip_time = excerpt_stream.secondsMap[0]['durationSeconds']

                if clip_time>T: # clip sample streams until time is longer than T
                    print('clip: ',i,' --> ','t = ',clip_time,'start = ',start,'stop =',stop,' delta = ',delta)

                    #clip = (composer,audio_path,start,stop,clip_time,excerpt_stream)
                    
                    clip = Clip(composer=composer,
                                path=audio_path,
                                start=start,
                                stop=stop,
                                seconds=clip_time,
                                stream = excerpt_stream)
                    
                    clips.append(clip)
                    
                    if show_streams: excerpt_stream.show('midi')
                        
                    break
                    
    
    return clips

## Extract Single clip

In [7]:
Bach_clips = extract_samples('Bach',T=30,n_tracks=1,n_clips=1)

./Bach/Cello Suite 3_BWV1009_2222_cs3-6gig.mid lasts approx  178.0581818181818  seconds
clip:  0  -->  t =  30.75 start =  25 stop = 65  delta =  40


In [8]:
# check that can access and play the clip streams

clip_stream = Bach_clips[0].stream

print()

clip_stream.show('midi')




# Batch Processing and Feature Extraction

- should now be ready to loop over all files, extracting desired number of samples clips of target length

- will use music21 library methods to extract list of ponetially useful single value numeric features from the streams

In [9]:
target_time = 30 # desired time in seconds for audio clips

n_clips = 4 # since clips range from 2 - 4 minutes, will get few different slices with some but not a lot of overlap

n_tracks = 15 # this will give 4*15 = 60 per composer , 240 in total

# keep track of extracted clips lists in a dictionary

composer_clips = dict()

In [10]:
#return list of clips for give composer 

# will go one at a time, could change to loop over list if were more than 4 composers to workwith 

composer = Composers[0] # Bach

composer_clips[composer] = extract_samples(composer,T=target_time,n_tracks=n_tracks,n_clips=n_clips)   

./Bach/Cello Suite 3_BWV1009_2222_cs3-6gig.mid lasts approx  178.0581818181818  seconds
clip:  0  -->  t =  30.75 start =  98 stop = 138  delta =  40
clip:  1  -->  t =  30.75 start =  22 stop = 62  delta =  40
clip:  2  -->  t =  30.75 start =  13 stop = 53  delta =  40
clip:  3  -->  t =  30.75 start =  4 stop = 44  delta =  40
./Bach/Violin Sonata in B minor_BWV1014_2284_vhs1_3.mid lasts approx  151.34728996720207  seconds
clip:  0  -->  t =  32.0 start =  5 stop = 20  delta =  15
clip:  1  -->  t =  32.0 start =  8 stop = 23  delta =  15
clip:  2  -->  t =  32.0 start =  10 stop = 25  delta =  15
clip:  3  -->  t =  32.0 start =  2 stop = 17  delta =  15
./Bach/Violin Sonata No 1 in G minor_BWV1001_2243_vs1_3.mid lasts approx  186.63523499970867  seconds
clip:  0  -->  t =  52.2032292032292 start =  8 stop = 13  delta =  5
clip:  1  -->  t =  52.2032292032292 start =  8 stop = 13  delta =  5
clip:  2  -->  t =  48.478229203229205 start =  6 stop = 11  delta =  5
clip:  3  -->  t = 

In [11]:
print(composer,len(composer_clips['Bach']))

Bach 60


In [12]:
composer = Composers[1] # Brahms

composer_clips[composer] = extract_samples(composer,T=target_time,n_tracks=n_tracks,n_clips=n_clips)

./Brahms/String Sextet No 2 in G major_OP36_2147_br36m4.mid lasts approx  555.7777490470717  seconds
clip:  0  -->  t =  32.20447710184552 start =  111 stop = 123  delta =  12
clip:  1  -->  t =  32.149188311688306 start =  16 stop = 28  delta =  12
clip:  2  -->  t =  32.18961038961039 start =  58 stop = 70  delta =  12
clip:  3  -->  t =  32.47142857142857 start =  90 stop = 103  delta =  13
./Brahms/Piano Quartet No 1 in G minor_OP25_2151_br25m4.mid lasts approx  480.1360988926574  seconds
clip:  0  -->  t =  30.59388111888112 start =  66 stop = 97  delta =  31
clip:  1  -->  t =  30.457342657342657 start =  106 stop = 136  delta =  30
clip:  2  -->  t =  30.833216783216784 start =  18 stop = 50  delta =  32
clip:  3  -->  t =  31.0 start =  235 stop = 265  delta =  30
./Brahms/Horn Trio in E-flat major_OP40_2158_bra40_1.mid lasts approx  484.7308941926688  seconds
clip:  0  -->  t =  30.82793262032223 start =  120 stop = 133  delta =  13
clip:  1  -->  t =  31.0 start =  7 stop = 3



./Brahms/Serenade No 1 in D Major_OP11_2127_bra11-2.mid lasts approx  468.0  seconds
clip:  0  -->  t =  31.5 start =  48 stop = 68  delta =  20
clip:  1  -->  t =  31.5 start =  185 stop = 205  delta =  20
clip:  2  -->  t =  31.5 start =  24 stop = 44  delta =  20
clip:  3  -->  t =  31.5 start =  234 stop = 254  delta =  20
./Brahms/String Sextet No 1 in B-flat major_OP18_2157_br18m4.mid lasts approx  510.7348094861472  seconds
clip:  0  -->  t =  30.461106135244062 start =  33 stop = 62  delta =  29
clip:  1  -->  t =  30.461106135244062 start =  33 stop = 62  delta =  29
clip:  2  -->  t =  30.28330721003135 start =  129 stop = 158  delta =  29
clip:  3  -->  t =  30.262695924764888 start =  39 stop = 68  delta =  29
./Brahms/String Quartet in C minor_OP51NO1_2138_br51n1m2.mid lasts approx  405.79187784659587  seconds
clip:  0  -->  t =  32.01031991744066 start =  7 stop = 25  delta =  18
clip:  1  -->  t =  31.144142616480753 start =  18 stop = 29  delta =  11
clip:  2  -->  t = 

In [13]:
print('Brahms',len(composer_clips['Brahms']))

Brahms 60


In [14]:
composer = Composers[2] # Beethoven    # There's much more data for Beethoven, possible to use ??? will keep same number for now

composer_clips[composer] = extract_samples(composer,T=target_time,n_tracks=n_tracks,n_clips=n_clips)

./Beethoven/String Quartet No 1 in F Major_OP18NO1_2403_qt01_4.mid lasts approx  315.7202498935679  seconds
clip:  0  -->  t =  30.52992889463477 start =  112 stop = 144  delta =  32
clip:  1  -->  t =  30.104935009346775 start =  181 stop = 213  delta =  32
clip:  2  -->  t =  30.537367441779203 start =  183 stop = 216  delta =  33
clip:  3  -->  t =  30.64022767310439 start =  43 stop = 80  delta =  37
./Beethoven/Piano Sonata No 20 in C minor_OP111_2531_ps32_02.mid lasts approx  951.9290715488472  seconds
clip:  0  -->  t =  30.238537644787645 start =  196 stop = 222  delta =  26
clip:  1  -->  t =  30.29584942084942 start =  231 stop = 261  delta =  30
clip:  2  -->  t =  30.561904761904763 start =  57 stop = 63  delta =  6
clip:  3  -->  t =  30.948412698412696 start =  247 stop = 290  delta =  43
./Beethoven/Piano Sonata No 8 in C minor_OP13_2576_ps08_03.mid lasts approx  263.80465302836416  seconds
clip:  0  -->  t =  32.0 start =  8 stop = 23  delta =  15
clip:  1  -->  t =  30



./Beethoven/Trio in E-flat major_OP38_2320_bh38m3.mid lasts approx  208.03564131022506  seconds
clip:  0  -->  t =  30.394107557259606 start =  59 stop = 77  delta =  18
clip:  1  -->  t =  30.947441804711502 start =  54 stop = 72  delta =  18
clip:  2  -->  t =  30.394107557259606 start =  59 stop = 77  delta =  18
clip:  3  -->  t =  30.432206207899878 start =  31 stop = 49  delta =  18
./Beethoven/Piano Sonata No 17 in D minor_OP31NO2_2393_ps17_03.mid lasts approx  472.18839394562553  seconds
clip:  0  -->  t =  30.480282884873354 start =  134 stop = 170  delta =  36
clip:  1  -->  t =  30.75 start =  212 stop = 252  delta =  40
clip:  2  -->  t =  30.193548387096772 start =  84 stop = 116  delta =  32
clip:  3  -->  t =  30.09375 start =  96 stop = 133  delta =  37
./Beethoven/Piano Sonata No 24 in F-sharp major_OP78_2550_ps24_02.mid lasts approx  151.16511959712494  seconds
clip:  0  -->  t =  30.1850421395543 start =  31 stop = 69  delta =  38
clip:  1  -->  t =  30.6850421395543

In [15]:
print('Beethoven',len(composer_clips['Beethoven']))

Beethoven 60


In [16]:
composer = Composers[3] # Schubert

composer_clips[composer] = extract_samples(composer,T=target_time,n_tracks=n_tracks,n_clips=n_clips)

./Schubert/4 Impromptus_OP142_1766_scbt1424.mid lasts approx  406.6597554277911  seconds
clip:  0  -->  t =  30.75 start =  19 stop = 59  delta =  40
clip:  1  -->  t =  30.399221384012655 start =  40 stop = 80  delta =  40
clip:  2  -->  t =  30.458745193536465 start =  73 stop = 112  delta =  39
clip:  3  -->  t =  30.75 start =  228 stop = 268  delta =  40
./Schubert/Piano Sonata in C minor_D958_1757_d958-1.mid lasts approx  602.4348325782179  seconds
clip:  0  -->  t =  30.321215884832593 start =  79 stop = 97  delta =  18
clip:  1  -->  t =  30.322739170358894 start =  183 stop = 201  delta =  18
clip:  2  -->  t =  30.5421832628865 start =  57 stop = 73  delta =  16
clip:  3  -->  t =  31.364131308216525 start =  151 stop = 167  delta =  16
./Schubert/Piano Sonata in A minor_D784_1756_sy_sps53.mid lasts approx  267.58643175879286  seconds
clip:  0  -->  t =  30.5 start =  116 stop = 135  delta =  19
clip:  1  -->  t =  31.5 start =  88 stop = 108  delta =  20
clip:  2  -->  t =  



./Schubert/Piano Trio in B-flat major_OP99_1739_sb99m4.mid lasts approx  562.3590469635146  seconds
clip:  0  -->  t =  30.56923076923077 start =  86 stop = 116  delta =  30
clip:  1  -->  t =  31.0 start =  81 stop = 111  delta =  30
clip:  2  -->  t =  30.56923076923077 start =  98 stop = 134  delta =  36
clip:  3  -->  t =  31.86787330316742 start =  246 stop = 263  delta =  17
./Schubert/Piano Sonata in A minor_D845_1750_sy_sps12.mid lasts approx  580.0833333333335  seconds
clip:  0  -->  t =  30.58712121212121 start =  139 stop = 158  delta =  19
clip:  1  -->  t =  31.48484848484849 start =  98 stop = 117  delta =  19
clip:  2  -->  t =  30.041666666666668 start =  37 stop = 78  delta =  41
clip:  3  -->  t =  30.3125 start =  1 stop = 17  delta =  16
./Schubert/Piano Sonata in C minor_D958_1758_d958-2.mid lasts approx  454.2446316725714  seconds
clip:  0  -->  t =  46.83627675279883 start =  20 stop = 25  delta =  5
clip:  1  -->  t =  50.2468593147759 start =  23 stop = 28  del

In [17]:
print('Schubert',len(composer_clips['Schubert']))

Schubert 60


In [22]:
# Complete Data set of clips : 

n_samples = 0

for composer in Composers: 
    
    n_composer = len(composer_clips[composer])
    
    print(composer,n_composer)
    
    n_samples += n_composer
    
print(f"Total of {n_samples} clip streams parsed...\n\n\n")

Bach 60
Brahms 60
Beethoven 60
Schubert 60
Total of 240 clip streams parsed...





## Now Use Feature Extraction Modules in Music21 to get set of numeric features describing each clip

- the following function is based on documentation from music21 [music21 feature extraction docs](https://web.mit.edu/music21/doc/moduleReference/moduleFeaturesJSymbolic.html)

- music21 offers a huge library of feature choices to extract from stream objects

- will narrow it down:

        - many of the features are about which instrument used, but seems irrelevant to who composed the piece 
        
        - will focus on features that only return single numeric value
        
        - later could be nice to try incorporating the histogram features provided 

In [40]:
# Function takes a stream object for a clip, searchs through full library of feature options, discarding unwanted features

# set output = False to supress printing progress

# returns a numpy array with shape (number of examples, number of features)

def features_from_stream(s,output=False):
    
    feature_vector = []
    
    # Narrowing down useful features to use... 

    matches = ["Fraction", "Instrument","Meter","Tempo","Voice"]

    stop = 5

    fs = features.jSymbolic.extractorsById
    
    for k in fs:
        for i in range(len(fs[k])):
              if fs[k][i] is not None:
                    n = fs[k][i].__name__
                    if fs[k][i] not in features.jSymbolic.featureExtractors:
                        n += ' (not implemented)'
                        continue

                    name = n[:-7] # throw away 'Feaure' from method name when printing 

                    if any(x in name for x in matches):
                        continue

                    extractor = getattr(features.jSymbolic,n) 

                    fe = extractor(s)

                    try:
                        val = fe.extract().vector
                    except:
                        continue

                    # take single valued features to make life easier     
                    if len(val)>1:
                        continue
                    
                    feature_vector.append(val[0])
                    if output: print(f'{k} {i} {name} {val}')
                    
    feature_vector = np.array(feature_vector)
    
    return feature_vector

In [24]:
# access streams via [composer][index].stream

clip_stream = composer_clips['Bach'][0].stream

feature_vector = features_from_stream(clip_stream,output=True)

M 2 AverageMelodicInterval [3.169230769230769]
M 3 MostCommonMelodicInterval [2]
M 4 DistanceBetweenMostCommonMelodicIntervals [1]
M 5 MostCommonMelodicIntervalPrevalence [0.35384615384615387]
M 6 RelativeStrengthOfMostCommonIntervals [0.5217391304347826]
M 7 NumberOfCommonMelodicIntervals [3]
M 8 AmountOfArpeggiation [0.38461538461538464]
M 9 RepeatedNotes [0.15384615384615385]
M 10 ChromaticMotion [0.18461538461538463]
M 11 StepwiseMotion [0.5384615384615384]
M 12 MelodicThirds [0.13846153846153847]
M 13 MelodicFifths [0.03076923076923077]
M 14 MelodicTritones [0.005128205128205128]
M 15 MelodicOctaves [0.015384615384615385]
M 17 DirectionOfMotion [0.5212121212121212]
M 18 DurationOfMelodicArcs [1.7934782608695652]
M 19 SizeOfMelodicArcs [6.6521739130434785]
P 1 MostCommonPitchPrevalence [0.12626262626262627]
P 2 MostCommonPitchClassPrevalence [0.16161616161616163]
P 3 RelativeStrengthOfTopPitches [0.84]
P 4 RelativeStrengthOfTopPitchClasses [0.90625]
P 5 IntervalBetweenStrongestPitc

In [25]:
feature_vector

array([3.16923077e+00, 2.00000000e+00, 1.00000000e+00, 3.53846154e-01,
       5.21739130e-01, 3.00000000e+00, 3.84615385e-01, 1.53846154e-01,
       1.84615385e-01, 5.38461538e-01, 1.38461538e-01, 3.07692308e-02,
       5.12820513e-03, 1.53846154e-02, 5.21212121e-01, 1.79347826e+00,
       6.65217391e+00, 1.26262626e-01, 1.61616162e-01, 8.40000000e-01,
       9.06250000e-01, 5.00000000e+00, 2.00000000e+00, 5.00000000e+00,
       2.20000000e+01, 1.10000000e+01, 2.90000000e+01, 5.70000000e+01,
       5.37878788e+01, 4.94949495e-01, 5.05050505e-01, 0.00000000e+00,
       7.00000000e+00, 0.00000000e+00, 6.43902439e+00, 1.64141414e-01,
       7.87498289e-02, 5.00000000e-01, 1.25000000e-01, 0.00000000e+00,
       1.59505208e-01, 7.12437130e-02, 3.07500000e+01])

# Construct data set 

In [26]:
# pull features from all streams

# clips = named tuples with composer, audio_path to original file, start measure, stop measure, and stream object

def extract_data(clips):
    
    print(f"Beginning Feature Extraction for {len(clips)} clips")
    
    vectors = []
    
    for i in range(len(clips)):
        
        print(f"extracting clip {(i+1)}/{len(clips)}...")
        feature_vector = features_from_stream(clips[i].stream,output=False)
        
        vectors.append(feature_vector)
        
    return np.vstack(vectors)

In [27]:
# Create dictionary to store numpy arrays of  X = samples, features for each composer

X_composers = dict()

In [28]:
composer = Composers[0]

X_composers[composer] = extract_data(composer_clips[composer])

Beginning Feature Extraction for 60 clips
extracting clip 1/60...
extracting clip 2/60...
extracting clip 3/60...
extracting clip 4/60...
extracting clip 5/60...
extracting clip 6/60...
extracting clip 7/60...
extracting clip 8/60...
extracting clip 9/60...
extracting clip 10/60...
extracting clip 11/60...
extracting clip 12/60...
extracting clip 13/60...
extracting clip 14/60...
extracting clip 15/60...
extracting clip 16/60...
extracting clip 17/60...
extracting clip 18/60...
extracting clip 19/60...
extracting clip 20/60...
extracting clip 21/60...
extracting clip 22/60...
extracting clip 23/60...
extracting clip 24/60...
extracting clip 25/60...
extracting clip 26/60...
extracting clip 27/60...
extracting clip 28/60...
extracting clip 29/60...
extracting clip 30/60...
extracting clip 31/60...
extracting clip 32/60...
extracting clip 33/60...
extracting clip 34/60...
extracting clip 35/60...
extracting clip 36/60...
extracting clip 37/60...
extracting clip 38/60...
extracting clip 3

In [29]:
composer = Composers[1]

print(composer)

X_composers[composer] = extract_data(composer_clips[composer])

Brahms
Beginning Feature Extraction for 60 clips
extracting clip 1/60...
extracting clip 2/60...
extracting clip 3/60...
extracting clip 4/60...
extracting clip 5/60...
extracting clip 6/60...
extracting clip 7/60...
extracting clip 8/60...
extracting clip 9/60...
extracting clip 10/60...
extracting clip 11/60...
extracting clip 12/60...
extracting clip 13/60...
extracting clip 14/60...
extracting clip 15/60...
extracting clip 16/60...
extracting clip 17/60...
extracting clip 18/60...
extracting clip 19/60...
extracting clip 20/60...
extracting clip 21/60...
extracting clip 22/60...
extracting clip 23/60...
extracting clip 24/60...
extracting clip 25/60...
extracting clip 26/60...
extracting clip 27/60...
extracting clip 28/60...
extracting clip 29/60...
extracting clip 30/60...
extracting clip 31/60...
extracting clip 32/60...
extracting clip 33/60...
extracting clip 34/60...
extracting clip 35/60...
extracting clip 36/60...
extracting clip 37/60...
extracting clip 38/60...
extracting

In [30]:
composer = Composers[2]

print(composer)

X_composers[composer] = extract_data(composer_clips[composer])

Beethoven
Beginning Feature Extraction for 60 clips
extracting clip 1/60...
extracting clip 2/60...
extracting clip 3/60...
extracting clip 4/60...
extracting clip 5/60...
extracting clip 6/60...
extracting clip 7/60...
extracting clip 8/60...
extracting clip 9/60...
extracting clip 10/60...
extracting clip 11/60...
extracting clip 12/60...
extracting clip 13/60...
extracting clip 14/60...
extracting clip 15/60...
extracting clip 16/60...
extracting clip 17/60...
extracting clip 18/60...
extracting clip 19/60...
extracting clip 20/60...
extracting clip 21/60...
extracting clip 22/60...
extracting clip 23/60...
extracting clip 24/60...
extracting clip 25/60...
extracting clip 26/60...
extracting clip 27/60...
extracting clip 28/60...
extracting clip 29/60...
extracting clip 30/60...
extracting clip 31/60...
extracting clip 32/60...
extracting clip 33/60...
extracting clip 34/60...
extracting clip 35/60...
extracting clip 36/60...
extracting clip 37/60...
extracting clip 38/60...
extract

In [31]:
composer = Composers[3] # schubert 

print(composer)

X_composers[composer] = extract_data(composer_clips[composer])

Schubert
Beginning Feature Extraction for 60 clips
extracting clip 1/60...
extracting clip 2/60...
extracting clip 3/60...
extracting clip 4/60...
extracting clip 5/60...
extracting clip 6/60...
extracting clip 7/60...
extracting clip 8/60...
extracting clip 9/60...
extracting clip 10/60...
extracting clip 11/60...
extracting clip 12/60...
extracting clip 13/60...
extracting clip 14/60...
extracting clip 15/60...
extracting clip 16/60...
extracting clip 17/60...
extracting clip 18/60...
extracting clip 19/60...
extracting clip 20/60...
extracting clip 21/60...
extracting clip 22/60...
extracting clip 23/60...
extracting clip 24/60...
extracting clip 25/60...
extracting clip 26/60...
extracting clip 27/60...
extracting clip 28/60...
extracting clip 29/60...
extracting clip 30/60...
extracting clip 31/60...
extracting clip 32/60...
extracting clip 33/60...
extracting clip 34/60...
extracting clip 35/60...
extracting clip 36/60...
extracting clip 37/60...
extracting clip 38/60...
extracti

In [32]:
y_Bach=np.zeros(shape=(len(X_composers['Bach']),))
y_Beethoven=1*np.ones(shape=(len(X_composers['Beethoven']),))
y_Brahms=2*np.ones(shape=(len(X_composers['Brahms']),))
y_Schubert=3*np.ones(shape=(len(X_composers['Schubert']),))

print(y_Bach)
print(y_Beethoven)
print(y_Brahms)
print(y_Schubert)

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]
[3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3.
 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3.
 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]


In [33]:
train_x = np.vstack((X_composers['Bach'],X_composers['Beethoven'],X_composers['Brahms'],X_composers['Schubert']))

train_y = np.hstack((y_Bach,y_Beethoven,y_Brahms,y_Schubert))

print(train_x.shape)
print(train_y.shape)

(240, 43)
(240,)


In [34]:

dataset_path = './DataStreamFeatures/TrainData.pkl'

import pickle as pkl

#to save it
with open(dataset_path, "wb") as f:
    pkl.dump([train_x, train_y], f)



In [35]:
#to load it
with open(dataset_path, "rb") as f:
    train_X, train_Y = pkl.load(f)

In [36]:
type(train_X)

numpy.ndarray

In [37]:
train_X.shape

(240, 43)

In [38]:
train_Y.shape

(240,)

## Now to do a little bit of feature exploration, selection, then on to Classification!!!

In [45]:
# will be helpful to keep list of feature names for later intrepretation

# return list of feature names for DataFrame column names
def feature_names(s):
    
    names = []
    
    # Narrowing down useful features to use... 

    matches = ["Fraction", "Instrument","Meter","Tempo","Voice"]

    fs = features.jSymbolic.extractorsById
    
    for k in fs:
        for i in range(len(fs[k])):
              if fs[k][i] is not None:
                    n = fs[k][i].__name__
                    if fs[k][i] not in features.jSymbolic.featureExtractors:
                        n += ' (not implemented)'
                        continue

                    name = n[:-7] # throw away 'Feaure' from method name when printing 

                    if any(x in name for x in matches):
                        continue

                    extractor = getattr(features.jSymbolic,n) 

                    fe = extractor(s)

                    try:
                        val = fe.extract().vector
                    except:
                        continue

                    # take single valued features to make life easier     
                    if len(val)>1:
                        continue
                    
                    names.append(name)
                    print(f'{k} {i} {name} {val}')
    
    return names

In [47]:
clip_stream = composer_clips['Bach'][0].stream

column_names = feature_names(clip_stream)

M 2 AverageMelodicInterval [3.169230769230769]
M 3 MostCommonMelodicInterval [2]
M 4 DistanceBetweenMostCommonMelodicIntervals [1]
M 5 MostCommonMelodicIntervalPrevalence [0.35384615384615387]
M 6 RelativeStrengthOfMostCommonIntervals [0.5217391304347826]
M 7 NumberOfCommonMelodicIntervals [3]
M 8 AmountOfArpeggiation [0.38461538461538464]
M 9 RepeatedNotes [0.15384615384615385]
M 10 ChromaticMotion [0.18461538461538463]
M 11 StepwiseMotion [0.5384615384615384]
M 12 MelodicThirds [0.13846153846153847]
M 13 MelodicFifths [0.03076923076923077]
M 14 MelodicTritones [0.005128205128205128]
M 15 MelodicOctaves [0.015384615384615385]
M 17 DirectionOfMotion [0.5212121212121212]
M 18 DurationOfMelodicArcs [1.7934782608695652]
M 19 SizeOfMelodicArcs [6.6521739130434785]
P 1 MostCommonPitchPrevalence [0.12626262626262627]
P 2 MostCommonPitchClassPrevalence [0.16161616161616163]
P 3 RelativeStrengthOfTopPitches [0.84]
P 4 RelativeStrengthOfTopPitchClasses [0.90625]
P 5 IntervalBetweenStrongestPitc

In [50]:
column_names

print(len(column_names))

43


In [51]:
names_path = './DataStreamFeatures/TrainColumnNames.pkl'

import pickle as pkl

#to save it
with open(names_path, "wb") as f:
    pkl.dump(column_names, f)