# Invoking music21 to extract features for 30 second intervals

In [1]:
import music21
import music21.features as features
from music21 import midi
from music21 import stream
from helper_functions import *
from numpy.random import randint

import numpy as np

from collections import namedtuple

In [2]:
# get dictionary paths to *.mid files for each composer 

Composers = ['Bach','Beethoven','Brahms','Schubert']

data_dir_musicnet = './MusicNet_KaggleData/'

midi_files_musicnet = make_composer_dict(Composers,data_dir = data_dir_musicnet)

# print number of files each folder

for composer in Composers:
    print(f'There are {len(midi_files_musicnet[composer])} compositions by {composer}')

There are 65 compositions by Bach
There are 155 compositions by Beethoven
There are 24 compositions by Brahms
There are 30 compositions by Schubert


In [3]:
# play sample stream from MIDI file
 
audio_path = midi_files_musicnet['Beethoven'][11]

print(audio_path)

mf = midi.MidiFile()
mf.open(audio_path) # path='abc.midi'
mf.read()
mf.close()

s = midi.translate.midiFileToStream(mf)



./MusicNet_KaggleData/Beethoven/2379_qt08_4.mid


In [4]:
# play the stream by calling s.show('midi')
print(audio_path)
s.show('midi')

./MusicNet_KaggleData/Beethoven/2379_qt08_4.mid


## CHECK WHICH FILES I ALREADY HAVE FROM ORIGINAL PROBLEM SET DATA

In [5]:
midi_files_original = make_composer_dict(Composers,data_dir = './')

# print number of files each folder

for composer in Composers:
    print(f'There are {len(midi_files_original[composer])} compositions by {composer}')

There are 17 compositions by Bach
There are 132 compositions by Beethoven
There are 20 compositions by Brahms
There are 25 compositions by Schubert


In [6]:
import re

song_ids_original = dict()

for composer in Composers:
    song_ids_original[composer]=[]
    
for composer in Composers:
    
    for filename in midi_files_original[composer]:
    
        ID = re.findall(r'_(\d\d\d\d)_',filename)[0]
    
        song_ids_original[composer].append(ID)

In [7]:
song_ids_musicnet = dict()

for composer in Composers:
    song_ids_musicnet[composer]=[]
    
for composer in Composers:
    
    for filename in midi_files_musicnet[composer]:
    
        ID = re.findall(r'\/(\d\d\d\d)_',filename)[0]
    
        song_ids_musicnet[composer].append(ID)

In [8]:
for composer in Composers: 
    
    song_id_list_musicnet = song_ids_musicnet[composer]
    
    song_id_list_original = song_ids_original[composer]
    
    new_songs = set(song_id_list_musicnet)-set(song_id_list_original)
    
    print(composer,len(new_songs))

Bach 48
Beethoven 24
Brahms 4
Schubert 5


In [9]:
for composer in Composers: 
    
    for filename in midi_files_musicnet[composer]:
        
        ID = re.findall(r'\/(\d\d\d\d)_',filename)[0]
        
        if ID not in new_songs:
            midi_files_musicnet[composer].pop(midi_files_musicnet[composer].index(filename))
            
for composer in Composers:
    print(f'There are {len(midi_files_musicnet[composer])} UNSEEN DATA by {composer} in MUSICNET folder')

There are 32 UNSEEN DATA by Bach in MUSICNET folder
There are 77 UNSEEN DATA by Beethoven in MUSICNET folder
There are 12 UNSEEN DATA by Brahms in MUSICNET folder
There are 17 UNSEEN DATA by Schubert in MUSICNET folder


## Extract 30 second duration streams from each composition

In [10]:
# for seeding random samples 

from numpy.random import Generator, PCG64
rng = Generator(PCG64())

In [11]:
# function to extract n streams, target duration T seconds for composer 

# extracts n_clips each from n_tracks

# will use namedtuples as convient way to store clips, composer info

Clip = namedtuple('Clip',['composer','path','start','stop','seconds','stream'])

def extract_samples(composer,T=30,n_tracks=10,n_clips=3,show_streams=False):
    delta_measures = list(range(5,50,1))

    clips = []
    
    for audio_path in midi_files_musicnet[composer][:n_tracks]:

        mf = midi.MidiFile()
        mf.open(audio_path) # path='abc.midi'
        mf.read()
        mf.close()
        s = midi.translate.midiFileToStream(mf)

        total_time = s.secondsMap[0]['durationSeconds']

        number_of_measures = len(s[0])  # think this is total number of measures in the stream?

        print(audio_path,'lasts approx ',total_time,' seconds')
        
        #pick n_clips random measures to start clipping from 
        
        #start_measures = randint(1,number_of_measures//2,size=n_clips)
        start_measures = rng.integers(1,number_of_measures//2,size=n_clips)
        
        for i,start in enumerate(start_measures):
            
            for delta in delta_measures:

                stop = start+delta

                excerpt_stream = s.measures(start,stop)

                clip_time = excerpt_stream.secondsMap[0]['durationSeconds']

                if clip_time>T: # clip sample streams until time is longer than T
                    print('clip: ',i,' --> ','t = ',clip_time,'start = ',start,'stop =',stop,' delta = ',delta)

                    #clip = (composer,audio_path,start,stop,clip_time,excerpt_stream)
                    
                    clip = Clip(composer=composer,
                                path=audio_path,
                                start=start,
                                stop=stop,
                                seconds=clip_time,
                                stream = excerpt_stream)
                    
                    clips.append(clip)
                    
                    if show_streams: excerpt_stream.show('midi')
                        
                    break                 
    
    return clips

## Extract Single clip

In [12]:
midi_files_musicnet['Brahms'][:15]

['./MusicNet_KaggleData/Brahms/2114_jb38_3.mid',
 './MusicNet_KaggleData/Brahms/2149_br25m2.mid',
 './MusicNet_KaggleData/Brahms/2117_brahms_sonata_opus_120-2.mid',
 './MusicNet_KaggleData/Brahms/2118_brahms_sonata_opus_120-3.mid',
 './MusicNet_KaggleData/Brahms/2156_br18m3.mid',
 './MusicNet_KaggleData/Brahms/2151_br25m4.mid',
 './MusicNet_KaggleData/Brahms/2131_bra11-6.mid',
 './MusicNet_KaggleData/Brahms/2155_br18m2.mid',
 './MusicNet_KaggleData/Brahms/2154_br18m1.mid',
 './MusicNet_KaggleData/Brahms/2158_bra40_1.mid',
 './MusicNet_KaggleData/Brahms/2112_jb38_1.mid',
 './MusicNet_KaggleData/Brahms/2161_bra40_4.mid']

In [13]:
Bach_clips = extract_samples('Brahms',T=30,n_tracks=15,n_clips=1)

./MusicNet_KaggleData/Brahms/2114_jb38_3.mid lasts approx  367.5870634530251  seconds
clip:  0  -->  t =  32.0 start =  67 stop = 82  delta =  15
./MusicNet_KaggleData/Brahms/2149_br25m2.mid lasts approx  514.5827643166542  seconds
clip:  0  -->  t =  31.5 start =  20 stop = 33  delta =  13
./MusicNet_KaggleData/Brahms/2117_brahms_sonata_opus_120-2.mid lasts approx  141.92857142857142  seconds
clip:  0  -->  t =  31.0 start =  15 stop = 45  delta =  30
./MusicNet_KaggleData/Brahms/2118_brahms_sonata_opus_120-3.mid lasts approx  249.5  seconds
clip:  0  -->  t =  31.5 start =  21 stop = 41  delta =  20
./MusicNet_KaggleData/Brahms/2156_br18m3.mid lasts approx  221.9384362852206  seconds
clip:  0  -->  t =  30.83595109791997 start =  76 stop = 104  delta =  28
./MusicNet_KaggleData/Brahms/2151_br25m4.mid lasts approx  480.1360988926574  seconds
clip:  0  -->  t =  30.91013986013986 start =  17 stop = 49  delta =  32




./MusicNet_KaggleData/Brahms/2131_bra11-6.mid lasts approx  315.4285714285714  seconds
clip:  0  -->  t =  31.0 start =  93 stop = 123  delta =  30
./MusicNet_KaggleData/Brahms/2155_br18m2.mid lasts approx  473.7170802419302  seconds
clip:  0  -->  t =  30.00067155067155 start =  36 stop = 52  delta =  16
./MusicNet_KaggleData/Brahms/2154_br18m1.mid lasts approx  696.1934786300759  seconds
clip:  0  -->  t =  30.82142857142857 start =  98 stop = 120  delta =  22
./MusicNet_KaggleData/Brahms/2158_bra40_1.mid lasts approx  484.7308941926688  seconds
clip:  0  -->  t =  30.203125 start =  71 stop = 88  delta =  17
./MusicNet_KaggleData/Brahms/2112_jb38_1.mid lasts approx  811.6363636363636  seconds
clip:  0  -->  t =  32.0 start =  28 stop = 43  delta =  15
./MusicNet_KaggleData/Brahms/2161_bra40_4.mid lasts approx  389.24536539595204  seconds
clip:  0  -->  t =  31.5 start =  131 stop = 151  delta =  20


In [14]:
# check that can access and play the clip streams

clip_tuple = Bach_clips[0]  # Using Bach Sonata B minor , as above 

clip_stream = clip_tuple.stream

print(*clip_tuple)

clip_stream.show('midi')

# note, the clip sounds correct, but it's also render with piano audio only for first part, render missing harpsicohrd part 

Brahms ./MusicNet_KaggleData/Brahms/2114_jb38_3.mid 67 82 32.0 <music21.stream.Score 0x7f1d7efb1f60>


# Batch Processing and Feature Extraction

- should now be ready to loop over all files, extracting desired number of samples clips of target length

- will use music21 library methods to extract list of ponetially useful single value numeric features from the streams

In [15]:
#MORE DATA FROM KAGGLE DATA REPOSITORY -   MUSIC NET 

target_time = 15 # desired time in seconds for audio clips

n_clips = 8 # since clips range from 2 - 4 minutes, will get few different slices with some but not a lot of overlap

n_tracks = 15 # this will give 8*15 = 120 per composer , 480 in total

# keep track of extracted clips lists in a dictionary

composer_clips = dict()

In [16]:
#return list of clips for give composer 

# will go one at a time, could change to loop over list if were more than 4 composers to workwith 

composer = Composers[0] # Bach

composer_clips[composer] = extract_samples(composer,T=target_time,n_tracks=n_tracks,n_clips=n_clips)   

./MusicNet_KaggleData/Bach/2247_fugue9.mid lasts approx  101.93138981561442  seconds
clip:  0  -->  t =  16.0 start =  7 stop = 14  delta =  7
clip:  1  -->  t =  16.0 start =  13 stop = 20  delta =  7
clip:  2  -->  t =  16.0 start =  7 stop = 14  delta =  7
clip:  3  -->  t =  16.0 start =  4 stop = 11  delta =  7
clip:  4  -->  t =  16.0 start =  12 stop = 19  delta =  7
clip:  5  -->  t =  16.0 start =  11 stop = 18  delta =  7
clip:  6  -->  t =  16.0 start =  8 stop = 15  delta =  7
clip:  7  -->  t =  16.0 start =  3 stop = 10  delta =  7
./MusicNet_KaggleData/Bach/2213_fugue2.mid lasts approx  113.23200285073294  seconds
clip:  0  -->  t =  16.0 start =  3 stop = 10  delta =  7
clip:  1  -->  t =  16.0 start =  11 stop = 18  delta =  7
clip:  2  -->  t =  16.0 start =  12 stop = 19  delta =  7
clip:  3  -->  t =  16.0 start =  14 stop = 21  delta =  7
clip:  4  -->  t =  16.0 start =  13 stop = 20  delta =  7
clip:  5  -->  t =  16.0 start =  13 stop = 20  delta =  7
clip:  6  

clip:  3  -->  t =  16.5 start =  10 stop = 20  delta =  10
clip:  4  -->  t =  16.5 start =  13 stop = 23  delta =  10
clip:  5  -->  t =  16.5 start =  10 stop = 20  delta =  10
clip:  6  -->  t =  16.5 start =  12 stop = 22  delta =  10
clip:  7  -->  t =  16.5 start =  11 stop = 21  delta =  10


In [17]:
composer = Composers[1] #Beethoven

composer_clips[composer] = extract_samples(composer,T=target_time,n_tracks=n_tracks,n_clips=n_clips)

./MusicNet_KaggleData/Beethoven/2342_vns08_3.mid lasts approx  195.44565299810276  seconds
clip:  0  -->  t =  15.182934712346478 start =  7 stop = 23  delta =  16
clip:  1  -->  t =  15.477394861503727 start =  41 stop = 57  delta =  16
clip:  2  -->  t =  15.09146735617324 start =  1 stop = 18  delta =  17
clip:  3  -->  t =  15.402364792070674 start =  111 stop = 126  delta =  15
clip:  4  -->  t =  15.146103896103897 start =  84 stop = 101  delta =  17
clip:  5  -->  t =  16.0 start =  104 stop = 119  delta =  15
clip:  6  -->  t =  16.0 start =  97 stop = 112  delta =  15
clip:  7  -->  t =  15.644061528170395 start =  45 stop = 62  delta =  17
./MusicNet_KaggleData/Beethoven/2611_ps07_02.mid lasts approx  439.44140007437244  seconds
clip:  0  -->  t =  15.100000000000001 start =  70 stop = 77  delta =  7
clip:  1  -->  t =  16.214285714285715 start =  5 stop = 13  delta =  8
clip:  2  -->  t =  15.55 start =  63 stop = 75  delta =  12
clip:  3  -->  t =  15.4 start =  60 stop = 7



./MusicNet_KaggleData/Beethoven/2318_bh38m1.mid lasts approx  656.9019237429434  seconds
clip:  0  -->  t =  16.0 start =  120 stop = 127  delta =  7
clip:  1  -->  t =  16.0 start =  31 stop = 38  delta =  7
clip:  2  -->  t =  16.0 start =  179 stop = 186  delta =  7
clip:  3  -->  t =  16.0 start =  151 stop = 158  delta =  7
clip:  4  -->  t =  16.119402985074625 start =  113 stop = 122  delta =  9
clip:  5  -->  t =  16.55039760963674 start =  12 stop = 19  delta =  7
clip:  6  -->  t =  15.868087979934398 start =  131 stop = 139  delta =  8
clip:  7  -->  t =  16.0 start =  59 stop = 66  delta =  7
./MusicNet_KaggleData/Beethoven/2379_qt08_4.mid lasts approx  364.3004004033066  seconds
clip:  0  -->  t =  16.0 start =  204 stop = 211  delta =  7
clip:  1  -->  t =  15.555555555555555 start =  164 stop = 173  delta =  9
clip:  2  -->  t =  16.0 start =  184 stop = 191  delta =  7
clip:  3  -->  t =  16.0 start =  171 stop = 178  delta =  7
clip:  4  -->  t =  16.0 start =  71 stop



./MusicNet_KaggleData/Beethoven/2572_bevs7c.mid lasts approx  199.59844322344318  seconds
clip:  0  -->  t =  16.0 start =  10 stop = 17  delta =  7
clip:  1  -->  t =  16.0 start =  4 stop = 11  delta =  7
clip:  2  -->  t =  16.0 start =  63 stop = 70  delta =  7
clip:  3  -->  t =  16.0 start =  68 stop = 75  delta =  7
clip:  4  -->  t =  16.0 start =  10 stop = 17  delta =  7
clip:  5  -->  t =  16.0 start =  55 stop = 62  delta =  7
clip:  6  -->  t =  16.0 start =  4 stop = 11  delta =  7
clip:  7  -->  t =  16.0 start =  11 stop = 18  delta =  7
./MusicNet_KaggleData/Beethoven/2555_ps30_01.mid lasts approx  181.45020602555917  seconds
clip:  0  -->  t =  15.635709800155148 start =  33 stop = 48  delta =  15
clip:  1  -->  t =  15.169776962168267 start =  54 stop = 68  delta =  14
clip:  2  -->  t =  15.135709800155148 start =  36 stop = 50  delta =  14
clip:  3  -->  t =  16.0 start =  46 stop = 61  delta =  15
clip:  4  -->  t =  15.624181011097365 start =  22 stop = 34  delta

In [18]:
composer = Composers[2] # Brahms 

composer_clips[composer] = extract_samples(composer,T=target_time,n_tracks=n_tracks,n_clips=n_clips)

./MusicNet_KaggleData/Brahms/2114_jb38_3.mid lasts approx  367.5870634530251  seconds
clip:  0  -->  t =  16.0 start =  2 stop = 9  delta =  7
clip:  1  -->  t =  16.0 start =  75 stop = 82  delta =  7
clip:  2  -->  t =  16.0 start =  96 stop = 103  delta =  7
clip:  3  -->  t =  16.0 start =  46 stop = 53  delta =  7
clip:  4  -->  t =  16.0 start =  55 stop = 62  delta =  7
clip:  5  -->  t =  16.0 start =  87 stop = 94  delta =  7
clip:  6  -->  t =  16.0 start =  13 stop = 20  delta =  7
clip:  7  -->  t =  16.0 start =  30 stop = 37  delta =  7
./MusicNet_KaggleData/Brahms/2149_br25m2.mid lasts approx  514.5827643166542  seconds
clip:  0  -->  t =  15.75 start =  32 stop = 38  delta =  6
clip:  1  -->  t =  15.75 start =  154 stop = 160  delta =  6
clip:  2  -->  t =  15.75 start =  78 stop = 84  delta =  6
clip:  3  -->  t =  15.75 start =  153 stop = 159  delta =  6
clip:  4  -->  t =  15.75 start =  19 stop = 25  delta =  6
clip:  5  -->  t =  15.947268907563025 start =  15 st

In [19]:
composer = Composers[3] # Schubert

composer_clips[composer] = extract_samples(composer,T=target_time,n_tracks=n_tracks,n_clips=n_clips)

./MusicNet_KaggleData/Schubert/1775_sy_sps21.mid lasts approx  511.0926449787836  seconds
clip:  0  -->  t =  16.0 start =  44 stop = 51  delta =  7
clip:  1  -->  t =  16.0 start =  55 stop = 62  delta =  7
clip:  2  -->  t =  16.0 start =  127 stop = 134  delta =  7
clip:  3  -->  t =  16.0 start =  80 stop = 87  delta =  7
clip:  4  -->  t =  16.0 start =  122 stop = 129  delta =  7
clip:  5  -->  t =  16.0 start =  28 stop = 35  delta =  7
clip:  6  -->  t =  16.0 start =  84 stop = 91  delta =  7
clip:  7  -->  t =  16.0 start =  45 stop = 52  delta =  7
./MusicNet_KaggleData/Schubert/1755_sy_sps52.mid lasts approx  149.6299803644374  seconds
clip:  0  -->  t =  16.0 start =  12 stop = 19  delta =  7
clip:  1  -->  t =  16.0 start =  19 stop = 26  delta =  7
clip:  2  -->  t =  16.0 start =  10 stop = 17  delta =  7
clip:  3  -->  t =  16.0 start =  13 stop = 20  delta =  7
clip:  4  -->  t =  16.0 start =  5 stop = 12  delta =  7
clip:  5  -->  t =  15.897435897435898 start =  31

clip:  7  -->  t =  39.75 start =  29 stop = 34  delta =  5
./MusicNet_KaggleData/Schubert/1765_scbt1423.mid lasts approx  588.7714057000203  seconds
clip:  0  -->  t =  16.808082573017174 start =  68 stop = 73  delta =  5
clip:  1  -->  t =  18.6089629654406 start =  61 stop = 68  delta =  7
clip:  2  -->  t =  16.0 start =  5 stop = 12  delta =  7
clip:  3  -->  t =  16.0 start =  10 stop = 17  delta =  7
clip:  4  -->  t =  16.0 start =  4 stop = 11  delta =  7
clip:  5  -->  t =  16.0 start =  16 stop = 23  delta =  7
clip:  6  -->  t =  17.03129685873146 start =  102 stop = 107  delta =  5
clip:  7  -->  t =  16.0 start =  19 stop = 26  delta =  7


In [20]:
# Complete Data set of clips : 

n_samples = 0

for composer in Composers: 
    
    n_composer = len(composer_clips[composer])
    
    print(composer,n_composer)
    
    n_samples += n_composer
    
print(f"Total of {n_samples} clip streams parsed...\n\n\n")

Bach 120
Beethoven 120
Brahms 96
Schubert 120
Total of 456 clip streams parsed...





## Now Use Feature Extraction Modules in Music21 to get set of numeric features describing each clip

- the following function is based on documentation from music21 [music21 feature extraction docs](https://web.mit.edu/music21/doc/moduleReference/moduleFeaturesJSymbolic.html)

- music21 offers a huge library of feature choices to extract from stream objects

- will narrow it down:

        - many of the features are about which instrument used, but seems irrelevant to who composed the piece 
        
        - will focus on features that only return single numeric value
        
        - later could be nice to try incorporating the histogram features provided 

In [25]:
# Function takes a stream object for a clip, searchs through full library of feature options, discarding unwanted features

# set output = False to supress printing progress

# returns a numpy array with shape (number of examples, number of features)

def features_from_stream(s,output=False):
    
    feature_vector = []
    
    # Narrowing down useful features to use... 

    matches = ["Fraction", "Instrument","Meter","Tempo","Voice","Histogram","Motion","Register"]

    stop = 5

    fs = features.jSymbolic.extractorsById
    
    for k in fs:
        
        if k in ['D','I','T']:
            continue # don't need these categories of features 
        
        for i in range(len(fs[k])):
              if fs[k][i] is not None:
                    n = fs[k][i].__name__
                    if fs[k][i] not in features.jSymbolic.featureExtractors:
                        n += ' (not implemented)'
                        continue

                    name = n[:-7] # throw away 'Feaure' from method name when printing 

                    if any(x in name for x in matches):
                        continue

                    extractor = getattr(features.jSymbolic,n) 

                    fe = extractor(s)

                    try:
                        val = fe.extract().vector
                    except:
                        continue

                    # take single valued features to make life easier     
                    if len(val)>1:
                        continue
                    
                    feature_vector.append(val[0])
                    if output: print(f'{k} {i} {name} {val}')
                    
    feature_vector = np.array(feature_vector)
    
    return feature_vector

In [26]:
def essential_features_from_stream(s,features_list,output=False):
    
    feature_values_vector = []
    
    for name in features_list: 
        
        extractor = getattr(features.jSymbolic,name+'Feature') 

        fe = extractor(s)
        
        val = fe.extract().vector
        
        feature_values_vector.append(val[0])
        
        if output: print(f'{name} {val}')
            
    return feature_values_vector

In [27]:
# speed up data extraction by preselecting only 15 needed features

features_list=['Range',
 'PitchVariety',
 'NumberOfCommonPitches',
 'NoteDensity',
 'AmountOfArpeggiation',
 'IntervalBetweenStrongestPitches',
 'MostCommonPitchClassPrevalence',
 'MostCommonPitchPrevalence',
 'RelativeStrengthOfTopPitchClasses',
 'RepeatedNotes',
 'MelodicOctaves',
 'SizeOfMelodicArcs',
 'VariabilityOfNoteDuration',
 'MelodicTritones',
 'PitchClassVariety']

In [28]:
# access streams via [composer][index].stream

clip_stream = composer_clips['Bach'][0].stream

feature_vector = essential_features_from_stream(clip_stream,features_list,output=True)

Range [39]
PitchVariety [30]
NumberOfCommonPitches [3]
NoteDensity [13.75]
AmountOfArpeggiation [0.22119815668202766]
IntervalBetweenStrongestPitches [2]
MostCommonPitchClassPrevalence [0.14545454545454545]
MostCommonPitchPrevalence [0.10454545454545454]
RelativeStrengthOfTopPitchClasses [0.96875]
RepeatedNotes [0.013824884792626729]
MelodicOctaves [0.004608294930875576]
SizeOfMelodicArcs [5.34020618556701]
VariabilityOfNoteDuration [0.17188321730920092]
MelodicTritones [0.018433179723502304]
PitchClassVariety [11]


In [None]:
feature_vector

# Construct data set 

In [40]:
# pull features from all streams

# clips = named tuples with composer, audio_path to original file, start measure, stop measure, and stream object

def extract_data(clips,features_list):
    
    print(f"Beginning Feature Extraction for {len(clips)} clips")
    
    vectors = []
    
    for i in range(3,50):
        
        print(f"extracting clip {(i+1)}/{len(clips)}...")
        #feature_vector = features_from_stream(clips[i].stream,output=False)
        
        feature_vector = essential_features_from_stream(clips[i].stream,features_list,output=False)
        
        vectors.append(feature_vector)
        
    return np.vstack(vectors)

In [30]:
# Create dictionary to store numpy arrays of  X = samples, features for each composer

X_composers = dict()

In [31]:
composer = Composers[0]

X_composers[composer] = extract_data(composer_clips[composer],features_list)

Beginning Feature Extraction for 120 clips
extracting clip 1/120...
extracting clip 2/120...
extracting clip 3/120...
extracting clip 4/120...
extracting clip 5/120...
extracting clip 6/120...
extracting clip 7/120...
extracting clip 8/120...
extracting clip 9/120...
extracting clip 10/120...
extracting clip 11/120...
extracting clip 12/120...
extracting clip 13/120...
extracting clip 14/120...
extracting clip 15/120...
extracting clip 16/120...
extracting clip 17/120...
extracting clip 18/120...
extracting clip 19/120...
extracting clip 20/120...
extracting clip 21/120...
extracting clip 22/120...
extracting clip 23/120...
extracting clip 24/120...
extracting clip 25/120...
extracting clip 26/120...
extracting clip 27/120...
extracting clip 28/120...
extracting clip 29/120...
extracting clip 30/120...
extracting clip 31/120...
extracting clip 32/120...
extracting clip 33/120...
extracting clip 34/120...
extracting clip 35/120...
extracting clip 36/120...
extracting clip 37/120...
extr

In [41]:
composer = Composers[1]

print(composer)

X_composers[composer] = extract_data(composer_clips[composer],features_list)

Beethoven
Beginning Feature Extraction for 120 clips
extracting clip 4/120...
extracting clip 5/120...
extracting clip 6/120...
extracting clip 7/120...
extracting clip 8/120...
extracting clip 9/120...
extracting clip 10/120...
extracting clip 11/120...
extracting clip 12/120...
extracting clip 13/120...
extracting clip 14/120...
extracting clip 15/120...
extracting clip 16/120...
extracting clip 17/120...
extracting clip 18/120...
extracting clip 19/120...
extracting clip 20/120...
extracting clip 21/120...
extracting clip 22/120...
extracting clip 23/120...
extracting clip 24/120...
extracting clip 25/120...
extracting clip 26/120...
extracting clip 27/120...
extracting clip 28/120...
extracting clip 29/120...
extracting clip 30/120...
extracting clip 31/120...
extracting clip 32/120...
extracting clip 33/120...
extracting clip 34/120...
extracting clip 35/120...
extracting clip 36/120...
extracting clip 37/120...
extracting clip 38/120...
extracting clip 39/120...
extracting clip 4

In [34]:
composer = Composers[2]

print(composer)

X_composers[composer] = extract_data(composer_clips[composer],features_list)

Brahms
Beginning Feature Extraction for 96 clips
extracting clip 1/96...
extracting clip 2/96...
extracting clip 3/96...
extracting clip 4/96...
extracting clip 5/96...
extracting clip 6/96...
extracting clip 7/96...
extracting clip 8/96...
extracting clip 9/96...
extracting clip 10/96...
extracting clip 11/96...
extracting clip 12/96...
extracting clip 13/96...
extracting clip 14/96...
extracting clip 15/96...
extracting clip 16/96...
extracting clip 17/96...
extracting clip 18/96...
extracting clip 19/96...
extracting clip 20/96...
extracting clip 21/96...
extracting clip 22/96...
extracting clip 23/96...
extracting clip 24/96...
extracting clip 25/96...
extracting clip 26/96...
extracting clip 27/96...
extracting clip 28/96...
extracting clip 29/96...
extracting clip 30/96...
extracting clip 31/96...
extracting clip 32/96...
extracting clip 33/96...
extracting clip 34/96...
extracting clip 35/96...
extracting clip 36/96...
extracting clip 37/96...
extracting clip 38/96...
extracting

In [39]:
composer = Composers[3] # schubert 

print(composer)

X_composers[composer] = extract_data(composer_clips[composer],features_list)

Schubert
Beginning Feature Extraction for 120 clips
extracting clip 1/120...
extracting clip 2/120...
extracting clip 3/120...
extracting clip 4/120...
extracting clip 5/120...
extracting clip 6/120...
extracting clip 7/120...
extracting clip 8/120...
extracting clip 9/120...
extracting clip 10/120...
extracting clip 11/120...
extracting clip 12/120...
extracting clip 13/120...
extracting clip 14/120...
extracting clip 15/120...
extracting clip 16/120...
extracting clip 17/120...
extracting clip 18/120...
extracting clip 19/120...
extracting clip 20/120...
extracting clip 21/120...
extracting clip 22/120...
extracting clip 23/120...
extracting clip 24/120...
extracting clip 25/120...
extracting clip 26/120...
extracting clip 27/120...
extracting clip 28/120...
extracting clip 29/120...
extracting clip 30/120...
extracting clip 31/120...
extracting clip 32/120...
extracting clip 33/120...
extracting clip 34/120...
extracting clip 35/120...
extracting clip 36/120...
extracting clip 37/12

In [43]:
y_Bach=np.zeros(shape=(len(X_composers['Bach']),))
y_Beethoven=1*np.ones(shape=(len(X_composers['Beethoven']),))
y_Brahms=2*np.ones(shape=(len(X_composers['Brahms']),))
y_Schubert=3*np.ones(shape=(len(X_composers['Schubert']),))

print(y_Bach)
print(y_Beethoven)
print(y_Brahms)
print(y_Schubert)

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]
[3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3.
 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3.
 3. 3.]


In [44]:
train_x = np.vstack((X_composers['Bach'],X_composers['Beethoven'],X_composers['Brahms'],X_composers['Schubert']))

train_y = np.hstack((y_Bach,y_Beethoven,y_Brahms,y_Schubert))

print(train_x.shape)
print(train_y.shape)

(313, 15)
(313,)


In [45]:

#First DataSet , using 30 second target for length, and using 4 per sample  4*15 = 60 each
#dataset_path = './DataStreamFeatures/TrainData.pkl'

#second , larger data set of 15 second clips 
dataset_path = './DataStreamFeatures/TrainData_MusicNet_15sec_313.pkl'

import pickle as pkl

#to save it
with open(dataset_path, "wb") as f:
    pkl.dump([train_x, train_y], f)



In [None]:
#to load it
with open(dataset_path, "rb") as f:
    train_X, train_Y = pkl.load(f)

In [None]:
type(train_X)

In [None]:
train_X.shape

In [None]:
train_Y.shape