In [222]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import librosa
import librosa.display   #for loading and visualizing audio files
import IPython.display as ipd   #to play audio

plt.style.use('ggplot')

%config InlineBackend.figure_format = 'retina'
%matplotlib inline
import time 
import pickle

## Collecting audio time series | ATS

- Extracting via librosa
- Storing as numpy arrays in corresponding harddrive
- Collecting track_id and associated file name - identifying corrupt tracks with error list
- Storing this in a dictionary file with pickle
- Limit iteration procedure to separate in collection chunks (1st 50 folders, 2nd 50 etc) - modify for loop to allow flexible range adjustment
- Set number of folders upper limit : 100 folders ~ 16,000 tracks

CARE IN RUNNING below TWICE as will overwrite already saved files!

In [8]:
import time 
import pickle

In [4]:
#get all filepaths for a given folder and store the audio time series y, sr
fp_main = '/Volumes/Extreme SSD/CAPSTONE_DATA/fma_medium/'
folders = os.listdir(fp_main)

numbers = ['0','1','2','3','4','5','6','7','8','9']

dictionary = {'track_id': [], 'folder': []} #empty dictionary to fill
error_list = []  

for i in range(folders.index('051'), folders.index('100') + 1):  #nice way for flexible range adjustment
    
    if any(x in folders[i] for x in numbers):  #prevent picking up hidden files e.g 'README.txt' or 'checksums'
        print("folder :",folders[i])
        fp_main_new = fp_main + folders[i] + '/'  #not forget the '/' at the end
        audio_clips = os.listdir(fp_main_new)
        for track_id in audio_clips:
            if '_' in track_id:
                pass
            else:
                fp_new = fp_main_new + track_id    
                try:     #need try and except - some clips are corrupted e.g '001486.mp3'
                    
                    y,sr = librosa.load(fp_new, duration = 30) #clips are 30secs #if doesn't collect won't save
                    np.save('/Volumes/Extreme SSD/CAPSTONE_DATA/saved/ATS_{}_{}/{}.npy'.format(51, 100, track_id.split('.')[0]), y)
                    
                    dictionary['track_id'].append(track_id.split('.')[0])
                    dictionary['folder'].append(folders[i])
                except:
                    error_list.append((folders[i],track_id.split('.')[0]))
        
                    
        a_dict = open('/Volumes/Extreme SSD/CAPSTONE_DATA/saved/dictionaries/dict_ATS_{}_{}.pkl'.format(51,100), "wb")
        pickle.dump(dictionary, a_dict)
        a_dict.close()
        
        a_txt = open('/Volumes/Extreme SSD/CAPSTONE_DATA/saved/dictionaries/errors_{}_{}.txt'.format(51,100), 'wb')
        pickle.dump(error_list, a_txt)
    else:
        pass

- Displaying error txt file, dictionary with ATS track_id's and ATS extracted numpy array for 1 track

In [294]:
a_txt = open('/Volumes/Extreme SSD/CAPSTONE_DATA/saved/dictionaries/errors_0_50.txt', "rb")
a_txt_51_100 = open('/Volumes/Extreme SSD/CAPSTONE_DATA/saved/dictionaries/errors_51_100.txt', "rb")
errors_0_50 = pickle.load(a_txt)
errors_51_100 = pickle.load(a_txt_51_100)
print("Errors found in folders 0 - 50:", errors_0_50)
print("Errors found in folders 51 - 100:",errors_51_100)

Errors found in folders 0 - 50: [('001', '001486'), ('005', '005574')]
Errors found in folders 51 - 100: [('065', '065753'), ('080', '080391'), ('098', '098558'), ('098', '098559'), ('098', '098560'), ('098', '098571'), ('099', '099134')]


Loading dictionaries for 0 - 50 & 51 - 100 containing track audio files and corresponding folder of each track.

In [6]:
a_file = open("/Volumes/Extreme SSD/CAPSTONE_DATA/saved/dictionaries/dict_ATS_0_50.pkl", "rb")
dict_ATS_0_50 = pickle.load(a_file)

a_file_51_100 = open("/Volumes/Extreme SSD/CAPSTONE_DATA/saved/dictionaries/dict_ATS_51_100.pkl", "rb")
dict_ATS_51_100 = pickle.load(a_file_51_100)

#print(dict_ATS_0_50)
#print(dict_ATS_51_100)

See that array shape of audio time series caps at 661500 as this is limit of track duration (30 seconds) set above in extraction but an audio file might be slightly shorter.

In [327]:
for i in dict_ATS_0_50['track_id'][0:10]:
    a_array = open('/Volumes/Extreme SSD/CAPSTONE_DATA/saved/ATS_0_50/{}.npy'.format(i), "rb")
    ATS_050993 = np.load(a_array)
    print("ATS array shape : ", ATS_050993.shape)

ATS array shape :  (661248,)
ATS array shape :  (661500,)
ATS array shape :  (661500,)
ATS array shape :  (661248,)
ATS array shape :  (661500,)
ATS array shape :  (661248,)
ATS array shape :  (661248,)
ATS array shape :  (661248,)
ATS array shape :  (661248,)
ATS array shape :  (661248,)


Checking number of tracks collected so far. Also checking numb of ATS arrays collected so far matches

In [328]:
df_files_0_50 = pd.DataFrame(dict_ATS_0_50)
df_files_0_50.head()
df_files_51_100 = pd.DataFrame(dict_ATS_51_100)
print("No. of tracks in folders 000 - 050 :", len(df_files_0_50.track_id))
print("No. of tracks in folders 051 - 100 :", len(df_files_51_100.track_id))

No. of tracks in folders 000 - 050 : 7949
No. of tracks in folders 051 - 100 : 8073


In [332]:
array_ATS_0_50_fp = '/Volumes/Extreme SSD/CAPSTONE_DATA/saved/ATS_0_50'
array_ATS_51_100_fp = '/Volumes/Extreme SSD/CAPSTONE_DATA/saved/ATS_51_100'

arrays_ATS_0_50 = os.listdir(array_ATS_0_50_fp)
arrays_ATS_51_100 = os.listdir(array_ATS_51_100_fp)
print("No. of arrays in folders 000 - 050 :", len(arrays_ATS_0_50))
print("No. of arrays in folders 051 - 100 :", len(arrays_ATS_51_100))

No. of arrays in folders 000 - 050 : 7949
No. of arrays in folders 051 - 100 : 8073


## Feature Extraction options
1. Save all feature arrays for a given track_id in a dictionary (~16,000 dictionarys {1 track : 25 features})
2. Save all tracks' feature arrays in a dictionary  (~25 dictionarys {16,000 tracks : same feature})

### Classification methods

**A.** Descriptive stats on each feature ---> model entirely on this ---> predictions...etc.

**B.** PCA each feature (spectrogram) ---> model on each one individually ---> majority vote predictions...etc.

**B.** PCA black & white, spectrogram ---> model on this entirely ---> predictions...etc.

**C.** PCA each feature ---> model entirely on this ---> predictions...etc.

***Which extraction option benefits which classification method?***

Method A: 1

Method B : 2

Method C: 1 or 2 ?

Option 1 will be better to iterate over and save after each track has its 25 features extracted, otherwise could lose.



In [333]:
import time 
import pickle

def extract_features_array(y, track_id, save = True, display = False):  #takes an ATS array and track_id to give all features for given ATS in dictionary

    #Spectral 
    S1 = np.abs(librosa.stft(y=y, n_fft=2048))**2  #dont save
    S2 = np.abs(librosa.stft(y=y))  #dont save             
    yharm = librosa.effects.harmonic(y)            
    
    melspec = librosa.feature.melspectrogram(y = y, hop_length=512)
    #chroma_stft_y = librosa.feature.chroma_stft(y = y, n_chroma=12, hop_length=512) #dont save
    chroma_stft_S1 = librosa.feature.chroma_stft(S = S1, n_chroma=12) 
    chroma_cens = librosa.feature.chroma_cens(y = y, n_chroma=12)   
    mfcc = librosa.feature.mfcc(y = y, n_mfcc = 12)
    rms = librosa.feature.rms(y = y, S = melspec)    
    spec_centroid = librosa.feature.spectral_centroid(y = y)
    
    spec_bw = librosa.feature.spectral_bandwidth(y=y)
    contrast = librosa.feature.spectral_contrast(S = S1)
    flatness = librosa.feature.spectral_flatness(y=y)
    rolloff = librosa.feature.spectral_rolloff(y=y) 
    poly = librosa.feature.poly_features(S = S1, order=0)
    tonnetz = librosa.feature.tonnetz(y= yharm)
    ZCR = librosa.feature.zero_crossing_rate(y = y)
    
    #Rhythm
    oenv = librosa.onset.onset_strength(y=y, hop_length=512) #dont save
    
    tempo = librosa.beat.tempo(onset_envelope = oenv, hop_length=512)[0]
    
    #Spectogram Decomposition
    S3 = librosa.stft(y=y, hop_length=512)
    
    H, P = librosa.decompose.hpss(S = S3)  
    
    items = [('track_id', track_id), ('yharm', yharm),('melspec', melspec),('chroma_stft_S1', chroma_stft_S1), 
             ('chroma_cens', chroma_cens),('mfcc', mfcc), ('rms', rms), ('spec_centroid', spec_centroid), ('spec_bw', spec_bw),
            ('contrast', contrast),('flatness', flatness),('rolloff', rolloff),('poly', poly),('tonnetz', tonnetz),
            ('ZCR', ZCR),('tempo', tempo),('H', H),('P', P)]
    
    dictionary = {}
    
    for key, value in items:       #filling dictionary with items list
        dictionary[key] = value
        
    if save:
        feat_dict = open('/Volumes/Extreme SSD/CAPSTONE_DATA/saved/features_dictionaries/dict_feat_{}.pkl'.format(track_id), "wb")
        pickle.dump(dictionary, feat_dict)
        feat_dict.close()       
    
    if display:
        return dictionary

Test above function, displaying feature array extracted for a given track using the ATS numpy array

In [4]:
#test above function
a_array = open('/Volumes/Extreme SSD/CAPSTONE_DATA/saved/ATS_0_50/050993.npy', "rb")
ATS_TEST = np.load(a_array)

extract_features_array(ATS_TEST, '050993', save = False, display=True) #not saving only displaying

{'track_id': '050993',
 'yharm': array([-0.07391745, -0.08061884, -0.08036947, ...,  0.        ,
         0.        ,  0.        ], dtype=float32),
 'melspec': array([[3.8379120e+01, 1.3121475e+01, 1.6021961e+01, ..., 1.9974895e-01,
         4.9430913e-01, 1.3632834e-01],
        [6.3905707e+02, 6.2784198e+02, 6.4522736e+02, ..., 5.5490702e-01,
         1.0584415e+00, 3.8813183e-01],
        [1.9068240e+02, 2.5939569e+02, 3.2135941e+02, ..., 3.5227637e+00,
         3.8852777e+00, 1.2624512e+00],
        ...,
        [1.5188937e-02, 1.3670291e-02, 1.3535739e-02, ..., 6.7426945e-04,
         5.9079559e-04, 2.7281829e-04],
        [1.0077956e-02, 5.4014931e-03, 3.2646512e-03, ..., 1.7821505e-04,
         1.0034796e-04, 3.0683008e-05],
        [3.0446649e-04, 1.4727877e-04, 6.6611567e-05, ..., 1.3548350e-05,
         8.7201297e-06, 5.9221502e-06]], dtype=float32),
 'chroma_stft_S1': array([[1.        , 0.9077362 , 0.7660032 , ..., 0.9297181 , 1.        ,
         1.        ],
        [0.61

After running above function, each stored dictionary created takes ~ 25 MB. Estimate total 16,000 tracks required space : 400 GB

***EXTRACTING*** 

Getting feature dictionaries for tracks in folders 0 - 50 and saving to hard drive (*Do not run twice*)

In [10]:
ATS_0_50_fp = '/Volumes/Extreme SSD/CAPSTONE_DATA/saved/ATS_0_50'
ATS_0_50 = os.listdir(ATS_0_50_fp)

for ATS in ATS_0_50:
    track_id = ATS.split('.')[0]
    a_array = open('/Volumes/Extreme SSD/CAPSTONE_DATA/saved/ATS_0_50/{}'.format(ATS), "rb")
    y = np.load(a_array)
    extract_features_array(y, track_id, save = True, display = False) #automatically saves each dictionary



Checking total No. of feature dictionaries matches total No. of tracks and ATS numpy arrays (7,949)

In [12]:
feat_dict_0_50_fp = '/Volumes/Extreme SSD/CAPSTONE_DATA/saved/features_dictionaries' #numpy arrays that were extracted
feat_dict_0_50 = os.listdir(feat_dict_0_50_fp)

print("No. of feature dictionaries in folders 000 - 050 :", len(feat_dict_0_50))

No. of feature dictionaries in folders 000 - 050 : 7949


Getting feature dictionaries for tracks in folders 51 - 100 and saving to hard drive (*Do not run twice*)

In [335]:
ATS_51_100_fp = '/Volumes/Extreme SSD/CAPSTONE_DATA/saved/ATS_51_100'
ATS_51_100 = os.listdir(ATS_51_100_fp)

for ATS in ATS_51_100:
    track_id = ATS.split('.')[0]
    a_array = open('/Volumes/Extreme SSD/CAPSTONE_DATA/saved/ATS_51_100/{}'.format(ATS), "rb")
    y = np.load(a_array)
    extract_features_array(y, track_id, save = True, display = False) #automatically saves each dictionary

  n_fft, y.shape[-1]
  n_fft, y.shape[-1]
  n_fft, y.shape[-1]
  n_fft, y.shape[-1]
  n_fft, y.shape[-1]


After extracting feature dictionaries from folders 51 - 100 verify that total in folder 'feature_dictionaries' increased to 7949 + 8073 = 16022 :

In [339]:
feat_dict_0_100_fp = '/Volumes/Extreme SSD/CAPSTONE_DATA/saved/features_dictionaries' #numpy arrays that were extracted
feat_dict_0_100 = os.listdir(feat_dict_0_100_fp)

print("No. of feature dictionaries in folders 000 - 100 :", len(feat_dict_0_100))

No. of feature dictionaries in folders 000 - 100 : 16022
