# This notbook contains code to extract features


1. Librosa is used to extract features. 
2. The list of extracted features could be expanded. 

**List of created features**
1. Energy
2. Mel Spectogram 40 bands
3. MFCC 13 bands
4. Spectral Centroid
5. Spectral Bandwidth
6. Spectral Rolloff
7. Zero Crossing Rate 

**Total number of features created = 58**

In [2]:
import librosa
import numpy as np
import pandas as pd
from datetime import timedelta
import glob
import natsort

In [3]:
def ExtractFeatures(soundData,samplingFrequency, frameLength, hopLength):
    #----EXTRACTING FEATURES------------    
    
        #--------Energy----------------
    energy = librosa.feature.rms(y=soundData, center=False, frame_length = frameLength, hop_length=hopLength)
        #------melspectogram----------
    mel_spectrum = librosa.feature.melspectrogram(y=soundData, sr=samplingFrequency, center=False, 
                                                  n_mels=40,hop_length=hopLength)
        #-------MFCC------------------
    mfcc= librosa.feature.mfcc(y=soundData,sr=samplingFrequency, n_mfcc=13, hop_length=hopLength)
        #--------Spec Centroid---------
    spec_centr = librosa.feature.spectral_centroid(y=soundData, sr=samplingFrequency, hop_length=hopLength, center=False)
        #--------Spec_bandwidth--------
    spec_bandwidth = librosa.feature.spectral_bandwidth(y = soundData, sr = samplingFrequency, 
                                                        hop_length=hopLength, center=False)
        #--------Spec_contrast---------
    spec_contrast = librosa.feature.spectral_contrast(y = soundData, sr = samplingFrequency, 
                                                      hop_length=hopLength, center=False)
        #------Spec Rolloff--------------
    spec_rolloff = librosa.feature.spectral_rolloff(y=soundData, sr=samplingFrequency, 
                                                    hop_length=hopLength, center=False, roll_percent=0.90)
        #------Tonal Centroid------------
    #tonal_centroid = librosa.feature.tonnetz(y=t['data'], sr=t['FS'])
        #------ZCR---------------------
    zcr = librosa.feature.zero_crossing_rate(y=soundData, frame_length=frameLength, 
                                             hop_length=hopLength, center=False)
    
    """    #-----TIMESTAMP CREATION------
    start_timestamp=t['t']+timedelta(seconds=5)
    timestamp = pd.date_range(start=start_timestamp,freq='10S', periods=60 )"""
    
    #----COLUMN NAMES CREATION-----------
    energy_col_name = ['Energy']
    mel_spectrum_col_names = ['melspectrum_{}'.format(i) for i in range(0, mel_spectrum.shape[0])]
    mfcc_feature_col_names = ['mfcc_{}'.format(i) for i in range(0, mfcc.shape[0])]
    spec_centr_col_name = ['Spectral_Centroid']
    spec_bandwidth_col_name = ['Spectral_Bandwidth']
    spec_contrast_band0 = ['Spectral_Contrast_0_200']
    spec_contrast_band1 = ['Spectral_Contrast_200_400']
    spec_contrast_band2 = ['Spectral_Contrast_400_800']
    spec_contrast_band3 = ['Spectral_Contrast_800_1600']
    spec_contrast_band4 = ['Spectral_Contrast_1600_3200']
    spec_contrast_band5 = ['Spectral_Contrast_3200_6400']
    spec_contrast_band6 = ['Spectral_Contrast_6400_12800']
    spec_rolloff_col_name = ['Spectral_Rolloff']
    #toanl_centroid_col_name = ['Tonal_Centroid']
    zcr_col_name = ['Zero_Crossing_Rate']
    column_names = [energy_col_name + mel_spectrum_col_names + mfcc_feature_col_names + 
                    spec_centr_col_name + spec_bandwidth_col_name + spec_contrast_band0 + 
                    spec_contrast_band1 + spec_contrast_band2 + spec_contrast_band3 +
                    spec_contrast_band4 + spec_contrast_band5 + spec_contrast_band6 + 
                    spec_rolloff_col_name + zcr_col_name]
    
    #---CREATING A NUMPY ARRAY OF FEATURES------
    numpy_array_of_features = np.vstack((energy, mel_spectrum, mfcc, spec_centr, spec_bandwidth, spec_contrast,
                                         spec_rolloff, zcr))
    
    #----CREATING A PANDAS DATAFRAME OF FEATURES----
    oneSecondFeatures= pd.DataFrame(numpy_array_of_features.T, columns=column_names)
    
    return oneSecondFeatures

In [4]:
# filelist = 'Create a list of the names of the sound files of which the features are to be extracted'
filelist = natsort.natsorted(filelist)

In [5]:
featuresFile=pd.DataFrame()

for i in range(len(filelist)):
    soundData, samplingFrequency = librosa.load(filelist[i], sr=None)
    oneFileFeatures = ExtractFeatures(soundData,samplingFrequency,frameLength=samplingFrequency, hopLength=samplingFrequency+1)
    featuresFile=featuresFile.append(oneFileFeaturese)

In [None]:
featureFile.head()