## Imports

In [3]:
import librosa
import numpy as np
import pandas as pd
import pickle

## Paths

In [4]:
paths = {"data_paths": "../../../assets/audio_sentiment_data_v2/data_features/data_paths_final.csv", 
         "save_path": "../../../assets/audio_sentiment_data_v2/data_features"}

## Extracting features from the audio files
The features extracted are : RMSE, spectral centroid, spectral bandwidth, rolloff, zero crossing rate, chroma stft and mfcc

In [5]:
ref = pd.read_csv(paths['data_paths'])

df = pd.DataFrame(columns=['rmse',
                           'spectral_centroid', 
                           'spectral_bandwidth', 
                           'rolloff', 
                           'zero_crossing_rate', 
                           'chroma_stft_1',
                           'chroma_stft_2',
                           'chroma_stft_3',
                           'chroma_stft_4',
                           'chroma_stft_5', 
                           'chroma_stft_6', 
                           'chroma_stft_7',
                           'chroma_stft_8',
                           'chroma_stft_9',
                           'chroma_stft_10',
                           'chroma_stft_11', 
                           'chroma_stft_12', 
                           'mfcc_1',
                           'mfcc_2',
                           'mfcc_3',
                           'mfcc_4',
                           'mfcc_5',
                           'mfcc_6',
                           'mfcc_7',
                           'mfcc_8',
                           'mfcc_9',
                           'mfcc_10',
                           'mfcc_11',
                           'mfcc_12',
                           'mfcc_13',
                           'mfcc_14',
                           'mfcc_15',
                           'mfcc_16',
                           'mfcc_17',
                           'mfcc_18',
                           'mfcc_19',
                           'mfcc_20'])

counter=0

for index,path in enumerate(ref.path):
    y, sr = librosa.load(path, res_type='kaiser_fast', duration=3, sr=22050*2)
    y = librosa.util.fix_length(y, 132300)
    
    rmse= np.mean(librosa.feature.rms(y=y))
    spec_cent = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
    spec_bw = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
    rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
    zcr = np.mean(librosa.feature.zero_crossing_rate(y))
    chroma_stft = np.mean(librosa.feature.chroma_stft(y=y, sr=sr), axis=1)
    mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr), axis=1)

    df.loc[counter] = [rmse,
                       spec_cent,
                       spec_bw, 
                       rolloff, 
                       zcr, 
                       chroma_stft[0],
                       chroma_stft[1],
                       chroma_stft[2],
                       chroma_stft[3],
                       chroma_stft[4],
                       chroma_stft[5],
                       chroma_stft[6],
                       chroma_stft[7],
                       chroma_stft[8],
                       chroma_stft[9],
                       chroma_stft[10],
                       chroma_stft[11],
                       mfcc[0],
                       mfcc[1],
                       mfcc[2],
                       mfcc[3],
                       mfcc[4],
                       mfcc[5],
                       mfcc[6],
                       mfcc[7],
                       mfcc[8],
                       mfcc[9],
                       mfcc[10],
                       mfcc[11],
                       mfcc[12],
                       mfcc[13],
                       mfcc[14],
                       mfcc[15],
                       mfcc[16],
                       mfcc[17],
                       mfcc[18],
                       mfcc[19],
                      ]
    counter=counter+1

In [6]:
df_out = pd.concat([df, ref['labels']], axis=1)
df_out.head()

Unnamed: 0,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,chroma_stft_1,chroma_stft_2,chroma_stft_3,chroma_stft_4,chroma_stft_5,...,mfcc_12,mfcc_13,mfcc_14,mfcc_15,mfcc_16,mfcc_17,mfcc_18,mfcc_19,mfcc_20,labels
0,0.140959,909.294473,1097.042536,1603.267525,0.016943,0.436693,0.511925,0.512334,0.480641,0.466939,...,-8.729561,-1.838324,0.268404,0.789445,-0.929519,0.485638,4.871945,1.404775,-2.436951,angry
1,0.137117,823.033604,1124.201441,1512.728304,0.014537,0.593529,0.679138,0.635324,0.558787,0.490128,...,-8.156523,2.750646,2.75798,-3.322827,-1.114958,3.73191,3.305622,-2.032364,-3.192875,angry
2,0.101072,408.188142,786.675134,695.048564,0.008665,0.340911,0.372148,0.440176,0.464147,0.476713,...,-2.824953,1.463743,1.082735,-0.288053,-0.498972,2.284548,2.159682,-0.455052,1.833002,angry
3,0.131034,844.784238,1207.130092,1459.020006,0.018104,0.464754,0.471661,0.502144,0.530103,0.574767,...,-5.333557,0.292711,0.349077,-0.785189,-1.125823,0.974832,5.092385,1.155617,-2.456316,angry
4,0.145784,1077.861061,1402.87964,1818.599557,0.022972,0.490705,0.475587,0.528495,0.549517,0.582952,...,-8.897561,0.499078,0.726909,-2.18348,-0.722497,3.656292,5.761038,-0.600574,-4.145806,angry


## Saving the extracted features and labels into a CSV file

In [7]:
df_out.to_csv(f"{paths['save_path']}/data_features_and_labels_final.csv",index=False)