## Imports

In [1]:
import librosa
import numpy as np
import pandas as pd

## Paths

In [2]:
paths = {"data_paths": "../../../assets/audio_sentiment_data_v2/data_features/data_paths.csv", 
         "save_path": "../../../assets/audio_sentiment_data_v2/data_features"}

## Extracting features from the audio files
The features extracted are : RMSE, spectral centroid, spectral bandwidth, rolloff, zero crossing rate, chroma stft and mfcc

In [3]:
ref = pd.read_csv(data_paths)

df = pd.DataFrame(columns=['rmse',
                           'spectral_centroid', 
                           'spectral_bandwidth', 
                           'rolloff', 
                           'zero_crossing_rate', 
                           'chroma_stft_1',
                           'chroma_stft_2',
                           'chroma_stft_3',
                           'chroma_stft_4',
                           'chroma_stft_5', 
                           'chroma_stft_6', 
                           'chroma_stft_7',
                           'chroma_stft_8',
                           'chroma_stft_9',
                           'chroma_stft_10',
                           'chroma_stft_11', 
                           'chroma_stft_12', 
                           'mfcc_1',
                           'mfcc_2',
                           'mfcc_3',
                           'mfcc_4',
                           'mfcc_5',
                           'mfcc_6',
                           'mfcc_7',
                           'mfcc_8',
                           'mfcc_9',
                           'mfcc_10',
                           'mfcc_11',
                           'mfcc_12',
                           'mfcc_13',
                           'mfcc_14',
                           'mfcc_15',
                           'mfcc_16',
                           'mfcc_17',
                           'mfcc_18',
                           'mfcc_19',
                           'mfcc_20'])

counter=0

for index,path in enumerate(ref.path):
    y, sr = librosa.load(path, res_type='kaiser_fast', duration=3)
    
    rmse= np.mean(librosa.feature.rms(y=y))
    spec_cent = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
    spec_bw = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
    rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
    zcr = np.mean(librosa.feature.zero_crossing_rate(y))
    chroma_stft = np.mean(librosa.feature.chroma_stft(y=y, sr=sr), axis=1)
    mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr), axis=1)

    df.loc[counter] = [rmse,
                       spec_cent,
                       spec_bw, 
                       rolloff, 
                       zcr, 
                       chroma_stft[0],
                       chroma_stft[1],
                       chroma_stft[2],
                       chroma_stft[3],
                       chroma_stft[4],
                       chroma_stft[5],
                       chroma_stft[6],
                       chroma_stft[7],
                       chroma_stft[8],
                       chroma_stft[9],
                       chroma_stft[10],
                       chroma_stft[11],
                       mfcc[0],
                       mfcc[1],
                       mfcc[2],
                       mfcc[3],
                       mfcc[4],
                       mfcc[5],
                       mfcc[6],
                       mfcc[7],
                       mfcc[8],
                       mfcc[9],
                       mfcc[10],
                       mfcc[11],
                       mfcc[12],
                       mfcc[13],
                       mfcc[14],
                       mfcc[15],
                       mfcc[16],
                       mfcc[17],
                       mfcc[18],
                       mfcc[19],
                      ]
    counter=counter+1

In [4]:
df_out = pd.concat([df, ref['labels']], axis=1)
df_out.head()

Unnamed: 0,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,chroma_stft_1,chroma_stft_2,chroma_stft_3,chroma_stft_4,chroma_stft_5,...,mfcc_12,mfcc_13,mfcc_14,mfcc_15,mfcc_16,mfcc_17,mfcc_18,mfcc_19,mfcc_20,labels
0,0.147862,999.945276,1063.106734,1771.520057,0.032531,0.39294,0.452334,0.430173,0.39506,0.396081,...,-1.112167,-1.467015,-3.402173,4.430283,-0.306186,-3.866104,5.422969,-2.040354,-6.495267,angry
1,0.142524,933.697225,1095.933846,1756.446815,0.028598,0.439112,0.569759,0.621076,0.533659,0.460831,...,-0.099838,-5.589032,1.361362,3.755712,-4.464936,-4.113828,1.086978,-5.010358,-3.381076,angry
2,0.150466,618.05001,965.148032,1064.697266,0.024664,0.454376,0.495002,0.592909,0.640397,0.641311,...,0.299027,-2.144394,0.935727,3.672377,-1.924671,3.592155,1.81914,-2.990866,0.25146,angry
3,0.152425,966.90785,1149.58966,1705.046469,0.037887,0.454727,0.448029,0.461427,0.493463,0.525605,...,-1.327947,-2.816006,-1.966254,5.995186,-0.272795,-3.719632,6.069865,-2.670578,-4.123085,angry
4,0.151927,1133.777922,1211.799547,1970.867826,0.044028,0.433203,0.399038,0.441396,0.462289,0.523994,...,-1.514057,-3.801479,1.634403,6.919915,-2.334688,-4.425612,6.456119,-1.603486,-4.420147,angry


## Saving the extracted features and labels into a CSV file

In [5]:
df_out.to_csv(f"{paths['save_path']}/data_features_and_labels.csv",index=False)