In [31]:
import os
import librosa
import numpy as np
import pandas as pd


In [32]:
data_path = r"D:\speech_emotion\speech-emotion-classification\data"



In [33]:
files = []
for root, dirs, f in os.walk(data_path):
    for file in f:
        if file.endswith(".wav"):
            files.append(os.path.join(root, file))

print("Total audio files found:", len(files))
files[:5]


Total audio files found: 2880


['D:\\speech_emotion\\speech-emotion-classification\\data\\Actor_01\\03-01-01-01-01-01-01.wav',
 'D:\\speech_emotion\\speech-emotion-classification\\data\\Actor_01\\03-01-01-01-01-02-01.wav',
 'D:\\speech_emotion\\speech-emotion-classification\\data\\Actor_01\\03-01-01-01-02-01-01.wav',
 'D:\\speech_emotion\\speech-emotion-classification\\data\\Actor_01\\03-01-01-01-02-02-01.wav',
 'D:\\speech_emotion\\speech-emotion-classification\\data\\Actor_01\\03-01-02-01-01-01-01.wav']

In [34]:
def extract_features(file_path):
    audio, sr = librosa.load(file_path, sr=None)

    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
    mfcc_mean = np.mean(mfcc.T, axis=0)

    chroma = librosa.feature.chroma_stft(y=audio, sr=sr)
    chroma_mean = np.mean(chroma.T, axis=0)

    centroid = librosa.feature.spectral_centroid(y=audio, sr=sr)
    centroid_mean = np.mean(centroid)

    zcr = librosa.feature.zero_crossing_rate(audio)
    zcr_mean = np.mean(zcr)

    rolloff = librosa.feature.spectral_rolloff(y=audio, sr=sr)
    rolloff_mean = np.mean(rolloff)

    return np.hstack([mfcc_mean, chroma_mean, centroid_mean, zcr_mean, rolloff_mean])


In [35]:
def get_emotion_from_path(path):
    file = os.path.basename(path)
    emotion_code = int(file.split("-")[2])

    emotion_map = {
        1: "neutral",
        2: "calm",
        3: "happy",
        4: "sad",
        5: "angry",
        6: "fear",
        7: "disgust",
        8: "surprise"
    }

    return emotion_map[emotion_code]


In [36]:
data = []

for file in files:
    try:
        features = extract_features(file)
        emotion = get_emotion_from_path(file)
        data.append([file, emotion] + list(features))
    except Exception as e:
        print("Error processing:", file, "Error:", e)


In [37]:
columns = (
    ["file", "emotion"] +
    [f"mfcc_{i}" for i in range(40)] +
    [f"chroma_{i}" for i in range(12)] +
    ["centroid", "zcr", "rolloff"]
)

df = pd.DataFrame(data, columns=columns)
df.head()


Unnamed: 0,file,emotion,mfcc_0,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,...,chroma_5,chroma_6,chroma_7,chroma_8,chroma_9,chroma_10,chroma_11,centroid,zcr,rolloff
0,D:\speech_emotion\speech-emotion-classificatio...,neutral,-726.217224,68.54142,3.293398,12.2053,5.510278,13.66741,-2.983828,3.098029,...,0.617627,0.6303,0.641174,0.643986,0.623945,0.6339,0.629672,7416.297748,0.050476,13285.735887
1,D:\speech_emotion\speech-emotion-classificatio...,neutral,-719.128296,70.201569,1.168397,13.122541,7.83695,14.41129,-4.11136,4.468973,...,0.629304,0.59825,0.602427,0.638104,0.65011,0.663277,0.638956,7135.571471,0.052904,13191.643371
2,D:\speech_emotion\speech-emotion-classificatio...,neutral,-714.995728,69.689346,3.924564,11.92419,6.421723,11.011614,-2.878103,4.509558,...,0.595411,0.606914,0.611433,0.634772,0.586808,0.578905,0.612411,7239.265648,0.046627,13279.137826
3,D:\speech_emotion\speech-emotion-classificatio...,neutral,-710.975281,67.56488,5.78224,13.230727,6.190846,12.628252,-1.675169,5.657494,...,0.64686,0.619975,0.611885,0.633945,0.59796,0.60211,0.619935,7008.958169,0.053835,13272.074245
4,D:\speech_emotion\speech-emotion-classificatio...,calm,-759.921753,75.783524,6.023605,14.557394,6.454188,14.631508,-3.004551,4.62097,...,0.620046,0.594329,0.596532,0.616511,0.638365,0.670592,0.602803,6997.31181,0.045929,12649.543486


In [38]:
df.to_csv(r"D:\speech_emotion\speech-emotion-classification\features\ravdess_features.csv", index=False)
print("Features saved successfully!")


Features saved successfully!
