In [1]:
import librosa
import soundfile
import os
import glob
import pickle
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC



In [2]:
# Extract features (mfcc, chroma, mel) from a sound file
def extract_feature(file_name):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read()
        result = np.array([])
        sample_rate = sound_file.samplerate

        mfccs = np.mean(librosa.feature.mfcc(
            y=X, sr=sample_rate, n_mfcc=40).T, axis=0)

        result = np.hstack((result, mfccs))

        stft = np.abs(librosa.stft(X))
        chroma = np.mean(librosa.feature.chroma_stft(
            S=stft, sr=sample_rate).T, axis=0)
        result = np.hstack((result, chroma))

        mel = np.mean(librosa.feature.melspectrogram(
            y=X, sr=sample_rate).T, axis=0)

        result = np.hstack((result, mel))

    return result


In [3]:
# Emotions in the RAVDESS dataset
emotions = {
    '01': 'neutral',
    '02': 'calm',
    '03': 'happy',
    '04': 'sad',
    '05': 'angry',
    '06': 'fearful',
    '07': 'disgust',
    '08': 'surprised'
}

# Emotions to observe
observed_emotions = ['calm', 'happy', 'fearful', 'disgust']


In [None]:
# Load the data and extract features for each sound file
def load_data():
    x, y = [], []
    for file in glob.glob("C:\\Users\\Abhay\\Downloads\\dataset\\*\\*.wav"):
        file_name = os.path.basename(file)
        emotion = emotions[file_name.split("-")[2]]
        if emotion not in observed_emotions:
            continue
        feature = extract_feature(file)
        x.append(feature)
        y.append(emotion)
    return np.array(x),y


In [None]:
df = pd.DataFrame(x,y).reset_index().rename({'index':'Emotion'}, axis=1)

In [None]:
for i in range(180):
    if i<40:
        name = f"mfcc_{i}"
    elif i<52:
        name = f"chroma_{i-40}"
    else:
        name = f"mel_{i-52}"
    df.rename({i : name},axis = 1,inplace = True)

df.head()

Unnamed: 0,Emotion,mfcc_0,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,...,mel_118,mel_119,mel_120,mel_121,mel_122,mel_123,mel_124,mel_125,mel_126,mel_127
0,calm,-709.056928,55.734301,2.66831,16.362571,3.344781,-1.124849,-5.818394,-8.999178,-9.009145,...,1.5e-05,7e-06,7e-06,5e-06,6e-06,5e-06,5e-06,3e-06,2e-06,1.469757e-06
1,calm,-695.383726,61.305755,-0.609379,14.27059,4.689146,-2.75907,-7.084949,-8.106919,-8.401125,...,1.1e-05,9e-06,4e-06,4e-06,3e-06,2e-06,3e-06,3e-06,2e-06,7.818131e-07
2,calm,-687.338556,57.978223,0.120546,13.901187,1.862312,1.50366,-6.355483,-8.991887,-8.013114,...,7.1e-05,3.7e-05,3.5e-05,5e-05,2e-05,2.3e-05,1.5e-05,1e-05,4e-06,1.583408e-06
3,calm,-684.747655,62.274807,-0.803503,15.693714,2.540188,1.151811,-6.517996,-9.151276,-7.50337,...,3.8e-05,2.5e-05,2.3e-05,1.5e-05,1.2e-05,1.4e-05,3.1e-05,1.4e-05,5e-06,1.657712e-06
4,calm,-717.279709,63.705902,2.185004,15.600433,3.973435,-2.098758,-5.965471,-5.629254,-8.025695,...,5e-06,3e-06,2e-06,3e-06,3e-06,3e-06,3e-06,2e-06,1e-06,6.484342e-07


In [None]:
df.to_csv("Extracted Feature.csv", index=False)

In [4]:
df = pd.read_csv("/content/Extracted Feature.csv")
df.head()

Unnamed: 0,Emotion,mfcc_0,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,...,mel_118,mel_119,mel_120,mel_121,mel_122,mel_123,mel_124,mel_125,mel_126,mel_127
0,calm,-709.056928,55.734301,2.66831,16.362571,3.344781,-1.124849,-5.818394,-8.999178,-9.009145,...,1.5e-05,7e-06,7e-06,5e-06,6e-06,5e-06,5e-06,3e-06,2e-06,1.469757e-06
1,calm,-695.383726,61.305755,-0.609379,14.27059,4.689146,-2.75907,-7.084949,-8.106919,-8.401125,...,1.1e-05,9e-06,4e-06,4e-06,3e-06,2e-06,3e-06,3e-06,2e-06,7.818131e-07
2,calm,-687.338556,57.978223,0.120546,13.901187,1.862312,1.50366,-6.355483,-8.991887,-8.013114,...,7.1e-05,3.7e-05,3.5e-05,5e-05,2e-05,2.3e-05,1.5e-05,1e-05,4e-06,1.583408e-06
3,calm,-684.747655,62.274807,-0.803503,15.693714,2.540188,1.151811,-6.517996,-9.151276,-7.50337,...,3.8e-05,2.5e-05,2.3e-05,1.5e-05,1.2e-05,1.4e-05,3.1e-05,1.4e-05,5e-06,1.657712e-06
4,calm,-717.279709,63.705902,2.185004,15.600433,3.973435,-2.098758,-5.965471,-5.629254,-8.025695,...,5e-06,3e-06,2e-06,3e-06,3e-06,3e-06,3e-06,2e-06,1e-06,6.484342e-07


In [9]:
y = df["Emotion"]
x = df.drop(["Emotion"], axis = 1)

In [16]:
# scaling features

from sklearn.preprocessing import MinMaxScaler
mm = MinMaxScaler()
transformer = mm.fit(x)
x_scaled = transformer.transform(x)


In [46]:
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y, test_size=0.2)

# SVM Model
---

In [20]:
model_svm = SVC()
model_svm.fit(x_train, y_train)

In [24]:
y_pred_svm = model_svm.predict(x_test)
print(f"Accuracy Score : {accuracy_score(y_true = y_test, y_pred = y_pred_svm)*100:.2f}%")

Accuracy Score : 62.34%


# MLP Classifier
---

In [72]:
from sklearn.neural_network import MLPClassifier

model_mlp = MLPClassifier(alpha=0.01, batch_size=180, epsilon=1e-9, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=900)

In [73]:
model_mlp.fit(x_train, y_train)

In [74]:
y_pred_mlp = model_mlp.predict(x_test)
print(f"Accuracy Score : {accuracy_score(y_true = y_test, y_pred = y_pred_mlp)*100:.2f}%")

Accuracy Score : 76.62%


# Tensorflow Model
---

In [75]:
emotions_rev = {
    'calm':0,
    'happy':1,
    'fearful':2,
    'disgust':3
}

y_num = np.array([emotions_rev[x] for x in y])
y_num[:5]

array([0, 0, 0, 0, 0])

In [125]:
x_train_tf, x_test_tf, y_train_tf, y_test_tf = train_test_split(x_scaled , y_num, test_size= 0.25, random_state=10)
y_train_tf[:5]

array([1, 0, 2, 0, 1])

In [79]:
import tensorflow as tf
from tensorflow import keras

In [134]:
model_tf = keras.Sequential([
    keras.layers.Dense(256,input_dim=180,activation='tanh'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(128,activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(64,activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(4,activation='softmax')
])

model_tf.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-3), metrics=['accuracy'], loss='sparse_categorical_crossentropy')

In [135]:
model_tf.fit(x_train_tf, y_train_tf, epochs=300, batch_size=120)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

<keras.src.callbacks.History at 0x79a7436b5450>

In [136]:
model_tf.evaluate(x_test_tf, y_test_tf)



[1.5538921356201172, 0.7291666865348816]