# Models
---
- [SVM Model](#scrollTo=1BwXNcTQKNHn)<br>
`Accuracy : 55.84%`
- [MLP Model](#scrollTo=CT9IT_cQOsZg)<br>
`Accuracy : 76.62%`
- [Tensorflow Model-1](#scrollTo=sTJgnYR3B_VM)<br>
`Accuracy : 67.09%`
- [Tensorflow Model-2](#scrollTo=g2i_LKV3CMrh)<br>
`Accuracy : 68.39%`
- [Final Model](#scrollTo=QKN0Wfdejomm)<br>
Combining all model's predictions will predicting<br>
`Accuracy : 81.82%`


In [None]:
import librosa
import soundfile
import os
import glob
import pickle
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

In [None]:
# Extract features (mfcc, chroma, mel) from a sound file
def extract_feature(file_name):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read()
        result = np.array([])
        sample_rate = sound_file.samplerate

        mfccs = np.mean(librosa.feature.mfcc(
            y=X, sr=sample_rate, n_mfcc=40).T, axis=0)

        result = np.hstack((result, mfccs))

        stft = np.abs(librosa.stft(X))
        chroma = np.mean(librosa.feature.chroma_stft(
            S=stft, sr=sample_rate).T, axis=0)
        result = np.hstack((result, chroma))

        mel = np.mean(librosa.feature.melspectrogram(
            y=X, sr=sample_rate).T, axis=0)

        result = np.hstack((result, mel))

    return result


In [None]:
# Emotions in the RAVDESS dataset
emotions = {
    '01': 'neutral',
    '02': 'calm',
    '03': 'happy',
    '04': 'sad',
    '05': 'angry',
    '06': 'fearful',
    '07': 'disgust',
    '08': 'surprised'
}

# Emotions to observe
observed_emotions = ['calm', 'happy', 'fearful', 'disgust']


In [None]:
# Load the data and extract features for each sound file
def load_data():
    x, y = [], []
    for file in glob.glob("C:\\Users\\Abhay\\Downloads\\dataset\\*\\*.wav"):
        file_name = os.path.basename(file)
        emotion = emotions[file_name.split("-")[2]]
        if emotion not in observed_emotions:
            continue
        feature = extract_feature(file)
        x.append(feature)
        y.append(emotion)
    return np.array(x),y


In [None]:
df = pd.DataFrame(x,y).reset_index().rename({'index':'Emotion'}, axis=1)

In [None]:
for i in range(180):
    if i<40:
        name = f"mfcc_{i}"
    elif i<52:
        name = f"chroma_{i-40}"
    else:
        name = f"mel_{i-52}"
    df.rename({i : name},axis = 1,inplace = True)

df.head()

Unnamed: 0,Emotion,mfcc_0,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,...,mel_118,mel_119,mel_120,mel_121,mel_122,mel_123,mel_124,mel_125,mel_126,mel_127
0,calm,-709.056928,55.734301,2.66831,16.362571,3.344781,-1.124849,-5.818394,-8.999178,-9.009145,...,1.5e-05,7e-06,7e-06,5e-06,6e-06,5e-06,5e-06,3e-06,2e-06,1.469757e-06
1,calm,-695.383726,61.305755,-0.609379,14.27059,4.689146,-2.75907,-7.084949,-8.106919,-8.401125,...,1.1e-05,9e-06,4e-06,4e-06,3e-06,2e-06,3e-06,3e-06,2e-06,7.818131e-07
2,calm,-687.338556,57.978223,0.120546,13.901187,1.862312,1.50366,-6.355483,-8.991887,-8.013114,...,7.1e-05,3.7e-05,3.5e-05,5e-05,2e-05,2.3e-05,1.5e-05,1e-05,4e-06,1.583408e-06
3,calm,-684.747655,62.274807,-0.803503,15.693714,2.540188,1.151811,-6.517996,-9.151276,-7.50337,...,3.8e-05,2.5e-05,2.3e-05,1.5e-05,1.2e-05,1.4e-05,3.1e-05,1.4e-05,5e-06,1.657712e-06
4,calm,-717.279709,63.705902,2.185004,15.600433,3.973435,-2.098758,-5.965471,-5.629254,-8.025695,...,5e-06,3e-06,2e-06,3e-06,3e-06,3e-06,3e-06,2e-06,1e-06,6.484342e-07


In [None]:
df.to_csv("Extracted Feature.csv", index=False)

In [None]:
df = pd.read_csv("/content/Extracted Feature.csv")
df.head()

Unnamed: 0,Emotion,mfcc_0,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,...,mel_118,mel_119,mel_120,mel_121,mel_122,mel_123,mel_124,mel_125,mel_126,mel_127
0,calm,-709.056928,55.734301,2.66831,16.362571,3.344781,-1.124849,-5.818394,-8.999178,-9.009145,...,1.5e-05,7e-06,7e-06,5e-06,6e-06,5e-06,5e-06,3e-06,2e-06,1.469757e-06
1,calm,-695.383726,61.305755,-0.609379,14.27059,4.689146,-2.75907,-7.084949,-8.106919,-8.401125,...,1.1e-05,9e-06,4e-06,4e-06,3e-06,2e-06,3e-06,3e-06,2e-06,7.818131e-07
2,calm,-687.338556,57.978223,0.120546,13.901187,1.862312,1.50366,-6.355483,-8.991887,-8.013114,...,7.1e-05,3.7e-05,3.5e-05,5e-05,2e-05,2.3e-05,1.5e-05,1e-05,4e-06,1.583408e-06
3,calm,-684.747655,62.274807,-0.803503,15.693714,2.540188,1.151811,-6.517996,-9.151276,-7.50337,...,3.8e-05,2.5e-05,2.3e-05,1.5e-05,1.2e-05,1.4e-05,3.1e-05,1.4e-05,5e-06,1.657712e-06
4,calm,-717.279709,63.705902,2.185004,15.600433,3.973435,-2.098758,-5.965471,-5.629254,-8.025695,...,5e-06,3e-06,2e-06,3e-06,3e-06,3e-06,3e-06,2e-06,1e-06,6.484342e-07


In [None]:
y = df["Emotion"]
x = df.drop(["Emotion"], axis = 1)

In [None]:
# scaling features

from sklearn.preprocessing import MinMaxScaler
mm = MinMaxScaler()
transformer = mm.fit(x)
x_scaled = transformer.transform(x)

# encode y
emotions_rev = {
    'calm':0,
    'happy':1,
    'fearful':2,
    'disgust':3
}

y_num = np.array([emotions_rev[x] for x in y])

x_train, x_test, y_train, y_test = train_test_split(x_scaled , y_num, test_size= 0.2, random_state=10, shuffle=True)

# SVM Model
---

In [None]:
model_svm = SVC()
model_svm.fit(x_train, y_train)

In [None]:
y_pred_svm = model_svm.predict(x_test)
print(f"Accuracy Score : {accuracy_score(y_true = y_test, y_pred = y_pred_svm)*100:.2f}%")

Accuracy Score : 55.84%


# MLP Classifier
---

In [None]:
from sklearn.neural_network import MLPClassifier

model_mlp = MLPClassifier(alpha=0.001, batch_size=350, epsilon=1e-7, hidden_layer_sizes=(400, 250, 100, 25), learning_rate='adaptive', max_iter=1500)

In [None]:
import os
os.cpu_count()

2

In [None]:
model_mlp.fit(x_train, y_train)
y_pred_mlp = model_mlp.predict(x_test)
print(f"Accuracy Score : {accuracy_score(y_true = y_test, y_pred = y_pred_mlp)*100:.2f}%")

Accuracy Score : 76.62%


In [None]:
from sklearn.model_selection import ShuffleSplit
cv=ShuffleSplit(n_splits=5, test_size=0.2, random_state=8)


# scores = cross_val_score(model_mlp, x_scaled, y_num, cv=cv)
# print(scores)
# print(f"Accuracy : {np.mean(scores)*100:.2f}%")


# Using RamdomSearchCV to find best model
# x_half = x_scaled[:x_scaled.shape[0]//2,:]
# y_half = y_num[:y_num.shape[0]//2]
from sklearn.model_selection import RandomizedSearchCV
def gsv(model,params):
    gsv = RandomizedSearchCV(model,params,cv=cv, n_iter = 10, n_jobs=-1)
    gsv.fit(x_scaled, y_num)
    print(gsv.best_params_ ,' : ',gsv.best_score_)


params_mlp ={
    'alpha': [0.001,0.01, 0.1,0.2,0.3] ,
    'batch_size':[350,450],
    'epsilon':[1e-7, 1e-8,1e-9],
    'hidden_layer_sizes':[(400,150,50)],
    'max_iter':[800, 1000,1500]
}

gsv(MLPClassifier(), params_mlp)

{'max_iter': 1500, 'hidden_layer_sizes': (400, 150, 50), 'epsilon': 1e-07, 'batch_size': 350, 'alpha': 0.001}  :  0.7584415584415585


# Tensorflow Model
---

In [None]:
import tensorflow as tf
from tensorflow import keras

# Tensorflow Model-1

In [None]:
model_tf_1 = keras.Sequential([
    keras.layers.Dense(200, input_dim=180,activation='tanh'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(100,activation='relu'),
    keras.layers.Dropout(0.7),
    keras.layers.Dense(60,activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(25, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(4,activation='softmax')
])

model_tf_1.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-3), metrics=['accuracy'], loss='sparse_categorical_crossentropy')

h = model_tf_1.fit(x_train, y_train, epochs=250)
model_tf_1.evaluate(x_test, y_test)

Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 19/250
Epoch 20/250
Epoch 21/250
Epoch 22/250
Epoch 23/250
Epoch 24/250
Epoch 25/250
Epoch 26/250
Epoch 27/250
Epoch 28/250
Epoch 29/250
Epoch 30/250
Epoch 31/250
Epoch 32/250
Epoch 33/250
Epoch 34/250
Epoch 35/250
Epoch 36/250
Epoch 37/250
Epoch 38/250
Epoch 39/250
Epoch 40/250
Epoch 41/250
Epoch 42/250
Epoch 43/250
Epoch 44/250
Epoch 45/250
Epoch 46/250
Epoch 47/250
Epoch 48/250
Epoch 49/250
Epoch 50/250
Epoch 51/250
Epoch 52/250
Epoch 53/250
Epoch 54/250
Epoch 55/250
Epoch 56/250
Epoch 57/250
Epoch 58/250
Epoch 59/250
Epoch 60/250
Epoch 61/250
Epoch 62/250
Epoch 63/250
Epoch 64/250
Epoch 65/250
Epoch 66/250
Epoch 67/250
Epoch 68/250
Epoch 69/250
Epoch 70/250
Epoch 71/250
Epoch 72/250
Epoch 73/250
Epoch 74/250
Epoch 75/250
Epoch 76/250
Epoch 77/250
Epoch 78

[1.2209243774414062, 0.5844155550003052]

In [None]:

model_tf_1.compile(optimizer=keras.optimizers.Adam(learning_rate=0.00001), metrics=['accuracy'], loss='sparse_categorical_crossentropy')

h = model_tf_1.fit(x_train, y_train, epochs=500, verbose=0)
model_tf_1.evaluate(x_test, y_test)



[2.197688341140747, 0.6709956526756287]

# Tensorflow Model-2

In [None]:
model_tf = keras.Sequential([
    keras.layers.Dense(300, input_dim=180, activation='tanh'),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.7),

    keras.layers.Dense(200, activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.7),

    keras.layers.Dense(100, activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.5),

    keras.layers.Dense(50, activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.7),

    keras.layers.Dense(4, activation='softmax')
])

In [None]:
for i in range(10):

  lr = 0.001 - 0.0001*(4*i/5)
  if i>=6:
    model_tf.compile(optimizer=keras.optimizers.Adam(learning_rate=lr), metrics=['accuracy'], loss='sparse_categorical_crossentropy')
  else:
    model_tf.compile(optimizer=keras.optimizers.SGD(learning_rate=0.001), metrics=['accuracy'], loss='sparse_categorical_crossentropy')
  history = model_tf.fit(x_train, y_train, epochs=70)


Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70
Epoch 23/70
Epoch 24/70
Epoch 25/70
Epoch 26/70
Epoch 27/70
Epoch 28/70
Epoch 29/70
Epoch 30/70
Epoch 31/70
Epoch 32/70
Epoch 33/70
Epoch 34/70
Epoch 35/70
Epoch 36/70
Epoch 37/70
Epoch 38/70
Epoch 39/70
Epoch 40/70
Epoch 41/70
Epoch 42/70
Epoch 43/70
Epoch 44/70
Epoch 45/70
Epoch 46/70
Epoch 47/70
Epoch 48/70
Epoch 49/70
Epoch 50/70
Epoch 51/70
Epoch 52/70
Epoch 53/70
Epoch 54/70
Epoch 55/70
Epoch 56/70
Epoch 57/70
Epoch 58/70
Epoch 59/70
Epoch 60/70
Epoch 61/70
Epoch 62/70
Epoch 63/70
Epoch 64/70
Epoch 65/70
Epoch 66/70
Epoch 67/70
Epoch 68/70
Epoch 69/70
Epoch 70/70
Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/7

In [None]:
model_tf.evaluate(x_test, y_test)



[1.012639045715332, 0.6839826703071594]

# Final Model

`It combines prediction of all the model to give final prediction` <br>
`Accuracy: 81.82%`

In [None]:
# Final model for prediction
# It combines Tensorflow model 1, Tensorflow model 2, and MLP model's prediction

class final_model:

  def __init__(self):
    pass


  def predict_1(self, x):
    '''
    predict for one value of x(all features of one file)
    '''
    x = x.reshape(1,-1)
    pred1 = model_mlp.predict(x)[0]
    pred2 = np.argmax(model_tf.predict(x,verbose=0), axis=1)[0]
    pred3 = np.argmax(model_tf_1.predict(x,verbose=0), axis=1)[0]
    pred4 = model_svm.predict(x)[0]

    if (pred1 == pred2 or pred1 == pred3 or pred1 == pred4):
      return pred1

    elif(pred2 == pred3 or pred2 == pred4):
      return pred2

    elif(pred1 != pred2 and pred3 == pred4):
      return pred3

    else:
      return pred1

  def predict(self, x):
    '''
    predict for list of x(list of files)
    '''
    pred = [self.predict_1(xi) for xi in x]
    return pred



  def test_score(self, x_test, y_test):
    pred = self.predict(x_test)
    # print(pred)
    return accuracy_score(y_true = y_test, y_pred=pred)


model = final_model()

accuracy = model.test_score(x_test, y_test)
print(f"Accuracy : {accuracy*100:.2f}%")

Accuracy : 81.82%
