# <center>Real-time emotion detection</center>

Authors : DOUET Marie, GRINDEL Brice, MARTIN Lucas, SOUVANNAVONG Elise

## Global imports

In [15]:
import numpy as np
import os
import soundfile
import librosa
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tqdm.notebook import tqdm_notebook
import pandas as pd

## Global functions

In [2]:
def extract_feature(file_name, mfcc=True, chroma=True, mel=True):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")

        sample_rate = sound_file.samplerate

        if chroma:
            stft = np.abs(librosa.stft(X))
        result=np.array([])

        # MFCC Criterias (short-term power spectrum of a sound)
        if mfcc:
            mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result = np.hstack((result, mfccs))

        # Pertains to the 12 diffrent pitch classes
        if chroma:
            chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result = np.hstack((result, chroma))
        
        # MEL Spectrogram Frequency
        if mel:
            mel = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T,axis=0)
            result = np.hstack((result, mel))
    return result

## Benchmark of datasets

### RAVDESS

In [3]:
emotions = {
    '01': "neutral", 
    '02': "calm", 
    '03': "happy", 
    '04': "sad", 
    '05': "angry", 
    '06': "fearful", 
    '07': "disgust", 
    '08': "surprised"
}

# Removed 'calm' emotion
observed_emotions = ['neutral', 'happy', 'sad', 'angry', 'fearful', 'disgust', 'surprised']

In [4]:
def load_data(folder_path, emotions, excluded_files=[], test_size=0.2):
    x,y = [],[]
    files = os.listdir(folder_path)
    
    # iterations with the progress bar
    for i in tqdm_notebook(range(len(files)-len(excluded_files))):
        file = files[i]
        
        if file not in excluded_files:
            file_name = os.path.basename(file)
            emotion = emotions[file_name.split("-")[2]]

            if emotion not in observed_emotions:
                continue

            feature = extract_feature(folder_path + "/" + file)

            x.append(feature)
            y.append(emotion)
    return np.array(x), np.array(y)

In [5]:
ravdess_folder = "./data_samples/RAVDESS"
excluded_files = ["README.md","03-01-06-01-01-02-20.wav", "03-01-08-01-02-02-01.wav", "03-01-03-01-02-01-20.wav"]

x,y = load_data(ravdess_folder, emotions=emotions, excluded_files=excluded_files)
x_train, x_test, y_train, y_test = train_test_split(x, y, shuffle=True, test_size=0.25, random_state=42)

  0%|          | 0/1437 [00:00<?, ?it/s]

## Preprocessing

### MFCC

## Benchmark of models

### 1 - Support Vector Machine (SVM) (Gridsearch)

In [6]:
from sklearn import svm
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

param_grid = {
    'C': [0.1, 1, 10, 100, 1000],  
    'gamma': [1, 0.1, 0.01, 0.001, 0.0001], 
    'kernel': ['rbf','linear']
}

grid = GridSearchCV(svm.SVC(), param_grid, refit = True, verbose=0)
grid.fit(x_train, y_train) 

print(grid.best_params_)

{'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}


In [18]:
# Select best hyper-parameters for SVM model
# C=100, gamma=0.0001, kernel=rbf 
C_opti, gamma_opti, kernel_opti = grid.best_params_['C'], grid.best_params_['gamma'], grid.best_params_['kernel']

svc = svm.SVC(C=C_opti, gamma=gamma_opti, kernel=kernel_opti)
svc.fit(x_train, y_train)

y_pred = svc.predict(x_test)

accuracy = round(accuracy_score(y_true=y_test,y_pred=y_pred)*100,2)
print("Accuracy : ", accuracy,"%")

Accuracy :  62.06 %


### 2 - Multi-layer Perceptron Classifer (MLPC)

In [19]:
from sklearn.neural_network import MLPClassifier

mlpc = MLPClassifier(alpha=0.01, batch_size=256, epsilon=1e-08, hidden_layer_sizes=(300,), learning_rate='adaptive', max_iter=500)
mlpc.fit(x_train,y_train)

y_pred = mlpc.predict(x_test)

accuracy = round(accuracy_score(y_true=y_test,y_pred=y_pred)*100,2)
print("Accuracy : ", accuracy,"%")

Accuracy :  55.63 %


### 3 - Random Forest

In [20]:
from sklearn.ensemble import RandomForestClassifier

rforest = RandomForestClassifier()
rforest.fit(x_train, y_train)

y_pred = rforest.predict(x_test)

accuracy = round(accuracy_score(y_true=y_test,y_pred=y_pred)*100,2)
print("Accuracy : ", accuracy,"%")

Accuracy :  55.31 %


### Linear Prediction Cepstral Coefficient (LPCC)

## Results of the benchmark

### Cross validation

In [38]:
from sklearn.model_selection import cross_val_score

svm_scores = cross_val_score(svc, x,y, cv=5)
mlpc_scores = cross_val_score(mlpc, x,y, cv=5)
rforest_scores = cross_val_score(rforest, x,y, cv=5)

names = ["SVM", "MLPC", "Random Forest"]
means = [np.mean(svm_scores), np.mean(mlpc_scores), np.mean(rforest_scores)]
stds = [np.std(svm_scores), np.std(mlpc_scores), np.std(rforest_scores)]
mins = [min(svm_scores), min(mlpc_scores), min(rforest_scores)]
maxs = [max(svm_scores), max(mlpc_scores), max(rforest_scores)]

In [None]:
data = {
    'name': names,
    'min': mins,
    'max': maxs,
    'mean': means,
    'var': stds
}

df = pd.DataFrame(data=data)
df

Unnamed: 0,name,min,max,mean,var
0,SVM,55.24,68.15,60.596,4.550475
1,MLPC,46.77,57.26,51.492,3.805782
2,Random Forest,52.82,61.69,57.694,2.869764
