#### Importing the required packages

In [None]:
import librosa
import pandas as pd
import os
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
import pickle

#### Setting some constants

In [11]:
train_path = 'SERDataset/SERDataset/Train'
test_path = 'SERDataset/SERDataset/Test'

dataset_paths = {
    "Train": train_path,
    "Test": test_path
}

emotions = os.listdir(path=train_path)

#### Creating a dataframe with audio filenames and their labels

In [12]:
ids = pd.DataFrame()
for emotion in emotions:
    recordings_train = os.listdir(path=f"{train_path}/{emotion}")
    recordings_test = os.listdir(path=f"{test_path}/{emotion}")
    ids = pd.concat(
        [
            ids,
            pd.DataFrame({
                "RecId": recordings_train,
                "Emotion": [emotion] * len(recordings_train),
                "Type": ['Train'] * len(recordings_train)
            }),
            pd.DataFrame({
                "RecId": recordings_test,
                "Emotion": [emotion] * len(recordings_test),
                "Type": ['Test'] * len(recordings_test)
            })
        ], ignore_index=True
    )
ids

Unnamed: 0,RecId,Emotion,Type
0,SM1_F10_A010.wav,Angry,Train
1,SM1_F11_A011.wav,Angry,Train
2,SM1_F12_A012.wav,Angry,Train
3,SM1_F13_A013.wav,Angry,Train
4,SM1_F14_A014.wav,Angry,Train
...,...,...,...
440,SM26_F2_S092.wav,Sad,Test
441,SM27_F1_S093.wav,Sad,Test
442,SM27_F2_S094.wav,Sad,Test
443,SM27_F3_S095.wav,Sad,Test


#### Creating a function to extract features for single audio signal. The features used are mfcc and spectral centroid.

In [13]:
def extract_features(signal_sr):
    signal, sr = signal_sr[0], signal_sr[1]
    all_features = np.array([])
    mfccs = np.mean(librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=40).T, axis=0)
    all_features = np.hstack((all_features, mfccs))

    spectral_centroids = np.mean(librosa.feature.spectral_centroid(signal, sr=sr).T, axis=0)
    all_features = np.hstack((all_features, spectral_centroids))
    return all_features

#### Creating a function to load an audio file as signal, using 16000 sampling rate

In [14]:
def load_file(file_path):
    return librosa.load(file_path, sr=16000)

#### Below function creates features and assigns numeric labels for all Train/Test records

In [16]:
def add_features(dataset_type='Train'):
    files = ids.copy().loc[ids['Type'] == dataset_type]
    files['SignalSR'] = files.apply(
        lambda x: load_file(
            f"{dataset_paths[dataset_type]}/{x['Emotion']}/{x['RecId']}"
        ), axis=1
    )
    files['X'] = files['SignalSR'].apply(extract_features)
    files['y'] = files['Emotion'].apply(lambda x: emotions.index(x))
    return files

#### Function to train the model using the features and labels. I have used Multilayered perceptron with 'adam' optimizer. The trained model is saved as a pickle

In [18]:
def train_model(x_train, y_train, model_name):
    cls = MLPClassifier(solver='adam', hidden_layer_sizes=(200,))
    cls.fit(x_train, y_train)
    pickle.dump(cls, open(model_name, "wb"))

#### Below function loads the saved model and predicts the label given the test example as a feature

In [19]:
def get_prediction(x_test, model_name):
    cls = pickle.load(open(model_name, "rb"))
    return cls.predict(x_test)

#### Main execution of the SER

In [26]:
model_name = "ser_classifier_mlp.model"
train = add_features(dataset_type='Train')
train_model(train['X'].tolist(), train['y'].tolist(), model_name)

test = add_features(dataset_type='Test')
test['PredictedY'] = get_prediction(test['X'].tolist(), model_name)
test['PredictedEmotion'] = test['PredictedY'].apply(lambda x: emotions[x])
print("Accuracy:", accuracy_score(test['y'], test['PredictedY'])*100)

test[['RecId', 'Emotion', 'PredictedEmotion']].to_csv('Predictions.csv', index=False)

Accuracy: 98.33333333333333
