In [None]:
import os, glob, pickle, random

import numpy as snp
import matplotlib.pyplot as plt

import seaborn as sns
sns.set()

In [None]:
#DataFlair - Emotions in the RAVDESS dataset
emotions={
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}

#DataFlair - Emotions to observe
observed_emotions=['happy','sad','angry','fearful','disgust','surprised']

In [None]:
#DataFlair - Extract features (mfcc, chroma, mel) from a sound file
import librosa
import soundfile

SAMPLERATE = 16000 # Hz

def extract_feature(file_name):
    with soundfile.SoundFile(file_name) as sound_file:
        sample_rate=sound_file.samplerate
        X = sound_file.read(dtype="float32")
        if sample_rate != SAMPLERATE:
            X = librosa.resample(X, sample_rate, SAMPLERATE)
            sample_rate=SAMPLERATE 
        #print('filename=', file_name, ' samplerate=', sample_rate)        
        stft=np.abs(librosa.stft(X))
        mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
        chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
        mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T, axis=0)
        tonnetz = np.mean(librosa.feature.tonnetz(y=X, sr=sample_rate).T, axis=0)
        bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=X, sr=sample_rate).T, axis=0)
        flatness = np.mean(librosa.feature.spectral_flatness(y=X))
        contrast = np.mean(librosa.feature.spectral_contrast(y=X))
        result = np.hstack((mfccs, chroma, mel, tonnetz, bandwidth, flatness, contrast))
    return (X, result)

In [None]:
#DataFlair - Load the data and extract features for each sound file
from tqdm.autonotebook import tqdm

cwd = os.getcwd()
def load_data(glob_pattern=cwd+"/dataset/Actor_*/*.wav"):
    X,x,y=[],[],[]
    for file in tqdm(glob.glob(glob_pattern)):
        file_name=os.path.basename(file)
        if file_name.split("-")[2] == '01' or file_name.split("-")[2] == '02':
            continue
        emotion=emotions[file_name.split("-")[2]]
        if emotion not in observed_emotions:
            continue
        Xo, feature=extract_feature(file)
        X.append(Xo)
        x.append(feature)
        y.append(emotion)
    return X,x,y

In [None]:
X,x,y = load_data()

In [None]:
import librosa.display

def plot_emo_specs(x,y,emos,n):
    index = 1
    plt.figure()
    emo_data = dict()
    nemos = len(emos)
    fig, axs = plt.subplots(nemos, n, figsize=(n*4,nemos*3), sharey=True, gridspec_kw={'wspace': 0.01, 'hspace': 0.15})
    axs = axs.flatten()
    props = dict(boxstyle='round', facecolor='white', alpha=0.95)
    # row_index = 0
    for emo in observed_emotions:
        xys = [xyi for xyi in zip(x, y) if xyi[1] == emo]
        emo_data[emo] = random.sample(xys, n)
        for i, samp in enumerate(emo_data[emo]):
            M = librosa.feature.melspectrogram(y=samp[0])
            ax = axs[index-1]
            librosa.display.specshow(librosa.power_to_db(M, ref=np.max),
                         y_axis='mel', x_axis='time', ax=ax)
            # ax.title.set_text(emo+' '+str(i+1))
            if (index-1) % n == 0:
                ax.text(0.05, 0.95, emo.capitalize(), transform=ax.transAxes, fontsize=14,
        verticalalignment='top',bbox=props)
            index += 1
        # row_index += 1
    
    plt.tight_layout()        
    plt.show()

In [None]:
plot_emo_specs(X,y,observed_emotions, n=3)

In [None]:
random.seed(4)
#DataFlair - Split the dataset
x_train,x_test,y_train,y_test = train_test_split(np.array(x), y, test_size=0.2, random_state=random.randint(1,25))

In [None]:
#DataFlair - Get the shape of the training and testing datasets
print((x_train.shape[0], x_test.shape[0]))

In [None]:
#DataFlair - Get the number of features extracted
print(f'Features extracted: {x_train.shape[1]}')

In [None]:
from sklearn import neighbors
from sklearn import svm
from sklearn import tree
from sklearn import naive_bayes
from sklearn import linear_model
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score

In [None]:
# Classifiers - MLP, K-Nearest Neighbor, Support Vector Machine and Decision Tree Classifier

mlp = MLPClassifier(alpha=0.01, batch_size=128, epsilon=1e-08, hidden_layer_sizes=(500,250,125,), learning_rate='adaptive', max_iter=2500)
# n=20 seems to be optimal
knn = neighbors.KNeighborsClassifier(20)
svm_clf = svm.SVC()
dtc = tree.DecisionTreeClassifier(max_depth=4)
nb = naive_bayes.GaussianNB()

# Newton-cg handles multinomial loss (for multiclass problems)
lr = linear_model.LogisticRegression(solver='newton-cg')

# Cross Validation using K-Fold
random.seed(1)
kf5 = KFold(n_splits=5, random_state=random.randint(1,25))
random.seed(1)
skf5 = StratifiedKFold(n_splits=5, random_state=random.randint(1,25))
random.seed(1)
kf10 = KFold(n_splits=10, random_state=random.randint(1,25))
random.seed(1)
skf10 = StratifiedKFold(n_splits=10, random_state=random.randint(1,25))

In [None]:
# Evaluation
import pandas as pd

models = [("MLP", mlp), ("K-NearestNeighbors", knn), ("SupportVectorMachine", svm_clf),
          ("DecisionTreeClassifier", dtc), ("NaiveBayesGaussian", nb), ("LogisticRegression", lr)]

validators = [("5-fold", kf5)]
# or all validators?
# validators = [("5-fold", kf5), ("Stratified 5-fold", skf5), ("10-fold", kf10), ("Stratified 10-fold", skf10)]

val_res = {}

for name, validator in tqdm(validators):
    val_res[name] = {}
    entries = []
    for model_name, model in models:
        print(name, model_name)
        # Scores of all folds per model
        scores = cross_val_score(model, x_train, y_train, cv=validator)
        val_res[name][model_name] = score
        for fold_idx, score in enumerate(scores):
            entries.append((model_name, fold_idx, score))
    val_res[name] = pd.DataFrame(entries, columns=['clf_name', 'fold_idx', 'accuracy'])
val_dfs.boxplot()

In [None]:
fig = plt.figure()
sns.boxplot(x='clf_name', y='accuracy', data=val_res['5-fold'])
sns.stripplot(x='clf_name', y='accuracy', data=val_res['5-fold'], 
              size=5, jitter=True, linewidth=2)
fig.set_size_inches((14,6))

In [None]:
import pickle
with open('model.pickle', 'wb') as f:
    pickle.dump(model, f)
with open('knn.pickle', 'wb') as f:
    pickle.dump(knn, f)

In [None]:
from sklearn.metrics import plot_confusion_matrix

fig = plot_confusion_matrix(mlp, x_test, y_test,
                      display_labels=observed_emotions,
                      cmap=plt.cm.Blues, xticks_rotation='vertical', normalize='pred', values_format='.2f')
fig.ax_.set_title("MLP: Emotion detection confusion matrix")