In [1]:
import os

import librosa, librosa.display
import IPython.display as ipd
from random import random

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy import signal
from sklearn import svm, metrics
from sklearn.model_selection import train_test_split
from tqdm import tqdm

import csv

PROCESSED_DATA_PICKLE_PATH = '../data/df_all.pkl'
PROCESSED_DATA_PICKLE_PATH_N = '../data/df_normal.pkl'

EMOTIONS = ['neutral', 'calm', 'happy', 'sad', 'angry', 'fearful', 'disgust', 'surprised']

MFCC_LENGTH = 125

In [2]:
df = pd.read_pickle(PROCESSED_DATA_PICKLE_PATH)
df.head()

Unnamed: 0,path,source,actor,gender,gender_label,statement,repetition,intensity,emotion,emotion_label,mfcc
0,../data/Audio_Speech_Actors_01-24/Actor_01/03-...,1,1,1,male,0,1,0,0,neutral,"[[-857.3094533443688, 0.0, 0.0, 0.0, 0.0, 0.0,..."
1,../data/Audio_Speech_Actors_01-24/Actor_01/03-...,1,1,1,male,0,2,0,0,neutral,"[[-864.8902862773604, 0.0, 0.0, 0.0, 0.0, 0.0,..."
2,../data/Audio_Speech_Actors_01-24/Actor_01/03-...,1,1,1,male,1,1,0,0,neutral,"[[-849.4454325616318, 9.397479238778757, 9.257..."
3,../data/Audio_Speech_Actors_01-24/Actor_01/03-...,1,1,1,male,1,2,0,0,neutral,"[[-832.7343966188961, 11.492822043371124, 0.14..."
4,../data/Audio_Speech_Actors_01-24/Actor_01/03-...,1,1,1,male,0,1,0,1,calm,"[[-902.4064116162402, 6.517241898027468, 6.427..."


In [3]:
df_normal = pd.read_pickle(PROCESSED_DATA_PICKLE_PATH_N)
df_normal.head()

Unnamed: 0,path,source,actor,gender,gender_label,statement,repetition,intensity,emotion,emotion_label,augmentation,mfcc
0,../data/Audio_Speech_Actors_01-24/Actor_01/03-...,1,1,1,male,0,1,0,0,neutral,none,"[[-857.3094533443688, 0.0, 0.0, 0.0, 0.0, 0.0,..."
1,../data/Audio_Speech_Actors_01-24/Actor_01/03-...,1,1,1,male,0,2,0,0,neutral,none,"[[-864.8902862773604, 0.0, 0.0, 0.0, 0.0, 0.0,..."
2,../data/Audio_Speech_Actors_01-24/Actor_01/03-...,1,1,1,male,1,1,0,0,neutral,none,"[[-849.4454325616318, 9.397479238778757, 9.257..."
3,../data/Audio_Speech_Actors_01-24/Actor_01/03-...,1,1,1,male,1,2,0,0,neutral,none,"[[-832.7343966188961, 11.492822043371124, 0.14..."
4,../data/Audio_Speech_Actors_01-24/Actor_01/03-...,1,1,1,male,0,1,0,1,calm,none,"[[-902.4064116162402, 6.517241898027468, 6.427..."


In [4]:
def load_data(data_path):
    df = pd.read_pickle(data_path)

    # # clean up data
    # vals = []
    # for i, val in enumerate(df["mfcc"].to_numpy()):
    #     if len(val) < MFCC_LENGTH:
    #         vals.append(i)
    #
    # df = df.drop(vals)
    # df = df.reset_index(drop=True)

    X = np.stack(df["mfcc"].to_numpy())
    y = np.stack(df["emotion"].to_numpy())

    return X, y

In [5]:
def prepare_datasets(test_size):

    # load data
    X, y = load_data(PROCESSED_DATA_PICKLE_PATH_N)

    new_X = []
    for i in X:
        avg_mfcc = []
        for j in i:
            avg_mfcc.append(j.mean())
        new_X.append(avg_mfcc)
    X = np.array(new_X)

    # create train/test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)

    return X_train, X_test, y_train, y_test


In [14]:
def run_training_all():
    X_train, X_test, y_train, y_test = prepare_datasets(0.2)

    clf_lin = svm.SVC(kernel='linear', decision_function_shape='ovo')
    clf_sig = svm.SVC(kernel='sigmoid', decision_function_shape='ovo')
    clf_pol = svm.SVC(kernel='poly', decision_function_shape='ovo')
    clf_rbf = svm.SVC(kernel='rbf', decision_function_shape='ovo')

    clf_lin.fit(X_train, y_train)
    clf_sig.fit(X_train, y_train)
    clf_pol.fit(X_train, y_train)
    clf_rbf.fit(X_train, y_train)

    pred_lin = clf_lin.predict(X_test)
    pred_sig = clf_sig.predict(X_test)
    pred_pol = clf_pol.predict(X_test)
    pred_rbf = clf_rbf.predict(X_test)

    print("linear: {}".format(metrics.accuracy_score(y_test, pred_lin)))
    print("sigmoid: {}".format(metrics.accuracy_score(y_test, pred_sig)))
    print("poly: {}".format(metrics.accuracy_score(y_test, pred_pol)))
    print("rbf: {}".format(metrics.accuracy_score(y_test, pred_rbf)))

def run_training_rbf():
    csv_file = '../data/rbf_performance_final.csv'
    csv_columns = ['Run', 'C-Value', 'Gamma Value', 'Accuracy']

    c_values = list(range(1, 51))
    gamma_values = ['scale', 0.01, 0.001, 0.0001]

    run_count = 1
    run_data = []


    for i in tqdm(range(1, 51)):
        X_train, X_test, y_train, y_test = prepare_datasets(0.2)
        clf_rbf = svm.SVC(kernel='rbf', C=5, gamma=0.001, decision_function_shape='ovo')
        clf_rbf.fit(X_train, y_train)
        pred_rbf = clf_rbf.predict(X_test)
        acc = metrics.accuracy_score(y_test, pred_rbf)
        run_data.append({
            'Run': run_count,
            'C-Value': 10,
            'Gamma Value': 'scale',
            'Accuracy': acc
        })
        run_count += 1
        print("precision: {}".format(acc))

    try:
        with open(csv_file, 'w') as csv_file:
            writer = csv.DictWriter(csv_file, fieldnames=csv_columns)
            writer.writeheader()
            for data in run_data:
                writer.writerow(data)

    except IOError:
        print("I/O Error")

In [15]:
run_training_rbf()

100%|██████████| 50/50 [01:30<00:00,  1.81s/it]


precision: 0.3472222222222222
precision: 0.3402777777777778
precision: 0.3645833333333333
precision: 0.3506944444444444
precision: 0.3888888888888889
precision: 0.3993055555555556
precision: 0.3993055555555556
precision: 0.3645833333333333
precision: 0.3993055555555556
precision: 0.3923611111111111
precision: 0.375
precision: 0.375
precision: 0.3611111111111111
precision: 0.3888888888888889
precision: 0.4027777777777778
precision: 0.3541666666666667
precision: 0.3993055555555556
precision: 0.4201388888888889
precision: 0.4097222222222222
precision: 0.3715277777777778
precision: 0.3472222222222222
precision: 0.40625
precision: 0.4166666666666667
precision: 0.375
precision: 0.4340277777777778
precision: 0.4513888888888889
precision: 0.4513888888888889
precision: 0.40625
precision: 0.3958333333333333
precision: 0.3819444444444444
precision: 0.4270833333333333
precision: 0.4236111111111111
precision: 0.3611111111111111
precision: 0.3680555555555556
precision: 0.4097222222222222
precision: 

In [12]:
X_train, X_test, y_train, y_test = prepare_datasets(0.2)

clf_poly = svm.SVC(kernel='poly', degree=17, decision_function_shape='ovo')
# clf = svm.SVC(kernel='rbf', decision_function_shape='ovo') # 0.343
clf_rbf = svm.SVC(C=5, kernel='rbf', gamma=0.001, decision_function_shape='ovo')
clf_rbf2 = svm.SVC(C=15.7, kernel='rbf', gamma='scale', decision_function_shape='ovo')
# # clf = svm.SVC(kernel='rbf', decision_function_shape='ovo') # 0.392
clf_poly.fit(X_train, y_train)
clf_rbf.fit(X_train, y_train)
clf_rbf2.fit(X_train, y_train)
#
# print(X_test.shape)
#
y_pred_poly = clf_poly.predict(X_test)
y_pred_rbf = clf_rbf.predict(X_test)
y_pred_rbf2 = clf_rbf.predict(X_test)

print("Poly Accuracy: {}".format(metrics.accuracy_score(y_test, y_pred_poly)))
print("RBF Accuracy: {}".format(metrics.accuracy_score(y_test, y_pred_rbf)))
print("RBF Accuracy2: {}".format(metrics.accuracy_score(y_test, y_pred_rbf2)))

Poly Accuracy: 0.2638888888888889
RBF Accuracy: 0.3645833333333333
RBF Accuracy2: 0.3645833333333333


In [13]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

X_train, X_test, y_train, y_test = prepare_datasets(0.2)

# Set the parameters by cross-validation
tuned_parameters = [{'kernel': ['rbf'], 'gamma': ['scale', 1e-2, 1e-3, 1e-4],
                     'C': [1, 5, 10, 50, 100]},
                    # {'kernel': ['poly'], 'gamma': [1e-3, 1e-4], 'degree':[1, 10, 50],
                    #  'coef0': [0.0, 0.0001, 0.001, 0.01, 0.1, 1],
                    #  'C': [1, 10, 100]}
                    ]

scores = ['precision', 'recall']

for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    clf = GridSearchCV(
        svm.SVC(), tuned_parameters, scoring='%s_macro' % score
    )
    clf.fit(X_train, y_train)

    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    print()
    print("Grid scores on development set:")
    print()
    means = clf.cv_results_['mean_test_score']
    stds = clf.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r"
              % (mean, std * 2, params))
    print()

    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    y_true, y_pred = y_test, clf.predict(X_test)
    print(classification_report(y_true, y_pred))
    print()


# Tuning hyper-parameters for precision

Best parameters set found on development set:

{'C': 5, 'gamma': 0.001, 'kernel': 'rbf'}

Grid scores on development set:

0.322 (+/-0.037) for {'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}
0.018 (+/-0.000) for {'C': 1, 'gamma': 0.01, 'kernel': 'rbf'}
0.443 (+/-0.089) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.423 (+/-0.094) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.364 (+/-0.017) for {'C': 5, 'gamma': 'scale', 'kernel': 'rbf'}
0.018 (+/-0.000) for {'C': 5, 'gamma': 0.01, 'kernel': 'rbf'}
0.454 (+/-0.030) for {'C': 5, 'gamma': 0.001, 'kernel': 'rbf'}
0.391 (+/-0.008) for {'C': 5, 'gamma': 0.0001, 'kernel': 'rbf'}
0.350 (+/-0.028) for {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}
0.018 (+/-0.000) for {'C': 10, 'gamma': 0.01, 'kernel': 'rbf'}
0.454 (+/-0.030) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.382 (+/-0.039) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.363 (+/-0.065) for {'C': 50, 'gamma': 'scale', 'kernel': 'rbf'}
0.01

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr