In [2]:
import librosa
import librosa.display
import os
import pickle
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
import re
import pandas as pd
import numpy as np
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.ensemble import GradientBoostingClassifier
from imblearn.under_sampling import ClusterCentroids
from xgboost import XGBClassifier

import keras
from keras import regularizers
from keras.preprocessing import sequence
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential, Model, model_from_json
from keras.layers import Dense, Embedding, LSTM
from keras.layers import Input, Flatten, Dropout, Activation, BatchNormalization
from keras.layers import Conv1D, MaxPooling1D, AveragePooling1D
from keras.utils import np_utils, to_categorical
from keras.callbacks import ModelCheckpoint

In [3]:
with open('X_train_2.pkl', 'rb') as f:
    X_train = pickle.load(f)

In [4]:
with open('y_train_2.pkl', 'rb') as f:
    y_train = pickle.load(f)

In [5]:
with open('X_val_2.pkl', 'rb') as f:
    X_val = pickle.load(f)

In [6]:
with open('y_val_2.pkl', 'rb') as f:
    y_val = pickle.load(f)

In [57]:
X_train.shape

(3000, 216)

In [58]:
y_train.shape

(3000,)

In [8]:
mean = np.mean(X_train, axis=0)
std = np.std(X_train, axis=0)

X_train = (X_train - mean)/std
X_val = (X_val - mean)/std

## GB Classifier

In [9]:
y_train = y_train.argmax(axis=1)
y_val = y_val.argmax(axis=1)

In [17]:
clf = GradientBoostingClassifier(random_state=42, n_estimators=500)
clf.fit(X_train, y_train)

GradientBoostingClassifier(criterion='friedman_mse', init=None,
                           learning_rate=0.1, loss='deviance', max_depth=3,
                           max_features=None, max_leaf_nodes=None,
                           min_impurity_decrease=0.0, min_impurity_split=None,
                           min_samples_leaf=1, min_samples_split=2,
                           min_weight_fraction_leaf=0.0, n_estimators=500,
                           n_iter_no_change=None, presort='auto',
                           random_state=42, subsample=1.0, tol=0.0001,
                           validation_fraction=0.1, verbose=0,
                           warm_start=False)

In [18]:
print(clf.score(X_train, y_train))
print(clf.score(X_val, y_val))

0.998
0.3164218958611482


In [19]:
y_pred = clf.predict(X_val)

In [20]:
print(classification_report(y_val, y_pred))

              precision    recall  f1-score   support

           0       0.07      0.08      0.07        26
           1       0.09      0.12      0.10        25
           2       0.27      0.35      0.30       179
           3       0.62      0.33      0.43       441
           4       0.11      0.31      0.16        78

    accuracy                           0.32       749
   macro avg       0.23      0.24      0.21       749
weighted avg       0.45      0.32      0.35       749



In [21]:
print(confusion_matrix(y_val, y_pred))

[[  2   1   7  10   6]
 [  0   3   8   6   8]
 [  3  11  62  47  56]
 [ 21  18 129 146 127]
 [  3   2  24  25  24]]


In [22]:
lb = LabelEncoder()
with open('labels', 'rb') as f:
    lb = pickle.load(f)

In [23]:
lb.inverse_transform([0,1,2,3,4])

array(['disgust', 'fear', 'happy', 'neutral', 'sad'], dtype=object)

## Random Forests

In [24]:
from sklearn.ensemble import RandomForestClassifier

In [44]:
clf2 = RandomForestClassifier(random_state=42, n_estimators=100)
clf2.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=42, verbose=0,
                       warm_start=False)

In [45]:
print(clf2.score(X_train, y_train))
print(clf2.score(X_val, y_val))

0.998
0.3417890520694259


In [46]:
y_pred = clf2.predict(X_val)

In [47]:
print(classification_report(y_val, y_pred))

              precision    recall  f1-score   support

           0       0.08      0.04      0.05        26
           1       0.09      0.08      0.08        25
           2       0.28      0.37      0.32       179
           3       0.66      0.35      0.45       441
           4       0.14      0.44      0.21        78

    accuracy                           0.34       749
   macro avg       0.25      0.25      0.22       749
weighted avg       0.47      0.34      0.37       749



In [48]:
print(confusion_matrix(y_val, y_pred))

[[  1   0   6  10   9]
 [  0   2  10   3  10]
 [  3  11  66  46  53]
 [  7   8 132 153 141]
 [  1   2  20  21  34]]


## XGBoost

In [50]:
clf3 = XGBClassifier(random_state=42)
clf3.fit(X_train, y_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0,
              learning_rate=0.1, max_delta_step=0, max_depth=3,
              min_child_weight=1, missing=None, n_estimators=100, n_jobs=1,
              nthread=None, objective='multi:softprob', random_state=42,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
              silent=None, subsample=1, verbosity=1)

In [51]:
print(clf3.score(X_train, y_train))
print(clf3.score(X_val, y_val))

0.743
0.2937249666221629


In [54]:
y_pred = clf3.predict(X_val)

In [55]:
print(classification_report(y_val, y_pred))

              precision    recall  f1-score   support

           0       0.06      0.08      0.07        26
           1       0.00      0.00      0.00        25
           2       0.28      0.36      0.32       179
           3       0.63      0.27      0.38       441
           4       0.13      0.42      0.20        78

    accuracy                           0.29       749
   macro avg       0.22      0.23      0.19       749
weighted avg       0.45      0.29      0.32       749



In [56]:
print(confusion_matrix(y_val, y_pred))

[[  2   1   5   8  10]
 [  1   0   4   6  14]
 [  2   8  65  40  64]
 [ 23  26 137 120 135]
 [  5   3  21  16  33]]


## Saving The Random Forest Classifier

In [60]:
filename = 'finalModel.sav'
pickle.dump(clf2, open(filename, 'wb'))

In [62]:
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_val, y_val)
print(result)

0.3417890520694259
