In [32]:
import pandas as pd
import numpy as np

## For plotting
import matplotlib.pyplot as plt
import seaborn as sns

## This sets the plot style
## to have a grid on a dark background
sns.set_style("whitegrid")

In [33]:

from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_score
from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

from sklearn.decomposition import PCA

from sklearn.metrics import accuracy_score, confusion_matrix




In [34]:
# Read in your csv file that has the mid features. 

#data = pd.read_csv('../Feature Extraction/midFeaturesTrainSet.csv')
data = pd.read_csv('midFeaturesTrainFinal.csv')


data.head()

print(data.shape)

(4876, 140)


In [35]:
# Split the data into train and test set, stratified by Emotion.


data_train, data_test = train_test_split(data.copy(),
                                   shuffle=True,
                                   random_state=608,
                                   stratify=data.Emotion,
                                   test_size=0.2
                                   )

In [36]:
data.sample(5)

Unnamed: 0,FileID,actorID,Emotion,SentenceID,zcr_mean,energy_mean,energy_entropy_mean,spectral_centroid_mean,spectral_spread_mean,spectral_entropy_mean,...,delta chroma_4_std,delta chroma_5_std,delta chroma_6_std,delta chroma_7_std,delta chroma_8_std,delta chroma_9_std,delta chroma_10_std,delta chroma_11_std,delta chroma_12_std,delta chroma_std_std
1392,1024_WSI_NEU_XX,1024,NEU,WSI,0.135906,0.030477,2.901268,0.194981,0.1828,0.711852,...,0.030656,0.016327,0.035657,0.01757,0.003064,0.020542,0.03571,0.015258,0.017597,0.014659
4368,1083_IWL_HAP_XX,1083,HAP,IWL,0.099215,0.008852,2.862445,0.191436,0.210406,0.760735,...,0.019861,0.024212,0.024375,0.02264,0.008443,0.011371,0.014012,0.018641,0.009317,0.007234
3635,1071_IWL_SAD_XX,1071,SAD,IWL,0.069836,0.032944,2.860838,0.13036,0.165496,0.277765,...,0.021475,0.022369,0.021624,0.014359,0.0065,0.006294,0.003946,0.014643,0.0296,0.009393
3735,1072_WSI_FEA_XX,1072,FEA,WSI,0.092087,0.045357,2.914424,0.196379,0.200513,0.551055,...,0.048005,0.035567,0.002362,0.001716,0.002727,0.003243,0.003031,0.00759,0.005105,0.011353
2592,1050_TSI_NEU_XX,1050,NEU,TSI,0.254775,0.013199,2.864431,0.314598,0.223688,0.927317,...,0.031233,0.013112,0.011738,0.022424,0.001299,0.00991,0.012473,0.030144,0.01051,0.009509


In [37]:
# Check the percentages of the different emotion categories in the training set

data_train.Emotion.value_counts(normalize=True)

NEU    0.178718
HAP    0.164359
FEA    0.164359
ANG    0.164359
SAD    0.164103
DIS    0.164103
Name: Emotion, dtype: float64

In [38]:
# Check the percentages of the different emotion categories in the test set

data_test.Emotion.value_counts(normalize=True)

NEU    0.179303
DIS    0.164959
ANG    0.163934
SAD    0.163934
FEA    0.163934
HAP    0.163934
Name: Emotion, dtype: float64

In [39]:
y_train = data_train[['Emotion']]
y_test  = data_test[['Emotion']]

X_train = data_train.drop(columns  = ['FileID','actorID', 'Emotion', 'SentenceID'])
X_test  = data_test.drop(columns   = ['FileID','actorID', 'Emotion', 'SentenceID'])

In [40]:
y_test

Unnamed: 0,Emotion
2589,DIS
628,DIS
3624,ANG
334,ANG
2033,SAD
...,...
4030,ANG
614,SAD
2150,ANG
4016,NEU


In [41]:
# Add a column to the y vectors encoding each of the emotions.

y_train_dummies = pd.get_dummies(y_train)
y_train         = pd.concat([y_train, y_train_dummies], axis=1)

y_test_dummies  = pd.get_dummies(y_test)
y_test          = pd.concat([y_test, y_test_dummies], axis=1)


In [42]:
# Example: try to train a support vector machine with Gaussian radial kernel to distinguish between instances
# where emotion is 'HAP' and instances where emotion is 'NEU'. 

# Get the rows of X_train, X_test corresponding to just the emotions 'HAP' and 'NEU'

X_train_hap = X_train.loc[(y_train.Emotion == 'HAP') | (y_train.Emotion == 'NEU')]
X_test_hap  = X_test.loc[(y_test.Emotion == 'HAP') | (y_test.Emotion == 'NEU')]


# Get the Emotion_ANG column of the ys, with only the rows corresponding to 'ANG' and 'HAP'

y_train_hap = y_train.loc[(y_train.Emotion == 'HAP') | (y_train.Emotion == 'NEU')].Emotion_HAP
y_test_hap  = y_test.loc[(y_test.Emotion == 'HAP') | (y_test.Emotion == 'NEU')].Emotion_HAP

In [43]:
X_train_ang = X_train.loc[(y_train.Emotion == 'ANG') | (y_train.Emotion == 'NEU')]
X_test_ang  = X_test.loc[(y_test.Emotion == 'ANG') | (y_test.Emotion == 'NEU')]

y_train_ang = y_train.loc[(y_train.Emotion == 'ANG') | (y_train.Emotion == 'NEU')].Emotion_ANG
y_test_ang  = y_test.loc[(y_test.Emotion == 'ANG') | (y_test.Emotion == 'NEU')].Emotion_ANG

In [44]:
X_train_sad = X_train.loc[(y_train.Emotion == 'SAD') | (y_train.Emotion == 'NEU')]
X_test_sad  = X_test.loc[(y_test.Emotion == 'SAD') | (y_test.Emotion == 'NEU')]

y_train_sad = y_train.loc[(y_train.Emotion == 'SAD') | (y_train.Emotion == 'NEU')].Emotion_SAD
y_test_sad  = y_test.loc[(y_test.Emotion == 'SAD') | (y_test.Emotion == 'NEU')].Emotion_SAD

In [45]:
X_train_dis = X_train.loc[(y_train.Emotion == 'DIS') | (y_train.Emotion == 'NEU')]
X_test_dis  = X_test.loc[(y_test.Emotion == 'DIS') | (y_test.Emotion == 'NEU')]

y_train_dis = y_train.loc[(y_train.Emotion == 'DIS') | (y_train.Emotion == 'NEU')].Emotion_DIS
y_test_dis  = y_test.loc[(y_test.Emotion == 'DIS') | (y_test.Emotion == 'NEU')].Emotion_DIS

In [46]:
X_train_fea = X_train.loc[(y_train.Emotion == 'FEA') | (y_train.Emotion == 'NEU')]
X_test_fea  = X_test.loc[(y_test.Emotion == 'FEA') | (y_test.Emotion == 'NEU')]

y_train_fea = y_train.loc[(y_train.Emotion == 'FEA') | (y_train.Emotion == 'NEU')].Emotion_FEA
y_test_fea  = y_test.loc[(y_test.Emotion == 'FEA') | (y_test.Emotion == 'NEU')].Emotion_FEA

In [47]:
#X_train_sub = X_train
#X_test_sub  = X_test

#y_train_sub = y_train
#y_test_sub  = y_test

In [48]:
print("X_train_sub",  X_train_hap.shape)
print("y_train_sub",  y_train_hap.shape)

X_train_sub (1338, 136)
y_train_sub (1338,)


In [49]:
np.unique(y_test_hap)

array([0, 1], dtype=uint8)

In [50]:
# Build pipeline to first scale the mid feature data, then apply the SVC

pipe = Pipeline([('scale', StandardScaler()),
                 ('svc', SVC(kernel='rbf'))])


# Fit the model to the training data

pipe.fit(X_train_hap, y_train_hap)
pipe.fit(X_train_ang, y_train_ang)
pipe.fit(X_train_sad, y_train_sad)
pipe.fit(X_train_dis, y_train_dis)
pipe.fit(X_train_fea, y_train_fea)

    
# Get the model's prediction on the test data

pred_hap = pipe.predict(X_test_hap)
pred_ang = pipe.predict(X_test_ang)
pred_sad = pipe.predict(X_test_sad)
pred_dis = pipe.predict(X_test_dis)
pred_fea = pipe.predict(X_test_fea)




In [51]:
# Look at the confusion matrix for the test data :

print("confusion matrix for the happy/neutral test set is:")
print(confusion_matrix(y_test_hap, pred_hap))
print()

# Look at the confusion matrix for the training data:
pred_train_hap = pipe.predict(X_train_hap)
print("confusion matrix for the happy/netural train set is:")
print(confusion_matrix(y_train_hap, pred_train_hap))


confusion matrix for the happy/neutral test set is:
[[141  34]
 [ 69  91]]

confusion matrix for the happy/netural train set is:
[[659  38]
 [273 368]]


In [52]:
# Look at the confusion matrix for the test data :

print("confusion matrix for the angry/neutral test set is:")
print(confusion_matrix(y_test_ang, pred_ang))
print()

# Look at the confusion matrix for the training data:
pred_train_ang = pipe.predict(X_train_ang)
print("confusion matrix for the angry/netural train set is:")
print(confusion_matrix(y_train_ang, pred_train_ang))


confusion matrix for the angry/neutral test set is:
[[141  34]
 [ 59 101]]

confusion matrix for the angry/netural train set is:
[[659  38]
 [220 421]]


In [53]:
# Look at the confusion matrix for the test data :

print("confusion matrix for the sad/neutral test set is:")
print(confusion_matrix(y_test_sad, pred_sad))
print()

# Look at the confusion matrix for the training data:
pred_train_sad = pipe.predict(X_train_sad)
print("confusion matrix for the sad/netural train set is:")
print(confusion_matrix(y_train_sad, pred_train_sad))


confusion matrix for the sad/neutral test set is:
[[141  34]
 [ 91  69]]

confusion matrix for the sad/netural train set is:
[[659  38]
 [366 274]]


In [54]:
# Look at the confusion matrix for the test data :

print("confusion matrix for the disgust/neutral test set is:")
print(confusion_matrix(y_test_dis, pred_dis))
print()

# Look at the confusion matrix for the training data:
pred_train_dis = pipe.predict(X_train_dis)
print("confusion matrix for the disgust/netural train set is:")
print(confusion_matrix(y_train_dis, pred_train_dis))


confusion matrix for the disgust/neutral test set is:
[[141  34]
 [118  43]]

confusion matrix for the disgust/netural train set is:
[[659  38]
 [442 198]]


In [55]:
# Look at the confusion matrix for the test data :

print("confusion matrix for the fear/neutral test set is:")
print(confusion_matrix(y_test_fea, pred_fea))
print()

# Look at the confusion matrix for the training data:
pred_train_fea = pipe.predict(X_train_fea)
print("confusion matrix for the fear/netural train set is:")
print(confusion_matrix(y_train_fea, pred_train_fea))


confusion matrix for the fear/neutral test set is:
[[141  34]
 [ 42 118]]

confusion matrix for the fear/netural train set is:
[[659  38]
 [ 47 594]]
