In [1]:
import pandas as pd
import numpy as np

## For plotting
import matplotlib.pyplot as plt
import seaborn as sns

## This sets the plot style
## to have a grid on a dark background
sns.set_style("whitegrid")

In [2]:

from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_score
from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, confusion_matrix


In [3]:
# Read in your csv file that has the mid features. 

#data = pd.read_csv('../Feature Extraction/midFeaturesTrainSet.csv')
data = pd.read_csv('midFeaturesTrainFinal.csv')
print(data.shape)

(4876, 140)


In [4]:
data.head()

Unnamed: 0,FileID,actorID,Emotion,SentenceID,zcr_mean,energy_mean,energy_entropy_mean,spectral_centroid_mean,spectral_spread_mean,spectral_entropy_mean,...,delta chroma_4_std,delta chroma_5_std,delta chroma_6_std,delta chroma_7_std,delta chroma_8_std,delta chroma_9_std,delta chroma_10_std,delta chroma_11_std,delta chroma_12_std,delta chroma_std_std
0,1001_DFA_ANG_XX,1001,ANG,DFA,0.159956,0.012981,2.988679,0.25049,0.224054,1.342449,...,0.024107,0.014803,0.017961,0.013412,0.008655,0.010352,0.009738,0.0106,0.004328,0.009167
1,1001_DFA_DIS_XX,1001,DIS,DFA,0.175069,0.006502,2.930843,0.258174,0.209151,1.423432,...,0.022395,0.01551,0.008768,0.014533,0.009661,0.002533,0.004223,0.007513,0.003662,0.007296
2,1001_DFA_FEA_XX,1001,FEA,DFA,0.199849,0.016796,2.999322,0.27257,0.202443,1.399237,...,0.007043,0.003129,0.006915,0.007791,0.013899,0.005247,0.003474,0.014306,0.005781,0.00825
3,1001_DFA_HAP_XX,1001,HAP,DFA,0.148663,0.00796,2.880264,0.23575,0.211387,1.292553,...,0.021737,0.005675,0.009277,0.026797,0.010147,0.010658,0.017229,0.013203,0.010011,0.007488
4,1001_DFA_NEU_XX,1001,NEU,DFA,0.174283,0.010704,2.833565,0.256034,0.201942,1.413561,...,0.03197,0.012929,0.017969,0.037496,0.013379,0.008354,0.005615,0.008907,0.007483,0.013592


In [5]:
# Split the data into train and test set, stratified by Emotion.
data_train, data_test = train_test_split(data.copy(),
                                   shuffle=True,
                                   random_state=608,
                                   stratify=data.Emotion,
                                   test_size=0.2
                                   )

In [6]:
data.sample(5)

Unnamed: 0,FileID,actorID,Emotion,SentenceID,zcr_mean,energy_mean,energy_entropy_mean,spectral_centroid_mean,spectral_spread_mean,spectral_entropy_mean,...,delta chroma_4_std,delta chroma_5_std,delta chroma_6_std,delta chroma_7_std,delta chroma_8_std,delta chroma_9_std,delta chroma_10_std,delta chroma_11_std,delta chroma_12_std,delta chroma_std_std
2984,1057_TIE_ANG_XX,1057,ANG,TIE,0.133594,0.115822,3.092731,0.238804,0.214965,1.09562,...,0.023818,0.026959,0.032593,0.01921,0.008168,0.016165,0.021449,0.013443,0.00714,0.010815
129,1002_WSI_FEA_XX,1002,FEA,WSI,0.292524,0.006919,2.873871,0.35202,0.219322,1.512138,...,0.007702,0.014563,0.003555,0.010754,0.011958,0.004994,0.012436,0.025269,0.010054,0.007276
654,1012_TSI_HAP_XX,1012,HAP,TSI,0.259459,0.017099,2.747317,0.309768,0.221264,1.165105,...,0.014664,0.034863,0.024736,0.014519,0.008411,0.009071,0.011992,0.014498,0.018951,0.011698
3824,1074_ITH_SAD_XX,1074,SAD,ITH,0.104086,0.02271,2.845487,0.174065,0.176616,0.802384,...,0.014727,0.015466,0.017787,0.016991,0.006119,0.017857,0.02051,0.018147,0.006925,0.007298
3612,1071_IOM_ANG_XX,1071,ANG,IOM,0.088413,0.024278,2.944547,0.16759,0.184146,0.584587,...,0.031727,0.013099,0.020678,0.029603,0.00959,0.00714,0.016084,0.015565,0.012171,0.014149


In [7]:
# Check the percentages of the different emotion categories in the training set

data_train.Emotion.value_counts(normalize=True)

NEU    0.178718
HAP    0.164359
FEA    0.164359
ANG    0.164359
SAD    0.164103
DIS    0.164103
Name: Emotion, dtype: float64

In [8]:
# Check the percentages of the different emotion categories in the test set

data_test.Emotion.value_counts(normalize=True)

NEU    0.179303
DIS    0.164959
ANG    0.163934
SAD    0.163934
FEA    0.163934
HAP    0.163934
Name: Emotion, dtype: float64

In [9]:
y_train = data_train[['Emotion']]
y_test  = data_test[['Emotion']]

X_train = data_train.drop(columns  = ['FileID','actorID', 'Emotion', 'SentenceID'])
X_test  = data_test.drop(columns   = ['FileID','actorID', 'Emotion', 'SentenceID'])

In [10]:
y_test

Unnamed: 0,Emotion
2589,DIS
628,DIS
3624,ANG
334,ANG
2033,SAD
...,...
4030,ANG
614,SAD
2150,ANG
4016,NEU


In [25]:
for k in np.arrange (.8, .95, .05):
    pca = PCA(n_components = round(k, 1)

SyntaxError: unexpected EOF while parsing (703819594.py, line 2)

In [29]:
for k in np.arange (.8, .95, .05):
    pca = PCA(n_components = round(k, 1))
    pca.fit(X_train)

    X_train_transform[k] = pca.transform(X_train)
    X_test_transform[k]  = pca.transform(X_test)

    print(X_train_transform.shape)
    print(X_test_transform.shape)

    #X_train_sub = X_train_transform
    for i in range (len(X_train_sub[0])):
        X_train["comp_" + str(i+1)] = X_train_transform[:,i][k]
        X_test ["comp_" + str(i+1)] = X_test_transform[:,i][k]

    
    X_train_sub[k]  = X_train.iloc[: , -(len(X_train_transform[0])+1):]
    X_test_sub[k]   = X_test.iloc[: , -(len(X_test_transform[0])+1):]    

    

SyntaxError: cannot assign to function call (1861668315.py, line 5)

In [12]:

pca = PCA(n_components=.95)
pca.fit(X_train)

X_train_transform = pca.transform(X_train)
X_test_transform  = pca.transform(X_test)

print(X_train_transform.shape)
print(X_test_transform.shape)

X_train_sub = X_train_transform
for i in range (len(X_train_sub[0])):
    X_train["comp_" + str(i+1)] = X_train_transform[:,i]
    X_test ["comp_" + str(i+1)] = X_test_transform[:,i]

    
X_train_sub  = X_train.iloc[: , -(len(X_train_transform[0])+1):]
X_test_sub   = X_test.iloc[: , -(len(X_test_transform[0])+1):]



(3900, 10)
(976, 10)


In [None]:
# Add a column to the y vectors encoding each of the emotions.

y_train_dummies = pd.get_dummies(y_train)
y_train         = pd.concat([y_train, y_train_dummies], axis=1)
y_train["Emotion_ALL"]  = 1*y_train["Emotion_NEU"] + 2*y_train["Emotion_ANG"] + 3*y_train["Emotion_HAP"] + 4*y_train["Emotion_SAD"] + 5*y_train["Emotion_FEA"] + 6*y_train["Emotion_DIS"]
y_train = y_train.drop(columns  = ["Emotion", "Emotion_NEU", "Emotion_ANG", "Emotion_HAP", "Emotion_SAD", "Emotion_FEA", "Emotion_DIS"])

y_test_dummies = pd.get_dummies(y_test)
y_test         = pd.concat([y_test, y_test_dummies], axis=1)
y_test["Emotion_ALL"]  = 1*y_test["Emotion_NEU"] + 2*y_test["Emotion_ANG"] + 3*y_test["Emotion_HAP"] + 4*y_test["Emotion_SAD"] + 5*y_test["Emotion_FEA"] + 6*y_test["Emotion_DIS"]
y_test  = y_test.drop(columns   = ["Emotion", "Emotion_NEU", "Emotion_ANG", "Emotion_HAP", "Emotion_SAD", "Emotion_FEA", "Emotion_DIS"])


In [None]:
np.unique(y_test)

In [None]:
# Build pipeline to first scale the mid feature data, then apply the SVC

pipe = Pipeline([('scale', StandardScaler()),
                 ('svc', SVC(kernel='rbf'))])

classifier = pipe.fit(X_train_sub, y_train)
pred       = pipe.predict(X_test_sub)




In [None]:
# Look at the confusion matrix for the test data :
cnf_matrix_test = confusion_matrix(y_test, pred)

print("confusion matrix for all six emotions of the test set is:")
print(cnf_matrix_test)
print()

# Look at the confusion matrix for the training data:
pred_train       = pipe.predict(X_train_sub)
cnf_matrix_train = confusion_matrix(y_train, pred_train)

print("confusion matrix for all six emotions of the train set is:")
print(cnf_matrix_train)


In [None]:
from sklearn.metrics import plot_confusion_matrix
class_names = ["NEU", "ANG", "HAP", "SAD", "FEA", "DIS"]

In [None]:
disp = plot_confusion_matrix(classifier, X_test_sub, y_test,
                                 display_labels=class_names,
                                 cmap=plt.cm.Blues)
plt.show()

In [None]:

FP = cnf_matrix_test.sum(axis=0) - np.diag(cnf_matrix_test) 
FN = cnf_matrix_test.sum(axis=1) - np.diag(cnf_matrix_test)
TP = np.diag(cnf_matrix_test)
TN = cnf_matrix_test.sum() - (FP + FN + TP)

In [None]:
FP = FP.astype(float)
FN = FN.astype(float)
TP = TP.astype(float)
TN = TN.astype(float)
# Sensitivity, hit rate, recall, or true positive rate
TPR = TP/(TP+FN)
# Specificity or true negative rate
TNR = TN/(TN+FP) 
# Precision or positive predictive value
PPV = TP/(TP+FP)
# Negative predictive value
NPV = TN/(TN+FN)
# Fall out or false positive rate
FPR = FP/(FP+TN)
# False negative rate
FNR = FN/(TP+FN)
# False discovery rate
FDR = FP/(TP+FP)
# Overall accuracy for each class
ACC = (TP+TN)/(TP+FP+FN+TN)

In [None]:
ACC