# Introduction to Machine Learning
# Emotion analysis from EEG data

#### Barr Morgenstein Gauri Nagavkar

Import Libraries

In [34]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from sklearn import svm
from sklearn import linear_model, preprocessing
from sklearn.naive_bayes import GaussianNB

import pickle

from sklearn.decomposition import PCA

import warnings
warnings.filterwarnings("ignore")

List of 32 .dat files, corresponding to 32 participants.

Every .dat file has EEG data and Emotion labels data.

In [2]:
dat_list = ['s01.dat', 's02.dat', 's03.dat', 's04.dat', 's05.dat', 's06.dat', 's07.dat', 's08.dat', 's09.dat', 's10.dat',
            's11.dat', 's12.dat', 's13.dat', 's14.dat', 's15.dat', 's16.dat', 's17.dat', 's18.dat', 's19.dat', 's20.dat',
            's21.dat', 's22.dat', 's23.dat', 's24.dat', 's25.dat', 's26.dat', 's27.dat', 's28.dat', 's29.dat', 's30.dat',
            's31.dat', 's32.dat']

Function to extract data from .dat files in the form of X and y.

In [3]:
def dat_to_dict(dat_file):
    infile= open(dat_file, 'rb')
    data_temp= pickle.load(infile, encoding='latin1')
    infile.close()
    
    X_temp= data_temp["data"]
    X_temp= X_temp[:, :-8]
    y_temp= data_temp["labels"]  
    
    return X_temp, y_temp

Create arrays X (EEG data) and y (emotion labels)

In [4]:
X, y= dat_to_dict(dat_list[0])

for i in range(1,32):
    X1, y1= dat_to_dict(dat_list[i])
    
    X= np.vstack((X, X1))
    y= np.vstack((y, y1))
    
    #print(i)

Scale X and y

Remove mean and divide by standard deviation

In [5]:
y_mean= np.mean(y, axis=0)
y_n= (y-y_mean)/np.std(y)

y_n= np.float32(y_n)


X_mean= np.mean(X, axis=0)
X_n= (X-X_mean)/np.std(X)

X_n= np.float32(X_n)

Reshape normalized X and convert it to a 2-D array

In [6]:
nsamp, nchan, nt = X_n.shape
p= nchan*nt
X_n = X_n.reshape((nsamp, p))

Separate y into its different label components

Convert the y components into 4 classes

In [112]:
#Valence y
y0 = y_n[:,0] 

y0= np.where((y0>=0.5),4, y0)
y0= np.where((y0>= -0.5) & (y0<0),2, y0)
y0= np.where((y0>= 0) & (y0<0.5),3, y0)
y0= np.where((y0<0.5),1, y0)

#Arousal y
y1 = y_n[:,1] 

y1= np.where((y1>=0.5),4, y1)
y1= np.where((y1>= -0.5) & (y1<0),2, y1)
y1= np.where((y1>= 0) & (y1<0.5),3, y1)
y1= np.where((y1<0.5),1, y1)

#Dominance y
y2 = y_n[:,2] 

y2= np.where((y2>=0.5),4, y2)
y2= np.where((y2>= -0.5) & (y2<0),2, y2)
y2= np.where((y2>= 0) & (y2<0.5),3, y2)
y2= np.where((y2<0.5),1, y2)

#Liking y
y3 = y_n[:,3]

y3= np.where((y3>=0.5),4, y3)
y3= np.where((y3>= -0.5) & (y3<0),2, y3)
y3= np.where((y3>= 0) & (y3<0.5),3, y3)
y3= np.where((y3<0.5),1, y3)

Split normalized X and y into train and test

In [113]:
X0tr, X0ts, y0tr, y0ts = train_test_split(X_n, y0, test_size=0.33)
X1tr, X1ts, y1tr, y1ts = train_test_split(X_n, y1, test_size=0.33)
X2tr, X2ts, y2tr, y2ts = train_test_split(X_n, y2, test_size=0.33)
X3tr, X3ts, y3tr, y3ts = train_test_split(X_n, y3, test_size=0.33)

PCA function for obtaining principle components

In [9]:
def pc(X, ncomp):
    pca= PCA(n_components= ncomp, svd_solver= 'randomized', whiten= True)
    pca.fit(X)
    Z= pca.transform(X)
    
    return Z

Function for Support Vector Machine (SVC)

In [106]:
def svcc(Xtr, ytr, Xts, yts):
    #clf = svm.linearSVC(mu;ti_class='ovr')
    clf = svm.SVC()
    clf.fit(Xtr, ytr)

    yhat= clf.predict(Xts)

    acc = np.mean(yhat == yts)
    
    return yhat, acc

Function for Logistic regression

In [107]:
def lr(Xtr, ytr, Xts, yts):
    clf = linear_model.LogisticRegression()
    clf.fit(Xtr, ytr)

    yhat= clf.predict(Xts)

    acc = np.mean(yhat == yts)
    
    return yhat, acc

 Function for Gaussian Naive Bayes classifier

In [108]:
def gnb(Xtr, ytr, Xts, yts):
    clf = GaussianNB()
    clf.fit(Xtr,ytr)
    
    yhat= clf.predict(Xts)
    
    acc = np.mean(yhat == yts)
    
    return yhat, acc

Valence

In [110]:
ncomp_test= np.linspace(300,400,10, endpoint=True, dtype=int)
num_nc = len(ncomp_test)

acc = np.zeros((num_nc))

# Loop over number of components to test
for icomp, ncomp in enumerate(ncomp_test):

 # TODO: Fit the PCA on the scaled training data
    Z0tr= pc(X0tr, ncomp)

    Z0ts= pc(X0ts, ncomp)

    y0hat, acc[icomp]= svcc(Z0tr, y0tr, Z0ts, y0ts)
    
    #print(icomp, ncomp, acc[icomp])

In [111]:
imax = np.argmax(acc
print(f'The maximum accuracy for Valence using SVM is {np.around(acc[imax]*100,2)} %, obtained using {ncomp_test[imax]} principle components')

The maximum accuracy for Valence using SVM is 68.79 %, obtained using 300 principle components


In [114]:
ncomp_test= np.linspace(250, 300, 10, endpoint=True, dtype=int)
num_nc = len(ncomp_test)

acc = np.zeros((num_nc))

# Loop over number of components to test
for icomp, ncomp in enumerate(ncomp_test):

 # TODO: Fit the PCA on the scaled training data
    Z0tr= pc(X0tr, ncomp)

    Z0ts= pc(X0ts, ncomp)

    y0hat, acc[icomp]= lr(Z0tr, y0tr, Z0ts, y0ts)
    
    #print(icomp, ncomp, acc[icomp])

In [115]:
imax = np.argmax(acc)
print(f'The maximum accuracy for Valence using Logistic regression is {np.around(acc[imax]*100,2)} %, obtained using {ncomp_test[imax]} principle components')

The maximum accuracy for Valence using Logistic regression is 55.56 %, obtained using 255 principle components


In [66]:
ncomp_test= np.linspace(350,400, 10, endpoint=True, dtype=int)
num_nc = len(ncomp_test)

acc = np.zeros((num_nc))

# Loop over number of components to test
for icomp, ncomp in enumerate(ncomp_test):

 # TODO: Fit the PCA on the scaled training data
    Z0tr= pc(X0tr, ncomp)

    Z0ts= pc(X0ts, ncomp)

    y0hat, acc[icomp]= gnb(Z0tr, y0tr, Z0ts, y0ts)
    
    #print(icomp, ncomp, acc[icomp])

In [67]:
imax = np.argmax(acc)
print(f'The maximum accuracy for Valence using Gaussian model is {np.around(acc[imax]*100,2)} %, obtained using {ncomp_test[imax]} principle components')

The maximum accuracy for Valence using Gaussian model is 48.23 %, obtained using 383 principle components


Arousal

In [116]:
ncomp_test= np.linspace(300,350,10, endpoint=True, dtype=int)
num_nc = len(ncomp_test)

acc = np.zeros((num_nc))

# Loop over number of components to test
for icomp, ncomp in enumerate(ncomp_test):

 # TODO: Fit the PCA on the scaled training data
    Z1tr= pc(X1tr, ncomp)

    Z1ts= pc(X1ts, ncomp)

    y1hat, acc[icomp]= svcc(Z1tr, y1tr, Z1ts, y1ts)
    
    #print(icomp, ncomp, acc[icomp])

In [117]:
imax = np.argmax(acc)
print(f'The maximum accuracy for Arousal using SVM is {np.around(acc[imax]*100,2)} %, obtained using {ncomp_test[imax]} principle components')

The maximum accuracy for Arousal using SVM is 67.14 %, obtained using 300 principle components


In [118]:
ncomp_test= np.linspace(65,120,10, endpoint=True, dtype=int)
num_nc = len(ncomp_test)

acc = np.zeros((num_nc))

# Loop over number of components to test
for icomp, ncomp in enumerate(ncomp_test):

 # TODO: Fit the PCA on the scaled training data
    Z1tr= pc(X1tr, ncomp)

    Z1ts= pc(X1ts, ncomp)

    y1hat, acc[icomp]= lr(Z1tr, y1tr, Z1ts, y1ts)
    
    #print(icomp, ncomp, acc[icomp])

In [119]:
imax = np.argmax(acc)
print(f'The maximum accuracy for Arousal using Logistic regression is {np.around(acc[imax]*100,2)} %, obtained using {ncomp_test[imax]} principle components')

The maximum accuracy for Arousal using Logistic regression is 63.36 %, obtained using 65 principle components


In [69]:
ncomp_test= np.linspace(250, 330, 10, endpoint=True, dtype=int)
num_nc = len(ncomp_test)

acc = np.zeros((num_nc))

# Loop over number of components to test
for icomp, ncomp in enumerate(ncomp_test):

 # TODO: Fit the PCA on the scaled training data
    Z1tr= pc(X1tr, ncomp)

    Z1ts= pc(X1ts, ncomp)

    y1hat, acc[icomp]= gnb(Z1tr, y1tr, Z1ts, y1ts)
    
    #print(icomp, ncomp, acc[icomp])

In [70]:
imax = np.argmax(acc)
print(f'The maximum accuracy for Arousal using Gaussian model is {np.around(acc[imax]*100,2)} %, obtained using {ncomp_test[imax]} principle components')

The maximum accuracy for Arousal using Gaussian model is 53.19 %, obtained using 312 principle components


Dominance

In [120]:
ncomp_test= np.linspace(60,120,10, endpoint=True, dtype=int)
num_nc = len(ncomp_test)

acc = np.zeros((num_nc))

# Loop over number of components to test
for icomp, ncomp in enumerate(ncomp_test):

 # TODO: Fit the PCA on the scaled training data
    Z2tr= pc(X2tr, ncomp)

    Z2ts= pc(X2ts, ncomp)

    y2hat, acc[icomp]= svcc(Z2tr, y2tr, Z2ts, y2ts)
    
    #print(icomp, ncomp, acc[icomp])

In [121]:
imax = np.argmax(acc)
print(f'The maximum accuracy for Dominance using SVM is {np.around(acc[imax]*100,2)} %, obtained using {ncomp_test[imax]} principle components')

The maximum accuracy for Dominance using SVM is 65.96 %, obtained using 60 principle components


In [62]:
ncomp_test= np.linspace(200,260,10, endpoint=True, dtype=int)
num_nc = len(ncomp_test)

acc = np.zeros((num_nc))

# Loop over number of components to test
for icomp, ncomp in enumerate(ncomp_test):

 # TODO: Fit the PCA on the scaled training data
    Z2tr= pc(X2tr, ncomp)

    Z2ts= pc(X2ts, ncomp)

    y2hat, acc[icomp]= lr(Z2tr, y2tr, Z2ts, y2ts)
    
    #print(icomp, ncomp, acc[icomp])

In [63]:
imax = np.argmax(acc)
print(f'The maximum accuracy for Dominance using Logistic regression is {np.around(acc[imax]*100,2)} %, obtained using {ncomp_test[imax]} principle components')

The maximum accuracy for Dominance using Logistic regression is 57.45 %, obtained using 220 principle components


In [122]:
ncomp_test= np.linspace(350,400, 10, endpoint=True, dtype=int)
num_nc = len(ncomp_test)

acc = np.zeros((num_nc))

# Loop over number of components to test
for icomp, ncomp in enumerate(ncomp_test):

 # TODO: Fit the PCA on the scaled training data
    Z2tr= pc(X2tr, ncomp)

    Z2ts= pc(X2ts, ncomp)

    y2hat, acc[icomp]= gnb(Z2tr, y2tr, Z2ts, y2ts)
    
    #print(icomp, ncomp, acc[icomp])

In [129]:
imax = np.argmax(acc)
print(f'The maximum accuracy for Dominance using Gaussian model is {np.around(acc[imax]*100,2)} %, obtained using {ncomp_test[imax]} principle components')

The maximum accuracy for Dominance using Gaussian model is 51.34 %, obtained using 10 principle components


Liking

In [124]:
ncomp_test= np.linspace(10,70,10, endpoint=True, dtype=int)
num_nc = len(ncomp_test)

acc = np.zeros((num_nc))

# Loop over number of components to test
for icomp, ncomp in enumerate(ncomp_test):

 # TODO: Fit the PCA on the scaled training data
    Z3tr= pc(X3tr, ncomp)

    Z3ts= pc(X3ts, ncomp)

    y3hat, acc[icomp]= svcc(Z3tr, y3tr, Z3ts, y3ts)
    
    #print(icomp, ncomp, acc[icomp])

In [125]:
imax = np.argmax(acc)
print(f'The maximum accuracy for Liking using SVM is {np.around(acc[imax]*100,2)} %, obtained using {ncomp_test[imax]} principle components')

The maximum accuracy for Liking using SVM is 71.87 %, obtained using 10 principle components


In [126]:
ncomp_test= np.linspace(10,70,10, endpoint=True, dtype=int)
num_nc = len(ncomp_test)

acc = np.zeros((num_nc))

# Loop over number of components to test
for icomp, ncomp in enumerate(ncomp_test):

 # TODO: Fit the PCA on the scaled training data
    Z3tr= pc(X3tr, ncomp)

    Z3ts= pc(X3ts, ncomp)

    y3hat, acc[icomp]= lr(Z3tr, y3tr, Z3ts, y3ts)
    
    #print(icomp, ncomp, acc[icomp])

In [127]:
imax = np.argmax(acc)
print(f'The maximum accuracy for Liking using Logistic Regression is {np.around(acc[imax]*100,2)} %, obtained using {ncomp_test[imax]} principle components')

The maximum accuracy for Liking using Logisti is 71.63 %, obtained using 10 principle components


In [130]:
ncomp_test= np.linspace(10,70,10, endpoint=True, dtype=int)
num_nc = len(ncomp_test)

acc = np.zeros((num_nc))

# Loop over number of components to test
for icomp, ncomp in enumerate(ncomp_test):

 # TODO: Fit the PCA on the scaled training data
    Z3tr= pc(X3tr, ncomp)

    Z3ts= pc(X3ts, ncomp)

    y3hat, acc[icomp]= gnb(Z3tr, y3tr, Z3ts, y3ts)
    
    #print(icomp, ncomp, acc[icomp])

In [132]:
imax = np.argmax(acc)
print(f'The maximum accuracy for Liking using Gaussian model is {np.around(acc[imax]*100,2)} %, obtained using {ncomp_test[imax]} principle components')

The maximum accuracy for Liking using Gaussian model is 50.66 %, obtained using 70 principle components
