# Introduction to Machine Learning
# Emotion analysis from EEG data

#### Barr Morgenstein Gauri Nagavkar

Import Libraries

In [1]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from sklearn import svm
from sklearn import linear_model, preprocessing

import pickle

from sklearn.decomposition import PCA

List of 32 .dat files, corresponding to 32 participants.

Every .dat file has EEG data and Emotion labels data.

In [2]:
dat_list = ['s01.dat', 's02.dat', 's03.dat', 's04.dat', 's05.dat', 's06.dat', 's07.dat', 's08.dat', 's09.dat', 's10.dat',
            's11.dat', 's12.dat', 's13.dat', 's14.dat', 's15.dat', 's16.dat', 's17.dat', 's18.dat', 's19.dat', 's20.dat',
            's21.dat', 's22.dat', 's23.dat', 's24.dat', 's25.dat', 's26.dat', 's27.dat', 's28.dat', 's29.dat', 's30.dat',
            's31.dat', 's32.dat']

Function to extract data from .dat files in the form of X and y.

In [3]:
def dat_to_dict(dat_file):
    infile= open(dat_file, 'rb')
    data_temp= pickle.load(infile, encoding='latin1')
    infile.close()
    
    X_temp= data_temp["data"]
    X_temp= X_temp[:, :-8]
    y_temp= data_temp["labels"]  
    
    return X_temp, y_temp

Create arrays X (EEG data) and y (emotion labels)

In [4]:
X, y= dat_to_dict(dat_list[0])

for i in range(1,32):
    X1, y1= dat_to_dict(dat_list[i])
    
    X= np.vstack((X, X1))
    y= np.vstack((y, y1))
    
    #print(i)

Scale X and y

Remove mean and divide by standard deviation

In [5]:
y_mean= np.mean(y, axis=0)
y_n= (y-y_mean)/np.std(y)

y_n= np.float32(y_n)


X_mean= np.mean(X, axis=0)
X_n= (X-X_mean)/np.std(X)

X_n= np.float32(X_n)

Reshape normalized X and convert it to a 2-D array

In [6]:
nsamp, nchan, nt = X_n.shape
p= nchan*nt
X_n = X_n.reshape((nsamp, p))

Separate y into its different label components

Convert the y components into binary classes by hard threshold (0,1)

In [7]:
#Valence y
y0 = y_n[:,0] 
y0=np.where(y0>0,1,0)

#Arousal y
y1 = y_n[:,1] 
y1=np.where(y1>0,1,0)

#Dominance y
y2 = y_n[:,2] 
y2=np.where(y2>0,1,0)

#Liking y
y3 = y_n[:,3] 
y3=np.where(y3>0,1,0)

Split normalized X and y into train and test

In [8]:
X0tr, X0ts, y0tr, y0ts = train_test_split(X_n, y0, test_size=0.33)
X1tr, X1ts, y1tr, y1ts = train_test_split(X_n, y1, test_size=0.33)
X2tr, X2ts, y2tr, y2ts = train_test_split(X_n, y2, test_size=0.33)
X3tr, X3ts, y3tr, y3ts = train_test_split(X_n, y3, test_size=0.33)

PCA function for obtaining principle components

In [9]:
def pc(X, ncomp):
    pca= PCA(n_components= ncomp, svd_solver= 'randomized', whiten= True)
    pca.fit(X)
    Z= pca.transform(X)
    
    return Z

PCA function to select optimal number of components for a given X and y pair for a specific model using one standard deviation rule

In [10]:
"""
def pc_optimal(model, range_start, range_stop, nsteps):
    ncomp_test= np.linspace(range_start, range_stop, nsteps, endpoint=True, dtype=int)
    num_nc = len(ncomp_test)
    
    acc = np.zeros((num_nc))

# Loop over number of components to test
    for icomp, ncomp in enumerate(ncomp_test):

#Fit the PCA on the scaled training data
        Z = pc(Xtr, ncomp)
        
        clf= model
"""

'\ndef pc_optimal(model, range_start, range_stop, nsteps):\n    ncomp_test= np.linspace(range_start, range_stop, nsteps, endpoint=True, dtype=int)\n    num_nc = len(ncomp_test)\n    \n    acc = np.zeros((num_nc))\n\n# Loop over number of components to test\n    for icomp, ncomp in enumerate(ncomp_test):\n\n#Fit the PCA on the scaled training data\n        Z = pc(Xtr, ncomp)\n        \n        clf= model\n'

Function for Support Vector Machine (SVC)

In [11]:
def svcc(Xtr, ytr, Xts, yts):
    clf = svm.SVC()
    clf.fit(Xtr, ytr)

    yhat= clf.predict(Xts)

    acc = np.mean(yhat == yts)
    
    return yhat, acc

Function for Logistic regression

In [12]:
def lr(Xtr, ytr, Xts, yts):
    clf = linear_model.LogisticRegression()
    clf.fit(Xtr, ytr)

    yhat= clf.predict(Xts)

    acc = np.mean(yhat == yts)
    
    return yhat, acc

 Function for Gaussian Naive Bayes classifier

In [13]:
def gnb(Xtr, ytr, Xts, yts):
    clf = GaussianNB()
    clf.fit(Xtr,ytr)
    
    yhat= clf.predict(Xts)
    
    return yhat, acc

Valence

In [None]:
ncomp_test= np.linspace(300,400,10, endpoint=True, dtype=int)
num_nc = len(ncomp_test)

# Accuracy: acc[icomp,ifold] is test accuracy when using `ncomp = ncomp_test[icomp]` in fold `ifold`.
acc = np.zeros((num_nc))

# Loop over number of components to test
for icomp, ncomp in enumerate(ncomp_test):

 # TODO: Fit the PCA on the scaled training data
    Z0tr= pc(X0tr, ncomp)

    Z0ts= pc(X0ts, ncomp)

    y0hat, acc[icomp]= svcc(Z0tr, y0tr, Z0ts, y0ts)
    
    print(icomp, ncomp, acc[icomp])

In [27]:
ncomp_test= np.linspace(250, 300, 10, endpoint=True, dtype=int)
num_nc = len(ncomp_test)

# Accuracy: acc[icomp,ifold] is test accuracy when using `ncomp = ncomp_test[icomp]` in fold `ifold`.
acc = np.zeros((num_nc))

# Loop over number of components to test
for icomp, ncomp in enumerate(ncomp_test):

 # TODO: Fit the PCA on the scaled training data
    Z0tr= pc(X0tr, ncomp)

    Z0ts= pc(X0ts, ncomp)

    y0hat, acc[icomp]= lr(Z0tr, y0tr, Z0ts, y0ts)
    
    print(icomp, ncomp, acc[icomp])



TypeError: cannot unpack non-iterable NoneType object

In [None]:
ncomp_test= np.linspace(10,400, 20, endpoint=True, dtype=int)
num_nc = len(ncomp_test)

# Accuracy: acc[icomp,ifold] is test accuracy when using `ncomp = ncomp_test[icomp]` in fold `ifold`.
acc = np.zeros((num_nc))

# Loop over number of components to test
for icomp, ncomp in enumerate(ncomp_test):

 # TODO: Fit the PCA on the scaled training data
    Z0tr= pc(X0tr, ncomp)

    Z0ts= pc(X0ts, ncomp)

    y0hat, acc= gnb(Z0tr, y0tr, Z0ts, y0ts)
    
    print(icomp, ncomp)