# Processing all session data and predict subject emotion with LDA

## Set up

In [1]:
import os
import glob
import time
import matplotlib.pyplot as plt
from matplotlib import rcParams

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

from get_feature_response import *

# figure size in inches: 
# https://stackoverflow.com/questions/31594549/
# how-to-change-the-figure-size-of-a-seaborn-axes-or-figure-level-plot
rcParams['figure.figsize'] = 11.7,8.27

Matplotlib created a temporary config/cache directory at /tmp/matplotlib-k4g1hz_2 because the default path (/home/chc012/.cache/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


In [2]:
CATEGORIES = {
    (0, 0) : "lvla",
    (0, 1) : "lvha",
    (1, 0) : "hvla",
    (1, 1) : "hvha"
}

min_record_len = 35

### Get preprocessed data

In [3]:
%%capture 
start_time = time.time()

raw_path = "raw"
all_sessions = os.listdir(raw_path)

loaded_raws = []

for session in all_sessions:
    session_path = os.path.join(raw_path, session)
    raw = mne.io.Raw(session_path, preload=True)
    
    loaded_raws.append(raw)

In [4]:
print("Took %ss to finish." % (time.time() - start_time))

Took 3.127345561981201s to finish.


### Get emotion response

In [5]:
dataset_path = "/net2/expData/affective_eeg/mahnob_dataset/Sessions"
meta_data_path = "session.xml"
all_session_nums = os.listdir(dataset_path) # List of all session names

session_nums = [] # Sessions with bdf recordings

# Get current working directory to change back later
curr_dir = os.getcwd()

# From data manual, bdf file may not exist if "the trials is missing due to 
# technical difficulties" (pg 15).
# Skip all sessions with no bdf recordings
for session in all_session_nums:
    session_path = os.path.join(dataset_path, session)
    os.chdir(session_path)
    bdf_list = glob.glob("*.bdf")
    
    if (len(bdf_list) == 1):
        session_nums.append(session)
    
    elif (len(bdf_list) > 1):
        raise ValueError("Cannot handle multiple bdf files in one session.")

# Change back to notebook directory as a precaution
os.chdir(curr_dir)
print("Back to directory: ", os.getcwd())

response_list = []

for session in session_nums:
    xml_path = os.path.join(dataset_path, session, meta_data_path)
    resp = get_affect(xml_path)
    response_list.append(resp)

response_array = np.array(response_list)

Back to directory:  /home/jovyan/mahnob/attended-gaze/full-session


In [6]:
print("response_array shape:", np.shape(response_array))

response_array shape: (527, 2)


## 1. Alpha+Beta band & 2.5 sec window

In [7]:
FREQ_BANDS = ["alpha", "beta"]
CHANNELS = ["F3", "Fz", "F4", "FC1", "FC2"]
WINDOW = 2.5

### Feature extraction

__Warning: Long run time.__

In [8]:
%%capture
start_time = time.time()

features_list = []
features_red_dim_list = []

for raw in loaded_raws:
    _features, _features_red_dim = bdf_to_features(raw=raw, 
                                                   duration=min_record_len,
                                                   freq_bands=FREQ_BANDS, 
                                                   chs=CHANNELS,
                                                   window=WINDOW)
    features_list.append(_features)
    features_red_dim_list.append(_features_red_dim)

features_array = np.array(features_list)
features_red_dim_array = np.array(features_red_dim_list)

In [9]:
print("Took %ss to finish." % (time.time() - start_time))
print("features_list shape:", np.shape(features_list))
print("features_red_dim_list shape:", np.shape(features_red_dim_list))

Took 19.51746892929077s to finish.
features_list shape: (527, 2, 5, 14)
features_red_dim_list shape: (527, 140)


### Prediction

#### LDA

In [10]:
lda_v = LDA()
lda_v_1_scores = cross_val_score(lda_v, features_red_dim_list, response_array[:,0], cv=5)
print(lda_v_1_scores)

[0.52830189 0.55660377 0.51428571 0.56190476 0.54285714]


In [11]:
lda_a = LDA()
lda_a_1_scores = cross_val_score(lda_a, features_red_dim_list, response_array[:,1], cv=5)
print(lda_a_1_scores)

[0.58490566 0.53773585 0.51428571 0.4952381  0.44761905]


#### SVC

In [12]:
svc_v = SVC()
svc_v_1_scores = cross_val_score(svc_v, features_red_dim_list, response_array[:,0], cv=5)
print(svc_v_1_scores)

[0.64150943 0.63207547 0.64761905 0.63809524 0.63809524]


In [13]:
svc_a = SVC()
svc_a_1_scores = cross_val_score(svc_a, features_red_dim_list, response_array[:,1], cv=5)
print(svc_a_1_scores)

[0.61320755 0.61320755 0.61904762 0.60952381 0.60952381]


## 2. Alpha+beta band & 0.5 sec window

In [14]:
FREQ_BANDS = ["alpha", "beta"]
CHANNELS = ["F3", "Fz", "F4", "FC1", "FC2"]
WINDOW = 0.5

### Feature extraction

__Warning: Long run time.__

In [15]:
%%capture
start_time = time.time()

features_list = []
features_red_dim_list = []

for raw in loaded_raws:
    _features, _features_red_dim = bdf_to_features(raw=raw, 
                                                   duration=min_record_len, 
                                                   freq_bands=FREQ_BANDS, 
                                                   chs=CHANNELS,
                                                   window=WINDOW)
    features_list.append(_features)
    features_red_dim_list.append(_features_red_dim)

features_array = np.array(features_list)
features_red_dim_array = np.array(features_red_dim_list)

In [16]:
print("Took %ss to finish." % (time.time() - start_time))
print("features_list shape:", np.shape(features_list))
print("features_red_dim_list shape:", np.shape(features_red_dim_list))

Took 13.123753547668457s to finish.
features_list shape: (527, 2, 5, 70)
features_red_dim_list shape: (527, 700)


### Prediction

#### LDA

In [17]:
lda_v = LDA()
lda_v_2_scores = cross_val_score(lda_v, features_red_dim_list, response_array[:,0], cv=5)
print(lda_v_2_scores)

[0.60377358 0.52830189 0.6        0.53333333 0.59047619]


In [18]:
lda_a = LDA()
lda_a_2_scores = cross_val_score(lda_a, features_red_dim_list, response_array[:,1], cv=5)
print(lda_a_2_scores)

[0.55660377 0.48113208 0.56190476 0.56190476 0.43809524]


#### SVC

In [19]:
svc_v = SVC()
svc_v_2_scores = cross_val_score(svc_v, features_red_dim_list, response_array[:,0], cv=5)
print(svc_v_2_scores)

[0.64150943 0.64150943 0.64761905 0.63809524 0.63809524]


In [20]:
svc_v = make_pipeline(StandardScaler(), SVC())
svc_a_2_scores = cross_val_score(svc_a, features_red_dim_list, response_array[:,1], cv=5)
print(svc_a_2_scores)

[0.61320755 0.62264151 0.61904762 0.61904762 0.60952381]


## 3. Alpha+beta+theta band & 2.5 sec window

In [21]:
FREQ_BANDS = ["alpha", "beta", "theta"]
CHANNELS = ["F3", "Fz", "F4", "FC1", "FC2"]
WINDOW = 2.5

### Feature extraction

__Warning: Long run time.__

In [None]:
%%capture
start_time = time.time()

features_list = []
features_red_dim_list = []

for raw in loaded_raws:
    _features, _features_red_dim = bdf_to_features(raw=raw, 
                                                   duration=min_record_len, 
                                                   freq_bands=FREQ_BANDS, 
                                                   chs=CHANNELS,
                                                   window=WINDOW)
    features_list.append(_features)
    features_red_dim_list.append(_features_red_dim)

features_array = np.array(features_list)
features_red_dim_array = np.array(features_red_dim_list)

In [None]:
print("Took %ss to finish." % (time.time() - start_time))
print("features_list shape:", np.shape(features_list))
print("features_red_dim_list shape:", np.shape(features_red_dim_list))

### Prediction

#### LDA

In [None]:
lda_v = LDA()
lda_v_3_scores = cross_val_score(lda_v, features_red_dim_list, response_array[:,0], cv=5)
print(lda_v_3_scores)

In [None]:
lda_a = LDA()
lda_a_3_scores = cross_val_score(lda_a, features_red_dim_list, response_array[:,1], cv=5)
print(lda_a_3_scores)

#### SVC

In [None]:
svc_v = make_pipeline(StandardScaler(), SVC())
svc_v_3_scores = cross_val_score(svc_v, features_red_dim_list, response_array[:,0], cv=5)
print(svc_v_3_scores)

In [None]:
svc_v = make_pipeline(StandardScaler(), SVC())
svc_a_3_scores = cross_val_score(svc_a, features_red_dim_list, response_array[:,1], cv=5)
print(svc_a_3_scores)

## 4. Alpha+beta band & 2.5 sec window & more frontal channel

In [None]:
FREQ_BANDS = ["alpha", "beta"]
CHANNELS = ["F3", "Fz", "F4", "AF3", "AF4"]
WINDOW = 2.5

### Feature extraction

__Warning: Long run time.__

In [None]:
%%capture
start_time = time.time()

features_list = []
features_red_dim_list = []

for raw in loaded_raws:
    _features, _features_red_dim = bdf_to_features(raw=raw, 
                                                   duration=min_record_len, 
                                                   freq_bands=FREQ_BANDS, 
                                                   chs=CHANNELS,
                                                   window=WINDOW)
    features_list.append(_features)
    features_red_dim_list.append(_features_red_dim)

features_array = np.array(features_list)
features_red_dim_array = np.array(features_red_dim_list)

In [None]:
print("Took %ss to finish." % (time.time() - start_time))
print("features_list shape:", np.shape(features_list))
print("features_red_dim_list shape:", np.shape(features_red_dim_list))

### Prediction

#### LDA

In [None]:
lda_v = LDA()
lda_v_4_scores = cross_val_score(lda_v, features_red_dim_list, response_array[:,0], cv=5)
print(lda_v_4_scores)

In [None]:
lda_a = LDA()
lda_a_4_scores = cross_val_score(lda_a, features_red_dim_list, response_array[:,1], cv=5)
print(lda_a_4_scores)

#### SVC

In [None]:
svc_v = make_pipeline(StandardScaler(), SVC())
svc_v_4_scores = cross_val_score(svc_v, features_red_dim_list, response_array[:,0], cv=5)
print(svc_v_4_scores)

In [None]:
svc_v = make_pipeline(StandardScaler(), SVC())
svc_a_4_scores = cross_val_score(svc_a, features_red_dim_list, response_array[:,1], cv=5)
print(svc_a_4_scores)

## Evaluation

The four models:
1. Alpha+Beta band & 2.5 sec window
2. Alpha+beta band & 0.5 sec window
3. Alpha+beta+theta band & 2.5 sec window
4. Alpha+beta band & 2.5 sec window & more frontal channel (replace FC1 and FC2 with AF3 and AF4)

### Valence

In [None]:
# Adapted from https://matplotlib.org/stable/gallery/lines_bars_and_markers/barchart.html
labels = ['Model 1', 'Model 2', 'Model 3', 'Model 4']

lda_v_means = [np.mean(lda_v_1_scores), np.mean(lda_v_2_scores), 
               np.mean(lda_v_3_scores), np.mean(lda_v_4_scores)]
svc_v_means = [np.mean(svc_v_1_scores), np.mean(svc_v_2_scores), 
               np.mean(svc_v_3_scores), np.mean(svc_v_4_scores)]

x = np.arange(len(labels))  # the label locations
width = 0.35  # the width of the bars

fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, lda_v_means, width, label='LDA')
rects2 = ax.bar(x + width/2, svc_v_means, width, label='SVC')

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Scores')
ax.set_title('Average cross validation scores of LDA and SVC for valence prediction')
ax.set_xticks(x, labels)
ax.legend()

ax.bar_label(rects1, padding=3)
ax.bar_label(rects2, padding=3)

fig.tight_layout()

plt.show()

### Arousal

In [None]:
# Adapted from https://matplotlib.org/stable/gallery/lines_bars_and_markers/barchart.html
labels = ['Model 1', 'Model 2', 'Model 3', 'Model 4']

lda_v_means = [np.mean(lda_a_1_scores), np.mean(lda_a_2_scores), 
               np.mean(lda_a_3_scores), np.mean(lda_a_4_scores)]
svc_v_means = [np.mean(svc_a_1_scores), np.mean(svc_a_2_scores), 
               np.mean(svc_a_3_scores), np.mean(svc_a_4_scores)]

x = np.arange(len(labels))  # the label locations
width = 0.35  # the width of the bars

fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, lda_v_means, width, label='LDA')
rects2 = ax.bar(x + width/2, svc_v_means, width, label='SVC')

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Scores')
ax.set_title('Average cross validation scores of LDA and SVC for arousal prediction')
ax.set_xticks(x, labels)
ax.legend()

ax.bar_label(rects1, padding=3)
ax.bar_label(rects2, padding=3)

fig.tight_layout()

plt.show()