# Verify that SVC classification is using information from EEG features

## Set up

In [None]:
import os
import glob
import time
import numpy as np

from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

from get_feature_response import *

From prediction_model notebook, we know that:

In [None]:
min_record_len = 35

### Get preprocessed data

In [None]:
%%capture 
start_time = time.time()

raw_path = "raw"
all_sessions = os.listdir(raw_path)

loaded_raws = []

for session in all_sessions:
    session_path = os.path.join(raw_path, session)
    raw = mne.io.Raw(session_path, preload=True)
    
    loaded_raws.append(raw)

In [None]:
print("Took %ss to finish." % (time.time() - start_time))

### Get emotion response

In [None]:
dataset_path = "/net2/expData/affective_eeg/mahnob_dataset/Sessions"
meta_data_path = "session.xml"
all_session_nums = os.listdir(dataset_path) # List of all session names

session_nums = [] # Sessions with bdf recordings

# Get current working directory to change back later
curr_dir = os.getcwd()

# From data manual, bdf file may not exist if "the trials is missing due to 
# technical difficulties" (pg 15).
# Skip all sessions with no bdf recordings
for session in all_session_nums:
    session_path = os.path.join(dataset_path, session)
    os.chdir(session_path)
    bdf_list = glob.glob("*.bdf")
    
    if (len(bdf_list) == 1):
        session_nums.append(session)
    
    elif (len(bdf_list) > 1):
        raise ValueError("Cannot handle multiple bdf files in one session.")

# Change back to notebook directory as a precaution
os.chdir(curr_dir)
print("Back to directory: ", os.getcwd())

response_list = []

for session in session_nums:
    xml_path = os.path.join(dataset_path, session, meta_data_path)
    resp = get_affect(xml_path, cutoff=5)
    response_list.append(resp)

response_array = np.array(response_list)

In [None]:
print("response_array shape:", np.shape(response_array))

## Check SVC classification accuracy using irrelevant data

### Use occipital lobe data

In [None]:
FREQ_BANDS = ["alpha", "beta"]
CHANNELS = ["PO3", "O1", "Oz", "O2", "PO4"]
WINDOW = 2.5

__Warning: Long run time.__

In [None]:
%%capture
start_time = time.time()

features_list = []
features_red_dim_list = []

for raw in loaded_raws:
    _features, _features_red_dim = bdf_to_features(raw=raw, 
                                                   duration=min_record_len,
                                                   freq_bands=FREQ_BANDS, 
                                                   chs=CHANNELS,
                                                   window=WINDOW)
    features_list.append(_features)
    features_red_dim_list.append(_features_red_dim)

features_array = np.array(features_list)
features_red_dim_array = np.array(features_red_dim_list)

In [None]:
print("Took %ss to finish." % (time.time() - start_time))
print("features_list shape:", np.shape(features_list))
print("features_red_dim_list shape:", np.shape(features_red_dim_list))

In [None]:
svc_v = make_pipeline(StandardScaler(), SVC())
svc_v_1_scores = cross_val_score(svc_v, features_red_dim_list, response_array[:,0], cv=5)
print(svc_v_1_scores)

In [None]:
svc_a = make_pipeline(StandardScaler(), SVC())
svc_a_1_scores = cross_val_score(svc_a, features_red_dim_list, response_array[:,1], cv=5)
print(svc_a_1_scores)

## Check SVC classification accuracy using random data

In [None]:
random_data = np.random.uniform(0, 100, (527, 140))

In [None]:
svc_v = SVC()
svc_v_2_scores = cross_val_score(svc_v, random_data, response_array[:,0], cv=5)
print(svc_v_2_scores)

In [None]:
svc_a = SVC()
svc_a_2_scores = cross_val_score(svc_a, random_data, response_array[:,1], cv=5)
print(svc_a_1_scores)

__So it appears that SVC is not making classifications properly__

## Check LDA accuracy using random data

In [None]:
lda_v = LDA()
lda_v_1_scores = cross_val_score(lda_v, random_data, response_array[:,0], cv=5)
print(lda_v_1_scores)
print("Mean CV score: ", np.mean(lda_v_1_scores))

In [None]:
lda_a = LDA()
lda_a_1_scores = cross_val_score(lda_a, random_data, response_array[:,0], cv=5)
print(lda_a_1_scores)
print("Mean CV score: ", np.mean(lda_a_1_scores))

In [None]:
random_data = np.random.uniform(0, 100, (527, 140))

In [None]:
lda_v = LDA()
lda_v_1_scores = cross_val_score(lda_v, random_data, response_array[:,0], cv=5)
print(lda_v_1_scores)
print("Mean CV score: ", np.mean(lda_v_1_scores))

In [None]:
lda_a = LDA()
lda_a_1_scores = cross_val_score(lda_a, random_data, response_array[:,0], cv=5)
print(lda_a_1_scores)
print("Mean CV score: ", np.mean(lda_a_1_scores))