In [None]:
%run ../preprocessing/Artifact_Removal/preprocecssing_helpers.ipynb
%run ../preprocessing/StimCodes.ipynb
%run ../Classification/ConcatEpochTrails.ipynb
%run ../PCA/Emmanuil-PCA.ipynb
import mne 

from mne.decoding import Vectorizer
from sklearn.preprocessing import MinMaxScaler, Normalizer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report
from sklearn.svm import SVC  # noqa
from sklearn.model_selection import ShuffleSplit  # noqa

from mne.decoding import UnsupervisedSpatialFilter
from sklearn.decomposition import PCA
from scipy.stats import skew, kurtosis

from sklearn.metrics import confusion_matrix
from mne.viz import tight_layout

import time

In [None]:
path = 'E:\EpochedEEG'
epoch_files = os.listdir(path)
file = os.path.join(path, epoch_files[2])

In [None]:
# read epoch object 
epoch = mne.read_epochs(file, preload= True)

In [None]:
epoch.plot(n_channels=3, n_epochs=10);

In [None]:
# get indicies of trials of epoch object
trial_index_list = get_trial_index_list(epoch_object= epoch)

In [None]:
# get new events for epoch object
new_event_list = convert_event_ids_to_stim_combinations(epoch_object=epoch,
                                                        trial_index_list = trial_index_list,
                                                        stim_combinations = stim_combinations)

In [None]:
print(epoch.events.shape)
print(new_event_list.shape)

In [None]:
# assign new events to current epoch object
epoch.events = new_event_list

# assign new event_ids to current epoch object (dictionary 
# found in ../Classification/ConcatEpochTrails.ipynb)
 
epoch.event_id = modality_lexicality_event_ids

In [None]:
# classification time!

# Isolate audio vs visual codes
# audio codes are < 700, # visual codes are > 700 
# audio : 100 , visual : 101
for event in epoch.events:
    if event[-1] < 700:
        event[-1] = 100
    else:
        event[-1] = 101

In [None]:
norm_pipe = make_pipeline(Vectorizer(), 
                          StandardScaler())
norm_pipe.fit(epoch.get_data())

In [None]:
scaled_data = norm_pipe.transform(epoch.get_data())
scaled_data.shape

In [None]:
scaled_data[:,1].shape
epoch.get_data()[:,:20,].shape

In [None]:
plt.scatter(scaled_data[:,1],scaled_data[:,2]);

In [None]:
# do PCA to reduce dimensions and data needed for classification.
pca = UnsupervisedSpatialFilter(PCA(28), average=False) # PCA, keep 9 components 

epoch_data = epoch.get_data()
pca_data = pca.fit_transform(epoch_data)

In [None]:
pca_data.shape

In [None]:
# classification pipeline
start = time.time()
clf = make_pipeline(Vectorizer(),
                    StandardScaler(),
                    LogisticRegression(penalty='l1'))
end = time.time()
print("clf elapsed time: {0}".format(end - start))

start_master = time.time()
cv = StratifiedKFold(n_splits=5, shuffle = True, random_state = 42)

labels = epoch.events[:,-1]
preds = np.empty(len(pca_data))
for train, test in cv.split(pca_data, labels):
    start = time.time()
    clf.fit(pca_data[train], labels[train])
    preds[test] = clf.predict(pca_data[test])
    end = time.time()
    print("kfold elapsed time: {0}".format(end - start))
end = time.time()
print("classification elapsed time: {0}".format(end - start_master))


start = time.time()
target_names = ['Audio', 'Visual']
report = classification_report(labels, preds, target_names=target_names)
print(report)
end = time.time()
print("reporting elapsed time: {0}".format(end - start))

In [None]:
cm3 = confusion_matrix(labels, preds)
cm3_normalized = cm3.astype(float) / cm3.sum(axis=1)[:, np.newaxis]

# Plot confusion matrix
plt.imshow(cm3_normalized, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Audio vs. Visual using Logistic Regression')
plt.colorbar()
tick_marks = np.arange(len(target_names))
plt.xticks(tick_marks, target_names, rotation=45)
plt.yticks(tick_marks, target_names)
tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()

In [None]:
# classification pipeline
start = time.time()
clf = make_pipeline(Vectorizer(),
                    StandardScaler(),
                    LogisticRegression(penalty='l1'))
end = time.time()
print("clf elapsed time: {0}".format(end - start))

start_master = time.time()
cv = StratifiedKFold(n_splits=5, shuffle = True, random_state = 42)

labels = epoch.events[:,-1]
preds = np.empty(len(pca_data))
for train, test in cv.split(pca_data, labels):
    start = time.time()
    clf.fit(pca_data[train], labels[train])
    preds[test] = clf.predict(pca_data[test])
    end = time.time()
    print("kfold elapsed time: {0}".format(end - start))
end = time.time()
print("classification elapsed time: {0}".format(end - start_master))


start = time.time()
target_names = ['Audio', 'Visual']
report = classification_report(labels, preds, target_names=target_names)
print(report)
end = time.time()
print("reporting elapsed time: {0}".format(end - start))

In [None]:
#reduce dimensions by converting the 180 ms of points in time into 3 values which are variance, skewness, and kurtosis 

In [None]:
mean_statisitcs = get_mean_band(pca_data)

In [None]:
mean_statisitcs = mean_statisitcs.swapaxes(1,2)

In [None]:
# classification pipeline
start = time.time()
clf = make_pipeline(Vectorizer(),
                    StandardScaler(),
                    LogisticRegression(penalty='l1'))
end = time.time()
print("clf elapsed time: {0}".format(end - start))

start_master = time.time()
cv = StratifiedKFold(n_splits=5, shuffle = True, random_state = 42)

labels = epoch.events[:,-1]
preds = np.empty(len(mean_statisitcs))
for train, test in cv.split(mean_statisitcs, labels):
    start = time.time()
    clf.fit(mean_statisitcs[train], labels[train])
    preds[test] = clf.predict(mean_statisitcs[test])
    end = time.time()
    print("kfold elapsed time: {0}".format(end - start))
end = time.time()
print("classification elapsed time: {0}".format(end - start_master))


start = time.time()
target_names = ['Audio', 'Visual']
report = classification_report(labels, preds, target_names=target_names)
print(report)
end = time.time()
print("reporting elapsed time: {0}".format(end - start))

In [None]:
cm3 = confusion_matrix(labels, preds)
cm3_normalized = cm3.astype(float) / cm3.sum(axis=1)[:, np.newaxis]

# Plot confusion matrix
plt.imshow(cm3_normalized, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Audio vs. Visual using Logistic Regression with statistics')
plt.colorbar()
tick_marks = np.arange(len(target_names))
plt.xticks(tick_marks, target_names, rotation=45)
plt.yticks(tick_marks, target_names)
tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()

In [None]:
# try this again without PCA data

In [None]:
epoch_mean_statistics = get_mean_band(epoch.get_data())
epoch_mean_statistics = epoch_mean_statistics.swapaxes(1,2)

In [None]:
# classification pipeline
start = time.time()
clf = make_pipeline(Vectorizer(),
                    StandardScaler(),
                    LogisticRegression(penalty='l1'))
end = time.time()
print("clf elapsed time: {0}".format(end - start))

start_master = time.time()
cv = StratifiedKFold(n_splits=5, shuffle = True, random_state = 42)

labels = epoch.events[:,-1]
preds = np.empty(len(epoch_mean_statistics))
for train, test in cv.split(epoch_mean_statistics, labels):
    start = time.time()
    clf.fit(epoch_mean_statistics[train], labels[train])
    preds[test] = clf.predict(epoch_mean_statistics[test])
    end = time.time()
    print("kfold elapsed time: {0}".format(end - start))
end = time.time()
print("classification elapsed time: {0}".format(end - start_master))


start = time.time()
target_names = ['Audio', 'Visual']
report = classification_report(labels, preds, target_names=target_names)
print(report)
end = time.time()
print("reporting elapsed time: {0}".format(end - start))

In [None]:
cm3 = confusion_matrix(labels, preds)
cm3_normalized = cm3.astype(float) / cm3.sum(axis=1)[:, np.newaxis]

# Plot confusion matrix
plt.imshow(cm3_normalized, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Audio vs. Visual - No PCA using Logistic Regression with statistics', size = 15)
plt.colorbar()
tick_marks = np.arange(len(target_names))
plt.xticks(tick_marks, target_names, rotation=45)
plt.yticks(tick_marks, target_names)
tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()

In [None]:
epoch_mean_statistics.shape

In [None]:
preprocess = make_pipeline(Vectorizer(),
                           StandardScaler())

In [None]:
scaled_epoch_mean_statistics = preprocess.fit_transform(epoch_mean_statistics)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(scaled_epoch_mean_statistics,labels, test_size = 0.3, random_state = 42)

In [None]:
lgr_clf = LogisticRegression()

In [None]:
lgr_clf.fit(X_train, y_train)

In [None]:
lgr_clf.score(X_test, y_test)

In [None]:
epoch.events[:,-1][:15]

In [None]:
epoch.plot(n_channels= 20, n_epochs = 9, title = "Preprocessed and Epoched Data");