# Classification Experiment - Lexicality (Language vs. Non-language)
By: Shateesh Bhugwansing



### Experiment 1 - Logistic Regression

__Data__: EpochedEEG/20131216_1441_epo.fif (Epoched data from Emmanuil's batch script)

__Problem__: Classify language vs. Non-language epochs 

__Procedure__:
1. filter the epoched data for epochs 1-24 
2. label events by either language or non-language, using the dictionary that Emmanuil created (preprocessing/StimCodes
3. Run Logistic Regression classifier on the labeled data. 


In [1]:
%run ../preprocessing/StimCodes.ipynb
import mne
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib
matplotlib.rcParams.update({'font.size': 11})

In [None]:
modal_lexi

In [None]:
# read in data 
path = '/Volumes/SB/EpochedEEG/20131216_1441_epo.fif'
epoch = mne.read_epochs(path, preload=True)

In [None]:
# filter data for stim codes 1-24 
filtered_epoch = epoch[(epoch.events[:,-1] >= 1) & (epoch.events[:,-1] <= 24)]

In [None]:
filtered_epoch.events.shape

In [None]:
# get epoch data 
filtered_epoch_data = filtered_epoch.get_data()
filtered_epoch_data.shape

In [None]:
# create labels 
language  = [v for k, v in modal_lexi.items() if k[-1] == 'L']

In [None]:
language


In [None]:
non_language = [v for k, v in modal_lexi.items() if k[-1] == 'N']

In [None]:
non_language

In [None]:
# language = [ids for row in language for tup in row for ids in tup]
# ignore this cell

In [132]:
# NOTE: By inspection of the language and non_language lists, you can see that the last event_id, which
# indicates language vs. non-language, corresponds to the following:
# language: 4, 1, 16, 13
# non_language: 10, 7, 22, 19

# create a labels list accordingly: 
labels = []
for ids in filtered_epoch.events[:,-1]:
    if ids in (1,4, 13, 16):
        labels.append(1) # 1 = language
    
    if ids in (7,10,19,22):
        labels.append(0) # 0 = non-language

In [133]:
filtered_epoch_lex = filtered_epoch[(filtered_epoch.events[:,-1] == 1) | (filtered_epoch.events[:,-1] == 4) |
                                    (filtered_epoch.events[:,-1] == 13) | (filtered_epoch.events[:,-1] == 16) |
                                   (filtered_epoch.events[:,-1] == 7) | (filtered_epoch.events[:,-1] == 10) |
                                   (filtered_epoch.events[:,-1] == 19) | (filtered_epoch.events[:,-1] == 22)]

In [134]:
# get imports used for classification algos 

from mne.decoding import Vectorizer
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report
from sklearn.svm import SVC  # noqa
from sklearn.model_selection import ShuffleSplit  # noqa

from sklearn.metrics import confusion_matrix
from mne.viz import tight_layout

In [135]:
# perform logistic regression, use cross validation

# clf = make_pipeline(
#                     Vectorizer(),
#                     MinMaxScaler(),
#                     LogisticRegression(penalty='l1'))

cv = StratifiedKFold(n_splits = 10, shuffle = True, random_state = 42)
vectorizer = Vectorizer()
data = vectorizer.fit_transform(filtered_epoch_lex._data)
clf = LogisticRegression()


In [136]:
preds = np.empty(len(filtered_epoch_lex._data))

for train, test in cv.split(data, labels):
    clf.fit(data[train], labels[train])
    preds[test] = clf.predict(data[test])
    
target_names = [ 'L', 'N']
report = classification_report(labels, preds, target_names=target_names)
print(report)

TypeError: only integer scalar arrays can be converted to a scalar index