# Classification Experiment - Lexicality (Language vs. Non-language)
By: Shateesh Bhugwansing



### Experiment 1 - Logistic Regression

__Data__: EpochedEEG/20131216_1441_epo.fif (Epoched data from Emmanuil's batch script)

__Problem__: Classify language vs. Non-language epochs 

__Procedure__:
1. filter the epoched data for epochs 1-24 
2. label events by either language or non-language, using the dictionary that Emmanuil created (preprocessing/StimCodes
3. Run Logistic Regression classifier on the labeled data. 


In [1]:
%run ../preprocessing/StimCodes.ipynb
import mne
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib
matplotlib.rcParams.update({'font.size': 11})

In [3]:
modal_lexi

{'AALL': [(5, 6, 4), (17, 18, 16)],
 'AALN': [(11, 6, 10), (23, 18, 22)],
 'AANL': [(5, 12, 4), (17, 24, 16)],
 'AANN': [(11, 12, 10), (23, 24, 22)],
 'AVLL': [(2, 6, 1), (14, 18, 13)],
 'AVNN': [(8, 12, 7), (20, 24, 19)],
 'VALL': [(5, 3, 4), (17, 15, 16)],
 'VANN': [(11, 9, 10), (23, 21, 22)],
 'VVLL': [(2, 3, 1), (14, 15, 13)],
 'VVLN': [(8, 3, 7), (20, 15, 19)],
 'VVNL': [(2, 9, 1), (14, 21, 13)],
 'VVNN': [(8, 9, 7), (20, 21, 19)]}

In [4]:
# read in data 
path = '/Volumes/SB/EpochedEEG/20131216_1441_epo.fif'
epoch = mne.read_epochs(path, preload=True)

Reading /Volumes/SB/EpochedEEG/20131216_1441_epo.fif ...
    Found the data of interest:
        t =    -199.22 ...     500.00 ms
        0 CTF compensation matrices available
5424 matching events found
Applying baseline correction (mode: mean)
5424 matching events found
Applying baseline correction (mode: mean)
Not setting metadata
0 projection items activated


In [5]:
# filter data for stim codes 1-24 
filtered_epoch = epoch[(epoch.events[:,-1] >= 1) & (epoch.events[:,-1] <= 24)]

In [11]:
filtered_epoch.events.shape

(1920, 3)

In [12]:
# get epoch data 
filtered_epoch_data = filtered_epoch.get_data()
filtered_epoch_data.shape

(1920, 129, 180)

In [33]:
# create labels 
language  = [v for k, v in modal_lexi.items() if k[-1] == 'L']

In [34]:
language


[[(5, 6, 4), (17, 18, 16)],
 [(5, 12, 4), (17, 24, 16)],
 [(2, 6, 1), (14, 18, 13)],
 [(5, 3, 4), (17, 15, 16)],
 [(2, 3, 1), (14, 15, 13)],
 [(2, 9, 1), (14, 21, 13)]]

In [16]:
non_language = [v for k, v in modal_lexi.items() if k[-1] == 'N']

In [17]:
non_language

[[(11, 6, 10), (23, 18, 22)],
 [(11, 12, 10), (23, 24, 22)],
 [(8, 12, 7), (20, 24, 19)],
 [(11, 9, 10), (23, 21, 22)],
 [(8, 3, 7), (20, 15, 19)],
 [(8, 9, 7), (20, 21, 19)]]

In [64]:
# language = [ids for row in language for tup in row for ids in tup]


In [66]:
# NOTE: By inspection of the language and non_language lists, you can see that the last event_id, which
# indicates language vs. non-language, corresponds to the following:
# language: 4, 1, 16, 13
# non_language: 10, 7, 22, 19

# create a labels list accordingly: 
labels = []
for ids in filtered_epoch.events[:,-1]:
    if ids in (1,4, 13, 16):
        labels.append('L')
    
    if ids in (7,10,19,22):
        labels.append('N')

In [91]:
filtered_epoch_lex = filtered_epoch[(filtered_epoch.events[:,-1] == 1) | (filtered_epoch.events[:,-1] == 4) |
                                    (filtered_epoch.events[:,-1] == 13) | (filtered_epoch.events[:,-1] == 16) |
                                   (filtered_epoch.events[:,-1] == 7) | (filtered_epoch.events[:,-1] == 10) |
                                   (filtered_epoch.events[:,-1] == 19) | (filtered_epoch.events[:,-1] == 22)]