In [1]:
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 29 14:00:37 2015

@author: alexandrebarachant

Beat the benchmark with CSP and Logisitic regression.

General Idea :

The goal of this challenge is to detect events related to hand movements. Hand 
movements are caracterized by change in signal power in the mu (~10Hz) and beta
(~20Hz) frequency band over the sensorimotor cortex. CSP spatial filters are
trained to enhance signal comming from this brain area, instantaneous power is
extracted and smoothed, and then feeded into a logisitic regression.

Preprocessing :

Signal are bandpass-filtered between 7 and 30 Hz to catch most of the signal of
interest. 4 CSP spatial filters are then applied to the signal, resutlting to
4 new time series.  In order to train CSP spatial filters, EEG are epoched 
using a window of 2 second before and after the event 'HandStart'. CSP training
needs two classes. the epochs after Replace event are assumed to contain 
patterns corresponding to hand movement, and epochs before are assumed to 
contain resting state.

Feature extraction :

Preprocessing is applied, spatialy filtered signal are the rectified and 
convolved with a 0.5 second rectangular window for smoothing. Then a logarithm
is applied. the resutl is a vector of dimention 4 for each time sample.

Classification :

For each of the 6 event type, a logistic regression is trained. For training 
only, features are downsampled in oder to speed up the process. Prediction are
the probailities of the logistic regression.

"""

print(__doc__)

import numpy as np
import pandas as pd
from mne.io import RawArray
from mne.channels import read_montage
from mne.epochs import concatenate_epochs
from mne import create_info, find_events, Epochs, concatenate_raws, pick_types
from mne.decoding import CSP

from sklearn.linear_model import LogisticRegression
from glob import glob

from scipy.signal import butter, lfilter, convolve, boxcar
from joblib import Parallel, delayed

def creat_mne_raw_object(fname,read_events=True):
    """Create a mne raw instance from csv file"""
    # Read EEG file
    data = pd.read_csv(fname)
    
    # get chanel names
    ch_names = list(data.columns[1:])
    
    # read EEG standard montage from mne
    montage = read_montage('standard_1005',ch_names)

    ch_type = ['eeg']*len(ch_names)
    data = np.array(data[ch_names]).T

    if read_events:
        # events file
        ev_fname = fname.replace('_data','_events')
        # read event file
        events = pd.read_csv(ev_fname)
        events_names = events.columns[1:]
        events_data = np.array(events[events_names]).T
        
        # define channel type, the first is EEG, the last 6 are stimulations
        ch_type.extend(['stim']*6)
        ch_names.extend(events_names)
        # concatenate event file and data
        data = np.concatenate((data,events_data))
        
    # create and populate MNE info structure
    info = create_info(ch_names,sfreq=500.0, ch_types=ch_type, montage=montage)
    info['filename'] = fname
    
    # create raw object 
    raw = RawArray(data,info,verbose=False)
    
    return raw

subjects = range(1)
ids_tot = []
pred_tot = []

# design a butterworth bandpass filter 
freqs = [7, 30]
b,a = butter(5,np.array(freqs)/250.0,btype='bandpass')

# CSP parameters
# Number of spatial filter to use
nfilters = 4

# convolution
# window for smoothing features
nwin = 250

# training subsample
subsample = 10

# submission file
submission_file = 'beat_the_benchmark.csv'
cols = ['HandStart','FirstDigitTouch',
        'BothStartLoadPhase','LiftOff',
        'Replace','BothReleased']

for subject in subjects:
    epochs_tot = []
    y = []

    ################ READ DATA ################################################
    fnames =  glob('../train/subj%d_series1_data.csv' % (subject))
    fname = '../train/subj1_series1_data.csv'
    # read and concatenate all the files
    #raw = concatenate_raws([creat_mne_raw_object(fname) for fname in fnames])
    raw = creat_mne_raw_object(fname)
    # pick eeg signal
    picks = pick_types(raw.info,eeg=True)
    
    # Filter data for alpha frequency and beta band
    # Note that MNE implement a zero phase (filtfilt) filtering not compatible
    # with the rule of future data.
    # Here we use left filter compatible with this constraint. 
    # The function parallelized for speeding up the script
    #raw._data[picks] = np.array(Parallel(n_jobs=-1)(delayed(lfilter)(b,a,raw._data[i]) for i in picks))
    
    ################ CSP Filters training #####################################
    # get event posision corresponding to HandStart
    events = find_events(raw,stim_channel='HandStart', verbose=False)

    # epochs signal for 2 second after the event
    epochs = Epochs(raw, events, {'during' : 1}, 0, 2, proj=False,
                    picks=picks, baseline=None, preload=True,
                    add_eeg_ref=False, verbose=False)

    epochs_tot.append(epochs)
    y.extend([1]*len(epochs))
    
    # epochs signal for 2 second before the event, this correspond to the 
    # rest period.
    epochs_rest = Epochs(raw, events, {'before' : 1}, -2, 0, proj=False,
                    picks=picks, baseline=None, preload=True,
                    add_eeg_ref=False, verbose=False)
    
    # Workaround to be able to concatenate epochs with MNE
    epochs_rest.times = epochs.times
    
    y.extend([-1]*len(epochs_rest))
    epochs_tot.append(epochs_rest)
        
    # Concatenate all epochs
    epochs = concatenate_epochs(epochs_tot)
    
    # get data 
    X = epochs.get_data()
    y = np.array(y)
    
    # train CSP
    csp = CSP(n_components=nfilters, reg='lws')
    csp.fit(X,y)
    


Created on Mon Jun 29 14:00:37 2015

@author: alexandrebarachant

Beat the benchmark with CSP and Logisitic regression.

General Idea :

The goal of this challenge is to detect events related to hand movements. Hand 
movements are caracterized by change in signal power in the mu (~10Hz) and beta
(~20Hz) frequency band over the sensorimotor cortex. CSP spatial filters are
trained to enhance signal comming from this brain area, instantaneous power is
extracted and smoothed, and then feeded into a logisitic regression.

Preprocessing :

Signal are bandpass-filtered between 7 and 30 Hz to catch most of the signal of
interest. 4 CSP spatial filters are then applied to the signal, resutlting to
4 new time series.  In order to train CSP spatial filters, EEG are epoched 
using a window of 2 second before and after the event 'HandStart'. CSP training
needs two classes. the epochs after Replace event are assumed to contain 
patterns corresponding to hand movement, and epochs before are assumed 

In [2]:
csp.filters_.shape

(32, 32)

In [28]:
raw._data[:,68]

array([ 234.,  470.,  248.,  203.,  264.,  516.,  499.,  482.,   50.,
        177.,  582., -229.,  213.,  169.,  541.,  707.,  196.,   20.,
        291.,  456.,  266.,  350.,  556.,  385.,  441.,  127.,  564.,
        314.,  530.,  275.,  184.,  695.,    0.,    0.,    0.,    0.,
          0.,    0.])

In [43]:
X[33:36]

array([[[ 103.,   87.,   79., ...,   65.,   93.,   95.],
        [ 343.,  337.,  272., ...,  269.,  332.,  380.],
        [ 450.,  424.,  434., ...,  381.,  344.,  355.],
        ..., 
        [ -74.,  -45.,  -60., ..., -106., -205., -205.],
        [-112.,  -73.,  -92., ..., -161., -253., -252.],
        [  45.,   93.,   17., ...,  122.,   45.,    7.]],

       [[ 234.,  194.,  214., ...,  493.,  474.,  435.],
        [ 470.,  540.,  547., ...,  506.,  342.,  206.],
        [ 248.,  233.,  361., ...,  696.,  758.,  762.],
        ..., 
        [ 275.,  261.,  282., ...,   -4., -104.,  -46.],
        [ 184.,  151.,  171., ...,  -60., -110.,  -71.],
        [ 695.,  650.,  669., ...,  436.,  400.,  449.]],

       [[ -30.,   15.,   49., ...,  465.,  516.,  554.],
        [ 610.,  628.,  599., ...,  617.,  520.,  443.],
        [ 604.,  537.,  513., ...,  525.,  535.,  522.],
        ..., 
        [ 404.,  420.,  394., ...,  209.,  173.,  156.],
        [ 414.,  430.,  404., ...,  335., 