# Survival analysis with LFP spectral features

### Stops are events, pops are censored

In [1]:
import numpy as np
import pandas as pd
import physutils
import dbio
import os
from __future__ import division
import matplotlib.pyplot as plt
%matplotlib inline

plt.style.use('ggplot')
np.random.seed(123456)

## Load Data

In [2]:
# which patient, dataset to plot
dtup = 18, 1

In [3]:
# load data
dbname = os.path.expanduser('data/bart.hdf5')
lfpraw = dbio.fetch_all_such_LFP(dbname, *dtup)

## Preprocess Data

In [4]:
# remove global mean across all channels at each time then set each channel to mean 0
lfp = lfpraw.demean_global().demean()

In [5]:
filters = ['delta', 'theta', 'alpha', 'beta', 'gamma']
lfp = lfp.bandlimit(filters)

In [6]:
# decimate to 40 Hz, get instantaneous power, censor, and z-score each channel
# lfp = lfp.decimate(5).instpwr().censor().zscore()

In [7]:
# decimate to 10 Hz
lfp = lfp.decimate(5).decimate(4).instpwr().zscore()

In [12]:
# get events
evt = dbio.fetch(dbname, 'events', *dtup)
cols = ['banked', 'popped', 'start inflating', 'trial_type']

if 'is_control' in evt.columns:
    evt_tmp = evt.query('is_control == False')[cols]
else:
    evt_tmp = evt.loc[:, cols]

# add a binary column (1 = voluntary stop)    
evt_tmp['event'] = np.isnan(evt_tmp['popped']).astype('int')

# add a column for stop time (regardless of cause)
evt_tmp['stop'] = evt.loc[:, ['banked', 'popped']].mean(axis=1)

# drop unneeded columns
evt_tmp = evt_tmp.drop(['banked', 'popped'], axis=1)
evt_tmp = evt_tmp.rename(columns={'start inflating': 'start'})

evt_tmp.head()

Unnamed: 0_level_0,start,trial_type,event,stop
trial,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,43.691,1,1,44.229
1,46.797,3,1,47.26
2,51.387,2,1,58.188
3,61.828,2,1,67.751
4,72.42,3,0,75.99


## Remove unneeded data

- take only non-control trials
- get only time points between trial start and event (pop or stop)

In [47]:
chunks = []
event_idx = list(evt_tmp.columns).index('event')
for trial, row in evt_tmp.iterrows():
    start, stop = row['start'], row['stop']
    this_chunk = lfp.loc[start:stop].copy()
    this_chunk['event'] = 0  # no event until the last bin
    this_chunk.iloc[-1, event_idx] = int(row['event'])  # set last bin correctly
    this_chunk['ttype'] = int(row['trial_type'])

    chunks.append(this_chunk)

# concatenate chunks, make non-power events their series    
meanpwr = pd.concat(chunks)
event = meanpwr['event']
ttype = pd.get_dummies(meanpwr['ttype'])
ttype.columns = ['ttype' + str(idx) for idx in ttype.columns]
meanpwr = meanpwr.drop(['event', 'ttype'], axis=1)

In [42]:
# make interaction terms and squares
int_terms = []
for i in range(len(meanpwr.columns)):
    for j in range(i + 1):
        if i == j:
            col = meanpwr.iloc[:, i] ** 2
            band, chan = col.name.split('.')
            col.name = "{}.{}.{}.{}".format(band, chan, band, chan)
        else:
            icol = meanpwr.iloc[:, i]
            jcol = meanpwr.iloc[:, j]
            col = icol * jcol
            iband, ichan = icol.name.split('.')
            jband, jchan = jcol.name.split('.')
            col.name = "{}.{}.{}.{}".format(iband, ichan, jband, jchan)

        int_terms.append(col)

In [49]:
trainset = pd.concat([event, ttype, meanpwr] + int_terms, axis=1, join='inner')
# trainset = trainset.dropna()  # can't send glmnet any row with a NaN
trainset.head()

Unnamed: 0_level_0,event,ttype1,ttype2,ttype3,delta.17,delta.18,delta.19,delta.20,delta.21,delta.22,...,gamma.48.gamma.39,gamma.48.gamma.40,gamma.48.gamma.41,gamma.48.gamma.42,gamma.48.gamma.43,gamma.48.gamma.44,gamma.48.gamma.45,gamma.48.gamma.46,gamma.48.gamma.47,gamma.48.gamma.48
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
43.7,0,1,0,0,4.034722,-0.369205,-0.482158,-0.475573,-0.508196,-0.328304,...,-0.052026,6.254969,0.656085,23.5708,0.171102,10.420873,11.669609,0.311388,3.041516,19.577036
43.8,0,1,0,0,2.402771,-0.728572,-0.567529,-0.351588,0.977441,-0.150758,...,0.17119,6.244879,-0.83885,31.223646,0.587477,10.156744,5.967136,-0.013543,3.954699,20.072959
43.9,0,1,0,0,4.951854,1.148581,0.177016,-0.080601,-0.062901,0.957006,...,-0.051558,9.474805,-0.396304,21.944122,0.288479,12.909397,10.272966,0.078048,2.534273,20.409032
44.0,0,1,0,0,1.750929,4.407977,1.816575,0.451829,1.077129,-0.18289,...,0.043384,4.09001,0.127207,22.00634,0.235666,7.467125,7.640104,0.225138,1.556562,14.090097
44.1,0,1,0,0,-0.597566,2.012673,1.106268,0.485475,-0.40486,-0.896854,...,0.082642,4.279624,-0.656881,12.618432,0.340466,4.718906,3.831619,-0.041174,2.396388,15.669234


In [50]:
print trainset.shape

(9076, 13044)


## Run sparse regression in R

In [None]:
%load_ext rpy2.ipython

In [30]:
%%R
set.seed(77654)
suppressMessages(library(glmnet))
source('glm_helpers.R')

In [35]:
%Rpush trainset
%R fitobj <- run_glm(data.matrix(trainset), measure='auc');

<img src="auc.svg"/>

In [32]:
%%R -o auc
source('helpers.R')

# get performance
glmobj <- fitobj$glmobj
min.ind <- which(glmobj$lambda == glmobj$lambda.1se)
auc <- glmobj$cvm[min.ind]

In [33]:
print "Area Under the Curve = {}".format(auc)

Area Under the Curve = [ 0.71054498]
