# Process the raw submissions

This notebook loads all raw submissions and computes the scores.

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import date
import spikefinder_eval as se

sns.set_style('whitegrid')

%matplotlib inline

In [2]:
algos =  ['stm','chenkov', 'deneux', 'friedrich', 'machado', 'mineault', 
               'pachitariu', 'ringach', 'rupprecht', 'speiser', 'bolte', 'oopsi']

dsets = ['1.test', '2.test', '3.test', '4.test', '5.test', 
       '1.train', '2.train', '3.train', '4.train', '5.train']


In [3]:
#algos =  ['stm']

#dsets = ['1.test']


In [4]:
dsets

['1.test',
 '2.test',
 '3.test',
 '4.test',
 '5.test',
 '1.train',
 '2.train',
 '3.train',
 '4.train',
 '5.train']

In [12]:
factor = 4

tab = pd.DataFrame(columns = ['value', 'algo', 'dset', 'split','measure', 'cell'])

for d, dset in enumerate(dsets):
    file = 'data/raw/truth/{}.spikes.csv'.format(dset)
    truth = se.load(file)
    
    for a, algo in enumerate(algos): 
        print(dset, algo)
        
        file = 'data/raw/{}/{}.spikes.csv'.format(algo, dset)
        data = se.load(file)
        
        if algo=='machado':
            data = data.iloc[:,1::] + .01
            
        if algo=='chenkov':
            data = (data-np.min(data.iloc[10:,:]))/np.std(data.iloc[10:,:]) + .01
            data[data<0] = .01
                
        
        c = np.array(se.score(data, truth, method='corr', downsample=factor))
        c_bits = -np.log2(1-c**2)
        
        s = np.array(se.score(data, truth, method='rank', downsample=factor))
        
        info = np.array(se.score(data, truth, method='info', downsample=factor))
        
        a = np.array(se.score(data, truth, method='auc', downsample=factor))
        
        
        #if algo=='stm':
        #    c_stm = c
        #    c_stm_bits = c_bits
        #    s_stm = s
        #    i_stm = info
        #    
        #    c_rel = np.NaN * np.ones_like(c)
        #    c_rel_bits = np.NaN * np.ones_like(c)
        #    s_rel = np.NaN * np.ones_like(c)
        #    i_rel = np.NaN * np.ones_like(c)
        #else:
        #    c_rel = c - c_stm
        #    c_rel_bits = np.round(100*(c_bits-c_stm_bits)/c_stm_bits)
        #    s_rel = s - s_stm
        #    i_rel = np.round(100*(info - i_stm)/i_stm)
            
        df = pd.DataFrame({'value': c,
                           #'rel value': c_rel,
                           'algo': algo, 
                           'dset': dset[0], 
                           'split': dset[2::], 
                           'measure': 'corr', 
                           'cell': np.arange(0,len(c))})
        
        tab = pd.concat([tab,df])
        
        df = pd.DataFrame({'value': c_bits,
                           #'rel value': c_rel_bits,
                           'algo': algo, 
                           'dset': dset[0], 
                           'split': dset[2::], 
                           'measure': 'corr_bits', 
                           'cell': np.arange(0,len(c))})
        
        tab = pd.concat([tab,df])
        
        
        df = pd.DataFrame({'value': s,
                           #'rel value': s_rel,
                           'algo': algo, 
                           'dset': dset[0], 
                           'split': dset[2::], 
                           'measure': 'rank', 
                           'cell': np.arange(0,len(c))})
        
        tab = pd.concat([tab,df])
        
        df = pd.DataFrame({'value': info,
                           #'rel value': i_rel,
                           'algo': algo, 
                           'dset': dset[0], 
                           'split': dset[2::], 
                           'measure': 'info', 
                           'cell': np.arange(0,len(c))})
        
        tab = pd.concat([tab,df])

        
        df = pd.DataFrame({'value': a,
                           #'rel value': i_rel,
                           'algo': algo, 
                           'dset': dset[0], 
                           'split': dset[2::], 
                           'measure': 'auc', 
                           'cell': np.arange(0,len(c))})
        
        tab = pd.concat([tab,df])









1.test stm
1.test chenkov
1.test deneux
1.test friedrich
1.test machado
1.test mineault
1.test pachitariu
1.test ringach
1.test rupprecht


  warn('Some firing rate predictions are smaller than zero.')


1.test speiser
1.test bolte
1.test oopsi


  c /= stddev[:, None]
  c /= stddev[None, :]
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)
  return mu >= 0


2.test stm
2.test chenkov
2.test deneux
2.test friedrich
2.test machado
2.test mineault
2.test pachitariu
2.test ringach
2.test rupprecht
2.test speiser
2.test bolte
2.test oopsi
3.test stm
3.test chenkov
3.test deneux
3.test friedrich
3.test machado
3.test mineault
3.test pachitariu
3.test ringach
3.test rupprecht
3.test speiser
3.test bolte
3.test oopsi
4.test stm
4.test chenkov
4.test deneux
4.test friedrich
4.test machado
4.test mineault
4.test pachitariu
4.test ringach
4.test rupprecht
4.test speiser
4.test bolte
4.test oopsi
5.test stm
5.test chenkov
5.test deneux
5.test friedrich
5.test machado
5.test mineault
5.test pachitariu
5.test ringach
5.test rupprecht
5.test speiser
5.test bolte
5.test oopsi
1.train stm
1.train chenkov
1.train deneux
1.train friedrich
1.train machado
1.train mineault
1.train pachitariu
1.train ringach
1.train rupprecht
1.train speiser
1.train bolte
1.train oopsi
2.train stm
2.train chenkov
2.train deneux
2.train friedrich
2.train machado
2.train mineault

In [13]:
tab

Unnamed: 0,algo,cell,dset,measure,split,value
0,stm,0,1,corr,test,0.524731
1,stm,1,1,corr,test,0.486117
2,stm,2,1,corr,test,0.454547
3,stm,3,1,corr,test,0.512064
4,stm,4,1,corr,test,0.560148
0,stm,0,1,corr_bits,test,0.464629
1,stm,1,1,corr_bits,test,0.388941
2,stm,2,1,corr_bits,test,0.333904
3,stm,3,1,corr_bits,test,0.438718
4,stm,4,1,corr_bits,test,0.543227


In [6]:
tab[tab.algo=="bolte"].head(20)


Unnamed: 0,algo,cell,dset,measure,split,value
0,bolte,0,1,corr,test,0.516207
1,bolte,1,1,corr,test,0.574513
2,bolte,2,1,corr,test,0.539393
3,bolte,3,1,corr,test,0.558185
4,bolte,4,1,corr,test,0.557982
0,bolte,0,1,corr_bits,test,0.447072
1,bolte,1,1,corr_bits,test,0.577908
2,bolte,2,1,corr_bits,test,0.496031
3,bolte,3,1,corr_bits,test,0.53862
4,bolte,4,1,corr_bits,test,0.538143


In [14]:
tab = tab.reset_index()

tab.to_csv('data/results_23_01_18_25Hz.csv')

In [8]:
tab

Unnamed: 0,index,algo,cell,dset,measure,split,value
0,0,stm,0,1,corr,test,0.524731
1,1,stm,1,1,corr,test,0.486117
2,2,stm,2,1,corr,test,0.454547
3,3,stm,3,1,corr,test,0.512064
4,4,stm,4,1,corr,test,0.560148
5,0,stm,0,1,corr_bits,test,0.464629
6,1,stm,1,1,corr_bits,test,0.388941
7,2,stm,2,1,corr_bits,test,0.333904
8,3,stm,3,1,corr_bits,test,0.438718
9,4,stm,4,1,corr_bits,test,0.543227
