# Process the raw submissions

This notebook loads all raw submissions and computes the scores.

In [6]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import date
import spikefinder_eval as se

sns.set_style('whitegrid')

%matplotlib inline

In [7]:
algos =  ['stm','chenkov', 'deneux', 'friedrich', 'machado', 'mineault', 
               'pachitariu', 'ringach', 'rupprecht', 'speiser', 'bolte', 'oopsi']

dsets = ['1.test', '2.test', '3.test', '4.test', '5.test', 
       '1.train', '2.train', '3.train', '4.train', '5.train']


In [8]:
#algos =  ['stm']

#dsets = ['1.test']


In [9]:
dsets

['1.test',
 '2.test',
 '3.test',
 '4.test',
 '5.test',
 '1.train',
 '2.train',
 '3.train',
 '4.train',
 '5.train']

In [4]:
tab = pd.DataFrame(columns = ['value', 'algo', 'dset', 'split','measure', 'rel value', 'cell'])

for d, dset in enumerate(dsets):
    file = 'data/raw/truth/{}.spikes.csv'.format(dset)
    truth = se.load(file)
    
    for a, algo in enumerate(algos): 
        print(dset, algo)
        
        file = 'data/raw/{}/{}.spikes.csv'.format(algo, dset)
        data = se.load(file)
        
        if algo=='machado':
            data = data.iloc[:,1::]
        
        c = np.array(se.score(data, truth, method='corr', downsample=4))
        c_bits = -np.log2(1-c**2)
        
        s = np.array(se.score(data, truth, method='rank', downsample=4))
        
        info = np.array(se.score(data, truth, method='info', downsample=4))
        
        if algo=='stm':
            c_stm = c
            c_stm_bits = c_bits
            s_stm = s
            i_stm = info
            
            c_rel = np.NaN * np.ones_like(c)
            c_rel_bits = np.NaN * np.ones_like(c)
            s_rel = np.NaN * np.ones_like(c)
            i_rel = np.NaN * np.ones_like(c)
        else:
            c_rel = c - c_stm
            c_rel_bits = np.round(100*(c_bits-c_stm_bits)/c_stm_bits)
            s_rel = s - s_stm
            i_rel = np.round(100*(info - i_stm)/i_stm)
            
        df = pd.DataFrame({'value': c,
                           'rel value': c_rel,
                           'algo': algo, 
                           'dset': dset[0], 
                           'split': dset[2::], 
                           'measure': 'corr', 
                           'cell': np.arange(0,len(c))})
        
        tab = pd.concat([tab,df])
        
        df = pd.DataFrame({'value': c_bits,
                           'rel value': c_rel_bits,
                           'algo': algo, 
                           'dset': dset[0], 
                           'split': dset[2::], 
                           'measure': 'corr_bits', 
                           'cell': np.arange(0,len(c))})
        
        tab = pd.concat([tab,df])
        
        
        df = pd.DataFrame({'value': s,
                           'rel value': s_rel,
                           'algo': algo, 
                           'dset': dset[0], 
                           'split': dset[2::], 
                           'measure': 'rank', 
                           'cell': np.arange(0,len(c))})
        
        tab = pd.concat([tab,df])
        
        df = pd.DataFrame({'value': info,
                           'rel value': i_rel,
                           'algo': algo, 
                           'dset': dset[0], 
                           'split': dset[2::], 
                           'measure': 'info', 
                           'cell': np.arange(0,len(c))})
        
        tab = pd.concat([tab,df])









1.test stm


  return loglik, loglik + entropy


In [5]:
tab


Unnamed: 0,algo,cell,dset,measure,rel value,split,value
0,stm,0,1,corr,,test,0.524731
1,stm,1,1,corr,,test,0.486117
2,stm,2,1,corr,,test,0.454547
3,stm,3,1,corr,,test,0.512064
4,stm,4,1,corr,,test,0.560148
0,stm,0,1,corr_bits,,test,0.464629
1,stm,1,1,corr_bits,,test,0.388941
2,stm,2,1,corr_bits,,test,0.333904
3,stm,3,1,corr_bits,,test,0.438718
4,stm,4,1,corr_bits,,test,0.543227


In [9]:
tab[tab.algo=="bolte"].head()


Unnamed: 0,algo,cell,dset,measure,rel value,split,value
0,bolte,0,1,corr,-0.008524,test,0.516207
1,bolte,1,1,corr,0.088396,test,0.574513
2,bolte,2,1,corr,0.084846,test,0.539393
3,bolte,3,1,corr,0.046121,test,0.558185
4,bolte,4,1,corr,-0.002166,test,0.557982


In [10]:
tab = tab.reset_index()

tab.to_csv('data/results_24_07_17.csv')