In [6]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
from pathlib import Path
from itertools import product

bids_root = str(Path.home()) + '/Documents/muniverse-data/Benchmarks/'

datasetnames = ['Caillet_et_al_2023']

pipelinenames = ['cbss', 'scd']

global_rc = {pipeline: pd.DataFrame() for pipeline in pipelinenames}
source_rc = {pipeline: pd.DataFrame() for pipeline in pipelinenames}

for dataset in datasetnames:
    for pipeline in pipelinenames:
        path = bids_root + dataset + '-' + pipeline + '/'
        dataset_global_rc = pd.read_csv(path + 'report_card_globals.tsv', delimiter='\t')
        dataset_source_rc = pd.read_csv(path + 'report_card_sources.tsv', delimiter='\t')
        global_rc[pipeline] = pd.concat([global_rc[pipeline], dataset_global_rc], ignore_index=True)
        source_rc[pipeline] = pd.concat([source_rc[pipeline], dataset_source_rc], ignore_index=True)
       

In [None]:
dataset_of_interest = ['Caillet_et_al_2023']

rows = [f"{d}-{a}" for d, a in product(dataset_of_interest, pipelinenames)]
columns = ['n_source', 'n_source_std', 'exp_var', 'exp_var_std','runtime', 'runtime_std']
summary = pd.DataFrame(np.nan, index=rows, columns=columns) 

sil_th = 0.9
cov_th = 0.3
min_num_spikes = 50

for dataset in dataset_of_interest:
    for pipeline in pipelinenames:
        row = f"{dataset}-{pipeline}"
        gdf = global_rc[pipeline].copy()
        gdf = gdf[(gdf['datasetname'] == dataset)]
        sdf = source_rc[pipeline].copy()
        sdf = sdf[(sdf['datasetname'] == dataset)]
        
        files = list(gdf['filename'])
        n_sources = np.zeros(len(files))
        for idx, file in enumerate(files):
            ssdf = sdf.copy()
            ssdf = sdf[(sdf['filename'] == file) & 
                       (sdf['sil'] > sil_th) & 
                       (sdf['cov_isi'] < cov_th) &
                       (sdf['n_spikes'] > min_num_spikes)]
            n_sources[idx] = ssdf.shape[0]

        summary.loc[row, 'n_source'] = np.mean(n_sources) 
        summary.loc[row, 'n_source_std'] = np.std(n_sources)   

        summary.loc[row, 'runtime'] = np.mean(gdf['runtime'])
        summary.loc[row, 'runtime_std'] = np.std(gdf['runtime'])
        summary.loc[row, 'exp_var'] = np.mean(gdf['explained_var'])
        summary.loc[row, 'exp_var_std'] = np.std(gdf['explained_var'])    


KeyboardInterrupt: 