In [1]:
%load_ext autoreload
%autoreload 2

from mountaintools import client as mt
import os
import pandas as pd
import numpy as np
import json

In [2]:
mt.configDownloadFrom('spikeforest.kbucket')

In [25]:
output_ids=[
    "paired_boyden32c","paired_crcns","paired_kampff","paired_mea64c",
    "synth_visapy","synth_magland","synth_mearec_tetrode",
    "manual_franklab","synth_bionet","synth_mearec_neuronexus"
]
# output_ids=[
#     "paired_boyden32c","paired_crcns","paired_kampff","paired_mea64c",
#     "synth_visapy","synth_magland","synth_mearec_tetrode",
#     "manual_franklab","synth_mearec_neuronexus"
# ]


In [26]:
print('******************************** LOADING ANALYSIS OUTPUT OBJECTS...')
studies = []
study_sets = []
recordings = []
sorting_results = []
for output_id in output_ids:
    print('Loading output object: {}'.format(output_id))
    output_path = ('key://pairio/spikeforest/spikeforest_analysis_results.{}.json').format(output_id)
    obj = mt.loadObject(path=output_path)
    if obj:
        studies = studies + obj['studies']
        study_sets = study_sets + obj.get('study_sets', [])
        recordings = recordings + obj['recordings']
        sorting_results = sorting_results + obj['sorting_results']
    else:
        print('Unable to load object: '+output_path)

******************************** LOADING ANALYSIS OUTPUT OBJECTS...
Loading output object: paired_boyden32c
Loading output object: paired_crcns
Loading output object: paired_kampff
Loading output object: paired_mea64c
Loading output object: synth_visapy
Loading output object: synth_magland
Loading output object: synth_mearec_tetrode
Loading output object: manual_franklab
Loading output object: synth_bionet
Loading output object: synth_mearec_neuronexus


In [33]:
round(1.05234, 2)

1.05

In [27]:
study_names = sorted(list(set([rec['study'] for rec in recordings])))
print(study_names)

['manual_tetrode_1200s', 'manual_tetrode_2400s', 'manual_tetrode_600s', 'mea_c30', 'paired_boyden32c', 'paired_crcns', 'paired_kampff', 'paired_mea64c', 'synth_bionet_drift', 'synth_bionet_shuffle', 'synth_bionet_static', 'synth_magland_noise10_K10_C4', 'synth_magland_noise10_K10_C8', 'synth_magland_noise10_K20_C4', 'synth_magland_noise10_K20_C8', 'synth_magland_noise20_K10_C4', 'synth_magland_noise20_K10_C8', 'synth_magland_noise20_K20_C4', 'synth_magland_noise20_K20_C8', 'synth_mearec_neuronexus_noise10_K10_C32', 'synth_mearec_neuronexus_noise10_K20_C32', 'synth_mearec_neuronexus_noise10_K40_C32', 'synth_mearec_neuronexus_noise20_K10_C32', 'synth_mearec_neuronexus_noise20_K20_C32', 'synth_mearec_neuronexus_noise20_K40_C32', 'synth_mearec_tetrode_noise10_K10_C4', 'synth_mearec_tetrode_noise10_K20_C4', 'synth_mearec_tetrode_noise20_K10_C4', 'synth_mearec_tetrode_noise20_K20_C4']


In [28]:
sorter_names = sorted(list(set([sr['sorter']['name'] for sr in sorting_results])))

In [31]:
def _assemble_study_analysis_result(*, study_name, recordings, sorting_results, sorter_names):
    true_units = dict()
    recording_names = []
    for irec, rec in enumerate(recordings):
        if rec['study'] == study_name:
            recording_names.append(rec['name'])
            true_units_info = mt.loadObject(path=rec['summary']['true_units_info'])
            for unit_info in true_units_info:
                id0=unit_info['unit_id']
                true_units[study_name+'/'+rec['name']+'/{}'.format(id0)] = dict(
                    unit_id = id0,
                    recording_index = irec,
                    snr=unit_info['snr'],
                    firing_rate=unit_info['firing_rate'],
                    num_events=unit_info['num_events'],
                    sorting_results=dict()
                )
    for sr in sorting_results:
        rec = sr['recording']
        if rec['study'] == study_name:
            if sr.get('comparison_with_truth', None):
                comparison_with_truth = mt.loadObject(path=sr['comparison_with_truth']['json'])
                if comparison_with_truth is None:
                    print(sr)
                    raise Exception('Unable to retrieve comparison with truth object for sorting result.')
                sorter_name=sr['sorter']['name']
                for unit_result in comparison_with_truth.values():
                    id0=unit_result['unit_id']
                    n_match = unit_result['num_matches']
                    n_fp = unit_result['num_false_positives']
                    n_fn = unit_result['num_false_negatives']
                    accuracy = n_match / (n_match + n_fp + n_fn)
                    if n_match + n_fp > 0:
                        precision = n_match / (n_match + n_fp)
                    else:
                        precision = 0
                    recall = n_match / (n_match + n_fn)
                    true_units[study_name+'/'+rec['name']+'/{}'.format(id0)]['sorting_results'][sorter_name]=dict(
                        accuracy=accuracy,
                        precision=precision,
                        recall=recall
                    )

    keys0 = sorted(true_units.keys())
    true_units_list = [true_units[key] for key in keys0]
    
    snrs = [x['snr'] for x in true_units_list]
    firing_rates = [x['firing_rate'] for x in true_units_list]
    num_events = [x['num_events'] for x in true_units_list]
    recording_indices = [x['recording_index'] for x in true_units_list]
    unit_ids = [x['unit_id'] for x in true_units_list]
    
    study_analysis_result=dict(
        study_name=study_name,
        recording_names=recording_names,
        snrs=snrs,
        firing_rates=firing_rates,
        num_events=num_events,
        recording_indices=recording_indices,
        unit_ids=unit_ids,
        sorting_results=dict()
    )
    for sorter_name in sorter_names:
        accuracies = [x['sorting_results'].get(sorter_name, {}).get('accuracy') for x in true_units_list]
        precisions = [x['sorting_results'].get(sorter_name, {}).get('precision') for x in true_units_list]
        recalls = [x['sorting_results'].get(sorter_name, {}).get('recall') for x in true_units_list]
        study_analysis_result['sorting_results'][sorter_name]=dict(
            accuracies=accuracies,
            precisions=precisions,
            recalls=recalls
        )
        
    
    return study_analysis_result

study_analysis_results = [_assemble_study_analysis_result(study_name=study['name'], recordings=recordings, sorting_results=sorting_results, sorter_names=sorter_names) for study in studies]

In [32]:
mt.realizeFile(path=mt.saveObject(object=study_analysis_results))

'/home/magland/sha1-cache-new/1/7c/17c63cfaa8c47de72eb6adb5a8ac03f62c297ebd'

In [23]:
[study['name'] for study in studies]

['paired_boyden32c',
 'paired_crcns',
 'paired_kampff',
 'paired_mea64c',
 'mea_c30',
 'synth_magland_noise10_K10_C4',
 'synth_magland_noise10_K10_C8',
 'synth_magland_noise10_K20_C4',
 'synth_magland_noise10_K20_C8',
 'synth_magland_noise20_K10_C4',
 'synth_magland_noise20_K10_C8',
 'synth_magland_noise20_K20_C4',
 'synth_magland_noise20_K20_C8',
 'synth_mearec_tetrode_noise10_K10_C4',
 'synth_mearec_tetrode_noise10_K20_C4',
 'synth_mearec_tetrode_noise20_K10_C4',
 'synth_mearec_tetrode_noise20_K20_C4',
 'manual_tetrode_600s',
 'manual_tetrode_1200s',
 'manual_tetrode_2400s',
 'synth_mearec_neuronexus_noise10_K10_C32',
 'synth_mearec_neuronexus_noise10_K20_C32',
 'synth_mearec_neuronexus_noise10_K40_C32',
 'synth_mearec_neuronexus_noise20_K10_C32',
 'synth_mearec_neuronexus_noise20_K20_C32',
 'synth_mearec_neuronexus_noise20_K40_C32']

In [17]:
[x['study_name'] for x in study_analysis_results]

['paired_boyden32c',
 'paired_crcns',
 'paired_kampff',
 'paired_mea64c',
 'mea_c30',
 'synth_magland_noise10_K10_C4',
 'synth_magland_noise10_K10_C8',
 'synth_magland_noise10_K20_C4',
 'synth_magland_noise10_K20_C8',
 'synth_magland_noise20_K10_C4',
 'synth_magland_noise20_K10_C8',
 'synth_magland_noise20_K20_C4',
 'synth_magland_noise20_K20_C8',
 'synth_mearec_tetrode_noise10_K10_C4',
 'synth_mearec_tetrode_noise10_K20_C4',
 'synth_mearec_tetrode_noise20_K10_C4',
 'synth_mearec_tetrode_noise20_K20_C4',
 'manual_tetrode_600s',
 'manual_tetrode_1200s',
 'manual_tetrode_2400s',
 'synth_mearec_neuronexus_noise10_K10_C32',
 'synth_mearec_neuronexus_noise10_K20_C32',
 'synth_mearec_neuronexus_noise10_K40_C32',
 'synth_mearec_neuronexus_noise20_K10_C32',
 'synth_mearec_neuronexus_noise20_K20_C32',
 'synth_mearec_neuronexus_noise20_K40_C32']

In [72]:
true_units_by_recording = dict()
for rec in recordings:
    snrs=[]
    firing_rates=[]
    num_events=[]
    snr_by_id=dict()
    firing_rate_by_id=dict()
    num_events_by_id=dict()
    true_units_info = mt.loadObject(path=rec['summary']['true_units_info'])
    for unit_info in true_units_info:
        id0=unit_info['unit_id']
        snr_by_id[id0]=unit_info['snr']
        firing_rate_by_id[id0]=unit_info['firing_rate']
        num_events_by_id[id0]=unit_info['num_events']
    true_units_by_recording[rec['study']+'/'+rec['name']]=dict(
        snr_by_id=snr_by_id,
        firing_rate_by_id=firing_rate_by_id,
        num_events_by_id=num_events_by_id
    )

study_sorter_results = []
for study in studies:
    total_true_units = 0
    for rec in recordings:
        if rec['study'] == study['name']:
            true_units_info = true_units_by_recording[rec['study']+'/'+rec['name']]
            total_true_units = total_true_units + len(true_units_info['snr_by_id'].keys())
            
    for sorter_name in sorter_names:
        srs = [sr for sr in sorting_results if (sr['sorter']['name'] == sorter_name) and (sr['recording']['study'] == study['name'])]
        accuracies = []
        precisions = []
        recalls = []
        snrs = []
        firing_rates = []
        num_events = []
        for sr in srs:
            if sr.get('comparison_with_truth', None):
                comparison_with_truth = mt.loadObject(path=sr['comparison_with_truth']['json'])
                if comparison_with_truth is None:
                    print(sr)
                    raise Exception('Unable to retrieve comparison with truth object for sorting result.')
                true_units_info = true_units_by_recording[sr['recording']['study']+'/'+sr['recording']['name']]
                for unit_result in comparison_with_truth.values():
                    id0=unit_result['unit_id']
                    n_match = unit_result['num_matches']
                    n_fp = unit_result['num_false_positives']
                    n_fn = unit_result['num_false_negatives']
                    accuracy = n_match / (n_match + n_fp + n_fn)
                    if n_match + n_fp > 0:
                        precision = n_match / (n_match + n_fp)
                    else:
                        precision = 0
                    recall = n_match / (n_match + n_fn)
                    accuracies.append(accuracy)
                    precisions.append(precision)
                    recalls.append(recall)
                    snrs.append(true_units_info['snr_by_id'][id0])
                    firing_rates.append(true_units_info['firing_rate_by_id'][id0])
                    num_events.append(true_units_info['num_events_by_id'][id0])
        study_sorter_results.append(dict(
            study=study['name'],
            sorter=sorter_name,
            accuracies=accuracies,
            precisions=precisions,
            recalls=recalls,
            snrs=snrs,
            firing_rates=firing_rates,
            num_events=num_events,
            total_true_units=total_true_units,
            missed_true_units=total_true_units-len(snrs)
        ))

In [73]:
mt.realizeFile(mt.saveObject(object=study_sorter_results))

'/home/magland/sha1-cache-new/f/6f/f6f4e1869fa0188c6d940d30e80c244988e80d79'

In [64]:
for ssr in study_sorter_results:
    if ssr['missed_true_units']:
        if ssr['sorter'] != 'HerdingSpikes2':
            print(ssr['missed_true_units'], ssr['total_true_units'], ssr['study'], ssr['sorter'])

4 19 paired_boyden32c KiloSort2
74 93 paired_crcns KiloSort2
1 15 paired_kampff SpykingCircus
18 18 paired_mea64c SpykingCircus
100 100 synth_magland_noise10_K10_C4 KiloSort2
200 200 synth_magland_noise10_K20_C4 KiloSort2
100 100 synth_magland_noise20_K10_C4 KiloSort2
200 200 synth_magland_noise20_K20_C4 KiloSort2
100 100 synth_mearec_tetrode_noise10_K10_C4 KiloSort2
200 200 synth_mearec_tetrode_noise10_K20_C4 KiloSort2
100 100 synth_mearec_tetrode_noise20_K10_C4 KiloSort2
200 200 synth_mearec_tetrode_noise20_K20_C4 KiloSort2
104 104 manual_tetrode_600s KiloSort2
52 52 manual_tetrode_1200s KiloSort2
26 26 manual_tetrode_2400s KiloSort2
8496 8496 synth_bionet_static SpykingCircus
1416 8496 synth_bionet_drift KiloSort2
8496 8496 synth_bionet_drift SpykingCircus
708 8496 synth_bionet_shuffle KiloSort2
8496 8496 synth_bionet_shuffle SpykingCircus
30 100 synth_mearec_neuronexus_noise20_K10_C32 KiloSort2
160 200 synth_mearec_neuronexus_noise20_K20_C32 KiloSort
400 400 synth_mearec_neuronexus

In [17]:
sorting_results[0]

{'recording': {'name': 'rec_16c_600s_11',
  'study': 'hybrid_drift_16c_600s',
  'directory': 'kbucket://15734439d8cf/groundtruth/hybrid_drift/rec_16c_600s_11',
  'description': 'One of the recordings in the hybrid_drift_16c_600s study',
  'summary': {'computed_info': {'samplerate': 30000.0,
    'num_channels': 16,
    'duration_sec': 600.0},
   'plots': {},
   'true_units_info': 'sha1://1106e8dba8d6119b8c6c050bbc9e7650cdfeea85/true_units_info.json'}},
 'sorter': {'code': 'ms4',
  'name': 'MountainSort4-thr3',
  'processor_name': 'MountainSort4',
  'params': {'detect_sign': -1, 'adjacency_radius': 50, 'detect_threshold': 3},
  'compute_resource': 'default'},
 'firings_true': 'kbucket://15734439d8cf/groundtruth/hybrid_drift/rec_16c_600s_11/firings_true.mda',
 'processor_name': 'MountainSort4',
 'processor_version': '4.2.0',
 'execution_stats': {'start_time': 1553546477.836693,
  'end_time': 1553546611.6152449,
  'elapsed_sec': 133.7785518169403},
 'console_out': 'sha1://3754f1f0faf757532

In [18]:
obj = dict(studies=studies, recordings=recordings, sorting_results=sorting_results)

In [19]:
a=mt.saveObject(object=obj, share_id='kbucket')
print(a)

Uploading to kachery --- (166.2 KiB): /home/magland/sha1-cache-new/tmp_mountainclient_koqfueomtv.txt -> http://kbucket.flatironinstitute.org:8080/set/sha1/ab9d36084bcd8c6f68cd674ee6bf496aeb8bf066?signature=cbc646c23494e1f977c961bf5b89aca3a053e1b2
File uploaded (166.2 KiB) in 0.012670516967773438 sec
sha1://ab9d36084bcd8c6f68cd674ee6bf496aeb8bf066/object.json


In [11]:
from copy import deepcopy

In [12]:
deepcopy(3)

3

In [5]:
study_sets_by_study = dict()
for study in studies:
    study_sets_by_study[study['name']] = study['study_set']

In [6]:
pd.DataFrame(data=studies, columns=['study_set', 'name', 'directory'])

Unnamed: 0,study_set,name,directory
0,mearec_neuronexus,mearec_neuronexus_noise10_K10_C32,kbucket://15734439d8cf/groundtruth/mearec_synt...
1,mearec_neuronexus,mearec_neuronexus_noise10_K20_C32,kbucket://15734439d8cf/groundtruth/mearec_synt...
2,mearec_neuronexus,mearec_neuronexus_noise10_K40_C32,kbucket://15734439d8cf/groundtruth/mearec_synt...
3,mearec_neuronexus,mearec_neuronexus_noise20_K10_C32,kbucket://15734439d8cf/groundtruth/mearec_synt...
4,mearec_neuronexus,mearec_neuronexus_noise20_K20_C32,kbucket://15734439d8cf/groundtruth/mearec_synt...
5,mearec_neuronexus,mearec_neuronexus_noise20_K40_C32,kbucket://15734439d8cf/groundtruth/mearec_synt...
6,visapy_mea,visapy_mea,kbucket://15734439d8cf/groundtruth/visapy_mea
7,magland_synth,magland_synth_noise10_K10_C4,kbucket://15734439d8cf/groundtruth/magland_syn...
8,magland_synth,magland_synth_noise10_K10_C8,kbucket://15734439d8cf/groundtruth/magland_syn...
9,magland_synth,magland_synth_noise10_K20_C4,kbucket://15734439d8cf/groundtruth/magland_syn...


In [7]:
study_set='visapy_mea'
recordings2 = [dict(study=rec['study'], name=rec['name'], **rec['summary']['computed_info']) for rec in recordings]
recordings2 = [rec for rec in recordings2 if study_sets_by_study[rec['study']]==study_set]
pd.DataFrame(data=recordings2, columns=['study', 'name', 'samplerate', 'num_channels', 'duration_sec'])

Unnamed: 0,study,name,samplerate,num_channels,duration_sec
0,visapy_mea,set1,32000.0,30,300.0
1,visapy_mea,set2,32000.0,30,300.0
2,visapy_mea,set3,32000.0,30,300.0
3,visapy_mea,set4,32000.0,30,300.0
4,visapy_mea,set5,32000.0,30,300.0
5,visapy_mea,set6,32000.0,30,300.0


In [8]:
sorter_codes

NameError: name 'sorter_codes' is not defined

In [9]:
all_elapsed = [SR['execution_stats']['elapsed_sec'] for SR in sorting_results]
total_elapsed_sec = np.sum(all_elapsed)
total_elapsed_hr = total_elapsed_sec/60/60
print('Total elapsed: {:.2f} hours -- {} sortings'.format(total_elapsed_hr, len(all_elapsed)))

Total elapsed: 111.28 hours -- 1904 sortings


In [10]:
sorter_codes=list(set([SR['sorter']['code'] for SR in sorting_results]))
for code in sorter_codes:
    all_elapsed = [SR['execution_stats']['elapsed_sec'] for SR in sorting_results if SR['sorter']['code']==code]
    total_elapsed_sec = np.sum(all_elapsed)
    total_elapsed_hr = total_elapsed_sec/60/60
    print('Total elapsed for {}: {:.2f} hours -- {} recordings'.format(code, total_elapsed_hr, len(all_elapsed)))

Total elapsed for sc: 45.78 hours -- 388 recordings
Total elapsed for ks: 8.10 hours -- 352 recordings
Total elapsed for ms4: 26.21 hours -- 388 recordings
Total elapsed for irc: 14.50 hours -- 388 recordings
Total elapsed for yass: 16.69 hours -- 388 recordings


In [11]:
study_sets = list(set(study_sets_by_study.values()))
for study_set in study_sets:
    print('STUDY SET: {}'.format(study_set))
    sorting_results2 = [SR for SR in sorting_results if study_sets_by_study[SR['recording']['study']]==study_set]
    for code in sorter_codes:
        all_elapsed = [SR['execution_stats']['elapsed_sec'] for SR in sorting_results2 if SR['sorter']['code']==code]
        total_elapsed_sec = np.sum(all_elapsed)
        total_elapsed_hr = total_elapsed_sec/60/60
        print('Total elapsed for {}: {:.2f} hours -- {:.2f} minutes per recording -- {} recordings'.format(code, total_elapsed_hr, total_elapsed_hr/len(all_elapsed)*60, len(all_elapsed)))

-----------------------------------------
STUDY SET: magland_synth
Total elapsed for sc: 4.93 hours -- 3.70 minutes per recording -- 80 recordings
Total elapsed for ks: 1.32 hours -- 0.99 minutes per recording -- 80 recordings
Total elapsed for ms4: 1.53 hours -- 1.15 minutes per recording -- 80 recordings
Total elapsed for irc: 1.48 hours -- 1.11 minutes per recording -- 80 recordings
Total elapsed for yass: 0.68 hours -- 0.51 minutes per recording -- 80 recordings
-----------------------------------------
STUDY SET: manual_tetrode
Total elapsed for sc: 2.32 hours -- 6.63 minutes per recording -- 21 recordings
Total elapsed for ks: 0.45 hours -- 1.29 minutes per recording -- 21 recordings
Total elapsed for ms4: 0.30 hours -- 0.86 minutes per recording -- 21 recordings
Total elapsed for irc: 0.48 hours -- 1.38 minutes per recording -- 21 recordings
Total elapsed for yass: 0.30 hours -- 0.86 minutes per recording -- 21 recordings
-----------------------------------------
STUDY SET: bion

  # Remove the CWD from sys.path while we load stuff.
