## Assemble website data

This notebook saves collections in .json files in the website_data/ directory

StudySets.json
Studies.json
Recordings.json
TrueUnits.json
UnitResults.json
Sorters.json

## Schema

StudySet
* name (str)
* [type (str) -- synthetic, real, hybrid, etc.]
* [description (str)]
    
Study
* name (str)
* studySet (str)
* description (str)
* sorterNames (array of str)

Note: study name is unique, even across study sets
    
Recording
* name (str)
* study (str)
* directory (str) -- i.e., kbucket address
* description (str)
* sampleRateHz (float)
* numChannels (int)
* durationSec (float)
* numTrueUnits (int)
* [fileSizeBytes (int)]
* spikeSign (int) [Hard-coded for now. In future, grab from params.json]

TrueUnit
* unitId (int)
* recording (str)
* study (str)
* meanFiringRateHz (float)
* numEvents (int)
* peakChannel (int)
* snr (float)

SortingResult
* recording (str)
* recordingExt (str)
* study (str)
* sorter (str)
* cpuTimeSec (float)
* [runtime_info (object): timestamps, wall time, CPU time, RAM usage, error status]
* [firingsOutputUrl (str)] TODO: jfm (two weeks)

UnitResult
* unitId (int)
* recording (str)
* recordingExt (str)
* study (str)
* sorter (str)
* numMatches (int)
* numFalsePositives (int)
* numFalseNegatives (int)
* checkAccuracy (float)
* checkRecall (float)
* checkPrecision (float)
* bestSortedUnitId (int)
* spikeSprayUrl (str) TODO: jfm to make this (next week)

Sorter
* name (str)
* algorithm (str)
* [algorithmVersion (str)] - future
* processorName (str)
* processorVersion (str)
* sortingParameters (object)

In [1]:
%load_ext autoreload
%autoreload 2

from mountaintools import client as mt
import os

In [2]:
mt.configRemoteReadonly(collection='spikeforest', share_id='spikeforest.spikeforest2')

MOUNTAIN CONFIG: remote database spikeforest (readonly); remote kb-share 69432e9201d0 (readonly)


In [3]:
output_ids=['mearec_neuronexus', 'visapy_mea', 'magland_synth', 'paired', 'mearec_tetrode', 'manual_tetrode', 'bionet']

In [4]:
result_objects=[
    mt.loadObject(
        key=dict(
            name='spikeforest_results'
        ),
        subkey=output_id
    )
    for output_id in output_ids
]
studies=[study for X in result_objects for study in X['studies']]
recordings=[recording for X in result_objects for recording in X['recordings']]
sorting_results=[sorting_result for X in result_objects for sorting_result in X['sorting_results']]

if not os.path.exists('website_data'):
    os.mkdir('website_data')

In [5]:
### STUDY SETS

study_sets_by_name=dict()
for study in studies:
    study_sets_by_name[study['study_set']]=dict(name=study['study_set'])

StudySets=[]
for study_set in study_sets_by_name.values():
    StudySets.append(dict(
        name=study_set['name']
    ))

mt.saveObject(object=StudySets, dest_path=os.path.abspath(os.path.join('website_data', 'StudySets.json')))
print(StudySets)

[{'name': 'mearec_neuronexus'}, {'name': 'visapy_mea'}, {'name': 'magland_synth'}, {'name': 'paired'}, {'name': 'mearec_tetrode'}, {'name': 'manual_tetrode'}, {'name': 'bionet'}]


In [6]:
### RECORDINGS and TRUE UNITS

Recordings=[]
TrueUnits=[]
for recording in recordings:
    true_units_info=mt.loadObject(path=recording['summary']['true_units_info'])
    for unit_info in true_units_info:
        TrueUnits.append(dict(
            unitId=unit_info['unit_id'],
            recording=recording['name'],
            recordingExt=recording['study']+':'+recording['name'],
            study=recording['study'],
            meanFiringRateHz=unit_info['firing_rate'],
            numEvents=unit_info['num_events'],
            peakChannel=unit_info['peak_channel'],
            snr=unit_info['snr'],
        ))
    Recordings.append(dict(
        name=recording['name'],
        study=recording['study'],
        directory=recording['directory'],
        description=recording['description'],
        sampleRateHz=recording['summary']['computed_info']['samplerate'],
        numChannels=recording['summary']['computed_info']['num_channels'],
        durationSec=recording['summary']['computed_info']['duration_sec'],
        numTrueUnits=len(true_units_info),
        spikeSign=-1
    ))

mt.saveObject(object=Recordings, dest_path=os.path.abspath(os.path.join('website_data', 'Recordings.json')))
mt.saveObject(object=TrueUnits, dest_path=os.path.abspath(os.path.join('website_data', 'TrueUnits.json')))
print('Num recordings:',len(Recordings))
print('Num true units:',len(TrueUnits))
print('studies for recordings:',set([recording['study'] for recording in Recordings]))

KeyboardInterrupt: 

In [15]:
### UNIT RESULTS and SORTING RESULTS

UnitResults=[]
SortingResults=[]
sorter_names_by_study=dict()
for sr in sorting_results:
    if ('comparison_with_truth' in sr) and (sr['comparison_with_truth']):
        SortingResults.append(dict(
            recording=sr['recording']['name'],
            study=sr['recording']['study'],
            sorter=sr['sorter']['name'],
            cpuTimeSec=sr['execution_stats'].get('elapsed_sec',None)
        ))
        comparison_with_truth=mt.loadObject(path=sr['comparison_with_truth']['json'])
        for unit_result in comparison_with_truth.values():
            study_name=sr['recording']['study']
            sorter_name=sr['sorter']['name']
            if study_name not in sorter_names_by_study:
                sorter_names_by_study[study_name]=set()
            sorter_names_by_study[study_name].add(sorter_name)
            n_match=unit_result['num_matches']
            n_fp=unit_result['num_false_positives']
            n_fn=unit_result['num_false_negatives']
            UnitResults.append(dict(
                unitId=unit_result['unit_id'],
                recording=sr['recording']['name'],
                recordingExt=sr['recording']['study']+':'+sr['recording']['name'],
                study=study_name,
                sorter=sorter_name,
                numMatches=n_match,
                numFalsePositives=n_fp,
                numFalseNegatives=n_fn,
                checkAccuracy=n_match/(n_match+n_fp+n_fn),
                #checkPrecision=n_match/(n_match+n_fp),
                checkRecall=n_match/(n_match+n_fn),
                bestSortedUnitId=unit_result['best_unit']
            ))
    else:
        print('Warning: comparison with truth not found for sorting result: {} {}/{}', sr['sorter']['name'], sr['recording']['study'], sr['recording']['name'])
for study in sorter_names_by_study.keys():
    sorter_names_by_study[study]=list(sorter_names_by_study[study])
    sorter_names_by_study[study].sort()
mt.saveObject(object=UnitResults, dest_path=os.path.abspath(os.path.join('website_data', 'UnitResults.json')))  
mt.saveObject(object=SortingResults, dest_path=os.path.abspath(os.path.join('website_data', 'SortingResults.json')))  
print('Num unit results:',len(UnitResults))

Num unit results: 90945


In [16]:
### SORTERS

sorters_by_name=dict()
for sr in sorting_results:
    sorters_by_name[sr['sorter']['name']]=sr['sorter']
    
Sorters=[]
for name,sorter in sorters_by_name.items():
    Sorters.append(dict(
        name=sorter['name'],
        algorithm=sorter['processor_name'], # right now the algorithm is the same as the processor name
        processorName=sorter['processor_name'],
        processorVersion='0', # jfm needs to provide this
        sorting_parameters=sorter['params'] # Liz, even though most sorters have similar parameter names, it won't always be like that. The params is an arbitrary json object.
    ))

mt.saveObject(object=Sorters, dest_path=os.path.abspath(os.path.join('website_data', 'Sorters.json')))
print([S['name'] for S in Sorters])

['MountainSort4-thr3', 'SpykingCircus', 'Yass', 'IronClust-static']


In [17]:
### STUDIES

Studies=[]
for study in studies:
    Studies.append(dict(
        name=study['name'],
        studySet=study['study_set'],
        description=study['description'],
        sorterNames=sorter_names_by_study[study['name']]
        # the following can be obtained from the other collections
        # numRecordings, sorters, etc...
    ))

mt.saveObject(object=Studies, dest_path=os.path.abspath(os.path.join('website_data', 'Studies.json')))
print([S['name'] for S in Studies])

['mearec_neuronexus_noise10_K10_C32', 'mearec_neuronexus_noise10_K20_C32', 'mearec_neuronexus_noise10_K40_C32', 'mearec_neuronexus_noise20_K10_C32', 'mearec_neuronexus_noise20_K20_C32', 'mearec_neuronexus_noise20_K40_C32', 'visapy_mea', 'magland_synth_noise10_K10_C4', 'magland_synth_noise10_K10_C8', 'magland_synth_noise10_K20_C4', 'magland_synth_noise10_K20_C8', 'magland_synth_noise20_K10_C4', 'magland_synth_noise20_K10_C8', 'magland_synth_noise20_K20_C4', 'magland_synth_noise20_K20_C8', 'paired_boyden32c', 'paired_crcns', 'paired_mea64c', 'paired_neuropix32c', 'mearec_tetrode_noise10_K10_C4', 'mearec_tetrode_noise10_K20_C4', 'mearec_tetrode_noise20_K10_C4', 'mearec_tetrode_noise20_K20_C4', 'manual_tetrode_600s', 'manual_tetrode_1200s', 'manual_tetrode_2400s', 'bionet_static', 'bionet_drift', 'bionet_shuffle']


In [18]:
int(1.3)

1

In [28]:
def _adjust_srun_opts_for_num_jobs(srun_opts, num_workers, num_jobs):
    vals = srun_opts.split()
    for i in range(len(vals)):
        if vals[i] == '-n' and (i+1<len(vals)):
            nval = int(vals[i+1])
            if num_jobs <= nval:
                nval = num_jobs
                num_workers = 1
            elif num_jobs <= nval * (num_workers-1):
                num_workers = int((num_jobs-1)/nval) + 1
            vals[i+1] = str(nval)
    return ' '.join(vals), num_workers

In [31]:
a, b = _adjust_srun_opts_for_num_jobs(srun_opts='-n 12', num_workers=4, num_jobs=37)
print(a, b)

-n 12 4
