## Assemble website data

This notebook saves collections in .json files in the website_data/ directory

StudySets.json
Studies.json
Recordings.json
TrueUnits.json
UnitResults.json
Sorters.json

## Schema

StudySet
* name (str)
    
Study
* name (str)
* studySet (str)
* description (str)
    
Recording
* name (str)
* study (str)
* directory (str)
* description (str)
* sampleRate (float)
* numChannels (int)
* durationSec (float)
* numTrueUnits (int)

TrueUnit
* unitId (int)
* recording (str)
* study (str)
* firingRate (float)
* numEvents (int)
* peakChannel (int)
* snr (float)

UnitResult
* unitId (int)
* recording (str)
* study (str)
* sorter (str)
* numMatches (int)
* numFalsePositives (int)
* numFalseNegatives (int)
* bestUnitId (int)
* matchedUnitId (int)

Sorter
* name (str)
* algorithm (str)
* processorName (str)
* processorVersion (str)
* sortingParameters (object)

In [93]:
%load_ext autoreload
%autoreload 2

from mountaintools import client as mt
import os

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [94]:
mt.configRemoteReadonly(collection='spikeforest', share_id='spikeforest.spikeforest2')

Resolved share_id 69432e9201d0 from alias spikeforest.spikeforest2
CAIRIO CONFIG: remote database spikeforest (readonly); remote kb-share 69432e9201d0 (readonly)


In [95]:
output_ids=['visapy_mea', 'magland_synth', 'mearec_neuronexus']

In [96]:
result_objects=[
    mt.loadObject(
        key=dict(
            name='spikeforest_results'
        ),
        subkey=output_id
    )
    for output_id in output_ids
]
studies=[study for X in result_objects for study in X['studies']]
recordings=[recording for X in result_objects for recording in X['recordings']]
sorting_results=[sorting_result for X in result_objects for sorting_result in X['sorting_results']]

if not os.path.exists('website_data'):
    os.mkdir('website_data')

In [97]:
### STUDY SETS

study_sets_by_name=dict()
for study in studies:
    study_sets_by_name[study['study_set']]=dict(name=study['study_set'])

StudySets=[]
for study_set in study_sets_by_name.values():
    StudySets.append(dict(
        name=study_set['name']
    ))

mt.saveObject(object=StudySets, dest_path=os.path.abspath(os.path.join('website_data', 'StudySets.json')))
print(StudySets)

[{'name': 'visapy_mea'}, {'name': 'magland_synth'}, {'name': 'mearec_neuronexus'}]


In [98]:
### STUDIES

Studies=[]
for study in studies:
    Studies.append(dict(
        name=study['name'],
        studySet=study['study_set'],
        description=study['description']
        # the following can be obtained from the other collections
        # numRecordings, sorters, etc...
    ))

mt.saveObject(object=Studies, dest_path=os.path.abspath(os.path.join('website_data', 'Studies.json')))
print([S['name'] for S in Studies])

['visapy_mea', 'magland_synth_noise10_K10_C4', 'magland_synth_noise10_K10_C8', 'magland_synth_noise10_K20_C4', 'magland_synth_noise10_K20_C8', 'magland_synth_noise20_K10_C4', 'magland_synth_noise20_K10_C8', 'magland_synth_noise20_K20_C4', 'magland_synth_noise20_K20_C8', 'mearec_neuronexus_noise10_K10_C32', 'mearec_neuronexus_noise10_K20_C32', 'mearec_neuronexus_noise10_K40_C32', 'mearec_neuronexus_noise20_K10_C32', 'mearec_neuronexus_noise20_K20_C32', 'mearec_neuronexus_noise20_K40_C32']


In [99]:
### RECORDINGS and TRUE UNITS

Recordings=[]
TrueUnits=[]
for recording in recordings:
    true_units_info=mt.loadObject(path=recording['summary']['true_units_info'])
    for unit_info in true_units_info:
        TrueUnits.append(dict(
            unitId=unit_info['unit_id'],
            recording=recording['name'],
            study=recording['study'],
            firingRate=unit_info['firing_rate'],
            numEvents=unit_info['num_events'],
            peakChannel=unit_info['peak_channel'],
            snr=unit_info['snr'],
        ))
    Recordings.append(dict(
        name=recording['name'],
        study=recording['study'],
        directory=recording['directory'],
        description=recording['description'],
        sampleRate=recording['summary']['computed_info']['samplerate'],
        numChannels=recording['summary']['computed_info']['num_channels'],
        durationSec=recording['summary']['computed_info']['duration_sec'],
        numTrueUnits=len(true_units_info)
    ))

mt.saveObject(object=Recordings, dest_path=os.path.abspath(os.path.join('website_data', 'Recordings.json')))
mt.saveObject(object=TrueUnits, dest_path=os.path.abspath(os.path.join('website_data', 'TrueUnits.json')))
print('Num recordings:',len(Recordings))
print('Num true units:',len(TrueUnits))
print('studies for recordings:',set([recording['study'] for recording in Recordings]))

Num recordings: 146
Num true units: 2696
studies for recordings: {'magland_synth_noise20_K10_C8', 'magland_synth_noise20_K10_C4', 'mearec_neuronexus_noise10_K20_C32', 'magland_synth_noise10_K20_C4', 'magland_synth_noise10_K10_C8', 'magland_synth_noise20_K20_C4', 'magland_synth_noise20_K20_C8', 'mearec_neuronexus_noise20_K20_C32', 'mearec_neuronexus_noise20_K40_C32', 'mearec_neuronexus_noise10_K10_C32', 'magland_synth_noise10_K20_C8', 'magland_synth_noise10_K10_C4', 'mearec_neuronexus_noise20_K10_C32', 'visapy_mea', 'mearec_neuronexus_noise10_K40_C32'}


In [100]:
### UNIT RESULTS

UnitResults=[]
for sr in sorting_results:
    comparison_with_truth=mt.loadObject(path=sr['comparison_with_truth']['json'])
    for unit_result in comparison_with_truth.values():
        UnitResults.append(dict(
            unitId=unit_result['unit_id'],
            recording=sr['recording']['name'],
            study=sr['recording']['study'],
            sorter=sr['sorter']['name'],
            numMatches=unit_result['num_matches'],
            numFalsePositives=unit_result['num_false_positives'],
            numFalseNegatives=unit_result['num_false_negatives'],
            bestUnitId=unit_result['best_unit'],
            matchedUnitId=unit_result['matched_unit']
        ))
mt.saveObject(object=UnitResults, dest_path=os.path.abspath(os.path.join('website_data', 'UnitResults.json')))  
print('Num unit results:',len(UnitResults))

Num unit results: 10784


In [101]:
### SORTERS

sorters_by_name=dict()
for sr in sorting_results:
    sorters_by_name[sr['sorter']['name']]=sr['sorter']
    
Sorters=[]
for name,sorter in sorters_by_name.items():
    Sorters.append(dict(
        name=sorter['name'],
        algorithm=sorter['processor_name'], # right now the algorithm is the same as the processor name
        processorName=sorter['processor_name'],
        processorVersion='0', # jfm needs to provide this
        sorting_parameters=sorter['params'] # Liz, even though most sorters have similar parameter names, it won't always be like that. The params is an arbitrary json object.
    ))

mt.saveObject(object=Sorters, dest_path=os.path.abspath(os.path.join('website_data', 'Sorters.json')))
print([S['name'] for S in Sorters])

['MountainSort4-thr3', 'SpykingCircus', 'Yass', 'IronClust-static']
