<a href="https://colab.research.google.com/github/magland/spikeforest_batch_run/blob/master/notebooks/assemble_website_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Assemble website data

This notebook assembles the data for the website.


This is the info to Liz on 11/16/18:


Here's the data for the website:
```
kb.loadObject(
    key=dict(
        target='spikeforest_website_dev',
        name='studies'
    )
)

kb.loadObject(
    key=dict(
        target='spikeforest_website_dev',
        name='recordings'
    )
)

kb.loadObject(
    key=dict(
        target='spikeforest_website_dev',
        name='true_units'
    )
)

kb.loadObject(
    key=dict(
        target='spikeforest_website_dev',
        name='sorters'
    )
)
```


In [0]:
# Only run this cell if you are running this on a hosted runtime that does not have these packages installed
# Consider connecting to a local runtime
%%capture
!pip install spikeforest

In [1]:
from kbucket import client as kb
import spikeforest as sf
import spikeinterface as si
import json
import numpy as np

vdomr: using colab because of VDOMR_MODE environment variable


In [2]:
## Configure read/write access to kbucket
sf.kbucketConfigRemote(name='spikeforest1-readwrite',ask_password=True)

Enter password: ··········
Pairio user set to spikeforest. Test succeeded.


In [0]:
recording_collection_names=[
    'spikeforest_magland_synth_recordings',
    'spikeforest_bionet_recordings',
    'spikeforest_mearec_tetrode_recordings'
]

batch_names=[
    'summarize_recordings_magland_synth','summarize_recordings_bionet','summarize_recordings_mearec_tetrode',
    'ms4_magland_synth','irc_magland_synth','sc_magland_synth_dev4','ks_magland_synth',
    'ms4_bionet','irc_bionet','sc_bionet','ks_bionet',
    'ms4_mearec_tetrode','irc_mearec_tetrode','sc_mearec_tetrode','ks_mearec_tetrode'
]

In [42]:
## Load the SpikeForest data
SF=sf.SFData()
for rcname in recording_collection_names:
  print(rcname)
  SF.loadRecordings(key=dict(name=rcname))
for bname in batch_names:
  print(bname)
  SF.loadProcessingBatch(key=dict(batch_name=bname,name='job_results'))

spikeforest_magland_synth_recordings
spikeforest_bionet_recordings
spikeforest_mearec_tetrode_recordings
summarize_recordings_magland_synth
Loaded 0 sorting results and 80 recording summary results
summarize_recordings_bionet
Loaded 0 sorting results and 36 recording summary results
summarize_recordings_mearec_tetrode
Loaded 0 sorting results and 40 recording summary results
ms4_magland_synth
Loaded 80 sorting results and 0 recording summary results
irc_magland_synth
Loaded 80 sorting results and 0 recording summary results
sc_magland_synth_dev4
Loaded 80 sorting results and 0 recording summary results
ks_magland_synth
Loaded 80 sorting results and 0 recording summary results
ms4_bionet
Loaded 36 sorting results and 0 recording summary results
irc_bionet
Loaded 36 sorting results and 0 recording summary results
sc_bionet
Loaded 36 sorting results and 0 recording summary results
ks_bionet
Downloading file --- (0.0 MB): http://132.249.245.245:24351/7317cea8265b/download/f/8b/f8ba630ebfe5

In [0]:
def min_max_range(a):
  return [min(a),max(a)]

def compute_recording_ranges(study):
  rnames=study.recordingNames()
  summary_objects=[study.recording(rname).getSummaryObject() for rname in rnames]
  recording_dirnames=[study.recording(rname).directory() for rname in rnames]
  
  duration_sec_list=[float(obj['computed_info']['duration_sec']) for obj in summary_objects]
  num_channels_list=[int(obj['computed_info']['num_channels']) for obj in summary_objects]
  samplerate_hz_list=[int(obj['computed_info']['samplerate']) for obj in summary_objects]
  num_true_units_list=[int(len(study.recording(rname).trueUnitsInfo(format='json'))) for rname in rnames]
  file_size_bytes_list=[int(kb.getFileSize(dirname+'/raw.mda')) for dirname in recording_dirnames]
  
  recording_ranges=dict(
      duration_sec=min_max_range(duration_sec_list),
      num_channels=min_max_range(num_channels_list),
      samplerate_hz=min_max_range(samplerate_hz_list),
      file_size_bytes=min_max_range(file_size_bytes_list),
      num_ground_truth_units=min_max_range(num_true_units_list)
  )
  
  return recording_ranges

In [44]:
## Load the studies
print('Loading studies')
all_studies=[]
studies_by_name=dict()
for sname in SF.studyNames():
  print(sname)
  study=dict(name=sname)
  study['sorters']=[] # initialize
  study['num_recordings']=len(SF.study(sname).recordingNames())
  study['recording_ranges']=compute_recording_ranges(SF.study(sname))
  studies_by_name[sname]=study
  all_studies.append(study)

Loading studies
magland_synth_noise10_K10_C4
magland_synth_noise10_K10_C8
magland_synth_noise10_K20_C4
magland_synth_noise10_K20_C8
magland_synth_noise20_K10_C4
magland_synth_noise20_K10_C8
magland_synth_noise20_K20_C4
magland_synth_noise20_K20_C8
bionet_drift
bionet_shuffle
bionet_static
mearec_tetrode_noise10_K10_C4
mearec_tetrode_noise10_K20_C4
mearec_tetrode_noise20_K10_C4
mearec_tetrode_noise20_K20_C4


In [45]:
## Load the recordings
print('Loading recordings')
all_recordings=[]
for sname in SF.studyNames():
  SS=SF.study(sname)
  for rname in SS.recordingNames():
    RR=SS.recording(rname)
    recording=RR.getObject()
    all_recordings.append(recording)

Loading recordings


In [46]:
## Load the units
all_true_units=[]
unit_lookup=dict()
for sname in SF.studyNames():
  SS=SF.study(sname)
  for rname in SS.recordingNames():
    RR=SS.recording(rname)
    true_units_info=RR.trueUnitsInfo(format='json')
    for unit in true_units_info:
      unit['study']=sname
      unit['recording']=rname
      unit['sorting_results']=dict()
      all_true_units.append(unit)
      code=sname+'---'+rname+'---'+str(unit['unit_id'])
      unit_lookup[code]=unit
print('Found {} true units'.format(len(all_true_units)))

Found 4921 true units


In [47]:
## Load the sorting results
print('Loading sorting results')
count=0
sorters_by_name=dict()
for sname in SF.studyNames():
  SS=SF.study(sname)
  for rname in SS.recordingNames():
    RR=SS.recording(rname)
    for srname in RR.sortingResultNames():
      SR=RR.sortingResult(srname)
      result=SR.getObject()
      if not SR.sorterName() in studies_by_name[sname]['sorters']:
        studies_by_name[sname]['sorters'].append(SR.sorterName())
      obj=SR.comparisonWithTruth(format='json')
      sorter=dict(
          name=result['sorter_name'],
          processor_name=result['sorting_processor_name'],
          processor_version=result['sorting_processor_version'],
          params=result['sorting_params']
      )
      sorters_by_name[SR.sorterName()]=sorter
      for unit_id in obj:
        unit=obj[unit_id]
        code=sname+'---'+rname+'---'+str(unit_id)
        if code in unit_lookup:
          unit_lookup[code]['sorting_results'][SR.sorterName()]=unit
          count=count+1
print('Loaded {} sorted units'.format(count))


Loading sorting results
Downloading file --- (0.0 MB): http://132.249.245.245:24351/7317cea8265b/download/5/7c/57cc82a4287b12a0ba08f883720ef4b702b18b7a -> /home/magland/kbucket_cache/5/7c/57cc82a4287b12a0ba08f883720ef4b702b18b7a
Downloading file --- (0.0 MB): http://132.249.245.245:24351/7317cea8265b/download/7/83/783e9b9cd02dab1aa999560cbc4cdf6f90aed014 -> /home/magland/kbucket_cache/7/83/783e9b9cd02dab1aa999560cbc4cdf6f90aed014
Downloading file --- (0.0 MB): http://132.249.245.245:24351/7317cea8265b/download/d/eb/deb9c57f91f43e07c36be53bed0a39e36497078a -> /home/magland/kbucket_cache/d/eb/deb9c57f91f43e07c36be53bed0a39e36497078a
Downloading file --- (0.0 MB): http://132.249.245.245:24351/7317cea8265b/download/b/38/b387bbd0f073a6285f0c3a57641c3c5ad09cfbae -> /home/magland/kbucket_cache/b/38/b387bbd0f073a6285f0c3a57641c3c5ad09cfbae
Downloading file --- (0.0 MB): http://132.249.245.245:24351/7317cea8265b/download/e/dc/edc8f8722f6ecbf5cc66aaa962606babcc6572de -> /home/magland/kbucket_cac

In [49]:
target='spikeforest_website_dev'

all_sorters=[]
for sname in sorters_by_name:
  all_sorters.append(sorters_by_name[sname])
print('Found {} sorters'.format(len(all_sorters)))

print('Saving {} studies'.format(len(all_studies)))
kb.saveObject(
    key=dict(
        target=target,
        name='studies'
    ),
    object=dict(
        studies=all_studies
    )
)
    
print('Saving {} recordings'.format(len(all_recordings)))
kb.saveObject(
    key=dict(
        target=target,
        name='recordings'
    ),
    object=dict(
        recordings=all_recordings
    )
)


print('Saving {} true units'.format(len(all_true_units)))
kb.saveObject(
    key=dict(
        target=target,
        name='true_units'
    ),
    object=dict(
        true_units=all_true_units
    )
)

print('Saving {} sorters'.format(len(all_sorters)))
kb.saveObject(
    key=dict(
        target=target,
        name='sorters'
    ),
    object=dict(
        sorters=all_sorters
    )
)

Found 5 sorters
Saving 15 studies
Already on server (*)
Saving 156 recordings
Already on server.
Saving 4921 true units
Already on server (*)
Saving 5 sorters
Already on server.


In [50]:
print('Study:')
obj=kb.loadObject(
    key=dict(
        target='spikeforest_website_dev',
        name='studies'
    )
)
print(json.dumps(obj['studies'][0],indent=4))

print('Recording:')
obj=kb.loadObject(
    key=dict(
        target='spikeforest_website_dev',
        name='recordings'
    )
)
print(json.dumps(obj['recordings'][0],indent=4))


print('Unit:')
obj=kb.loadObject(
    key=dict(
        target='spikeforest_website_dev',
        name='true_units'
    )
)
print(json.dumps(obj['true_units'][0],indent=4))

print('Sorter:')
obj=kb.loadObject(
    key=dict(
        target='spikeforest_website_dev',
        name='sorters'
    )
)
print(json.dumps(obj['sorters'][0],indent=4))

Study:
{
    "name": "magland_synth_noise10_K10_C4",
    "sorters": [
        "MountainSort4-thr3",
        "IronClust-tetrode",
        "SpykingCircus",
        "KiloSort"
    ],
    "num_recordings": 10,
    "recording_ranges": {
        "duration_sec": [
            600.0,
            600.0
        ],
        "num_channels": [
            4,
            4
        ],
        "samplerate_hz": [
            30000,
            30000
        ],
        "file_size_bytes": [
            288000020,
            288000020
        ],
        "num_ground_truth_units": [
            10,
            10
        ]
    }
}
Recording:
{
    "name": "001_synth",
    "study": "magland_synth_noise10_K10_C4",
    "directory": "kbucket://15734439d8cf/groundtruth/magland_synth/datasets_noise10_K10_C4/001_synth",
    "description": "One of the recordings in the magland_synth_noise10_K10_C4 study"
}
Unit:
{
    "unit_id": 1,
    "snr": 25.396783859187707,
    "peak_channel": 0,
    "num_events": 1398,
    "f

In [65]:
print('studies: '+kb.findFile(key=dict(
    target='spikeforest_website_dev',
    name='studies'
),local=False,remote=True))

print('recordings: '+kb.findFile(key=dict(
    target='spikeforest_website_dev',
    name='recordings'
),local=False,remote=True))

print('units: '+kb.findFile(key=dict(
    target='spikeforest_website_dev',
    name='true_units'
),local=False,remote=True))

print('sorters: '+kb.findFile(key=dict(
    target='spikeforest_website_dev',
    name='sorters'
),local=False,remote=True))

studies: http://132.249.245.245:24351/7317cea8265b/download/c/7a/c7afb64466b4bf5aed2dcc8b176d6215ebc15c01
recordings: http://132.249.245.245:24351/7317cea8265b/download/f/37/f377f635b25dbc1b2a3bad6b9f98f3d3856e7545
units: http://132.249.245.245:24351/7317cea8265b/download/9/c4/9c4e5906f4ebf28c651bfe45874733c269ed7b5e
sorters: http://132.249.245.245:24351/7317cea8265b/download/3/e8/3e8e627d69c194eb3b3fa4bad3aec5b7adba8187


In [0]:
## This was the old method

all_sorting_results=[]
all_summarize_recording_results=[]
for bname in batch_names:
  print('Loading '+bname)
  obj=kb.loadObject(key=dict(batch_name=bname,name='job_results'))
  job_results=obj['job_results']
  for res in job_results:
    if res['job']['command']=='sort_recording':
      all_sorting_results.append(res)
    elif res['job']['command']=='summarize_recording':
      if 'true_units_info' in res['result']:
        all_summarize_recording_results.append(res)
      else:
        print('WARNING: no field, true_units_info, skipping.')
        display(res)

import json
def load_json(fname):
  fname=kb.realizeFile(fname)
  with open(fname) as f:
    return json.load(f)

def use_study(name):
  if name.startswith('magland'):
    return True
  if name.startswith('bionet'):
    return True
  return False

## Load the studies
print('Loading studies')
all_studies=[]
studies_by_name=dict()
obj=kb.loadObject(key=dict(name='spikeforest_recordings'))
studies=obj['studies']
for study in studies:
  study['sorters']=[] # initialize
  studies_by_name[study['name']]=study
  if use_study(study['name']):
    all_studies.append(study)

## Load the recordings
print('Loading recordings')
all_recordings=[]
for res in all_summarize_recording_results:
  recording=res['result']
  study=recording['study']
  if use_study(recording['study']):
    all_recordings.append(recording)
    
## Load the units
all_true_units=[]
unit_lookup=dict()
print('Loading summarize recording results')
for res in all_summarize_recording_results:
  study=res['job']['recording']['study']
  recording=res['job']['recording']['name']
  if use_study(study):
    obj=load_json(res['result']['true_units_info'])
    for unit in obj:
      unit['study']=study
      unit['recording']=recording
      unit['sorting_results']=dict()
      all_true_units.append(unit)
      code=study+'---'+recording+'---'+str(unit['unit_id'])
      unit_lookup[code]=unit
  #res['result']['true_units_info_data']=obj
print('Found {} true units'.format(len(all_true_units)))

## Load the sorting results
print('Loading sorting results')
count=0
sorters_by_name=dict()
for res in all_sorting_results:
  study=res['job']['recording']['study']
  recording=res['job']['recording']['name']
  sorter=res['job']['sorter']['name']
  if not sorter in studies_by_name[study]['sorters']:
    studies_by_name[study]['sorters'].append(sorter)
  if use_study(study):
    obj=load_json(res['result']['comparison_with_truth']['json'])
    sorters_by_name[sorter]=res['job']['sorter']
    for unit_id in obj:
      unit=obj[unit_id]
      code=study+'---'+recording+'---'+str(unit_id)
      if code in unit_lookup:
        unit_lookup[code]['sorting_results'][sorter]=unit
        count=count+1
print('Loaded {} sorted units'.format(count))

all_sorters=[]
for sname in sorters_by_name:
  all_sorters.append(sorters_by_name[sname])
print('Found {} sorters'.format(len(all_sorters)))

print('Saving {} studies'.format(len(all_studies)))
kb.saveObject(
    key=dict(
        target='spikeforest_website',
        name='studies'
    ),
    object=dict(
        studies=all_studies
    )
)
    
print('Saving {} recordings'.format(len(all_recordings)))
kb.saveObject(
    key=dict(
        target='spikeforest_website',
        name='recordings'
    ),
    object=dict(
        recordings=all_recordings
    )
)


print('Saving units')
kb.saveObject(
    key=dict(
        target='spikeforest_website',
        name='true_units'
    ),
    object=dict(
        true_units=all_true_units
    )
)

print('Saving sorters')
kb.saveObject(
    key=dict(
        target='spikeforest_website',
        name='sorters'
    ),
    object=dict(
        sorters=all_sorters
    )
)

  
#print('Loading summarize recording results')
#for res in all_summarize_recording_results:
#  obj=load_json(res['result']['true_units_info'])
#  res['result']['true_units_info_data']=obj

Loading studies
Loading recordings
Loading summarize recording results
Found 26688 true units
Loading sorting results
Downloading file --- (0.0 MB): http://132.249.245.245:24351/7317cea8265b/download/a/65/a65d172d96d74c8b92e9b4125152eb0bee2296d2 -> /home/magland/kbucket_cache/a/65/a65d172d96d74c8b92e9b4125152eb0bee2296d2
Downloading file --- (0.0 MB): http://132.249.245.245:24351/7317cea8265b/download/9/7f/97fe8157e6c38915aadf5b9013952a4b0191a366 -> /home/magland/kbucket_cache/9/7f/97fe8157e6c38915aadf5b9013952a4b0191a366
Downloading file --- (0.0 MB): http://132.249.245.245:24351/7317cea8265b/download/f/37/f375280367ae2e171790c6be34fcdd0cf9a84e17 -> /home/magland/kbucket_cache/f/37/f375280367ae2e171790c6be34fcdd0cf9a84e17
Downloading file --- (0.0 MB): http://132.249.245.245:24351/7317cea8265b/download/3/1a/31ad16e4f4ed9d0d1e14671ca67e38796d146216 -> /home/magland/kbucket_cache/3/1a/31ad16e4f4ed9d0d1e14671ca67e38796d146216
Downloading file --- (0.0 MB): http://132.249.245.245:24351/73