# Dandiset Manifest
### This notebook shows all of the assets within this dandiset, organized by their transgenic lines and recording locations

In [1]:
from dandi.dandiapi import DandiAPIClient

In [2]:
import numpy as np
import pandas as pd
import pynwb
from tqdm import tqdm
from joblib import Parallel, delayed

import warnings

In [3]:
dandiset_id = '000039'

In [4]:
client = DandiAPIClient()
dandisets = list(client.get_dandisets())

In [5]:
ds = client.get_dandiset(dandiset_id)

In [6]:
asset_list = list(ds.get_assets())
number_assets = len(asset_list)
manifest = pd.DataFrame(columns=('session_id','specimen_id','genotype','area','imaging_depth','sex','age','path','date'), 
                        index=range(number_assets))

def to_struct(asset):
    manifest = {}
    md = asset.get_raw_metadata()
    manifest['session_id'] = md['wasGeneratedBy'][0]['identifier']
    manifest['specimen_id'] = md['wasAttributedTo'][0]['identifier']
    manifest['genotype'] = md['wasAttributedTo'][0]['genotype']
    manifest['sex'] = md['wasAttributedTo'][0]['sex']['name']
    manifest['age'] = md['wasAttributedTo'][0]['age']['value']
    manifest['path'] = md['path']
    manifest['date'] = md['wasGeneratedBy'][0]['startDate']
    
    path = md['path']
    s3_url = asset.get_content_url(regex='s3')
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        io = pynwb.NWBHDF5IO(s3_url, mode='r', load_namespaces=True, driver='ros3')
        nwbfile = io.read()
    location = nwbfile.imaging_planes['imaging_plane_1'].location
    manifest['area'] = location.split(',')[0].split((' '))[1]
    manifest['imaging_depth'] = location.split(',')[1].split((' '))[1]
    return manifest

result = Parallel(n_jobs=10)(delayed(to_struct)(asset_list[i]) for i in tqdm(range(number_assets)))

  0%|          | 0/100 [00:00<?, ?it/s]

 10%|█         | 10/100 [00:00<00:02, 32.21it/s]

 10%|█         | 10/100 [00:19<00:02, 32.21it/s]

 20%|██        | 20/100 [00:25<01:58,  1.48s/it]

 30%|███       | 30/100 [00:50<02:17,  1.96s/it]

 40%|████      | 40/100 [01:08<01:52,  1.88s/it]

 50%|█████     | 50/100 [01:28<01:36,  1.92s/it]

 60%|██████    | 60/100 [01:46<01:15,  1.88s/it]

 70%|███████   | 70/100 [02:04<00:56,  1.88s/it]

 80%|████████  | 80/100 [02:25<00:38,  1.93s/it]

 90%|█████████ | 90/100 [02:45<00:19,  1.98s/it]

100%|██████████| 100/100 [03:05<00:00,  1.97s/it]

100%|██████████| 100/100 [03:05<00:00,  1.85s/it]




In [7]:
manifest = pd.DataFrame(result)

In [8]:
manifest.head()

Unnamed: 0,session_id,specimen_id,genotype,sex,age,path,date,area,imaging_depth
0,698273664,673647168,Vip-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt,Male,P128D,sub-673647168/sub-673647168_ses-698273664_beha...,2018-05-16T15:18:38.587000-07:00,VISp,175
1,792319003,760940732,Sst-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt,Female,P136D,sub-760940732/sub-760940732_ses-792319003_beha...,2018-12-10T16:37:40.325000-08:00,VISp,275
2,791556785,760940732,Sst-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt,Female,P136D,sub-760940732/sub-760940732_ses-791556785_beha...,2018-12-07T16:38:34.105000-08:00,VISp,275
3,791125374,760940732,Sst-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt,Female,P136D,sub-760940732/sub-760940732_ses-791125374_beha...,2018-12-06T16:40:45.901000-08:00,VISal,275
4,793874958,760940732,Sst-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt,Female,P136D,sub-760940732/sub-760940732_ses-793874958_beha...,2018-12-12T16:37:43.216000-08:00,VISl,275


This dataframe has a row for each asset, describing some key metadata about the animal and recording location. We can explore the dataframe to identify assets by genotype, recording location (i.e. area), or other parameters, and find the path for each asset.

In [9]:
manifest.genotype.unique()

array(['Vip-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt',
       'Sst-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt',
       'Cux2-CreERT2/wt;Camk2a-tTA/wt;Ai93(TITL-GCaMP6f)/wt',
       'Rbp4-Cre_KL100/wt;Camk2a-tTA/wt;Ai93(TITL-GCaMP6f)/wt',
       'Rorb-IRES2-Cre/wt;Camk2a-tTA/wt;Ai93(TITL-GCaMP6f)/wt',
       'Cux2-CreERT2/wt;Camk2a-CreERT2/wt;Ai93(TITL-GCaMP6f)/wt',
       'Ntsr1-Cre_GN220/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt'], dtype=object)

In [10]:
manifest.area.unique()

array(['VISp', 'VISal', 'VISl', 'VISpm', 'VISam', 'VISrl'], dtype=object)

In [11]:
len(manifest[manifest.sex=='Female'])

64

In [12]:
pd.pivot_table(manifest, values=['path'],columns=['area'], index=['genotype'], aggfunc='count', fill_value=0)

Unnamed: 0_level_0,path,path,path,path,path,path
area,VISal,VISam,VISl,VISp,VISpm,VISrl
genotype,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Cux2-CreERT2/wt;Camk2a-CreERT2/wt;Ai93(TITL-GCaMP6f)/wt,1,0,0,1,0,1
Cux2-CreERT2/wt;Camk2a-tTA/wt;Ai93(TITL-GCaMP6f)/wt,5,6,6,8,5,2
Ntsr1-Cre_GN220/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt,0,0,0,6,0,0
Rbp4-Cre_KL100/wt;Camk2a-tTA/wt;Ai93(TITL-GCaMP6f)/wt,0,0,0,4,0,0
Rorb-IRES2-Cre/wt;Camk2a-tTA/wt;Ai93(TITL-GCaMP6f)/wt,4,0,4,4,4,0
Sst-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt,9,0,8,8,8,0
Vip-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt,0,0,0,6,0,0
