# Dandiset Manifest
### This notebook shows all of the assets within this dandiset, organized by their transgenic lines and recording locations

In [1]:
from dandi.dandiapi import DandiAPIClient

In [2]:
import numpy as np
import pandas as pd
import pynwb
from tqdm import tqdm
from joblib import Parallel, delayed

import warnings
warnings.simplefilter("ignore", category=UserWarning)

In [3]:
dandiset_id = '000039'

In [4]:
client = DandiAPIClient()
dandisets = list(client.get_dandisets())

In [5]:
ds = client.get_dandiset(dandiset_id)

In [6]:
asset_list = list(ds.get_assets())
number_assets = len(asset_list)
manifest = pd.DataFrame(columns=('session_id','specimen_id','genotype','area','imaging_depth','sex','age','path','date'), 
                        index=range(number_assets))

def to_struct(asset):
    manifest = {}
    md = asset.get_raw_metadata()
    manifest['session_id'] = md['wasGeneratedBy'][0]['identifier']
    manifest['specimen_id'] = md['wasAttributedTo'][0]['identifier']
    manifest['genotype'] = md['wasAttributedTo'][0]['genotype']
    manifest['sex'] = md['wasAttributedTo'][0]['sex']['name']
    manifest['age'] = md['wasAttributedTo'][0]['age']['value']
    manifest['path'] = md['path']
    manifest['date'] = md['wasGeneratedBy'][0]['startDate']
    
    path = md['path']
    s3_url = asset.get_content_url(regex='s3')
    io = pynwb.NWBHDF5IO(s3_url, mode='r', load_namespaces=True, driver='ros3')
    nwbfile = io.read()
    location = nwbfile.imaging_planes['imaging_plane_1'].location
    manifest['area'] = location.split(',')[0].split((' '))[1]
    manifest['imaging_depth'] = location.split(',')[1].split((' '))[1]
    return manifest

result = Parallel(n_jobs=10)(delayed(to_struct)(asset_list[i]) for i in tqdm(range(number_assets)))

  0%|          | 0/100 [00:00<?, ?it/s]

 10%|█         | 10/100 [00:00<00:02, 30.21it/s]

 20%|██        | 20/100 [00:05<00:25,  3.09it/s]

 30%|███       | 30/100 [00:06<00:14,  4.95it/s]

ValueError: h5py was built without ROS3 support, can't use ros3 driver

In [None]:
manifest = pd.DataFrame(result)

In [None]:
manifest.head()

This dataframe has a row for each asset, describing some key metadata about the animal and recording location. We can explore the dataframe to identify assets by genotype, recording location (i.e. area), or other parameters, and find the path for each asset.

In [None]:
manifest.genotype.unique()

In [None]:
manifest.area.unique()

In [None]:
len(manifest[manifest.sex=='Female'])

In [None]:
pd.pivot_table(manifest, values=['path'],columns=['area'], index=['genotype'], aggfunc='count', fill_value=0)