In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
from bokeh.plotting import figure, show
from bokeh.io import output_notebook, output_file
from bokeh.layouts import column, row
from bokeh.models import OpenURL, TapTool, CustomJS, Div, Callback
from bokeh import events

output_notebook()

## Define utility functions

In [2]:
from dandi.dandiapi import DandiAPIClient
import json
import pandas as pd
import numpy as np
from urllib.parse import quote, unquote

In [3]:
def assets_to_df(ds):
    assets = list(ds.get_assets())
    asset_info = []
    for asset in assets:
        path_parts = asset.path.split("/")
        sub = None
        for val in path_parts[:-1]:
            if val.startswith("sub-"):
                sub = val.split("sub-")[1]
        assetname = path_parts[-1]
        info = dict([[val.split("-")[0], "-".join(val.split("-")[1:])]
                     for val in assetname.split(".")[0].split("_")
                     if "-" in val])
        if sub:
            info["subdir"] = sub
        info["path"] = asset.path
        modality = None
        if "_" in assetname and "sub-" in assetname:
            path = "sub-".join(asset.path.split("sub-")[1:])
            if len(path.split("/")) > 1:
                modality = assetname.split("_")[-1].split(".")[0]
                info["modality"] = modality
        ext = ".".join(assetname.split(".")[1:])
        info["extension"] = ext
        info["modified"] = asset.modified
        asset_info.append(info)
    df = pd.DataFrame(asset_info)
    return df, assets

In [4]:
cubehelix_template = """
#uicontrol float brightness slider(min=0.0, max=100.0, default=%f)
void main() {
    float x = clamp(toNormalized(getDataValue()) * brightness, 0.0, 1.0);
    float angle = 2.0 * 3.1415926 * (4.0 / 3.0 + x);
    float amp = x * (1.0 - x) / 2.0;
    vec3 result;
    float cosangle = cos(angle);
    float sinangle = sin(angle);
    result.r = -0.14861 * cosangle + 1.78277 * sinangle;
    result.g = -0.29227 * cosangle + -0.90649 * sinangle;
    result.b = 1.97294 * cosangle;
    result = clamp(x + amp * result, 0.0, 1.0);
    emitRGB(result);
}
"""

cubehelix2_template = """
#uicontrol float brightness slider(min=0.0, max=100.0, default=%f)
void main() {
    float x = clamp(toNormalized(getDataValue()) * brightness, 0.0, 1.0);
    float angle = 2.0 * 3.1415926 * (4.0 / 3.0 + x);
    float amp = x * (1.0 - x) / 2.0;
    vec3 result;
    float cosangle = cos(angle);
    float sinangle = sin(angle);
    result.g = -0.14861 * cosangle + 1.78277 * sinangle;
    result.r = -0.29227 * cosangle + -0.90649 * sinangle;
    result.b = 1.97294 * cosangle;
    result = clamp(x + amp * result, 0.0, 1.0);
    emitRGB(result);
}
"""

cubehelix3_template = """
#uicontrol float brightness slider(min=0.0, max=100.0, default=%f)
void main() {
    float x = clamp(toNormalized(getDataValue()) * brightness, 0.0, 1.0);
    float angle = 2.0 * 3.1415926 * (4.0 / 3.0 + x);
    float amp = x * (1.0 - x) / 2.0;
    vec3 result;
    float cosangle = cos(angle);
    float sinangle = sin(angle);
    result.b = -0.14861 * cosangle + 1.78277 * sinangle;
    result.g = -0.29227 * cosangle + -0.90649 * sinangle;
    result.r = 1.97294 * cosangle;
    result = clamp(x + amp * result, 0.0, 1.0);
    emitRGB(result);
}
"""

cubehelix4_template = """
#uicontrol float brightness slider(min=0.0, max=100.0, default=%f)
void main() {
    float x = clamp(toNormalized(getDataValue()) * brightness, 0.0, 1.0);
    float angle = 2.0 * 3.1415926 * (4.0 / 3.0 + x);
    float amp = x * (1.0 - x) / 2.0;
    vec3 result;
    float cosangle = cos(angle);
    float sinangle = sin(angle);
    result.r = -0.14861 * cosangle + 1.78277 * sinangle;
    result.g = -0.29227 * cosangle + -0.90649 * sinangle;
    result.b = 1.97294 * cosangle;
    result = clamp(x + amp * result, 0.0, 1.0);
    emitRGB(result);
}
"""

cubehelix5_template = """
#uicontrol float brightness slider(min=0.0, max=100.0, default=%f)
void main() {
    float x = clamp(toNormalized(getDataValue()) * brightness, 0.0, 1.0);
    float angle = 2.0 * 3.1415926 * (4.0 / 3.0 + x);
    float amp = x * (1.0 - x) / 2.0;
    vec3 result;
    float cosangle = cos(angle);
    float sinangle = sin(angle);
    result.g = -0.14861 * cosangle + 1.78277 * sinangle;
    result.b = -0.29227 * cosangle + -0.90649 * sinangle;
    result.r = 1.97294 * cosangle;
    result = clamp(x + amp * result, 0.0, 1.0);
    emitRGB(result);
}
"""

cubehelix6_template = """
#uicontrol float brightness slider(min=0.0, max=100.0, default=%f)
void main() {
    float x = clamp(toNormalized(getDataValue()) * brightness, 0.0, 1.0);
    float angle = 2.0 * 3.1415926 * (4.0 / 3.0 + x);
    float amp = x * (1.0 - x) / 2.0;
    vec3 result;
    float cosangle = cos(angle);
    float sinangle = sin(angle);
    result.b = -0.14861 * cosangle + 1.78277 * sinangle;
    result.r = -0.29227 * cosangle + -0.90649 * sinangle;
    result.g = 1.97294 * cosangle;
    result = clamp(x + amp * result, 0.0, 1.0);
    emitRGB(result);
}
"""

ng_colormap = {"LEC": cubehelix_template % 50,
            "YO": cubehelix2_template % 50,
            "NN": cubehelix3_template % 50,
            "CR": cubehelix4_template % 50,
            'NPY': cubehelix5_template % 50,
            'IBA1': cubehelix6_template % 50,
            'SST': cubehelix4_template % 50}


def get_ng_url(zarrmap, sub, sample):
    layers = []
    for stain in zarrmap.keys():
        sources = zarrmap[stain]        
        layer = dict(
            source=sources,
            type="image",
            shader=ng_colormap[stain],
            name=f"{sub}-{sample}-{stain}-{len(sources)}",
            tab='rendering',
        )
        layers.append(layer)

    ng_url = "https://neuroglancer-demo.appspot.com/"
    ng_str = json.dumps(dict(dimensions={"t":[1,"s"],
                                         "z":[0.000002285,"m"],
                                         "y":[0.0000032309999999999996,"m"],
                                         "x":[0.000002285,"m"]},
                             displayDimensions=["z","y","x"],
                             crossSectionScale=50,
                             projectionScale=500000,
                             layers=layers,
                             showDefaultAnnotations=False,
                             layerListPanel={'visible': len(layers)>1},
                             layout="yz"))
    url = f"{ng_url}#!%s" % quote(ng_str)
    return url
    

In [5]:
colormap = ["#444444", "#a6cee3", "#1f78b4", "#b2df8a", "#33a02c", "#fb9a99",
            "#e31a1c", "#fdbf6f", "#ff7f00", "#cab2d6", "#6a3d9a"]

In [6]:
def sequence_plot(data, x_range, y_range, title, x_rect, y_rect, color_rect, ncols=60, tooltips=None, callback=None,
                  x_axis_location="below", mf=None):
    nsamples = len(x_range)
    if (nsamples/ncols)  < 0.8:
        ncols = nsamples
    if mf is None:
        mf = 60/ncols
    tools = "save"
    if tooltips:
        tools += ",hover"
    if callback:
        tools += ",tap"
    p = figure(title=title, 
               x_axis_location=x_axis_location, tools=tools,
               x_range=x_range, y_range=y_range,
               tooltips=tooltips)
    p.width = max(int(15*ncols*mf), 400)
    p.height = max(int(len(y_range)*20*mf), 125)
    p.grid.grid_line_color = None
    p.axis.axis_line_color = None
    p.axis.major_tick_line_color = None
    p.axis.major_label_text_font_size = f"{int(8*mf)}px"
    p.title.text_font_size = f"{int(10*mf)}px"
    p.axis.major_label_standoff = 0
    p.xaxis.major_label_orientation = np.pi/3
    p.output_backend = "svg"

    p.rect(x_rect, y_rect, 0.9, 0.9, source=data,
           color=color_rect, line_color=None,
           hover_line_color='red') #, hover_color='red') #colors')
    if callback is not None:
        taptool = p.select(type=TapTool)
        taptool.callback = callback
    return p

def paged_sequence_plot(data, x_range, y_range, title, x_rect, y_rect, color_rect, ncols=60, tooltips=None, callback=None,
                        x_axis_location="below", mf=None):
    plots = []
    numpages = int(np.ceil(len(x_range)/ncols))

    for i in range(1, numpages + 1):
        if (len(samples) - ncols * (i-1))/ncols < 0.8:
            ncols = min(len(x_range) - ncols * (i-1), ncols)
        mod_xrange = x_range[ncols*(i - 1):(ncols*i)]
        p = sequence_plot(data, mod_xrange, y_range, 
                          title=f"{title} Coverage: {mod_xrange[0]} -- {mod_xrange[-1]}",
                          x_rect=x_rect, y_rect=y_rect, 
                          color_rect=color_rect, 
                          x_axis_location=x_axis_location,
                          ncols=ncols,
                          tooltips=tooltips,
                          callback=callback,
                          mf=mf)
        plots.append(p)
    return column(plots)

## Work on a dashboard for a specific dandiset

In [7]:
dandiset = "000108"
# dandiset = "000026"

api = DandiAPIClient("https://api.dandiarchive.org/api")
ds = api.get_dandiset(dandiset)

df, assets = assets_to_df(ds)
df_orig = None
df.head()

Unnamed: 0,path,extension,modified,sub,subdir,modality,ses,sample,stain,run,chunk
0,dataset_description.json,json,2022-07-13 21:40:28.737065+00:00,,,,,,,,
1,samples.tsv,tsv,2022-07-13 21:41:07.535853+00:00,,,,,,,,
2,sub-MITU01/sub-MITU01_sessions.tsv,tsv,2022-07-13 21:43:56.549551+00:00,MITU01,MITU01,sessions,,,,,
3,sub-MITU01/ses-20210720h20m19s32/micr/sub-MITU...,ome.zarr,2022-07-13 21:44:25.190403+00:00,MITU01,MITU01,SPIM,20210720h20m19s32,127.0,YO,1.0,8.0
4,sub-MITU01/ses-20210720h20m19s32/micr/sub-MITU...,ome.zarr,2022-07-13 21:44:25.226338+00:00,MITU01,MITU01,SPIM,20210720h20m19s32,127.0,YO,1.0,9.0


### Make dandiset specific alterations to dataframe

In [8]:
df.path.str.contains("derivatives") & ((df.path.str.contains("EPIC") == False) & (df.path.str.contains("STER") == False))

0       False
1       False
2       False
3       False
4       False
        ...  
6853    False
6854    False
6855    False
6856    False
6857    False
Name: path, Length: 6858, dtype: bool

In [9]:
if df_orig is None:
    df_orig = df.copy()
else:
    df = df_orig.copy()

if dandiset == "000108":
    remap = dict(calretinin='CR', npy='NPY')
    def sample_to_int(x):
        if isinstance(x, str) or not np.isnan(x):
            return int(x.split('R')[0])
        return x
    df.stain = df.stain.apply(lambda x: remap[x] if x in remap else x)
    #df['sample'] = df['sample'].apply(sample_to_int).astype(pd.Int64Dtype())
if dandiset == "000026":
    df = df[(df.path.str.contains("derivatives") & 
             ((df.path.str.contains("EPIC") == False) & 
              (df.path.str.contains("STER") == False)))
             == False]
df.head()

Unnamed: 0,path,extension,modified,sub,subdir,modality,ses,sample,stain,run,chunk
0,dataset_description.json,json,2022-07-13 21:40:28.737065+00:00,,,,,,,,
1,samples.tsv,tsv,2022-07-13 21:41:07.535853+00:00,,,,,,,,
2,sub-MITU01/sub-MITU01_sessions.tsv,tsv,2022-07-13 21:43:56.549551+00:00,MITU01,MITU01,sessions,,,,,
3,sub-MITU01/ses-20210720h20m19s32/micr/sub-MITU...,ome.zarr,2022-07-13 21:44:25.190403+00:00,MITU01,MITU01,SPIM,20210720h20m19s32,127.0,YO,1.0,8.0
4,sub-MITU01/ses-20210720h20m19s32/micr/sub-MITU...,ome.zarr,2022-07-13 21:44:25.226338+00:00,MITU01,MITU01,SPIM,20210720h20m19s32,127.0,YO,1.0,9.0


#### Subject x modality

In [10]:
df_sub = df[['sub', 'modality']].dropna().groupby(['sub', 'modality']).count().index.to_frame()
df_sub

Unnamed: 0_level_0,Unnamed: 1_level_0,sub,modality
sub,modality,Unnamed: 2_level_1,Unnamed: 3_level_1
MITU01,SPIM,MITU01,SPIM
MITU01,photo,MITU01,photo
MITU01,sessions,MITU01,sessions
MITU01h3,SPIM,MITU01h3,SPIM
MITU01h3,corr,MITU01h3,corr


In [22]:
data_sub = {val: df_sub[val].tolist() for val in df_sub.keys()}
x_range = sorted(df_sub['sub'].unique().tolist())
y_range = sorted(df_sub.modality.unique().tolist())

data_sub["colors" ] = []
for val in data_sub["modality"]:
    data_sub["colors"].append(colormap[y_range.index(val)])

In [24]:
p = sequence_plot(data_sub, x_range, y_range, 
                  title="Subjects and modalities", 
                  x_rect="sub", y_rect="modality", 
                  color_rect="colors", x_axis_location="above",
                  mf=1 if dandiset == "000108" else 2)
show(p)

In [20]:
plots = []
for group in df.groupby("subdir"):
    mi_chunk = df[(df["sub"] == group[0]) & (df.modality == "SPIM") & df.extension.str.contains("ome")].groupby(['sample', 'stain', 'ses']).chunk.count()
    if mi_chunk.shape == (0,):
        continue
    dfcat = pd.concat((mi_chunk.index.to_frame(), mi_chunk), axis=1)
    stains = dfcat["stain"].unique().tolist()
    if dandiset == "000026":
        samples = dfcat["sample"].unique().tolist()
    if dandiset == "000108":
        samples = sorted(dfcat["sample"].unique().tolist(), key=lambda x: int(x.split("R")[0]))
        if group[0] == "MITU01":
            samples = sorted(set([str(val) for val in range(1, 181)]).union(set(samples)),
                             key=lambda x: int(x.split("R")[0]))
    
    photos = {}
    for val in df[(df["sub"] == group[0]) & (df.modality == "photo")]["sample"].to_frame().iterrows():
        url = assets[val[0]].get_content_url(regex='s3')
        photos[val[1]["sample"]] = url
    if dandiset == "000108":
        zarrmap = {}
        for grp in df[(df["sub"] == group[0]) & 
                      (df.modality == "SPIM") &
                      df.extension.str.contains("ome")].groupby(['sample', 'stain',]):
            sample, stain = grp[0]
            chunks = grp[1]["chunk"].values
            if len(chunks) != len(np.unique(chunks)):
                print(f"multiple chunks detected for sample: {sample} stain: {stain}")
                continue
            if sample not in zarrmap:
                zarrmap[sample] = {}
            zarrmap[sample][stain] = [f"zarr://{val.get_content_url(regex='s3')}"
                                      for val in sorted([assets[idx] for idx in grp[1].index], 
                                                        key=lambda x: int(x.path.split("_chunk-")[1].split("_")[0]))]
        ngurls = {}
        for key in zarrmap:
            ngurls[key] = unquote(get_ng_url(zarrmap[key], group[0], key))

    data=dict(
        samples=dfcat['sample'].tolist(),
        stains=dfcat['stain'].tolist(),
        colors=[colormap[stains.index(stain)] for stain in dfcat['stain'].tolist()],
        sessions=dfcat['ses'].tolist(),
        chunks=dfcat['chunk'].tolist(),
        photo=[f'<img src="{photos.get(val[1]["sample"])}" width="100px"/>' for val in dfcat.iterrows()]
    )
    if dandiset == "000108":
        data["url"] = [f'{ngurls.get(val[1]["sample"])}' for val in dfcat.iterrows()]

    p = paged_sequence_plot(data, samples, stains, 
                    title=f"Sub {group[0]} samples",
                    x_rect="samples", y_rect="stains", 
                    color_rect="colors", x_axis_location="below",
                    tooltips = [('sample', '@samples'),
                                ('chunks', '@chunks'), 
                                ('sessions', '@sessions'),
                               ],
                    callback=OpenURL(url="@url") if dandiset == "000108" else None,
                    mf=1, ncols=62)
    plots.append(p)

multiple chunks detected for sample: 10 stain: LEC
multiple chunks detected for sample: 10 stain: NN
multiple chunks detected for sample: 10 stain: YO
multiple chunks detected for sample: 20 stain: LEC
multiple chunks detected for sample: 20 stain: NN
multiple chunks detected for sample: 20 stain: YO
multiple chunks detected for sample: 73 stain: LEC
multiple chunks detected for sample: 73 stain: NN
multiple chunks detected for sample: 73 stain: YO
multiple chunks detected for sample: 97 stain: LEC
multiple chunks detected for sample: 97 stain: NN
multiple chunks detected for sample: 97 stain: YO
multiple chunks detected for sample: 15 stain: LEC
multiple chunks detected for sample: 15 stain: NN
multiple chunks detected for sample: 15 stain: YO
multiple chunks detected for sample: 16 stain: YO
multiple chunks detected for sample: 20 stain: LEC
multiple chunks detected for sample: 20 stain: NN
multiple chunks detected for sample: 20 stain: YO
multiple chunks detected for sample: 4 stain

In [25]:
output_file(f"{dandiset}-dashboard.html")
show(column([p] + plots))