### Organizing training data for visualization

In [1]:
from pathlib import Path
from tqdm import tqdm
import pandas as pd
from datetime import datetime

In [2]:
training_data_path = Path('/data/caitlin/training-vids/')
df_dir_path = Path('/data/caitlin/data-frames/')

In [9]:
df = pd.DataFrame(columns=["animal_id", "session_id", "mat_file", "session_vids", "notes"])

In [10]:
df

Unnamed: 0,animal_id,session_id,mat_file,session_vids,notes


In [11]:
# for animal in training data dir
for animal_dir in sorted(training_data_path.glob('*')):
    animal_id = animal_dir.stem
    for session_dir in sorted(animal_dir.glob('*')):
        session_id = session_dir.stem
        mat_file = session_dir.joinpath('jaaba.mat')
        session_vids = sorted(session_dir.glob('*.avi'))
        df.loc[len(df.index)] = [animal_id, session_id, mat_file, session_vids, None]

In [12]:
save_df(df, df_dir_path)

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block0_values] [items->Index(['animal_id', 'session_id', 'mat_file', 'session_vids', 'notes'], dtype='object')]

  df.to_hdf(f'{df_dir_path}/{datetime.now().isoformat()}', key='df')


In [4]:
df[df['animal_id']=='M234Slc17a7_Gtacr2']

NameError: name 'df' is not defined

In [6]:
def save_df(df, df_dir_path):
    df.to_hdf(f'{df_dir_path}/{datetime.now().isoformat()}', key='df')
def load_df(df_dir_path):
    return pd.read_hdf(sorted(df_dir_path.glob('*'))[-1])

In [13]:
df = load_df(df_dir_path)

In [14]:
df

Unnamed: 0,animal_id,session_id,mat_file,session_vids,notes
0,M235Slc17a7_Gtacr2,20170424,/data/caitlin/training-vids/M235Slc17a7_Gtacr2...,[/data/caitlin/training-vids/M235Slc17a7_Gtacr...,
1,M235Slc17a7_Gtacr2,20170425,/data/caitlin/training-vids/M235Slc17a7_Gtacr2...,[/data/caitlin/training-vids/M235Slc17a7_Gtacr...,
2,M236Slc17a7_Gtacr2,20170501,/data/caitlin/training-vids/M236Slc17a7_Gtacr2...,[/data/caitlin/training-vids/M236Slc17a7_Gtacr...,
3,M236Slc17a7_Gtacr2,20170503,/data/caitlin/training-vids/M236Slc17a7_Gtacr2...,[/data/caitlin/training-vids/M236Slc17a7_Gtacr...,
4,M238Slc17a7_Chr2,20170605,/data/caitlin/training-vids/M238Slc17a7_Chr2/2...,[/data/caitlin/training-vids/M238Slc17a7_Chr2/...,
5,M238Slc17a7_Chr2,20170825,/data/caitlin/training-vids/M238Slc17a7_Chr2/2...,[/data/caitlin/training-vids/M238Slc17a7_Chr2/...,
6,M239Slc17a7_Gtacr2,20170623,/data/caitlin/training-vids/M239Slc17a7_Gtacr2...,[/data/caitlin/training-vids/M239Slc17a7_Gtacr...,
7,M239Slc17a7_Gtacr2,20170902,/data/caitlin/training-vids/M239Slc17a7_Gtacr2...,[/data/caitlin/training-vids/M239Slc17a7_Gtacr...,
8,M242Slc17a7_Chr2BPN_DCN,20170911,/data/caitlin/training-vids/M242Slc17a7_Chr2BP...,[/data/caitlin/training-vids/M242Slc17a7_Chr2B...,
9,M242Slc17a7_Chr2BPN_DCN,20170912,/data/caitlin/training-vids/M242Slc17a7_Chr2BP...,[/data/caitlin/training-vids/M242Slc17a7_Chr2B...,


In [8]:
from scipy.io import loadmat, whosmat
from typing import *
from copy import deepcopy
from functools import lru_cache
import numpy as np

In [9]:
@lru_cache
def get_ethogram(trial_index: int, mat_path):
    m = loadmat(mat_path)
    behaviors = sorted([b.split('_')[0] for b in m['data'].dtype.names if 'scores' in b])

    all_behaviors = [
        "Lift",
        "Handopen",
        "Grab",
        "Sup",
        "Atmouth",
        "Chew"
    ]

    sorted_behaviors = [b for b in all_behaviors if b in behaviors]

    ethograms = []

    mat_trial_index = np.argwhere(m["data"]["trial"].ravel() == (trial_index + 1))
    # Trial not found in JAABA data
    if mat_trial_index.size == 0:
        return False

    mat_trial_index = mat_trial_index.item()

    for b in sorted_behaviors:
        behavior_index = m['data'].dtype.names.index(f'{b}_postprocessed')
        ethograms.append(m['data'][mat_trial_index][0][behavior_index])

    sorted_behaviors = [b.lower() for b in sorted_behaviors]

    return np.hstack(ethograms).T, sorted_behaviors

In [10]:
hide_columns = ["mat_file",
               "session_vids",
               "notes"]

columns = df.columns

In [11]:
df_show = df[[c for c in columns if c not in hide_columns]]

In [12]:
df_show

Unnamed: 0,animal_id,session_id
0,M234Slc17a7_Gtacr2,20170328
1,M234Slc17a7_Gtacr2,20170404
2,M235Slc17a7_Gtacr2,20170424
3,M235Slc17a7_Gtacr2,20170425
4,M236Slc17a7_Gtacr2,20170501
5,M236Slc17a7_Gtacr2,20170503
6,M238Slc17a7_Chr2,20170605
7,M238Slc17a7_Chr2,20170825
8,M239Slc17a7_Gtacr2,20170623
9,M239Slc17a7_Gtacr2,20170902


In [13]:
from ipydatagrid import DataGrid

In [14]:
default_widths = {
            'animal_id': 200,
            'session_id': 200
        }

In [15]:
datagrid = DataGrid(
    df_show,
    selection_mode="cell",
    layout={"height": "250px", "width": "750px"},
    base_row_size=24,
    index_name="index",
    column_widths=default_widths
)

  schema = pd.io.json.build_table_schema(dataframe)


In [16]:
from fastplotlib import ImageWidget

In [17]:
datagrid

DataGrid(auto_fit_params={'area': 'all', 'padding': 30, 'numCols': None}, base_row_size=24, column_widths={'an…

In [18]:
datagrid.select(row1=0,
               column1=0,
               row2=0,
               column2=len(df_show.columns),
               clear_mode='all')

In [19]:
def row_change(*args):
    r1 = datagrid.selections[0]["r1"]
    r2 = datagrid.selections[0]["r2"]

    if r1 != r2:
        warn("Only single row selection is currently allowed")
        return

    index = datagrid.get_visible_data().index[r1]

    # make_viewer()
    

In [20]:
datagrid.observe(row_change, names="selections")

In [29]:
# create datagrid with trials for selected session
# trial selection should trigger 
r1 = datagrid.selections[0]["r1"]
index = datagrid.get_visible_data().index[r1]

In [30]:
index

0

In [31]:
from ipywidgets import Select

In [32]:
selected_row = df.iloc[[index]]

In [33]:
selected_row

Unnamed: 0,animal_id,session_id,mat_file,session_vids,notes
0,M234Slc17a7_Gtacr2,20170328,/data/caitlin/training-vids/M234Slc17a7_Gtacr2...,[/data/caitlin/training-vids/M234Slc17a7_Gtacr...,


In [41]:
session_dir = training_data_path.joinpath(selected_row['animal_id'].item()).joinpath(selected_row['session_id'].item())

In [34]:
trial_videos = selected_row['session_vids'].item()

In [35]:
options = [item.stem for item in trial_videos]

In [36]:
from mesmerize_core.arrays import LazyVideo
from fastplotlib import Plot

In [42]:
from tqdm import tqdm

In [43]:
def get_ethogram_shape() -> Tuple[int, int]:
    d0, d1 = (0, 0)
    for o in tqdm(options):
        ix = int(o[-3:]) - 1
        eth = get_ethogram(ix, list(session_dir.glob("*.mat"))[0])[0].shape
        d0, d1 = (max(eth[0], d0), max(eth[1], d1))
    return d0, d1

In [44]:
get_ethogram_shape()

100%|███████████████████████████████████████████████████████████████████████████████| 80/80 [00:05<00:00, 13.88it/s]


(6, 2498)

In [45]:
eth_dtype = get_ethogram(0, list(session_dir.glob("*.mat"))[0])[0].dtype

In [46]:
# initial iw
vid_path_init = selected_row['session_vids'].item()[0]
ethogram_init = get_ethogram(int(vid_path_init.stem.split('_v')[-1]), selected_row['mat_file'].item())
iw = ImageWidget(data=LazyVideo(vid_path_init))
plot = Plot()

eth_shape = get_ethogram_shape()
eth_heatmap = plot.add_heatmap(data=np.zeros(eth_shape, dtype=eth_dtype))
eth_selector = eth_heatmap.add_linear_selector()

def update_frame(ev):
    ix = ev.pick_info["selected_index"]
    iw.sliders["t"].value = ix

eth_selector.selection.add_event_handler(update_frame)

trial_selector = Select(options=options)

RFBOutputContext()

  warn("min not implemented for LazyTiff, returning min of 0th index")
  warn("max not implemented for LazyTiff, returning min of 0th index")


RFBOutputContext()

100%|████████████████████████████████████████████████████████████████████████████| 80/80 [00:00<00:00, 23219.45it/s]


In [47]:
from ipywidgets import VBox, HBox

In [48]:
HBox(
    [
        VBox([iw.show(), plot.show()]), 
        trial_selector
    ]
)

HBox(children=(VBox(children=(VBox(children=(VBox(children=(JupyterWgpuCanvas(), HBox(children=(Button(icon='e…

In [292]:
def trial_change(obj):
    session_path = training_data_path.joinpath(selected_row['animal_id'].item()).joinpath(selected_row['session_id'].item())
    selected_video = session_path.joinpath(trial_selector.value).with_suffix('.avi')
    
    iw._data = [LazyVideo(selected_video)]
    iw.current_index["t"] = 0
    iw.sliders["t"].value = 0
    iw.plot.graphics[0].data = iw._data[0][0]
    
    hm_data = get_ethogram(int(selected_video.stem.split('_v')[-1]), selected_row['mat_file'].item())[0]
    plot.graphics[0].data[:hm_data.shape[0], :hm_data.shape[1]] = hm_data

In [293]:
trial_selector.observe(trial_change, "value")