### Merge ethograms between Jay's handlabels and jaaba preds

In [1]:
import pandas as pd
from pathlib import Path
from scipy.io import loadmat
import numpy as np
from tqdm import tqdm
from typing import *
import shutil

In [2]:
df = pd.read_hdf('/data/caitlin/exactly5_hand_labels.hdf')

In [7]:
df["jaaba_labels"] = [dict() for k in range(len(df.index))]

In [8]:
df

Unnamed: 0,mat_path,hand_labels,jaaba_labels
0,M232_20170307,"{'M232_20170307_v065': [[0, 0, 0, 0, 0, 0, 0, ...",{}
1,M232_20170310,"{'M232_20170310_v013': [[0, 0, 0, 0, 0, 0, 0, ...",{}
2,M234_20170328,"{'M234_20170328_v007': [[0, 0, 0, 0, 0, 0, 0, ...",{}
3,M234_20170329,"{'M234_20170329_v002': [[0, 0, 0, 0, 0, 0, 0, ...",{}
4,M234_20170403,"{'M234_20170403_v002': [[0, 0, 0, 0, 0, 0, 0, ...",{}
...,...,...,...
65,M324_20201016,"{'M324_20201016_v033': [[0, 0, 0, 0, 0, 0, 0, ...",{}
66,M326_20201110,"{'M326_20201110_v029': [[0, 0, 0, 0, 0, 0, 0, ...",{}
67,M336_20210612,"{'M336_20210612_v007': [[0, 0, 0, 0, 0, 0, 0, ...",{}
68,M336_20210613,"{'M336_20210613_v035': [[0, 0, 0, 0, 0, 0, 0, ...",{}


### Get jaaba ethogram from mat file

In [9]:
mat_loc = Path('/data/caitlin/mat_files/')

In [10]:
errors = list()
def get_ethogram(trial_index: int, mat_path):
        """
        Returns the ethogram for a given trial in a session.
        """
        m = loadmat(mat_path)
        try:
            behaviors = sorted([b.split('_')[0] for b in m['data'].dtype.names if 'scores' in b])
        except KeyError:
            errors.append(mat_path)
            return

        all_behaviors = [
            "Lift",
            "Handopen",
            "Grab",
            "Sup",
            "Atmouth",
            "Chew"
        ]

        sorted_behaviors = [b for b in all_behaviors if b in behaviors]

        ethograms = []

        mat_trial_index = np.argwhere(m["data"]["trial"].ravel() == (trial_index))
        # Trial not found in JAABA data
        if mat_trial_index.size == 0:
            return False

        try:
            mat_trial_index = mat_trial_index.item()
        except ValueError:
            return

      
        for b in sorted_behaviors:
            behavior_index = m['data'].dtype.names.index(f'{b}_postprocessed')
            row = m['data'][mat_trial_index][0][behavior_index]
            row[row == -1] = 0
            ethograms.append(row)

        sorted_behaviors = [b.lower() for b in sorted_behaviors]

        return np.hstack(ethograms).T

In [11]:
for row in tqdm(df.iterrows()):
    for key in tqdm(row[1]["hand_labels"].keys()):
        row[1]["jaaba_labels"][key] = get_ethogram(trial_index=int(key.split('_v')[-1]), 
                                                   mat_path=mat_loc.joinpath(row[1]["mat_path"]).with_suffix('.mat'))

0it [00:00, ?it/s]
  0%|                                                                                                     | 0/1 [00:00<?, ?it/s][A
100%|█████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  9.73it/s][A
1it [00:00,  9.59it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 16.10it/s][A

100%|█████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 14.95it/s][A
3it [00:00, 13.18it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 16.25it/s][A

  0%|                                                                                                     | 0/3 [00:00<?, ?it/s][A
100%|█████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 16.16it/s][A
5it [00:0

In [12]:
errors

[]

In [8]:
# errors2 = list()
# def get_ethogram2(trial_index: int, mat_path):
#         """
#         Returns the ethogram for a given trial in a session.
#         """
#         m = loadmat(mat_path)
#         try:
#             behaviors = sorted([b.split('_')[0] for b in m['data1'].dtype.names if 'scores' in b])
#         except KeyError:
#             errors2.append(mat_path)
#             return

#         all_behaviors = [
#             "Lift",
#             "Handopen",
#             "Grab",
#             "Sup",
#             "Atmouth",
#             "Chew"
#         ]

#         sorted_behaviors = [b for b in all_behaviors if b in behaviors]

#         ethograms = []

#         mat_trial_index = np.argwhere(m["data1"]["trial"].ravel() == (trial_index))
#         # Trial not found in JAABA data
#         if mat_trial_index.size == 0:
#             return False

#         try:
#             mat_trial_index = mat_trial_index.item()
#         except ValueError:
#             return

      
#         for b in sorted_behaviors:
#             behavior_index = m['data1'].dtype.names.index(f'{b}_postprocessed')
#             row = m['data1'][mat_trial_index][0][behavior_index]
#             row[row == -1] = 0
#             ethograms.append(row)

#         sorted_behaviors = [b.lower() for b in sorted_behaviors]

        # return np.hstack(ethograms).T

In [9]:
# for mat in tqdm(errors):
#     for key in tqdm(df[df['mat_path'] == mat.stem]['jaaba_labels'].item().keys()):
#         df[df['mat_path'] == mat.stem]['jaaba_labels'].item()[key] = get_ethogram2(trial_index=int(key.split('_v')[-1]),
#                                                                                    mat_path=mat)

In [13]:
df.to_hdf('/data/caitlin/exactly5_hand_labels.hdf', key='df')

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block0_values] [items->Index(['mat_path', 'hand_labels', 'jaaba_labels'], dtype='object')]

  df.to_hdf('/data/caitlin/exactly5_hand_labels.hdf', key='df')


In [15]:
df = pd.read_hdf('/data/caitlin/exactly5_hand_labels.hdf')
df

Unnamed: 0,mat_path,hand_labels,jaaba_labels
0,M232_20170307,"{'M232_20170307_v065': [[0, 0, 0, 0, 0, 0, 0, ...","{'M232_20170307_v065': [[0, 0, 0, 0, 0, 0, 0, ..."
1,M232_20170310,"{'M232_20170310_v013': [[0, 0, 0, 0, 0, 0, 0, ...","{'M232_20170310_v013': [[0, 0, 0, 0, 0, 0, 0, ..."
2,M234_20170328,"{'M234_20170328_v007': [[0, 0, 0, 0, 0, 0, 0, ...","{'M234_20170328_v007': [[0, 0, 0, 0, 0, 0, 0, ..."
3,M234_20170329,"{'M234_20170329_v002': [[0, 0, 0, 0, 0, 0, 0, ...","{'M234_20170329_v002': [[0, 0, 0, 0, 0, 0, 0, ..."
4,M234_20170403,"{'M234_20170403_v002': [[0, 0, 0, 0, 0, 0, 0, ...","{'M234_20170403_v002': [[0, 0, 0, 0, 0, 0, 0, ..."
...,...,...,...
65,M324_20201016,"{'M324_20201016_v033': [[0, 0, 0, 0, 0, 0, 0, ...","{'M324_20201016_v033': [[0, 0, 0, 0, 0, 0, 0, ..."
66,M326_20201110,"{'M326_20201110_v029': [[0, 0, 0, 0, 0, 0, 0, ...","{'M326_20201110_v029': [[0, 0, 0, 0, 0, 0, 0, ..."
67,M336_20210612,"{'M336_20210612_v007': [[0, 0, 0, 0, 0, 0, 0, ...","{'M336_20210612_v007': [[0, 0, 0, 0, 0, 0, 0, ..."
68,M336_20210613,"{'M336_20210613_v035': [[0, 0, 0, 0, 0, 0, 0, ...","{'M336_20210613_v035': [[0, 0, 0, 0, 0, 0, 0, ..."


### want to merge ethograms so that single hand-labeled behavior replaces jabba pred for that behavior

In [16]:
df["merged_ethogram"] = [dict() for i in range(len(df.index))]

In [17]:
errors = list()
for row in tqdm(df.iterrows()):
    for key in tqdm(row[1]["hand_labels"].keys()):
        if isinstance(row[1]["jaaba_labels"][key], Union[bool, None]):
            errors.append((row[1]["mat_path"], key))
            continue
        if row[1]["hand_labels"][key].shape != row[1]["jaaba_labels"][key].shape:
            errors.append((row[1]["mat_path"], key))
            continue
        merged_ethogram = row[1]["jaaba_labels"][key]
        for i, h_row in enumerate(row[1]["hand_labels"][key]):
            if h_row.any():
                merged_ethogram[i] = h_row
            else:
                continue
        row[1]["merged_ethogram"][key] = merged_ethogram

0it [00:00, ?it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 3066.01it/s][A

100%|███████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 5302.53it/s][A

100%|███████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 1925.76it/s][A

100%|███████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 1975.65it/s][A

100%|███████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 3804.93it/s][A

100%|███████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 1763.79it/s][A

100%|███████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 1897.02it/s][A

100%|█████████████████████████████████████████████

In [18]:
corrected_trials = list()
count = 0
for row in df.iterrows():
    for key in row[1]["merged_ethogram"].keys():
        if row[1]["merged_ethogram"][key].any():
            count += 1
            corrected_trials.append(key)
count

90

In [32]:
df.to_hdf('/data/caitlin/exactly5_hand_labels.hdf', key='df')

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block0_values] [items->Index(['mat_path', 'hand_labels', 'jaaba_labels', 'merged_ethogram'], dtype='object')]

  df.to_hdf('/data/caitlin/exactly5_hand_labels.hdf', key='df')


In [19]:
corrected_trials[0:5]

['M232_20170307_v065',
 'M232_20170310_v013',
 'M234_20170328_v007',
 'M234_20170329_v002',
 'M234_20170403_v002']

### want to download videos for trials that have been correct to `/data/caitlin/potential_ground_truth/` for review

In [20]:
mat_files = Path('/data/caitlin/mat_files/')

In [21]:
vid_paths = list()
for row in tqdm(df.iterrows()):
    m = loadmat(mat_files.joinpath(row[1]['mat_path']).with_suffix('.mat'))
    keys = row[1]["merged_ethogram"].keys()
    for k in keys:
        index = int(k.split('_v')[-1]) - 1
        try:
            vid_paths.append((Path(*Path(m['data']['id'][index][0][0]).parts[:-1])))
        except (KeyError, IndexError):
            try: 
                vid_paths.append((Path(*Path(m['data1']['id'][index][0][0]).parts[:-1])))
            except (KeyError, IndexError):
                vid_paths.append(None)
                continue

70it [00:10,  6.94it/s]


In [22]:
len(corrected_trials)

90

In [23]:
len(vid_paths)

90

In [24]:
final_paths = list()
home_path = '/home/clewis7/wasabi/hantmanlab/from_tier2'
for (vp, trial) in zip(vid_paths, corrected_trials):
    if vp == None:
        final_paths.append(None)
        continue
    full_path = home_path + str(vp)
    final_paths.append(Path(full_path).joinpath(trial, trial).with_name('movie_comb').with_suffix('.avi'))

In [25]:
len(final_paths)

90

In [26]:
final_paths[0:5]

[PosixPath('/home/clewis7/wasabi/hantmanlab/from_tier2/Jay/videos/M232Slc17a7_Gtacr2/20170307/Group7/M232_20170307_v065/movie_comb.avi'),
 PosixPath('/home/clewis7/wasabi/hantmanlab/from_tier2/Jay/videos/M232Slc17a7_Gtacr2/20170310/M232_20170310_v013/movie_comb.avi'),
 PosixPath('/home/clewis7/wasabi/hantmanlab/from_tier2/Jay/videos/M234Slc17a7_Gtacr2/20170328/M234_20170328_v007/movie_comb.avi'),
 PosixPath('/home/clewis7/wasabi/hantmanlab/from_tier2/Jay/videos/M234Slc17a7_Gtacr2/20170329/M234_20170329_v002/movie_comb.avi'),
 PosixPath('/home/clewis7/wasabi/hantmanlab/from_tier2/Jay/videos/M234Slc17a7_Gtacr2/20170403/A/M234_20170403_v002/movie_comb.avi')]

In [27]:
ground = Path('/data/caitlin/potential_ground_truth/')

In [28]:
wrong_paths = list()
for vp, trial in tqdm(zip(final_paths, corrected_trials)):
    if trial is None:
        continue
    try:
        shutil.copy(src=vp, dst=ground.joinpath(f"{trial}.avi"))
    except FileNotFoundError:
        wrong_paths.append(vp)

90it [01:52,  1.25s/it]


In [29]:
len(wrong_paths)

15

In [30]:
wrong_paths

[PosixPath('/home/clewis7/wasabi/hantmanlab/from_tier2/Jay/videos/M254VGATXChR2_TH/20180323/CTR/M254_20180323_v072/movie_comb.avi'),
 PosixPath('/home/clewis7/wasabi/hantmanlab/from_tier2/Jay/videos/M254VGATXChR2_TH/20180326/LaserDetector2s/M254_20180326_v151/movie_comb.avi'),
 PosixPath('/home/clewis7/wasabi/hantmanlab/from_tier2/Jay/videos/M254VGATXChR2_TH/20180327/LaserOnly100ms/M254_20180327_v238/movie_comb.avi'),
 PosixPath('/home/clewis7/wasabi/hantmanlab/from_tier2/Jay/videos/M254VGATXChR2_TH/20180327/CTR/M254_20180327_v027/movie_comb.avi'),
 PosixPath('/home/clewis7/wasabi/hantmanlab/from_tier2/Jay/videos/M254VGATXChR2_TH/20180327/CTR/M254_20180327_v111/movie_comb.avi'),
 PosixPath('/home/clewis7/wasabi/hantmanlab/from_tier2/Jay/videos/M259VGATXChR2TRN_TH/20180405/CTR/M259_20180405_v123/movie_comb.avi'),
 PosixPath('/home/clewis7/wasabi/hantmanlab/from_tier2/Jay/videos/M259VGATXChR2TRN_TH/20180412/LaserOnly500ms/M259_20180412_v228/movie_comb.avi'),
 PosixPath('/home/clewis7/was