### Merge ethograms between Jay's handlabels and jaaba preds

In [1]:
import pandas as pd
from pathlib import Path
from scipy.io import loadmat
import numpy as np
from tqdm import tqdm
from typing import *
import shutil

In [2]:
df = pd.read_hdf('/data/caitlin/exactly3_hand_labels.hdf')

In [3]:
df["jaaba_labels"] = [dict() for k in range(len(df.index))]

In [4]:
df

Unnamed: 0,mat_path,hand_labels,jaaba_labels
0,M232_20170306,"{'M232_20170306_v029': [[0, 0, 0, 0, 0, 0, 0, ...",{}
1,M232_20170307,"{'M232_20170307_v022': [[0, 0, 0, 0, 0, 0, 0, ...",{}
2,M232_20170308,"{'M232_20170308_v020': [[0, 0, 0, 0, 0, 0, 0, ...",{}
3,M232_20170310,"{'M232_20170310_v055': [[0, 0, 0, 0, 0, 0, 0, ...",{}
4,M232_20170314,"{'M232_20170314_v011': [[0, 0, 0, 0, 0, 0, 0, ...",{}
...,...,...,...
260,M238_20170717,"{'M238_20170717_v021': [[0, 0, 0, 0, 0, 0, 0, ...",{}
261,M238_20170725,"{'M238_20170725_v001': [[0, 0, 0, 0, 0, 0, 0, ...",{}
262,M238_20170726,"{'M238_20170725_v001': [[0, 0, 0, 0, 0, 0, 0, ...",{}
263,M240_20170725FinalChecked,"{'M240_20170725_v084': [[0, 0, 0, 0, 0, 0, 0, ...",{}


### Get jaaba ethogram from mat file

In [5]:
mat_loc = Path('/data/caitlin/mat_files/')

In [6]:
errors = list()
def get_ethogram(trial_index: int, mat_path):
        """
        Returns the ethogram for a given trial in a session.
        """
        m = loadmat(mat_path)
        try:
            behaviors = sorted([b.split('_')[0] for b in m['data'].dtype.names if 'scores' in b])
        except KeyError:
            errors.append(mat_path)
            return

        all_behaviors = [
            "Lift",
            "Handopen",
            "Grab",
            "Sup",
            "Atmouth",
            "Chew"
        ]

        sorted_behaviors = [b for b in all_behaviors if b in behaviors]

        ethograms = []

        mat_trial_index = np.argwhere(m["data"]["trial"].ravel() == (trial_index))
        # Trial not found in JAABA data
        if mat_trial_index.size == 0:
            return False

        try:
            mat_trial_index = mat_trial_index.item()
        except ValueError:
            return

      
        for b in sorted_behaviors:
            behavior_index = m['data'].dtype.names.index(f'{b}_postprocessed')
            row = m['data'][mat_trial_index][0][behavior_index]
            row[row == -1] = 0
            ethograms.append(row)

        sorted_behaviors = [b.lower() for b in sorted_behaviors]

        return np.hstack(ethograms).T

In [7]:
for row in tqdm(df.iterrows()):
    for key in tqdm(row[1]["hand_labels"].keys()):
        row[1]["jaaba_labels"][key] = get_ethogram(trial_index=int(key.split('_v')[-1]), 
                                                   mat_path=mat_loc.joinpath(row[1]["mat_path"]).with_suffix('.mat'))

0it [00:00, ?it/s]
  0%|                                                                                                     | 0/5 [00:00<?, ?it/s][A
 20%|██████████████████▌                                                                          | 1/5 [00:00<00:00,  9.38it/s][A
 60%|███████████████████████████████████████████████████████▊                                     | 3/5 [00:00<00:00, 13.20it/s][A
100%|█████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 13.61it/s][A
1it [00:00,  2.71it/s]
  0%|                                                                                                     | 0/3 [00:00<?, ?it/s][A
100%|█████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 14.40it/s][A
2it [00:00,  3.63it/s]
  0%|                                                                                                     | 0/4 [00:00<?, ?it/s][A
 50%|██████

In [8]:
errors

[PosixPath('/data/caitlin/mat_files/M236_20170817.mat'),
 PosixPath('/data/caitlin/mat_files/M238_20170717.mat'),
 PosixPath('/data/caitlin/mat_files/M238_20170725.mat'),
 PosixPath('/data/caitlin/mat_files/M238_20170725.mat'),
 PosixPath('/data/caitlin/mat_files/M238_20170726.mat'),
 PosixPath('/data/caitlin/mat_files/M238_20170726.mat'),
 PosixPath('/data/caitlin/mat_files/M240_20170725FinalChecked.mat'),
 PosixPath('/data/caitlin/mat_files/M240_20170727FinalChecked.mat')]

In [9]:
errors2 = list()
def get_ethogram2(trial_index: int, mat_path):
        """
        Returns the ethogram for a given trial in a session.
        """
        m = loadmat(mat_path)
        try:
            behaviors = sorted([b.split('_')[0] for b in m['data1'].dtype.names if 'scores' in b])
        except KeyError:
            errors2.append(mat_path)
            return

        all_behaviors = [
            "Lift",
            "Handopen",
            "Grab",
            "Sup",
            "Atmouth",
            "Chew"
        ]

        sorted_behaviors = [b for b in all_behaviors if b in behaviors]

        ethograms = []

        mat_trial_index = np.argwhere(m["data1"]["trial"].ravel() == (trial_index))
        # Trial not found in JAABA data
        if mat_trial_index.size == 0:
            return False

        try:
            mat_trial_index = mat_trial_index.item()
        except ValueError:
            return

      
        for b in sorted_behaviors:
            behavior_index = m['data1'].dtype.names.index(f'{b}_postprocessed')
            row = m['data1'][mat_trial_index][0][behavior_index]
            row[row == -1] = 0
            ethograms.append(row)

        sorted_behaviors = [b.lower() for b in sorted_behaviors]

        return np.hstack(ethograms).T

In [10]:
for mat in tqdm(errors):
    for key in tqdm(df[df['mat_path'] == mat.stem]['jaaba_labels'].item().keys()):
        df[df['mat_path'] == mat.stem]['jaaba_labels'].item()[key] = get_ethogram2(trial_index=int(key.split('_v')[-1]),
                                                                                   mat_path=mat)

  0%|                                                                                                     | 0/8 [00:00<?, ?it/s]
  0%|                                                                                                     | 0/1 [00:00<?, ?it/s][A
100%|█████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  8.25it/s][A
 12%|███████████▋                                                                                 | 1/8 [00:00<00:00,  8.08it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 14.52it/s][A

  0%|                                                                                                     | 0/2 [00:00<?, ?it/s][A
100%|█████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 15.14it/s][A
 38%|██████████████████████████████████▉                                         

In [11]:
errors2

[]

In [12]:
df

Unnamed: 0,mat_path,hand_labels,jaaba_labels
0,M232_20170306,"{'M232_20170306_v029': [[0, 0, 0, 0, 0, 0, 0, ...","{'M232_20170306_v029': [[0, 0, 0, 0, 0, 0, 0, ..."
1,M232_20170307,"{'M232_20170307_v022': [[0, 0, 0, 0, 0, 0, 0, ...","{'M232_20170307_v022': [[0, 0, 0, 0, 0, 0, 0, ..."
2,M232_20170308,"{'M232_20170308_v020': [[0, 0, 0, 0, 0, 0, 0, ...","{'M232_20170308_v020': [[0, 0, 0, 0, 0, 0, 0, ..."
3,M232_20170310,"{'M232_20170310_v055': [[0, 0, 0, 0, 0, 0, 0, ...","{'M232_20170310_v055': [[0, 0, 0, 0, 0, 0, 0, ..."
4,M232_20170314,"{'M232_20170314_v011': [[0, 0, 0, 0, 0, 0, 0, ...","{'M232_20170314_v011': [[0, 0, 0, 0, 0, 0, 0, ..."
...,...,...,...
260,M238_20170717,"{'M238_20170717_v021': [[0, 0, 0, 0, 0, 0, 0, ...","{'M238_20170717_v021': [[0, 0, 0, 0, 0, 0, 0, ..."
261,M238_20170725,"{'M238_20170725_v001': [[0, 0, 0, 0, 0, 0, 0, ...","{'M238_20170725_v001': [[0, 0, 0, 0, 0, 0, 0, ..."
262,M238_20170726,"{'M238_20170725_v001': [[0, 0, 0, 0, 0, 0, 0, ...","{'M238_20170725_v001': [[0, 0, 0, 0, 0, 0, 0, ..."
263,M240_20170725FinalChecked,"{'M240_20170725_v084': [[0, 0, 0, 0, 0, 0, 0, ...","{'M240_20170725_v084': [[0, 0, 0, 0, 0, 0, 0, ..."


In [13]:
df.to_hdf('/data/caitlin/exactly3_hand_labels.hdf', key='df')

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block0_values] [items->Index(['mat_path', 'hand_labels', 'jaaba_labels'], dtype='object')]

  df.to_hdf('/data/caitlin/exactly3_hand_labels.hdf', key='df')


In [14]:
df = pd.read_hdf('/data/caitlin/exactly3_hand_labels.hdf')
df

Unnamed: 0,mat_path,hand_labels,jaaba_labels
0,M232_20170306,"{'M232_20170306_v029': [[0, 0, 0, 0, 0, 0, 0, ...","{'M232_20170306_v029': [[0, 0, 0, 0, 0, 0, 0, ..."
1,M232_20170307,"{'M232_20170307_v022': [[0, 0, 0, 0, 0, 0, 0, ...","{'M232_20170307_v022': [[0, 0, 0, 0, 0, 0, 0, ..."
2,M232_20170308,"{'M232_20170308_v020': [[0, 0, 0, 0, 0, 0, 0, ...","{'M232_20170308_v020': [[0, 0, 0, 0, 0, 0, 0, ..."
3,M232_20170310,"{'M232_20170310_v055': [[0, 0, 0, 0, 0, 0, 0, ...","{'M232_20170310_v055': [[0, 0, 0, 0, 0, 0, 0, ..."
4,M232_20170314,"{'M232_20170314_v011': [[0, 0, 0, 0, 0, 0, 0, ...","{'M232_20170314_v011': [[0, 0, 0, 0, 0, 0, 0, ..."
...,...,...,...
260,M238_20170717,"{'M238_20170717_v021': [[0, 0, 0, 0, 0, 0, 0, ...","{'M238_20170717_v021': [[0, 0, 0, 0, 0, 0, 0, ..."
261,M238_20170725,"{'M238_20170725_v001': [[0, 0, 0, 0, 0, 0, 0, ...","{'M238_20170725_v001': [[0, 0, 0, 0, 0, 0, 0, ..."
262,M238_20170726,"{'M238_20170725_v001': [[0, 0, 0, 0, 0, 0, 0, ...","{'M238_20170725_v001': [[0, 0, 0, 0, 0, 0, 0, ..."
263,M240_20170725FinalChecked,"{'M240_20170725_v084': [[0, 0, 0, 0, 0, 0, 0, ...","{'M240_20170725_v084': [[0, 0, 0, 0, 0, 0, 0, ..."


### want to merge ethograms so that single hand-labeled behavior replaces jabba pred for that behavior

In [15]:
df["merged_ethogram"] = [dict() for i in range(len(df.index))]

In [16]:
errors = list()
for row in tqdm(df.iterrows()):
    for key in tqdm(row[1]["hand_labels"].keys()):
        if isinstance(row[1]["jaaba_labels"][key], Union[bool, None]):
            errors.append((row[1]["mat_path"], key))
            continue
        if row[1]["hand_labels"][key].shape != row[1]["jaaba_labels"][key].shape:
            errors.append((row[1]["mat_path"], key))
            continue
        merged_ethogram = row[1]["jaaba_labels"][key]
        for i, h_row in enumerate(row[1]["hand_labels"][key]):
            if h_row.any():
                merged_ethogram[i] = h_row
            else:
                continue
        row[1]["merged_ethogram"][key] = merged_ethogram

0it [00:00, ?it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 6817.79it/s][A

100%|███████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 5584.96it/s][A

100%|███████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 6756.83it/s][A

100%|███████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 2439.97it/s][A

100%|███████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 5798.58it/s][A

100%|███████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 5964.60it/s][A

100%|███████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 8084.63it/s][A

100%|█████████████████████████████████████████████

In [17]:
corrected_trials = list()
count = 0
for row in df.iterrows():
    for key in row[1]["merged_ethogram"].keys():
        if row[1]["merged_ethogram"][key].any():
            count += 1
            corrected_trials.append(key)
count

680

In [18]:
df.to_hdf('/data/caitlin/exactly3_hand_labels.hdf', key='df')

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block0_values] [items->Index(['mat_path', 'hand_labels', 'jaaba_labels', 'merged_ethogram'], dtype='object')]

  df.to_hdf('/data/caitlin/exactly3_hand_labels.hdf', key='df')


In [19]:
corrected_trials[0:5]

['M232_20170306_v029',
 'M232_20170306_v038',
 'M232_20170306_v040',
 'M232_20170306_v060',
 'M232_20170306_v070']

### want to download videos for trials that have been correct to `/data/caitlin/potential_ground_truth/` for review

In [20]:
mat_files = Path('/data/caitlin/mat_files/')

In [21]:
vid_paths = list()
for row in tqdm(df.iterrows()):
    m = loadmat(mat_files.joinpath(row[1]['mat_path']).with_suffix('.mat'))
    keys = row[1]["merged_ethogram"].keys()
    for k in keys:
        index = int(k.split('_v')[-1]) - 1
        try:
            vid_paths.append((Path(*Path(m['data']['id'][index][0][0]).parts[:-1])))
        except (KeyError, IndexError):
            try: 
                vid_paths.append((Path(*Path(m['data1']['id'][index][0][0]).parts[:-1])))
            except (KeyError, IndexError):
                vid_paths.append(None)
                continue

265it [00:36,  7.18it/s]


In [22]:
len(corrected_trials)

680

In [23]:
len(vid_paths)

680

In [24]:
final_paths = list()
home_path = '/home/clewis7/wasabi/hantmanlab/from_tier2'
for (vp, trial) in zip(vid_paths, corrected_trials):
    if vp == None:
        final_paths.append(None)
        continue
    full_path = home_path + str(vp)
    final_paths.append(Path(full_path).joinpath(trial, trial).with_name('movie_comb').with_suffix('.avi'))

In [25]:
len(final_paths)

680

In [26]:
final_paths[0:5]

[PosixPath('/home/clewis7/wasabi/hantmanlab/from_tier2/Jay/videos/M232Slc17a7_Gtacr2/20170306/Group3Laser/M232_20170306_v029/movie_comb.avi'),
 PosixPath('/home/clewis7/wasabi/hantmanlab/from_tier2/Jay/videos/M232Slc17a7_Gtacr2/20170306/Group4/M232_20170306_v038/movie_comb.avi'),
 PosixPath('/home/clewis7/wasabi/hantmanlab/from_tier2/Jay/videos/M232Slc17a7_Gtacr2/20170306/Group4/M232_20170306_v040/movie_comb.avi'),
 PosixPath('/home/clewis7/wasabi/hantmanlab/from_tier2/Jay/videos/M232Slc17a7_Gtacr2/20170306/Group6/M232_20170306_v060/movie_comb.avi'),
 PosixPath('/home/clewis7/wasabi/hantmanlab/from_tier2/Jay/videos/M232Slc17a7_Gtacr2/20170306/Group7/M232_20170306_v070/movie_comb.avi')]

In [27]:
ground = Path('/data/caitlin/potential_ground_truth/')

In [28]:
wrong_paths = list()
for vp, trial in tqdm(zip(final_paths, corrected_trials)):
    if vp is None:
        continue
    if trial is None:
        continue
    try:
        shutil.copy(src=vp, dst=ground.joinpath(f"{trial}.avi"))
    except FileNotFoundError:
        wrong_paths.append(vp)

680it [21:09,  1.87s/it]
