### Imports

In [33]:
import numpy as np
import pandas as pd
import os
import scipy
from scipy import stats, signal
import pickle
import csv
import datetime

import time

import warnings
warnings.filterwarnings("ignore")

In [34]:
import shutil

### File Paths

In [35]:
#must define paths to the object prediction directory, body part prediction directory, and video annotation directory
cwd = os.getcwd()
dlc_parent = os.path.join(cwd, 'dlc')
obj_parent = os.path.join(cwd, 'behavior_propagate')
ant_parent = os.path.join(cwd, 'annotations')
file_path = os.path.join(cwd, 'files')

### Pickling Files

In [36]:
#pickles file
def pickle_file(file_path, file):
    outfile = open(file_path,'wb')
    pickle.dump(file, outfile)
    outfile.close()

In [37]:
#opens pickled file
def open_pickle(file_path):
    infile = open(file_path,'rb')
    file = pickle.load(infile)
    infile.close()
    
    return file

### Loading .txt Files

In [92]:
#constants + necessary information
mice_name = ["caf99", "kdr48", "caf42", "caf26"]

body_parts = ['base_tail', 'body_core', 'headstage', 'left_ear', 'right_ear', 'snout']

obj_names = {"caf99" : "CAF00099-20210608T040846-050846", 
            "kdr48" : "KDR00048_Corner-20220502T201304-211305", 
            "caf42" : "e3v81a6-20200915T022243-032244", 
            "caf26" : "e3v81a8-20200808T0609-0709"}

dlc_names = {"caf99" : "CAF00099-20210608T040846-050846DLC_resnet50_caf26_caf42_caf99_kdr48Jul12shuffle1_1030000.h5",
            "kdr48" : "KDR00048_Top-20220502T201307-211308DLC_resnet50_caf26_caf42_caf99_kdr48Jul12shuffle1_1030000.h5",
            "caf42" : "e3v81a6-20200915T022243-032244DLC_resnet50_caf26_caf42_caf99_kdr48Jul12shuffle1_1030000.h5",
            "caf26" : "e3v81a8-20200808T0609-0709DLC_resnet50_caf26_caf42_caf99_kdr48Jul12shuffle1_1030000.h5"}

frame_num = dict.fromkeys(mice_name)

In [106]:
#combines all txt files (one for each frame) into a multiindex dataframe
#one dataframe for each frame of the video
#combines into multiindex dataframe where first index is the frame and second index corresponds to the object
def obj_frames(parent_dir, mouse_name):
    #stores each line of dataframe
    frames = {}
    
    #path to mouse directory
    obj_dir = os.path.join(parent_dir, f'{mouse_name}_infer')
    
    #column names
    header = ['label','x_center','y_center','x_width','y_height','confidence']
    
    #stores number of frames
    total_frames = len([name for name in os.listdir(obj_dir) if name != ".DS_Store"])
    frame_num[mouse_name] = total_frames

    #iterates through entire directory
    for i in range(total_frames):
        #opens each txt file and stores into dictionary
        objs_df = pd.read_csv(os.path.join(obj_dir, f'{obj_names[mouse_name]}_{i + 1}.txt'), sep = ",", skiprows = 1, names = header, header = None)
        frames[i] = objs_df
    
    df = pd.concat(frames)
    
    #returns dictionary of all frames, key: frame #, value: dataframe at frame
    return df

In [107]:
#creates individual dataframes for each mouse, combines into one large dataframe
dataframes = {}
for mouse in mice_name:
    start_time = time.time()
    print(mouse)
    
    #gets each dataframe for every frame
    frames = obj_frames(obj_parent, mouse)
    #adds dataframe to dictionary
    dataframes[mouse] = frames
    #pickles dataframe
    #individual dataframe for mouse
    pickle_file(os.path.join(file_path, f'{mouse}_obj_df'), frames)
    
    print(time.time() - start_time)
#pickles dictionary
pickle_file(os.path.join(file_path, 'all_obj_dict'), dataframes)

caf99
1746.1812200546265
kdr48
6746.072951078415
caf42
7588.4480838775635
caf26
139.9658658504486


In [123]:
#flatten multiindex dataframes
flatten_dataframes = {}
for mouse in mice_name:
    start_time = time.time()
    print(mouse)
    
    mouse_obj_df = open_pickle(os.path.join(file_path, f'{mouse}_obj_df'))
    mouse_obj_df = mouse_obj_df.reset_index([0, 1])
    mouse_obj_df.rename(columns={'level_0': 'frame', 'level_1': 'num_objs'}, inplace = True)
    
    flatten_dataframes[mouse] = mouse_obj_df
    
    pickle_file(os.path.join(file_path, f'flatten_{mouse}_obj_df'), mouse_obj_df)
    
    print(time.time() - start_time)
pickle_file(os.path.join(file_path, 'flatten_all_obj_dict'), flatten_dataframes)

caf99
0.5934350490570068
kdr48
0.291550874710083
caf42
0.24297690391540527
caf26
0.36738014221191406


### Loading .h5 Files

In [19]:
#given the dlc h5 path, a dataframe is read in
def load_dlc(dlc_path):
    dlc_df = pd.read_hdf(dlc_path)
    scorer = dlc_df.columns.levels[0][0]
    dlc_df = dlc_df[scorer]
    
    return dlc_df

In [20]:
dataframes = {}
for mouse in mice_name:
    start_time = time.time()
    print(mouse)
    
    #saves dataframe as an hdf file
    path = os.path.join(file_path, f'{mouse}_dlc_df.h5')
    dlc_path = os.path.join(dlc_parent, dlc_names[mouse])
    dlc_df = load_dlc(dlc_path)
    dlc_df.to_hdf(path, key = 'dlc_df', mode = 'w')
    
    #stores dataframe to dictionary
    dataframes[mouse] = dlc_df
    
    print(time.time() - start_time)

#pickles dictionary with all dataframes
pickle_file(os.path.join(file_path, 'all_parts_dict'), dataframes)

caf99
0.2343120574951172
kdr48
0.0546722412109375
caf42
0.04892611503601074
caf26
0.0546879768371582


### Loading Behavior Annotations

First, parse the csv files for all annotations.

In [139]:
#encodes all categorical behaviors to numerical
def cat_num(df):
    #encode
    df.Behavior[df.Behavior=='Nesting'] = 1 # Location
    df.Behavior[df.Behavior=='Playing w/ Ball'] = 2 # Location
    df.Behavior[df.Behavior=='Playing w/ Box'] = 3 # Location
    df.Behavior[df.Behavior=='Riding the Ball'] = 4 # Location
    df.Behavior[df.Behavior=='Riding the Box'] = 5 # Location
    df.Behavior[df.Behavior=='Drinking'] = 6 # Location
    df.Behavior[df.Behavior=='Chewing'] = 7 # Location
    df.Behavior[df.Behavior=='Porthole Interaction'] = 8 # Location
    df.Behavior[df.Behavior=='Scaling Porthole'] = 9 #Location
    df.Behavior[df.Behavior=='Scaling Door'] = 10 #Location
    
    df.Behavior[df.Behavior=='Grooming'] = 11
    
    df.Behavior[df.Behavior=='Hollowing/Digging'] = 12
    df.Behavior[df.Behavior=='Digging'] = 12
    
    df.Behavior[df.Behavior=='Locomotion'] = 13
    df.Behavior[df.Behavior=='Locomotive Movement'] = 13
    
    df.Behavior[df.Behavior=='Minor Postural Movement'] = 14
    df.Behavior[df.Behavior=='Rearing'] = 15
    df.Behavior[df.Behavior=='Stretching'] = 16
    df.Behavior[df.Behavior=='Twitching'] = 17
    df.Behavior[df.Behavior=='Slipping'] = 18
    
    #drop all nonbehaviors
    df = df[df.Behavior.apply(lambda x: str(x).isnumeric())]
    
    return df

In [140]:
def get_skip_rows(file_name):
    with open(file_name, newline='') as f:
      reader = csv.reader(f)
      for i, row in enumerate(reader):
            if row[0] == "Time":
                return i
    return None

In [141]:
def add_frames_column(df):
    fps = 15
    df['Frames'] = df['Time'].apply(lambda x: round(x * fps))
    df.sort_values(by = ['Frames', 'Status'])
    return df

In [142]:
def clean_ants(file_name, mouse_name):
    skip = get_skip_rows(file_name)
    
    df = pd.read_csv(file_name, skiprows = skip)
    
    #add timestamps column
    df['Minutes'] = df.apply(lambda row: str(datetime.timedelta(seconds = row['Time'])), axis=1)
    
    #drop unnecessary columns
    drop_filter = df.filter(["Media file path", "Subject", "Behavioral category", "Comment"])
    
    df.drop(drop_filter, axis = 1, inplace = True)
    
    #encode all categorical behaviors
    df = cat_num(df)
    
    #add frames columns
    df = add_frames_column(df)
    
    return df

Separate behaviors + create corresponding arrays.

In [143]:
def sep_beh(df):
    df_loc = df.loc[df.Behavior < 11]
    df_reg = df.loc[df.Behavior >= 11]
    
    return df_loc, df_reg

In [144]:
def behavior_arr(df, total_frames):
#     total_time = df['Total length'].unique()[0]
#     fps = 15
    #fps = df['FPS'].unique()[0]
    
    arr = np.zeros(total_frames)
    
    frames_arr = df['Frames'].to_numpy()
    
    for start_idx, stop_idx in zip(range(0, len(df)-1, 2), range(1, len(df), 2)):
        start_frame = frames_arr[start_idx]
        stop_frame = frames_arr[stop_idx]
        behavior_num = df.iloc[start_idx].Behavior
        arr[start_frame:stop_frame] = behavior_num
    
    return arr

In [145]:
for mouse in mice_name:
    print(mouse)
    start_time = time.time()
    filename = os.path.join(ant_parent, f'{mouse}_dt_test.csv')
    parsed_df = clean_ants(filename, mouse)

    loc_df, reg_df = sep_beh(parsed_df)

    frames = frame_num[mouse]

    loc_arr = behavior_arr(loc_df, frames)
    reg_arr = behavior_arr(reg_df, frames)

    pickle_file(os.path.join(ant_parent, f'{mouse}_loc_arr'), loc_arr)
    pickle_file(os.path.join(ant_parent, f'{mouse}_reg_arr'), reg_arr)

    print(time.time() - start_time)

caf99
0.3398890495300293
kdr48
0.21668720245361328
caf42
0.2411510944366455
caf26
0.2804570198059082


In [38]:
len(open_pickle(os.path.join(ant_parent, 'caf99_loc_arr')))

108000

### Dead Code

In [108]:
test = open_pickle(os.path.join(file_path, 'caf42_obj_df'))

In [109]:
#FLATTENING MULTIINDEX DATAFRAME
test = test.reset_index([0, 1])
test.rename(columns={'level_0': 'frame', 'level_1': 'num_objs'}, inplace=True)
test

Unnamed: 0,frame,num_objs,label,x_center,y_center,x_width,y_height,confidence
0,0,0,pellet,273.73504,112.93008,40.67008,37.46016,
1,0,1,pellet,463.47520,327.00000,51.64992,63.44016,
2,0,2,pellet,219.95008,110.25504,57.26016,44.95008,
3,0,3,box,382.68480,389.26992,120.49024,124.88016,
4,0,4,ball,325.85472,131.22000,108.28992,98.54016,
...,...,...,...,...,...,...,...,...
1025072,54014,14,spout,450.95488,95.31984,19.71008,18.08016,
1025073,54014,15,porthole,77.25504,426.39984,16.92992,23.16000,
1025074,54014,16,porthole,110.09024,411.10512,15.90016,21.25008,
1025075,54014,17,body,257.00032,48.50016,78.00000,69.00000,0.843156


In [133]:
obj = test[test['label'] == 'spout']

In [138]:
obj[obj['frame'] == 54000]['x_center'].values[0]

450.95488

In [89]:
#CONFIRMING DATAFRAMES HAVE THE SAME NUMBER OF FRAMES
# for mouse in mice_name:
#     obj_len = len([name for name in os.listdir(os.path.join(obj_parent, f'{mouse}_infer')) if name != ".DS_Store"])
#     dlc_len = len(pd.read_hdf(os.path.join(dlc_parent, dlc_names[mouse])))
    
#     if obj_len != dlc_len:
#         print(mouse)
#         print(obj_len)
#         print(dlc_len)
    
#     else:
#         print('success')