### Note:

This notebook was created and executed at the very beginning of the summer, where we had only done a handful of annotations. The ethnogram does not reflect the updated behaviors. However, this notebook is a good foundation for how to complete future processing.

## Imports

In [1]:
import pandas as pd
import numpy as np
import warnings
import sys
warnings.filterwarnings("ignore")

## Import Data

In [3]:
#path to csv
side_path = 'Files/CAF26_side.csv'
top_path = 'Files/CAF26_top.csv'

#read in the annotations as pandas dataframe
df_side = pd.read_csv()
df_top = pd.read_csv()

## Ethnogram

In [5]:
# 0 = No Behavior
# 1 = Nesting # Location
# 2 = Playing w/ Ball# Location
# 3 = Playing w/ Box # Location
# 4 = Riding the Ball # Location
# 5 = Riding the Box # Location
# 6 = Drinking # Location
# 7 = Chewing # Location
# 8 = Porthole Interaction # Location
# 9 = Grooming
# 10 = Digging
# 11 = Locomotion
# 12 = Minor Postural Movement
# 13 = Rearing
# 14 = Stretching
# 15 = Twitching

## Pre-Processing Data

In [6]:
# drop sleep states and flickers from the dataset
df_side.drop(df_side[df_side.Behavior == 'SS/Wake'].index, inplace = True)
df_side.drop(df_side[df_side.Behavior == 'SS/NREM'].index, inplace = True)
df_side.drop(df_side[df_side.Behavior == 'SS/REM'].index, inplace = True)
df_side.drop(df_side[df_side.Behavior == 'Flicker/N_W'].index, inplace = True)
df_side.drop(df_side[df_side.Behavior == 'Flicker/N_R'].index, inplace = True)
df_side.drop(df_side[df_side.Behavior == 'Flicker/W_N'].index, inplace = True)
df_side.drop(df_side[df_side.Behavior == 'Flicker/W_R'].index, inplace = True)
df_side.drop(df_side[df_side.Behavior == 'Flicker/R_N'].index, inplace = True)
df_side.drop(df_side[df_side.Behavior == 'Flicker/R_W'].index, inplace = True)
df_side.drop(df_side[df_side.Behavior == 'Carrying/Moving'].index, inplace = True)
df_side.drop(df_side[df_side.Behavior == 'Carrying/Pushing'].index, inplace = True)

df_top.drop(df_top[df_top.Behavior == 'SS/Wake'].index, inplace = True)
df_top.drop(df_top[df_top.Behavior == 'SS/NREM'].index, inplace = True)
df_top.drop(df_top[df_top.Behavior == 'SS/REM'].index, inplace = True)
df_top.drop(df_top[df_top.Behavior == 'Flicker/N_W'].index, inplace = True)
df_top.drop(df_top[df_top.Behavior == 'Flicker/N_R'].index, inplace = True)
df_top.drop(df_top[df_top.Behavior == 'Flicker/W_N'].index, inplace = True)
df_top.drop(df_top[df_top.Behavior == 'Flicker/W_R'].index, inplace = True)
df_top.drop(df_top[df_top.Behavior == 'Flicker/R_N'].index, inplace = True)
df_top.drop(df_top[df_top.Behavior == 'Flicker/R_W'].index, inplace = True)
# df_top.drop(df_top[df_top.Behavior == 'Carrying/Moving'].index, inplace = True)
# df_top.drop(df_top[df_top.Behavior == 'Carrying/Pushing'].index, inplace = True)

### Converting to Numerical Labels

In [7]:
#encodes all categorical behaviors to numerical
def cat_num(df):
    df.Behavior[df.Behavior=='Nesting'] = 1 # Location
    df.Behavior[df.Behavior=='Playing w/ Ball'] = 2 # Location
    df.Behavior[df.Behavior=='Playing w/ Box'] = 3 # Location
    df.Behavior[df.Behavior=='Riding the Ball'] = 4 # Location
    df.Behavior[df.Behavior=='Riding the Box'] = 5 # Location
    df.Behavior[df.Behavior=='Drinking'] = 6 # Location
    df.Behavior[df.Behavior=='Chewing'] = 7 # Location
    df.Behavior[df.Behavior=='Porthole Interaction'] = 8 # Location
    
    df.Behavior[df.Behavior=='Grooming'] = 9
    df.Behavior[df.Behavior=='Digging'] = 10
    df.Behavior[df.Behavior=='Locomotion'] = 11
    df.Behavior[df.Behavior=='Minor Postural Movement'] = 12
    df.Behavior[df.Behavior=='Rearing'] = 13
    df.Behavior[df.Behavior=='Stretching'] = 14
    df.Behavior[df.Behavior=='Twitching'] = 15
    
    return df

In [8]:
#encodes both angles
df_top = cat_num(df_top)
df_side = cat_num(df_side)

In [9]:
df_top

Unnamed: 0,Time,Behavior,Comment,Status
1,0.067,11,,START
2,1.467,11,,STOP
3,5.133,8,,START
4,7.800,8,,STOP
5,8.067,11,,START
...,...,...,...,...
770,3526.733,11,,STOP
771,3550.400,11,,START
772,3553.667,11,,STOP
773,3560.133,8,,START


In [10]:
df_side

Unnamed: 0,Time,Behavior,Comment,Status
0,0.000,11,,START
2,1.333,11,,STOP
3,5.467,8,,START
4,7.800,8,,STOP
5,8.400,11,,START
...,...,...,...,...
687,3551.267,11,,STOP
688,3558.733,8,,START
689,3560.733,8,,STOP
690,3573.533,13,,START


### Adding Frames column to Dataframe

In [11]:
# def add_frames_column(df):
#     #assumes video only has 54000 frames, which is not true for some
#     sparse_arr = np.zeros(54000)
    
#     #assumes video to be 15 fps
#     df['Frames'] = df['Time'].apply(lambda x: round(x * 15))
#     df.sort_values(by = ['Frames', 'Status'])
#     frames_arr_loc = df['Frames'].to_numpy()
#     frames_arr_reg = df['Frames'].to_numpy()
    
#     return df,frames_arr_loc,frames_arr_reg

# df_side,frames_arr_side_loc,frames_arr_side_reg = add_frames_column(df_side)
# df_top,frames_arr_top_loc,frames_arr_top_reg = add_frames_column(df_top)

# frames_arr_side_loc

In [12]:
# df_top.Behavior = df_top.Behavior.astype(int) # to numeric
# df_side.Behavior = df_side.Behavior.astype(int) # to numeric

In [13]:
# type(df_side.Behavior.iloc[0])

### Isolating Location Constrained Behaviors

In [14]:
def add_frames_column(df):
    #assumes video only has 54000 frames, which is not true for some
    sparse_arr = np.zeros(54000)
    
    #assumes video to be 15 fps
    df['Frames'] = df['Time'].apply(lambda x: round(x * 15))
    df.sort_values(by = ['Frames', 'Status'])
    frames_arr = df['Frames'].to_numpy()
    return df, frames_arr

In [15]:
def separate_behaviors(df):
    df_loc = df.loc[df.Behavior < 9]
    df_reg = df.loc[df.Behavior >= 9]
    return df_loc,df_reg

df_loc_top,df_reg_top = separate_behaviors(df_top)
df_loc_side,df_reg_side = separate_behaviors(df_side)

In [16]:
df_top, frames_arr_top = add_frames_column(df_top)
df_side, frames_arr_side = add_frames_column(df_side)

df_loc_top, frames_arr_loc_top = add_frames_column(df_loc_top)
df_reg_top, frames_arr_reg_top = add_frames_column(df_reg_top)

df_loc_side, frames_arr_loc_side = add_frames_column(df_loc_side)
df_reg_side, frames_arr_reg_side = add_frames_column(df_reg_side)

df_loc_side

# df_side,frames_arr_side_loc,frames_arr_side_reg = add_frames_column(df_side)
# df_top,frames_arr_top_loc,frames_arr_top_reg = add_frames_column(df_top)

# frames_arr_side_loc

Unnamed: 0,Time,Behavior,Comment,Status,Frames
3,5.467,8,,START,82
4,7.800,8,,STOP,117
7,13.133,8,,START,197
8,13.400,8,,STOP,201
13,17.333,8,,START,260
...,...,...,...,...,...
681,3518.400,8,,STOP,52776
682,3522.800,8,,START,52842
683,3523.933,8,,STOP,52859
688,3558.733,8,,START,53381


In [17]:
frames_arr_loc_side

array([   82,   117,   197,   201,   260,   275,   369,   374,   383,
         393,   436,   455,   458,   485,   491,   503,   510,   552,
         556,   571,   585,   645,   647,   686,   695,   763,   773,
         798,   819,   876,   884,   899,   941,   949,  1084,  1097,
        1213,  1222,  1224,  1234,  3583,  3649,  3976,  4180,  5072,
        5078,  5078,  5117,  5124,  5134,  5443,  5947,  5964,  5992,
        6116,  6190,  6190,  6228,  6228,  6694,  6694,  6722,  6722,
        6766,  6766,  6793,  6793,  6819,  7265,  7279,  7279,  7298,
        7298,  7318,  7409,  7424,  7424,  7426,  7426,  7437,  7562,
        7592,  7625,  7677,  7704,  7729,  7731,  7750,  7753,  7766,
        7766,  7780,  8072,  8075,  8442,  8458,  8602,  8641,  8680,
        8719,  8729,  8738,  8742,  8747,  8749,  8769,  8771,  8804,
        8814,  8833,  9050,  9055,  9690,  9719,  9723,  9734,  9751,
        9830,  9846,  9895,  9917,  9932,  9940,  9945, 10144, 10207,
       10213, 10324,

### Populating Lists

In [18]:
#loads all behaviors into sparse array, with index representing frame #
def sparse_array(df,frames_arr):
    sparse_arr = np.zeros(54000)
    for start_idx, stop_idx in zip(range(0, len(df)-1, 2), range(1, len(df), 2)):
        start_frame = frames_arr[start_idx]
        stop_frame = frames_arr[stop_idx]
        behavior_num = df.iloc[start_idx].Behavior
        sparse_arr[start_frame:stop_frame] = behavior_num
    return sparse_arr

# location
sparse_arr_side_loc = sparse_array(df_loc_side,frames_arr_loc_side)
sparse_arr_top_loc = sparse_array(df_loc_top,frames_arr_loc_top)

# non-location
sparse_arr_side_reg = sparse_array(df_reg_side,frames_arr_reg_side)
sparse_arr_top_reg = sparse_array(df_reg_top,frames_arr_reg_top)

In [19]:
np.set_printoptions(threshold=sys.maxsize)

In [20]:
sparse_arr_side_loc

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 8., 8., 8.,
       8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8.,
       8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 8., 8., 8., 8., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0.

In [21]:
sparse_arr_top_loc

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 8., 8., 8., 8., 8., 8., 8., 8.,
       8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8.,
       8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 8., 8., 8., 8.,
       8., 8., 8., 8., 8.

## Creating Resolution Arrays

In [22]:
#binary representing whether there is a behavior or not
top_binary = np.array(sparse_arr_top_loc != 0, dtype=int)
side_binary = np.array(sparse_arr_side_loc != 0, dtype=int)

In [23]:
#binary array
mask_side = (side_binary == 1)
mask_top = (top_binary == 0)

#behavior in side array but not top array

binary_mask = np.logical_and(mask_side, mask_top)
binary_mask

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,

In [24]:
res_arr = np.copy(sparse_arr_top_loc)
res_arr[binary_mask] = sparse_arr_side_loc[binary_mask]
res_arr

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 8., 8., 8., 8., 8., 8., 8., 8.,
       8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8.,
       8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 8., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 8., 8., 8., 8., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 8., 8., 8., 8.,
       8., 8., 8., 8., 8.

In [29]:
#checking for improvement
(np.count_nonzero(np.equal(res_arr, sparse_arr_top_loc)))/54000

0.9374259259259259

In [30]:
(np.count_nonzero(np.equal(res_arr, sparse_arr_side_loc)))/54000

0.9365555555555556

In [34]:
(np.count_nonzero(np.equal(sparse_arr_top_loc, sparse_arr_side_loc)))/54000

0.8739814814814815

In [31]:
#sparseness
(54000-np.count_nonzero(res_arr))/54000

0.6587222222222222

In [33]:
(54000-np.count_nonzero(sparse_arr_top_loc))/54000

0.7212962962962963

In [32]:
(54000-np.count_nonzero(sparse_arr_side_loc))/54000

0.7122407407407407

In [27]:
#combined function
#top acting as priority

def resolution(top_arr, side_arr):
    top_binary = np.array(top_arr != 0, dtype=int)
    side_binary = np.array(side_arr != 0, dtype=int)
    
    mask_side = (side_binary == 1)
    mask_top = (top_binary == 0)
    binary_mask = np.logical_and(mask_side, mask_top)
    
    res_arr = np.copy(top_arr)
    res_arr[binary_mask] = side_arr[binary_mask]
    
    return res_arr

In [28]:
#resolution array for location + regular behaviors

res_arr_loc = resolution(sparse_arr_top_loc, sparse_arr_side_loc)
res_arr_reg = resolution(sparse_arr_top_reg, sparse_arr_side_reg)