# For light/dark experiments - Collect all the data that has been tracked and associated


In [None]:
import numpy as np
import os
import json
import sys
import glob
import time
import datetime
from pympler.tracker import SummaryTracker
from scipy import stats, signal
from scipy.signal import argrelextrema
from sklearn import linear_model
import cv2
import h5py
from itertools import groupby
from math import ceil
import pickle
import math
from scipy.linalg import norm
from scipy.spatial import ConvexHull
from scipy.signal import butter, lfilter
from sklearn.decomposition import PCA
import random
import feather
from sklearn.neighbors import KernelDensity
import re


# %qtconsole
%matplotlib qt5
%matplotlib auto
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib import colors
from matplotlib.patches import Rectangle, Circle
from matplotlib.patches import Polygon, Ellipse
from matplotlib import patches
from matplotlib.collections import PatchCollection
import matplotlib.gridspec as gridspec
from matplotlib.colors import LinearSegmentedColormap
from mpl_toolkits.mplot3d import Axes3D

# import statsmodels.api as sm

sys.path.append('/home/gravishlab/Documents/Python/')
sys.path.append('/home/gravishlab/Documents/Python/AntTrackCode')
sys.path.append('/home/gravishlab/Documents/Python/Tracker/')
sys.path.append('/home/gravishlab/Documents/Python/Tracker/Tracker/')
from Tracker.Tracker import Tracker

# import multiprocessing
# import threading
import subprocess
from subprocess import call
import pandas as pd
import seaborn as sns
import copy


# stop telling me i have a nan in an array with a logical comparison
with np.errstate(invalid='ignore'):
    np.less([np.nan, 0], 1)

In [None]:
%qtconsole

## Build file list 

In [None]:
vid_locations = '/media/gravishlab/SeagateExpansionDrive/Light_Dark/'
file_list = []

# searches for files
file_list = glob.glob(os.path.join(vid_locations, 'Tunnel_2019**/**/*0000.mp4'))
# file_list = glob.glob(os.path.join(vid_locations, 'Tunnel_20180329-30/**/*0000.mp4'))
file_list = sorted(file_list)
print('Total Number of Videos: ',len(file_list))
                
file_list = file_list
len(file_list)

### Calibrations 

In [None]:
# TUNNEL EXPERIMENTS
pix2mm = 31.72 # Measured 3cm in 4 cameras
fps = 239.16

v_multiplier=  fps / pix2mm
print('%0.3f pixels per mm'%pix2mm)


hei = 550
wid = 1000

pltcolors = ['#B1740F', '#BA4246', '#087E8B', '#701C6F']

# LOAD IN DATA FROM PICKLE - if have already been saved as done in next section

In [None]:
# LOAD IN DATA FROM PICKLE OR FEATHER (SKIP ALL NEXT DATAFRAME SECTIONS) - SPECIFY WHETHER TO INCLUDE TD DATA
print('loading in all trackways pickle')
start_time = time.time()
colonies = glob.glob(os.path.join(vid_locations, 'Tunnel_2019**/'))
for col in colonies[0:]:
    col_OI = col.split('/')[-2][-5:]
    print('   colony: %s'%col_OI)
    tmp = pd.read_pickle(vid_locations + 'AllTracks_' + col_OI)
#     tmp = pd.read_pickle(vid_locations + 'AllTracks_TDs_' + col_OI)
    if col == colonies[0]:
        df = tmp.copy()
    else:
        df = pd.concat([df,tmp], ignore_index=True)
    del tmp, col_OI
print('read in all trackways pickle -- duration: ', time.time()-start_time)


# remove trials with really weird velocity values
df = df.drop(df[df['v'].apply(lambda x: np.nanmax(x))>2000].index)
df = df.drop(df[df['vfilt'].apply(lambda x: np.nanmax(x))>2000].index)
print('\nRemoved trackways with a weirdly high velocity value')


# load in master dataframe
# print('loading in all trackways pickle')
# start_time = time.time()
# df = pd.read_pickle(vid_locations + 'AllTracks')
# print('read in all trackways pickle -- duration: ', time.time()-start_time)

# make list of trials
# %run LoadInTrackedData.ipynb # loads all functions
# from ipynb.fs.defs.LoadInTrackedData import create_trial_info # loads only one function
# trial_info = create_trial_info(file_list)

# load in dataframe of long trackways
# start_time = time.time()
# longtracks = pd.read_pickle(vid_locations + 'LongTracks')
# print('read in all trackways pickle -- duration: ', time.time()-start_time)

# OPTIONAL: BUILD DATA SET AND SAVE AS PICKLE


In [98]:
# FOR EACH COLONY, BUILD DATA AND SAVE AS PICKLE

colonies = glob.glob(os.path.join(vid_locations, 'Tunnel_2019**/'))
# %run LoadInTrackedData.ipynb # load in function of interest
from ipynb.fs.defs.LoadInTrackedData import load_and_analyze_videos_to_df
buffer = 0 #pix - remove sections when ant this this close to frame edge

for col in colonies[0:6]:
    
    f_list = sorted(glob.glob(os.path.join(col, '**/*0000.mp4')))
    print('\n%s - %i videos'%(col,len(f_list)))
    if len(f_list)>0:
        load_and_analyze_videos_to_df(f_list, buffer)
    

# # LOAD IN DATA
# %run LoadInTrackedData.ipynb
# df, trial_info = load_and_analyze_videos_to_df(file_list)
# # df, trial_info = load_and_analyze_videos_to_df(file_list)


# # if just building trial list
# # trial_info = create_trial_info(file_list)

del f_list, col, colonies

print('ALL DONE!!! YAY!')


/media/gravishlab/SeagateExpansionDrive/Light_Dark/Tunnel_20190814/ - 344 videos

*** BUILDING TRIAL LIST ***
    Done building trial list

*** LOADING TRACKS ***
0... 100... 200... 300... 
    Done loading tracked data

*** LOWPASS FILTERING TRACKS ***
    Done lowpass filtering tracks

*** CALCULATING VELOCITY ***
0... 100... 200... 300... 
    Done calculating whole ant velocities

*** CREATING DATAFRAME ***




Number of tracks:  419  - Number of frames:  127480
    Done creating dataframe

*** SAVING DF AS PICKLE ***
    Done saving pickle

/media/gravishlab/SeagateExpansionDrive/Light_Dark/Tunnel_20190815/ - 457 videos

*** BUILDING TRIAL LIST ***
    Done building trial list

*** LOADING TRACKS ***
0... 100... 200... 300... 400... 
    Done loading tracked data

*** LOWPASS FILTERING TRACKS ***
    Done lowpass filtering tracks

*** CALCULATING VELOCITY ***
0... 100... 200... 300... 400... 
    Done calculating whole ant velocities

*** CREATING DATAFRAME ***
Number of tracks:  524  - Number of frames:  160590
    Done creating dataframe

*** SAVING DF AS PICKLE ***
    Done saving pickle

/media/gravishlab/SeagateExpansionDrive/Light_Dark/Tunnel_20190816/ - 528 videos

*** BUILDING TRIAL LIST ***
    Done building trial list

*** LOADING TRACKS ***
0... 



100... 200... 300... 400... 500... 
    Done loading tracked data

*** LOWPASS FILTERING TRACKS ***
    Done lowpass filtering tracks

*** CALCULATING VELOCITY ***
0... 100... 200... 300... 400... 500... 
    Done calculating whole ant velocities

*** CREATING DATAFRAME ***
Number of tracks:  763  - Number of frames:  244926
    Done creating dataframe

*** SAVING DF AS PICKLE ***
    Done saving pickle

/media/gravishlab/SeagateExpansionDrive/Light_Dark/Tunnel_20190817/ - 413 videos

*** BUILDING TRIAL LIST ***
    Done building trial list

*** LOADING TRACKS ***
0... 100... 200... 300... 400... 
    Done loading tracked data

*** LOWPASS FILTERING TRACKS ***
    Done lowpass filtering tracks

*** CALCULATING VELOCITY ***
0... 100... 200... 300... 400... 
    Done calculating whole ant velocities

*** CREATING DATAFRAME ***
Number of tracks:  458  - Number of frames:  180356
    Done creating dataframe

*** SAVING DF AS PICKLE ***
    Done saving pickle

/media/gravishlab/SeagateExpan

#### Find long trials and bootstrap

In [None]:
# FIND LONG TRIALS 
# import random


temp = df.copy()
# look only at trials longer than 50fr
idcs = [index for index, row in temp.iterrows() if len(row.v)>50]
longtracks = df.loc[idcs,]

# # MEDIAN V
# colony_R = [col.split('20180')[-1][1:] for col in longtracks['colony'].values.tolist()]
# date_days = [col[-2:] for col in longtracks['date'].values.tolist()]
# day_R = [col.split('-').index(day) for day, col in zip(date_days, colony_R)]
# subs_string = longtracks['substrate'].values.tolist()
# substrate_R = np.array([int(s.split('mm')[0]) for s in subs_string])
# v_med_R = np.array(longtracks['median_v'])
# df_med_R = pd.DataFrame( {"colony" : colony_R, "day" : day_R, "substrate" : substrate_R,
#                        "v_med" : v_med_R } )

# del idcs, colony_R, day_R, substrate_R, v_med_R, date_days, subs_string, temp



# # BOOTSTRAP
# n_boot = 50;

# # make a numpy array of each random sample for one entry in dataframe
# sample = []
# for boot in range(0,100):
# #     sample.append( np.array([random.choice(tr) for tr in list(temp.v)]) )
#     sample.append( np.array([random.choice(row.v) if len(row.v)>50 else np.nan for index, row in longtracks.iterrows() ]))
    
# # convert sample list into np array
# allsamples = np.transpose(np.vstack(sample))
# samplesubset = allsamples[:,0:n_boot]

# # make new dataframe to be output to R as feather
# shortcolnames = [col.split('20180')[-1][1:] for col in longtracks['colony'].values.tolist()]
# colony_R = np.repeat(np.array(shortcolnames), n_boot)
# date_days = [col[-2:] for col in longtracks['date'].values.tolist()]
# col_days = [col.split('-').index(day) for day, col in zip(date_days, shortcolnames)]
# day_R = np.repeat(np.array(col_days), n_boot)
# subs_string = longtracks['substrate'].values.tolist()
# subs_int = np.array([int(s.split('mm')[0]) for s in subs_string])
# substrate_R = np.repeat(subs_int, n_boot)
# indiv_R = np.repeat(np.array(longtracks.index), n_boot)
# df_R = pd.DataFrame( {"colony" : colony_R, "day" : day_R, "substrate" : substrate_R,
#                        "indiv" : indiv_R, "v" : np.concatenate(samplesubset)})

# del allsamples, samplesubset, shortcolnames, colony_R, day_R, substrate_R, date_days, subs_string, subs_int, indiv_R, sample
del temp
print('done')

#### Manually save things as pickle or feather

In [None]:
# SAVE DATAFRAME AS FEATHER
# import time
# import feather

# # save master dataframe as pickle
# df_preference = (df>0).astype(np.int8)
# start_time = time.time()
# df.to_pickle(vid_locations + 'AllTracks')#, compression='infer', protocol=4) 
# print('Saved all trackways dataframe as pickle - duration: ', time.time()-start_time)

# save master dataframe with TDs for each colony
# colonies = glob.glob(os.path.join(vid_locations, 'Tunnel**/'))
# for col in colonies:
#     col_OI = col.split('/')[-2][-5:]
#     df.loc[(df['colony']!='Tunnel_201803'+col_OI)].to_pickle(vid_locations + '/AllTracks_TDs_%s'%col_OI, protocol = 4)#, compression='infer', protocol=4) 
#     print(col_OI)
# print('    Done saving pickle')
    
# save master dataframe as feather
# start_time = time.time()
# # newdf = df[['ID','colony','datetime','date','median_v','substrate','x_raw']].copy()
# # feather.write_dataframe(newdf, vid_locations + 'AllTracks.feather')
# print('Saved all trackways dataframe as feather - duration: ', time.time()-start_time)

# save long tracks dataframe as pickle
# start_time = time.time()
# longtracks.to_pickle(vid_locations + 'LongTracks')
# print('Saved long trackways dataframe as pickle - duration: ', time.time()-start_time)

# save long tracks dataframe as feather
# start_time = time.time()
# # feather.write_dataframe(longtracks, vid_locations + 'LongTracks.feather')
# print('Saved long trackways dataframe as feathers - duration: ', time.time()-start_time)

# print('Saved median and bootstrap files as feathers')
# feather.write_dataframe(df_med_R, vid_locations + 'Median.feather')
# # feather.write_dataframe(df_R, vid_locations + 'Bootstrap.feather')
# del df_med_R, df_R

# Functions that are needed in full file

In [None]:
def lowpass_filt_sections(arr, fr):
    yy = np.isnan(arr)
    xx = range(len(yy))
    full_filtered = np.empty(yy.shape)*np.nan
    for k,g in groupby(iter(xx), lambda x: yy[x]):
        if k == False: # if is a group of nan
            g = list(g)
#             print('section to lowpass fitler: ', len(g))
            if len(g)>9:
                b, a = signal.butter(2,fr,btype='low')
                new_arr = arr[np.array(g)]
                mirror_arr = np.hstack([np.flip(new_arr,axis=0), new_arr, np.flip(new_arr,axis=0)])
                filtered = signal.filtfilt(b, a, mirror_arr)
                full_filtered[np.array(g)]=filtered[len(new_arr):-1*len(new_arr)]
    return full_filtered


# Find when lights turned on and off for each recording session, add dataframe row

In [None]:
if os.path.exists(vid_locations + '/LightingInfo.pkl'):
    lighting_info = pd.read_pickle(vid_locations + '/LightingInfo.pkl')
    print('loaded lighting info from pickle')
    
else:
    print('calculating lighting info')
    rsessions = glob.glob(os.path.join(vid_locations, 'Tunnel_2019**/WebcamPhotos/'))
    temp = []
    for rsess in rsessions:
        tmp = {}
        print(rsess)
        tmp['date']=rsess.split('/')[-3].split('_')[1]


        pic_names = sorted(glob.glob(os.path.join(rsess, '**jpg')))
        is_light = np.ones([len(pic_names),2])
        for pp,pic in enumerate(pic_names):
            img = cv2.imread(pic)
            is_light[pp,1]=np.mean(img)>120
            is_light[pp,0]=int(pic.split('/')[-1].split('_')[1])
        is_light = is_light.astype(np.int)


        dark_to_light = np.diff(np.insert(is_light[:,1],0,0))==1
        light_to_dark = np.diff(np.insert(is_light[:,1],-1,0))==-1
        tmp['when_light'] = np.ones([np.sum(dark_to_light),2])
        tmp['when_light'][:,0]=is_light[dark_to_light,0]
        tmp['when_light'][:,1]=is_light[light_to_dark,0]

        light_to_dark2 = np.diff(np.insert(is_light[:,1],0,1))==-1
        dark_to_light2 = np.diff(np.insert(is_light[:,1],-1,1))==1
        tmp['when_dark'] = np.ones([np.sum(dark_to_light2),2])
        tmp['when_dark'][:,1]=is_light[dark_to_light2,0]
        tmp['when_dark'][:,0]=is_light[light_to_dark2,0]

        temp.append(tmp)
        del tmp, dark_to_light, dark_to_light2, light_to_dark, light_to_dark2, is_light, pic_names
    lighting_info = pd.DataFrame(temp)
    del temp
    print('done calculating lighting info')

    print('\nsaving lighting info as pickle')
    lighting_info.to_pickle(vid_locations + '/LightingInfo.pkl', protocol = 4)

In [None]:
# assign each trial from dataframe to light or dark


def find_light_dark_df(x, coltypes, max_s_from_webcam_photo):
    time = int(x['time'])
    col_OI = x['colony']
    col_idx = np.where(np.isin(coltypes,col_OI))[0][0]
    light = lighting_info['when_light'][col_idx]
    dark = lighting_info['when_dark'][col_idx]
    is_light = np.any(np.logical_and(time>=light[:,0], time <= light[:,1]))
    is_dark = np.any(np.logical_and(time>=dark[:,0], time <= dark[:,1]))
    assignment = np.nan
    
    if is_light | is_dark:
        if is_light & is_dark:
            print('Col: %s, Time: %i -- unassigned -- lies within both ranges'%(col_OI, time))
        else:
            assignment = np.array([1,0])[np.array([is_light, is_dark])][0]
            
    else:
        how_close_to_light = np.min(np.abs(time-light))
        how_close_to_dark = np.min(np.abs(time-dark))
        if how_close_to_light < how_close_to_dark:
            if how_close_to_light <=5:
#                 print('Col: %s, Time: %i -- Assigned to light -- light/dark: %i/%i'%(col_OI, time, how_close_to_light, how_close_to_dark))
                assignment = 1
        elif how_close_to_light > how_close_to_dark:
            if how_close_to_dark <=5:
#                 print('Col: %s, Time: %i -- Assigned to dark -- light/dark: %i/%i'%(col_OI, time, how_close_to_light, how_close_to_dark))
                assignment = 0
        else:
            print('Col: %s, Time: %i -- unassigned -- light/dark: %i/%i'%(col_OI, time, how_close_to_light, how_close_to_dark))
            
    return assignment
    
    
columns_to_drop = ['light']
for colmn in columns_to_drop:
    if colmn in df: # remove columns if already exist
        df = df.drop(colmn, axis = 1)

coltypes = sorted(list(set(df['colony'])))
df = df.reindex( columns = df.columns.tolist() + ['light'] )
df['light'] = df.apply(
    find_light_dark_df, args = (coltypes,5), axis=1)
    

In [None]:
# try out assignment method for specific trials
for tr in np.arange(0,10):
    time = int(df['time'][tr])
    col_OI = df['colony'][tr]
    col_idx = np.where(np.isin(coltypes,col_OI))[0][0]
    light = lighting_info['when_light'][col_idx]
    dark = lighting_info['when_dark'][col_idx]
    is_light = np.any(np.logical_and(time>=light[:,0], time <= light[:,1]))
    is_dark = np.any(np.logical_and(time>=dark[:,0], time <= dark[:,1]))

    if is_light & is_dark:
        print('ERROR: lies within both ranges!')
    if (not is_light) & (not is_dark):
        how_close_to_light = np.min(np.abs(time-light))
        how_close_to_dark = np.min(np.abs(time-dark))
        
        if how_close_to_light < how_close_to_dark:
            if how_close_to_light <=5:
                print('Tr: %i, Time: %i -- Assigned to light -- light/dark: %i/%i'%(tr, time, how_close_to_light, how_close_to_dark))
        elif how_close_to_light > how_close_to_dark:
            if how_close_to_dark <=5:
                print('Tr: %i, Time: %i -- Assigned to dark -- light/dark: %i/%i'%(tr, time, how_close_to_light, how_close_to_dark))
        else:
            print('Tr: %i, Time: %i -- unassigned -- light/dark: %i/%i'%(tr, time, how_close_to_light, how_close_to_dark))
            
        
#         if (how_close_to_light<=5)
# #         is_light = np.any(np.logical_and(time>=(light[:,0]-2), time<=(light[:,1]+2)))
# #         is_dark = np.any(np.logical_and(time>=(dark[:,0]-2), time<=(dark[:,1]+2)))
# #     is_dark = np.any(np.logical_and(time>dark[:,0], time < dark[:,1]))
#         print('ERROR: lies in neither range')
#         print('Tr: %i, Time: %i -- light/dark: %i/%i'%(tr, time, how_close_to_light, how_close_to_dark))

# JOINT TRAJECTORIES

#### Make 10x10 compilation figures of one ant-centered frame from each trial to check that ant facing correct way

In [None]:
# check that ant is facing right direction in videos
plt.close('all')

i_idx = 0
f_idx = 4
# file = df['video'][80][:-4]

for file in file_list[200:1000]:#np.random.choice(file_list, 40, replace=False): #file_list[0:10]:
    vids = glob.glob(os.path.join(file[:-4], 'ant*[0-9].h5'))
#     print(vids)

    for vid in vids:
#         print(i_idx)
        if i_idx ==0:
            fig = plt.figure(figsize=(12,12))
            fig.subplots_adjust(top = 1, bottom = 0, left = 0, right = 1)
            f_idx = f_idx +1
            plt.tight_layout(pad = 0.05)#, w_pad = 0.1, h_pad = 0.1)
            gs = gridspec.GridSpec(10,10)
            gs.update(wspace= 0.05, hspace = 0.05)

        hf = h5py.File(vid,'r')
        frames = hf['box'][()].astype(np.float32) #hf.get('positions_pred')
        hf.close
        if np.any( np.sum(np.sum(np.squeeze(frames),axis=1),axis=1)>40000 ):
            fr = np.where(np.sum(np.sum(np.squeeze(frames),axis=1),axis=1)>40000)[0][0]
        else:
            fr = 0
        ax=plt.subplot(gs[i_idx])
        plt.imshow(np.squeeze(frames[fr,:,:,:]).T)
        plt.text(10,25,'v%i - a%s'%(np.where(np.array(file_list)==file)[0][0], vid.split('ant')[-1].split('.')[0]), size = 9, color = 'w')
        plt.axis('off')
        i_idx = i_idx+1
        
        if (i_idx==100):
            plt.savefig(('/').join(file.split('/')[0:5])+'/h5_ant_facing_%i'%f_idx)
            plt.close(fig)
            i_idx = 0
            
        del frames
        

plt.savefig(('/').join(file.split('/')[0:5])+'/h5_ant_facing_%i'%f_idx)

print('done')

    


#### Plot video of tracking on raw footage <br>

In [None]:
# plot images with tracked data on it
tr_num = 21#105 #91 #105
videofile = df.video[tr_num]
print(videofile)
cap = cv2.VideoCapture(videofile)
plt.close('all')

def WRTant_to_WRTframe(val_x, val_y, frame_center_x, frame_center_y, ant_ang_deg):
    ant_ang = ant_ang_deg *np.pi/180
    R = np.array([[np.cos(ant_ang), -1*np.sin(ant_ang)],
                  [np.sin(ant_ang),    np.cos(ant_ang)]])
    rotated_vals = np.dot(R,np.array([val_x-100,val_y-100]))
    translated_vals = rotated_vals*np.array([1,1]) + np.array([frame_center_x, frame_center_y])  
    return translated_vals[0], translated_vals[1];

def plot_ant_pt(ant_part, df, tr_num, fr_num, ant_x, ant_y, ant_ang_deg, buffer):
    x = df['%s_x'%ant_part][tr_num][fr_num]
    y = df['%s_y'%ant_part][tr_num][fr_num]
    conf = df['%s_conf'%ant_part][tr_num][fr_num]
    (newx, newy) = WRTant_to_WRTframe(x, y, ant_x, ant_y, ant_ang_deg)
#     print('old vals: %i, %i  TO %0.1f, %0.1f'%(x,y,newx, newy))

    # define colormap to show confidence
    norm2 = colors.Normalize(vmin=0, vmax=1)
    plt.scatter(newx+buffer, newy+buffer, c = conf, s = 10, cmap = cm.bwr,
               edgecolor = 'none', norm=norm2)# '.g')
    return;

def save_image(vlocation, nfig, name_base):
    pname = os.path.join(vlocation, '%s%d.png'%(name_base,nfig))
    plt.savefig(pname)
    nfig = nfig + 1
    plt.pause(0.2)
#     plt.close('all')
    return nfig

def save_video(vlocation, name_base):
    # save images as movie
    if os.path.isfile((vlocation+'/%s.mp4'%name_base)):
        os.remove(vlocation + "/%s.mp4"%name_base)
        print('** Deleted %s.mp4 file'%name_base)
    print('saving %s.mp4 file'%name_base)
    command_p1 = "ffmpeg -r 20 -i '%s/%s"%(vlocation, name_base)
    command_p2 = " -vcodec libx264 '%s/%s.mp4'"%(vlocation, name_base)
    command = command_p1 + "%01d.png'" + command_p2
#     print(command)
    os.system(command)
    plt.pause(10)

    # delete all trackway vids
    pics2delete = glob.glob(os.path.join(vlocation, '%s*.png'%name_base))
    for pic in pics2delete:
        os.remove(pic)
    return


fig = plt.figure(figsize=(10,6))
for im_n, fr_OI in enumerate( df.frames_final[tr_num][30:70]):
    plt.clf()
    # load frame
    ff=np.where(df.frames[tr_num]==fr_OI)[0][0]
    cap.set(1,int(fr_OI))
    ret, frame = cap.read()
    x_dim = frame.shape[1]
    y_dim = frame.shape[0]
    
    # load ant x, y and angle
    x = df.x_raw[tr_num][ff]
    y = df.y_raw[tr_num][ff]
    ang = df.angle_improved[tr_num][ff]
    (thorax_x, thorax_y) = WRTant_to_WRTframe(df.thorax_x[tr_num][ff], df.thorax_y[tr_num][ff], x, y, ang)
#     print(x,y,ang)
    
    
    # PLOT THINGS
    ax1=fig.add_axes([0.03,0.1, 0.5, 0.3])
    ax1.set_position([0.03,0.1, 0.5, 0.3])
    plt.imshow(frame)
    plt.plot(x, y, '.w')
    plt.axis('off')
    
    ax2=fig.add_axes([0.35,0.1, 0.7, 0.7]) #plt.axes()
#     ax2.set_position([0.35,0.1, 0.7, 0.7], which = 'both')
    # zoom into around ant
    buffer = 150
    blank_frame = np.ones((y_dim+ 2*buffer, x_dim+ 2*buffer,3),dtype=np.uint8)* 1# 1.001# np.max(temp) # gray background  1.0001#
    bframe = blank_frame.copy()
    bframe[buffer:-buffer, buffer:-buffer,:] = frame
    if not np.isnan(ang):
        xrange = range(int(round(thorax_x)), int(round(thorax_x+2*buffer)))
        yrange = range(int(round(thorax_y)), int(round(thorax_y+2*buffer)))
    else:
        xrange = range(int(round(x)), int(round(x+2*buffer)))
        yrange = range(int(round(y)), int(round(y+2*buffer)))
    xrange_actual = np.array(sorted(list( set(xrange) & set(range(0, x_dim+2*buffer) ) )))[[0,-1]]
    yrange_actual = np.array(sorted(list( set(yrange) & set(range(0, y_dim+2*buffer) ) )))[[0,-1]]
    frame_zoom = bframe[np.ix_(yrange_actual, xrange_actual)]
    plt.xlim(xrange_actual)
    plt.ylim(yrange_actual)
    plt.text(xrange_actual[0]+20, yrange_actual[0]+20, 'Fr: %i'%fr_OI, color='w')
    plt.imshow(bframe)
    cmap = cm.bwr
    plt.scatter(x+buffer, y+buffer, s=20, c=np.array(0.5), 
            norm = colors.Normalize(vmin=0, vmax=1), marker= 'o')
    
    
    if not np.isnan(ang):
        plt.scatter(thorax_x+buffer, thorax_y+buffer, c = df.thorax_conf[tr_num][ff], s = 10, 
                cmap = cmap, norm = colors.Normalize(vmin=0, vmax=1))

        for aa in range(0,2):
            for jj in range(0,3):
                plot_ant_pt('antenna%i_%i'%(aa,jj), df, tr_num, ff, x, y, ang, buffer)
                
        for ee in range(0,2):
            plot_ant_pt('eye%i'%(ee), df, tr_num, ff, x, y, ang, buffer)

    plt.gca().invert_yaxis()
    plt.axis('off')
    cax = plt.axes([0.93,0.1,0.02,0.7])
    plt.colorbar(cax=cax)
    plt.clim(0,1)
    plt.set_cmap(cm.bwr)
    
    
    vlocation = '/media/gravishlab/SeagateExpansionDrive/Light_Dark'
    save_image(vlocation, im_n, 'Model_Predictions')
save_video(vlocation, 'Model_Predictions')
    #     plt.pause(0.1)
    
plt.close()

### Define functions to analyze tracked data, removing low confidence points and lowpass filter -- apply to individual trial/limb

In [None]:
# INTERPOLATING AND LOWPASS FILTERING FUNCTIONS

def remove_lowconf_pts(arr, conf, conf_cutoff, jump_limit):
    arr_highconf = arr.copy()
    # get rid of low confidence pts
    arr_highconf[conf<conf_cutoff]=np.nan
    return arr_highconf

def remove_jumps(arr, jump_limit):
    yy = np.isnan(arr)
    xx = range(len(yy))
    arr_nojump = np.empty(yy.shape)*np.nan
    for k,g in groupby(iter(xx), lambda x: yy[x]):
        if k == False: # if is not a group of nan
            g = np.array(list(g))
#             print(len(g))
            if len(g)>3:
                # get rid of drastic changes
                if jump_limit != None:
                    arr_OI = arr[g]
                    d_arr = np.abs(np.diff(arr_OI))
                    d_jump = np.abs(d_arr) > jump_limit
                    d_jump_cumsum = np.cumsum(np.insert(d_jump,0,0))
                    d_jump_opp = (d_jump_cumsum%2).astype(bool)
                    if np.sum(d_jump_opp==True)>np.sum(d_jump_opp == False):
                        d_jump_opp = np.logical_not(d_jump_opp)
                    arr_OI[d_jump_opp]=np.nan
                arr_nojump[g]=arr_OI
                    
    return arr_nojump
    


def middle_half(alist, wanted_parts=4):
    alist= alist[np.logical_not(np.isnan(alist))]
    length = len(alist)
    sections = np.array([ alist[i*length // wanted_parts: (i+1)*length // wanted_parts] 
             for i in range(wanted_parts) ])
    middle_half = np.concatenate(sections[1:3])
    if not len(middle_half)>0:
        return  np.nan, np.nan
    else:
        full_range = (np.max(alist)-np.min(alist))
        middle_range = (np.max(middle_half)-np.min(middle_half))
        med = np.mean(middle_half)
#         print(middle_range/full_range)
        if middle_range/full_range < 0.93: # theoretically for normal distribution mid_range/full_range = 0.16625
#             print('not  normal dist')
            sigma = middle_range/3
#             print('removed outliers')
        else:
            sigma = full_range/4
#         print(med, sigma)
    return med, sigma

def remove_outliers(arr):
    med, sigma = middle_half(arr)
    where_far_away = (np.abs(arr-med)> 2*sigma)
    arr[where_far_away]=np.nan
    return arr

def remove_outliers2d(arr_x, arr_y):
    med_x, sigma_x = middle_half(arr_x)
    med_y, sigma_y = middle_half(arr_y)
#     if sigma_x < 15:
#         sigma_x = 15
    where_far_away = np.logical_or(np.abs(arr_x-med_x)> 2*sigma_x, np.abs(arr_y-med_y)> 2*sigma_y)
#     print('removing %i outliers'%np.sum(where_far_away))
    arr_x[where_far_away]=np.nan
    arr_y[where_far_away]=np.nan
    return arr_x, arr_y

def remove_outliers(arr_x, arr_y):
    med_x, sigma_x = middle_half(arr_x)
    med_y, sigma_y = middle_half(arr_y)
    
    where_far_away = np.logical_or(np.abs(arr_x-med_x)> 2*sigma_x, np.abs(arr_y-med_y)> 2*sigma_y)
    arr_x[where_far_away]=np.nan
    arr_y[where_far_away]=np.nan
    
    return arr_x, arr_y

def find_nan_gaps(arr, limit):  
    yy = np.isnan(arr)
    xx = range(len(yy))
    where_gapOI = np.full(arr.shape, False)
    where_othergaps = np.full(arr.shape, False)
    for k,g in groupby(iter(xx), lambda x: yy[x]):
        if k == True: # if is a group of nan
            g = list(g)
            if any(x in g for x in [0, len(arr)-1]): # if first or last group
                where_othergaps[np.array(g)]=True
#                 print('remove: ', len(g), g)
                continue
                
            if len(g)<= limit: # length is below limit
                where_gapOI[np.array(g)]=True
#                 print('interp: ', len(g), g)
            else:
                where_othergaps[np.array(g)]=True
#                 print('remove: ', len(g), g)
    return where_gapOI, where_othergaps

def find_interp_idcs(where_interpolate):
    interp_idcs = []
    for val in [-1,0,1]:
        interp_idcs = np.concatenate([interp_idcs,np.where(where_interpolate)[0]+val])
    interp_idcs = np.sort(np.array(list(set(interp_idcs)))) # get of repeat elements
    interp_idcs = interp_idcs[np.logical_and(interp_idcs>-1, interp_idcs < len(where_interpolate))].astype(np.uint32) # only elements in range
    return interp_idcs

def interp_vals(arr, interp_idcs): # array includes nan values
    interp = arr.copy()
    if len(interp_idcs)>0:
        temp = arr[interp_idcs]
        interpolated_vals = np.interp(
            interp_idcs, 
            interp_idcs[np.logical_not(np.isnan(temp))], temp[np.logical_not(np.isnan(temp))] )
        interp[interp_idcs] = interpolated_vals
    return interp
    

def interpolate_filter_tracking(df, tr_num, tracked_pt, conf_cutoff, jump_limit, nan_gap_limit, plots = False):
    
    all_frames = df['frames'][tr_num]
    frames_OI = df['frames_final'][tr_num]
    frames_OI_idcs = np.isin(all_frames, frames_OI)
    
    # initialize variables
    joint_x, joint_y, x_offset, y_offset, conf, \
    joint_x_highconf, joint_y_highconf, joint_x_interp, joint_y_interp, joint_x_filt, joint_y_filt = \
    (np.empty(all_frames.shape)*np.nan for i in range(11))
    
    # set up variables
    joint_x = df['%s_x'%tracked_pt][tr_num]
    joint_y = df['%s_y'%tracked_pt][tr_num]
    if tracked_pt != 'thorax': # account for inaccurate initial guess of body center, make relative to LEAP tracked thorax
        if 'thorax_x_filt' in df:
            x_offset = df['thorax_x_filt'][tr_num]
            y_offset = df['thorax_y_filt'][tr_num]
        else:
            x_offset = df['thorax_x'][tr_num]
            y_offset = df['thorax_y'][tr_num]
        joint_x = joint_x - x_offset
        joint_y = joint_y - y_offset
    conf = df['%s_conf'%tracked_pt][tr_num]
    joint_x_highconf = remove_lowconf_pts(joint_x, conf, conf_cutoff, jump_limit)
    joint_y_highconf = remove_lowconf_pts(joint_y, conf, conf_cutoff, jump_limit)
    
    # remove big jumps
    joint_x_highconf = remove_jumps(joint_x_highconf, jump_limit)
    joint_y_highconf = remove_jumps(joint_y_highconf, jump_limit)
    
    # remove outliers
    print('before removing outliers: ', np.sum(np.isnan(joint_x_highconf)), ' nan of ', len(joint_x_highconf) )
#     joint_x_highconf, joint_y_highconf = remove_outliers2d(joint_x_highconf, joint_y_highconf)
    print('after removing outliers: ', np.sum(np.isnan(joint_x_highconf)), ' nan of ', len(joint_x_highconf) )
    
    # interpolate 
    where_interpolate, where_remove = find_nan_gaps(joint_x_highconf, nan_gap_limit)
    joint_x_interp = interp_vals(joint_x_highconf, find_interp_idcs(where_interpolate))
    joint_y_interp = interp_vals(joint_y_highconf, find_interp_idcs(where_interpolate))

    # lowpass filter
    joint_x_filt = lowpass_filt_sections(joint_x_interp, 0.3)
    joint_y_filt = lowpass_filt_sections(joint_y_interp, 0.3)
#     print(joint_x_filt.shape)
    
    if plots:
        # PLOT THINGS
        fig = plt.figure(figsize=(15,5))
        ax1=plt.subplot(2,1,1)
        ax_limits=[]
        ax_limits.append([np.nanmin(joint_x)-10, np.nanmax(joint_x)+10])
        ax2 = plt.subplot(2,1,2)
        ax_limits.append([np.nanmin(joint_y)-10, np.nanmax(joint_y)+10])

        for xx,ax in enumerate([ax1, ax2]):
            for kk, inter in enumerate(all_frames[where_interpolate]):
                if kk == 0:
                    rect = Rectangle((inter-0.5, ax_limits[xx][0]),
                                     1, np.diff(ax_limits[xx]), alpha = 0.2, fc = 'm', ec = None, label = 'interpolated')
                else:
                    rect = Rectangle((inter-0.5, ax_limits[xx][0]),
                                     1, np.diff(ax_limits[xx]), alpha = 0.2, fc = 'm', ec = None)
                ax.add_patch(rect)
            for kk,remov in enumerate(all_frames[where_remove]):
                if kk == 0:
                    rect = Rectangle((remov-0.5, ax_limits[xx][0]),
                                     1, np.diff(ax_limits[xx]), alpha = 0.05, fc = 'k', ec = None, label = 'removed')
                else:
                    rect = Rectangle((remov-0.5, ax_limits[xx][0]),
                                     1, np.diff(ax_limits[xx]), alpha = 0.05, fc = 'k', ec = None)
                ax.add_patch(rect)

        plt.sca(ax1)
        cmap = cm.bwr
        plt.scatter(all_frames, joint_x, c = conf, s = 10, 
                cmap = cmap, norm = colors.Normalize(vmin=0, vmax=1), label = 'raw tracking')
# #         plt.plot(frames_OI, joint_x_highconf, '-k', alpha = 0.2)
        plt.plot(all_frames, joint_x_interp, '.k', alpha = 0.5, MarkerSize = 2)#, label = 'interpolated')
        plt.plot(all_frames, joint_x_filt, '-g', alpha = 0.5, label = 'filtered', )
        plt.ylabel('x (pix)')
        plt.legend(loc = 'upper right', frameon=False, fontsize = 7)
        titleparts = videofile.split('/')
        plt.title('%s -- %s -- %s\n%s -- conf. cutoff: %0.1f -- jump limit: %i -- max nan gap for interp: %i'
                  %(titleparts[-2], titleparts[-1].split('_')[0], titleparts[-1].split('_')[1],
                    tracked_pt, conf_cutoff, jump_limit, nan_gap_limit),
                 loc = 'left')
#         plt.axhline(y=-58.82, xmin = 0, xmax = .8) # for file 589, trackway 833 -- show outlier range
#         plt.axhline(y=-58.82+2*15.99, xmin = 0, xmax = .8)
#         plt.axhline(y=-58.82-2*15.99, xmin = 0, xmax = .8)
        


        plt.sca(ax2)
#         plt.plot(frames_OI, joint_y_highconf, '-k', alpha = 0.5)
        plt.plot(all_frames, joint_y_interp, '.k', alpha = 0.5, label = 'interpolated', MarkerSize = 2)
        plt.plot(all_frames, joint_y_filt, '-g', alpha = 0.5, label = 'filtered', )
#         plt.plot(frames_OI, joint_y_filt, '.g', alpha = 0.5, label = 'filtered', MarkerSize = 3)
        plt.scatter(all_frames, joint_y, c = conf, s = 10, 
                cmap = cmap, norm = colors.Normalize(vmin=0, vmax=1))
        plt.ylabel('y (pix)')
        plt.gca().invert_yaxis()
#         plt.axhline(y=30.01, xmin = 0, xmax = .9) # for file 589, trackway 833 -- show outlier range
#         plt.axhline(y=30.01+2*7.4, xmin = 0, xmax = .9)
#         plt.axhline(y=30.01-2*7.4, xmin = 0, xmax = .9)
        
        
        

        cax = plt.axes([0.93,0.1,0.02,0.8])
        plt.colorbar(cax=cax, label='confidence')
        plt.clim(0,1)
    
    if tracked_pt != 'thorax':
        joint_x_filt = joint_x_filt + x_offset
        joint_y_filt = joint_y_filt + y_offset
    print('final filter len: ', np.sum(np.isnan(joint_x_filt)), ' nan of ', len(joint_x_filt) )
    del rect, 
    return;


def WRTant_to_WRTframe(val_x, val_y, frame_center_x, frame_center_y, ant_ang_deg):
    ant_ang = ant_ang_deg *np.pi/180
    R = np.array([[np.cos(ant_ang), -1*np.sin(ant_ang)],
                  [np.sin(ant_ang),    np.cos(ant_ang)]])
    if R.ndim == 3:
        rotated_vals = np.einsum('ijk, ki->kj', R, (np.array([val_x,val_y])-100).T).T
#         print(R.shape, rotated_vals.shape, rotated_vals[:,0])
        translated_vals = rotated_vals + np.array([frame_center_x, frame_center_y])  
        return translated_vals[0,:], translated_vals[1,:];
    elif R.ndim == 2:
        rotated_vals = np.einsum('ij, i->j', R, (np.array([val_x,val_y])-100).T).T
#         print(R.shape, rotated_vals.shape, rotated_vals[:])
        translated_vals = rotated_vals + np.array([frame_center_x, frame_center_y])  
        return translated_vals[0], translated_vals[1];
    else:
        print('something went wrong with R dimensions')
        return np.nan, np.nan



    
# RUN ON SPECIFIC TRIAL & PLOT
plt.close('all')
for tr_num in [978]:#np.random.choice(df.index, 5, replace=False):#range(111,112):#105 #91 #105
    videofile = df.video[tr_num]
    print(tr_num, videofile)
    
    if not np.any(np.isfinite(df['thorax_x'][tr_num])):
        print('No data for this trial')
        continue
    
    tracked_pt = 'thorax'
    interpolate_filter_tracking(df, tr_num, tracked_pt, 
                                    conf_cutoff = 0.6, jump_limit = 15, nan_gap_limit = 5, plots = True)
    
    for antenna_num in range(1,2):
        for joint_num in range(0,1):
            tracked_pt = 'antenna%i_%i'%(antenna_num, joint_num)
            interpolate_filter_tracking(df, tr_num, tracked_pt, 
                                    conf_cutoff = 0.6, jump_limit = 15, nan_gap_limit = 5, plots = True)
del tracked_pt, videofile
#     tracked_pt = 'thorax'
#     interpolate_filter_tracking(df, tr_num, tracked_pt, 
#                                 conf_cutoff = 0.6, jump_limit = 10, nan_gap_limit = 5, plots = True)
    
    
    



# Analyze LEAP tracked data in WHOLE DATAFRAME, removing low confidence points and lowpass filter -- rotate wrt full frame, rotate wrt tracked neck and thorax - remove points close to frame/substrate edge and when ant is stopped
-- apply to whole dataframe: thorax, neck, and limbs<br>

In [None]:
# DATAFRAME FUNCTIONS -- also need to run above cell for individual trial to get some functions

def find_nan_gaps(arr, limit):  
    yy = np.isnan(arr)
    xx = range(len(yy))
    where_gapOI = np.full(arr.shape, False)
    where_othergaps = np.full(arr.shape, False)
    for k,g in groupby(iter(xx), lambda x: yy[x]):
        if k == True: # if is a group of nan
            g = list(g)
            if any(x in g for x in [0, len(arr)-1]): # if first or last group
                where_othergaps[np.array(g)]=True
#                 print('remove: ', len(g), g)
                continue
                
            if len(g)<= limit: # length is below limit
                where_gapOI[np.array(g)]=True
#                 print('interp: ', len(g), g)
            else:
                where_othergaps[np.array(g)]=True
#                 print('remove: ', len(g), g)
    return where_gapOI, where_othergaps


def interpolate_filter_tracking_df(x, ant_part, conf_cutoff, jump_limit, nan_gap_limit):
#     print('TRIAL: ', x.name, ' -- substrate: ', x.substrate)
    all_frames = x['frames']
    frames_OI = x['frames_final']
    frames_OI_idcs = np.isin(all_frames, frames_OI)
    
    joint={'x': x[ant_part+'_x'] , 'y': x[ant_part+'_y']}
    conf = x[ant_part + '_conf']
    if 'thorax_x_filt' in df:
        thorax = {'x': x['thorax_x_filt'], 'y': x['thorax_y_filt']} 
    else:
        thorax = {'x': x['thorax_x'], 'y': x['thorax_y']} 

    #initialize new dicts
    joint_highconf = {}
    joint_no_outliers = {}
    joint_interp = {}
    joint_filt = {}

    for coord in ['x','y']:
        if 'thorax' not in ant_part: # account for inaccurate initial guess of body center, make relative to LEAP tracked thorax
            joint[coord] = joint[coord] - thorax[coord]

        joint_highconf[coord] = remove_lowconf_pts(joint[coord], conf, conf_cutoff, jump_limit)
        joint_highconf[coord] = remove_jumps(joint_highconf[coord], jump_limit)
        
    # remove outliers?
#     joint_no_outliers['x'], joint_no_outliers['y'] = remove_outliers2d(joint_highconf['x'], joint_highconf['y'])   
    joint_no_outliers['x'], joint_no_outliers['y'] = joint_highconf['x'], joint_highconf['y'] # do not remove outliers
    
    for coord in ['x','y']:
        where_interpolate, where_remove = find_nan_gaps(joint_no_outliers[coord], nan_gap_limit)
        if np.sum(where_interpolate==True)>0: # only interpolate if needed
            joint_interp[coord] = interp_vals(joint_no_outliers[coord], find_interp_idcs(where_interpolate))
        else:
            joint_interp[coord] = joint_no_outliers[coord]
        joint_filt[coord] = lowpass_filt_sections(joint_interp[coord],0.3)
    
        if 'thorax' not in ant_part: # account for inaccurate initial guess of body center, make relative to LEAP tracked thorax
            joint_filt[coord] = joint_filt[coord] + thorax[coord]

    if np.sum(np.logical_not(np.isnan(joint_filt['x']))) < 50: # if fewer than 50 non-nan points in trial, remove
        joint_filt['x'][np.logical_not(np.isnan(joint_filt['x']))]=np.nan
        joint_filt['y'][np.logical_not(np.isnan(joint_filt['y']))]=np.nan
        
    if np.nanmax(np.linalg.norm([np.diff(joint_filt['x']), np.diff(joint_filt['y'])],axis=0))> 30 : # if unreasonably high velocities
        joint_filt['x'][np.logical_not(np.isnan(joint_filt['x']))]=np.nan
        joint_filt['y'][np.logical_not(np.isnan(joint_filt['y']))]=np.nan
        
     
    return joint_filt['x'], joint_filt['y'] #joint_x_filt, joint_y_filt;


def WRTant_to_WRTframe_df(df, ant_part):
#     frame_idcs = np.isin(df['frames'], df['frames_final'])
    ant_x = np.floor( np.array(df['x_kal']))#[frame_idcs] # when crop around video use integer bc it's pixels
    ant_y = np.floor( np.array(df['y_kal']))#[frame_idcs] 
    ang = np.array(df['angle_improved'])*-1#[frame_idcs]*-1
    val_x = df['%s_x_filt'%ant_part]
    val_y = df['%s_y_filt'%ant_part]

    ant_ang = ang *np.pi/180
    R = np.array([[np.cos(ant_ang), -1*np.sin(ant_ang)],
                  [np.sin(ant_ang),    np.cos(ant_ang)]])
    
    if R.ndim == 3:
        rotated_vals = np.einsum('ijk, ki->kj', R, (np.array([val_x,val_y])-100).T).T
        translated_vals = rotated_vals + np.array([ant_x, ant_y])
        xs = translated_vals[0,:]
        ys = translated_vals[1,:]
    elif R.ndim == 2:
        rotated_vals = np.einsum('ij, i->j', R, (np.array([val_x,val_y])-100).T).T
        translated_vals = rotated_vals + np.array([ant_x, ant_y]) 
        xs = translated_vals[0]
        ys = translated_vals[1]    
    else:
        print('something went wrong with R dimensions')
        return np.nan, np.nan
    
    # remove any crazy rotated points that result from slight diff in rotation angle +180/-180
    fast_velocity_pts = np.linalg.norm([np.diff(xs), np.diff(ys)],axis=0)> 30
    xs[1:][fast_velocity_pts] = np.nan
    ys[1:][fast_velocity_pts] = np.nan
    
    # remove any points close to edge of substrate based on ones removed for thorax
    if not 'thorax' in ant_part:
        nans = np.isnan(df['thorax_x_filt_fullfr'])
        xs[nans] = np.nan; ys[nans] = np.nan;
        
    # lowpass filter again
    xs = lowpass_filt_sections(xs, 0.3)
    ys = lowpass_filt_sections(ys, 0.3)
        
    # remove any points within _ dist from edge of frame
#     buffer = 50
#     wid = 1000
#     hei = 500
#     edgeidcs = ((xs < buffer) | (xs > wid-(buffer)) | (ys < buffer) | (ys > hei-buffer))
#     xs[edgeidcs]=np.nan
#     ys[edgeidcs]=np.nan

    return xs,ys
    

def WRTant_to_WRTneck_df(df, ant_part):
    x = df['%s_x_filt'%ant_part]
    y = df['%s_y_filt'%ant_part]
    thorax_x = df['thorax_x_filt']
    thorax_y = df['thorax_y_filt']
    neck_x = df['neck_x_filt']
    neck_y = df['neck_y_filt']
    
    val_coord = np.array([x,y])-np.array([thorax_x,thorax_y])
    neck_coord = np.array([neck_x-thorax_x,neck_y-thorax_y])
    ang = np.arctan( (neck_y-thorax_y)/(neck_x-thorax_x))
    c, s = np.cos(ang), np.sin(ang)
    Rx = np.array([c,s])
    Ry = np.array([-s,c])
    newx = np.einsum('mn,mn->n', val_coord, Rx)
    newy = np.einsum('mn,mn->n', val_coord, Ry)
    # get rid of points that close to edge
    nans = np.isnan(df['thorax_x_filt_fullfr'])
    newx[nans] = np.nan; newy[nans] = np.nan;
    return newx, newy


def WRTant_to_WRThead_df(df, ant_part):
    x = df['%s_x_filt'%ant_part]
    y = df['%s_y_filt'%ant_part]
    neck_x = df['neck_x_filt']
    neck_y = df['neck_y_filt']
    head_x = df['head_x_filt']
    head_y = df['head_y_filt']
    
    val_coord = np.array([x,y])-np.array([neck_x,neck_y])
    neck_coord = np.array([head_x-neck_x,head_y-neck_y])
    ang = np.arctan( (head_y-neck_y)/(head_x-neck_x))
    c, s = np.cos(ang), np.sin(ang)
    Rx = np.array([c,s])
    Ry = np.array([-s,c])
    newx = np.einsum('mn,mn->n', val_coord, Rx)
    newy = np.einsum('mn,mn->n', val_coord, Ry)
    # get rid of points that close to edge
    nans = np.isnan(df['thorax_x_filt_fullfr'])
    newx[nans] = np.nan; newy[nans] = np.nan;
    return newx, newy


def remove_close_to_edge_df(df, buffer_mm, pix2mm, sub_edge_df, wid):
    buffer = buffer_mm*pix2mm
    col = df['colony']
    sub = df['substrate']
    idx = sub_edge_df[(sub_edge_df['colony']==col) & (sub_edge_df['substrate']==sub)].index
    p = sub_edge_df.iloc[idx]['hlines'].values[0]

    x = np.array(df['thorax_x_filt_fullfr'])
    y = np.array(df['thorax_y_filt_fullfr'])
    v = np.append(np.linalg.norm(np.array([np.diff(x), np.diff(y)]), axis =0)*fps, np.nan)
    
#     # get rid of crazy accelerations from bad tracking
#     tmp = np.diff(v/pix2mm); tmp[-1] = tmp[-2]; tmp = tmp>15
#     idcs = np.logical_or(np.insert(tmp, 0, False), np.append(tmp, False))
#     x[idcs] = np.nan; y[idcs] = np.nan; v[idcs]=np.nan
    
    near_bottom = y > (p[0] + p[1]*x - buffer)
    near_top = np.logical_or(y < (p[0] + p[1]*x - 16*pix2mm + buffer), y < buffer)
    near_left = x < 3*pix2mm
    near_right = x > wid-3*pix2mm
    remove = np.logical_or(np.logical_or(near_bottom, near_top),  np.logical_or(near_left, near_right))
#     print(remove)
    x[remove] = np.nan; y[remove] = np.nan; v[remove] = np.nan
    return x,y,v/pix2mm


def remove_stopped_ant_sections_df(x, v_cutoff, pix2mm):
    xs = x['thorax_x_filt_fullfr']
    ys = x['thorax_y_filt_fullfr']
    v = x['thorax_v']*pix2mm # it's in mm/s but don't wanna figure out a new freq cutoff for those units vs. pix/s
    lowpass = lowpass_filt_sections(v, 0.06)
    v_remove = lowpass/pix2mm<v_cutoff
    xs[v_remove] = np.nan; ys[v_remove] = np.nan; v[v_remove] = np.nan
    v_lowpass = lowpass.copy()
    v_lowpass[v_remove] = np.nan
    return xs, ys, lowpass/pix2mm, v_lowpass/pix2mm


### Load in data to remove when ant close to a horizontal edge of the substrate 

In [None]:
# load in information from pickles saved from Find_Array_Edges notebook
vid_locations = '/media/gravishlab/SeagateExpansionDrive/Light_Dark/'
sub_edge_files = glob.glob(os.path.join(vid_locations, '**/*[!s]/*_Edges.pkl'))
sub_edge_files = sorted(sub_edge_files)
print('Total Number of Videos: ',len(sub_edge_files))

sub_edge_data = []
for file in sub_edge_files:
    colony = file.split('/')[-3]
    substrate = file.split('/')[-2]
#     print('%s -- %s'%(colony, substrate))
    temp = {}
    with open(file, 'rb') as f:
        temp['vlines'], temp['hlines'], temp['top_step_loc'], temp['img_raw'] = pickle.load(f)
    f.close()
    temp['colony']=colony
    temp['substrate']=substrate
    sub_edge_data.append(temp)
sub_edge_df = pd.DataFrame(sub_edge_data).copy()

del temp, sub_edge_data, sub_edge_files, colony, substrate

print('loaded in edge data for all substrates and colonies')

In [None]:
# confirm that sub edge info is correct by plotting it on the substrates
plt.close('all')
coltypes = sorted(list(set(df['colony'].values)))
subtypes = sorted(list(set(df['substrate'].values)))

for cc,col in enumerate(coltypes):
    for ss,sub in enumerate(subtypes):
        
        idx = sub_edge_df[(sub_edge_df['colony']==col) & (sub_edge_df['substrate']==sub)].index
        p = sub_edge_df.iloc[idx]['hlines'].values[0]
        step_side = sub_edge_df.iloc[idx]['top_step_loc'].values[0]
        img_raw = sub_edge_df.iloc[idx]['img_raw'].values[0]
        
        
        p_idx = cc*4 + ss 
        if p_idx%12 == 0:
            plt.figure(figsize = (12,6))
            
        plt.subplot(3,4,p_idx%12+1)
        plt.imshow(img_raw, cmap='gray')
        plt.axis('off')
        plt.plot(np.arange(0,1000), p[0] + p[1]*np.arange(0,1000), ':b')
        if ss==0:
            plt.ylabel(col[-4:])
            plt.text(10,-10,col[-4:])
        if ss%2 ==1:
            plt.text(100, 150, step_side, size = 8)
        
    


### Apply function to whole dataframe

In [None]:
# APPLY INTERPOLATION AND LOWPASS FILTERING TO DATAFRAME
# removes points with low confidence or large jumps in tracking (likely errors), replaces with nan
# interpolates nan gaps less than specified size
# low pass filters (butterworth) and saves

print('For whole dataframe: filter x,y wrt ant and calc filtered x,y wrt lab')
conf_cutoff = 0.6
jump_cutoff = 15
nan_gap_limit = 5
mm_from_edge = 1
slow_v_cutoff = 3 # mm/s

# # thorax
df['thorax_x_filt'], df['thorax_y_filt'] = zip(*df.apply(
    interpolate_filter_tracking_df, args = ('thorax', conf_cutoff, jump_cutoff, nan_gap_limit), axis=1))
df['thorax_x_filt_fullfr'], df['thorax_y_filt_fullfr'] = zip(*df.apply(
        WRTant_to_WRTframe_df, args = ('thorax',), axis=1))
df['thorax_x_filt_fullfr'], df['thorax_y_filt_fullfr'], df['thorax_v'] = zip(*df.apply(
        remove_close_to_edge_df, args = (mm_from_edge, pix2mm, sub_edge_df, wid), axis=1))
df['thorax_x_filt_fullfr_nostops'], df['thorax_y_filt_fullfr_nostops'],df['thorax_v_filt'], df['thorax_v_filt_nostops'],  = zip(*df.apply(
        remove_stopped_ant_sections_df, args = (slow_v_cutoff, pix2mm,), axis=1))
print('thorax analyzed')

# neck
df['neck_x_filt'], df['neck_y_filt'] = zip(*df.apply(
    interpolate_filter_tracking_df, args = ('neck', conf_cutoff, jump_cutoff, nan_gap_limit), axis=1))
df['neck_x_filt_fullfr'], df['neck_y_filt_fullfr'] = zip(*df.apply(
        WRTant_to_WRTframe_df, args = ('neck',), axis=1))
print('neck analyzed')

# head
df['head_x_filt'], df['head_y_filt'] = zip(*df.apply(
    interpolate_filter_tracking_df, args = ('head', conf_cutoff, jump_cutoff, nan_gap_limit), axis=1))
df['head_x_filt_fullfr'], df['head_y_filt_fullfr'] = zip(*df.apply(
        WRTant_to_WRTframe_df, args = ('head',), axis=1))
print('head analyzed')

# antennae
for antenna_num in range(0,2):
    for joint_num in range(0,3):
        df['antenna%i_%i_x_filt'%(antenna_num, joint_num)], df['antenna%i_%i_y_filt'%(antenna_num, joint_num)] = zip(*df.apply(
            interpolate_filter_tracking_df, args = ('antenna%i_%i'%(antenna_num, joint_num), conf_cutoff, jump_cutoff, nan_gap_limit), axis=1))
        df['antenna%i_%i_x_filt_fullfr'%(antenna_num, joint_num)], df['antenna%i_%i_y_filt_fullfr'%(antenna_num, joint_num)] = zip(*df.apply(
            WRTant_to_WRTframe_df, args = ('antenna%i_%i'%(antenna_num, joint_num),), axis=1))
print('antennae analyzed')

# eyes
for joint_num in range(0,2):
    df['eye%i_x_filt'%joint_num], df['eye%i_y_filt'%joint_num] = zip(*df.apply(
        interpolate_filter_tracking_df, args = ('eye%i'%joint_num, conf_cutoff, jump_cutoff, nan_gap_limit), axis=1))
    df['eye%i_x_filt_fullfr'%joint_num], df['eye%i_y_filt_fullfr'%joint_num] = zip(*df.apply(
        WRTant_to_WRTframe_df, args = ('eye%i'%joint_num,), axis=1))
print('eyes analyzed')


# rotate relative to thorax-to-neck and head-to-neck if exist
print('\nRotate eye and antennae point WRT tracked neck direction:')
if ('thorax_x_filt' in df) and ('neck_x_filt' in df):
    for antenna_num in range(0,2): # antennae
        for joint_num in range(0,3):
            df['antenna%i_%i_x_filt_WRTneck'%(antenna_num, joint_num)], df['antenna%i_%i_y_filt_WRTneck'%(antenna_num, joint_num)] = zip(*df.apply(
                WRTant_to_WRTneck_df, args = ('antenna%i_%i'%(antenna_num, joint_num),), axis=1))
            df['antenna%i_%i_x_filt_WRThead'%(antenna_num, joint_num)], df['antenna%i_%i_y_filt_WRThead'%(antenna_num, joint_num)] = zip(*df.apply(
                WRTant_to_WRThead_df, args = ('antenna%i_%i'%(antenna_num, joint_num),), axis=1))
    print('antennae analyzed')
    
    for joint_num in range(0,2): # eyes
        df['eye%i_x_filt_WRTneck'%joint_num], df['eye%i_y_filt_WRTneck'%joint_num] = zip(*df.apply(
            WRTant_to_WRTneck_df, args = ('eye%i'%joint_num,), axis=1))
    print('eyes analyzed')
    
del conf_cutoff, jump_cutoff, nan_gap_limit


# something there's a weird rotation resulting in a really high velocity 

print('\nall donezo')

## confirming that can correctly back calculate full frame position from LEAP tracking on zoomed/rotated video

In [None]:
fr_OI =130
tr_num = 978

import imutils


videofile = df.video[tr_num]
print(videofile)
cap = cv2.VideoCapture(videofile)
plt.close('all')

ff=np.where(df.frames[tr_num]==fr_OI)[0][0]
cap.set(1,int(fr_OI))
ret, temp = cap.read()
(xdim,ydim,_)=temp.shape

# plot full frame with ant center location from tracker
plt.figure(figsize=(5,12))
plt.subplot(3,1,1)
plt.imshow(temp)
x = int(df.x_kal[tr_num][ff])
y = int(df.y_kal[tr_num][ff])
ang = df.angle_improved[tr_num][ff]
plt.plot(x,y,'.w')

# zoom into around ant
buffer = 150
blank_frame = np.ones((y_dim+ 2*buffer, x_dim+ 2*buffer))* 0.7 #1.001# np.max(temp) # gray background  1.0001#
frame = blank_frame.copy()
frame[buffer:-buffer, buffer:-buffer] = temp[:,:,0]
xrange = range(int(round(x)), int(round(x+2*buffer)))
yrange = range(int(round(y)), int(round(y+2*buffer)))
xrange_actual = sorted(list( set(xrange) & set(range(0, x_dim+2*buffer) ) ))
yrange_actual = sorted(list( set(yrange) & set(range(0, y_dim+2*buffer) ) ))
frame_zoom = frame[np.ix_(yrange_actual, xrange_actual)]
plt.subplot(3,1,2)
plt.imshow(frame_zoom, cmap = 'gray')
plt.plot(150,150,'.w')


# rotate frame as what's used for LEAP tracking
rotation = ang
if not np.isnan(rotation):
    rotated = imutils.rotate_bound(frame_zoom, -1*rotation)
    rot_ranges = [range(int(x/2-100), int(x/2+100)) for x in rotated.shape];
    rotated_zoom = rotated[np.ix_(rot_ranges[0], rot_ranges[1])]
plt.subplot(3,1,3)
plt.imshow(rotated_zoom, cmap = 'gray')
plt.plot(100,100,'.w')
x_to_check, y_to_check = (120,150)
plt.plot(x_to_check, y_to_check,'.r')

# un_rotate a "tracked" point to make sure aligns on the full frame
i_ang = ang*-1#[frame_idcs]*-1
val_x, val_y = x_to_check, y_to_check
ant_x = x
ant_y = y
ant_ang = i_ang *np.pi/180 # in rad
R = np.array([[np.cos(ant_ang), -1*np.sin(ant_ang)],
              [np.sin(ant_ang),    np.cos(ant_ang)]])

if R.ndim == 3:
    rotated_vals = np.einsum('ijk, ki->kj', R, (np.array([val_x,val_y])-100).T).T
    translated_vals = rotated_vals + np.array([ant_x, ant_y])
    xs = translated_vals[0,:]
    ys = translated_vals[1,:]
elif R.ndim == 2:
    rotated_vals = np.einsum('ij, i->j', R, (np.array([val_x,val_y])-100).T).T
    translated_vals = rotated_vals + np.array([ant_x, ant_y]) 
    xs = translated_vals[0]
    ys = translated_vals[1]    
else:
    print('something went wrong with R dimensions')


plt.subplot(3,1,1)
plt.plot(xs,ys,'.r')



In [None]:
tr = 587
conf_cutoff = 0.6
jump_cutoff = 15
nan_gap_limit = 5
mm_from_edge = 1
slow_v_cutoff = 3 # mm/s

df['thorax_x_filt'][tr], df['thorax_y_filt'][tr] = interpolate_filter_tracking_df(df.loc[tr],'thorax', conf_cutoff, jump_cutoff, nan_gap_limit)
# print(tmpx)

df['thorax_x_filt_fullfr'][tr], df['thorax_y_filt_fullfr'][tr] =WRTant_to_WRTframe_df(df.loc[tr],'thorax')
print(df['thorax_x_filt_fullfr'][tr])
tmpx, tmpy, tmpv = remove_close_to_edge_df(df.loc[tr], 2, pix2mm, sub_edge_df, wid);
print(tmpx)

## Make images and video of raw tracked & analyzed data on background subtract and raw footage
-- for user given trackway number

In [None]:
# plot images with tracked data and lowpass filtered data
tr_num = 978 #105 #91 #105 # 1662


def load_video(raw_video_path, frame_range, verbose):
    """
    Independent of the frame range loaded, background has to be computed over total video or else can run into
    tracking problems
    """
    vid = cv2.VideoCapture(raw_video_path)
    Height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    Width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    NumFrames = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
    if not (NumFrames > 0):
        raise IOError('Codec issue: cannot read number of frames.')

    # restrict to desired range of frames
    if frame_range is None:
        frame_range = (0, int(NumFrames))
    else:
        # check doesn't exceed number of frames
        if frame_range[0] + frame_range[1] > NumFrames:
            frame_range = (int(frame_range[0]), int(NumFrames - frame_range[0]))

    # initialize blank frames
    frames = np.zeros((frame_range[1], Height, Width), np.uint8)

    # set the first frame to read in
    vid.set(cv2.CAP_PROP_POS_FRAMES, 0)
    for kk in range(frame_range[0]):
        tru, ret = self.vid.read(1)
    # vid.set(cv.CAP_PROP_POS_FRAMES, frame) # this way of setting the frame doesn't work on all cv versions

    # read in all frames
    for kk in range(frame_range[1]):
        tru, ret = vid.read(1)

        # check if video frames are being loaded
        if not tru:
            raise IOError('Codec issue: cannot load frames.')
        frames[kk, :, :] = ret[:, :, 0]  # assumes loading color
        if ((kk % 100) == 0) and verbose:
            print(kk)
    return frames, NumFrames, frame_range, vid


def remove_background(raw_video_path, frame_range, bkg_method, bkg_sep, verbose):

    # load in video, get video features
    frames, NumFrames, frame_range, vid = load_video(raw_video_path, frame_range, verbose)

    # if all frames loaded, do as normal
    if frame_range[1] == NumFrames:
        background = np.float32(np.median(frames[0::bkg_sep,:,:], axis = 0))
        if verbose:
            print('all_loaded!')
    else: # still use full video for background, not just desired output range
        background = []
        for kk in range(0, NumFrames, bkg_sep):
            vid.set(cv2.CAP_PROP_POS_FRAMES, kk)
            tru, ret = vid.read(1)

            # check if video frames are being loaded
            if not tru:
                raise IOError('Codec issue: cannot load frames.')

            background.append(ret[:,:,0])  # assumes loading color
        background = np.array(background, dtype='float32')
        background = np.float32(np.median(background, axis=0))

    # add a small number to background to not have divide by zeros for division
    background = background + np.float32(1E-6)
    if verbose:
        print('Background calculated')
    if bkg_method == 'div':
        norm_bkg = np.mean(background[:])  # normalize for mean intensity of image
        # norm_frm = np.mean(frames, axis=(1,2)) # normalize for mean intensity of current frame. For flicker
        frames_normed = (frames / norm_bkg) / (background / norm_bkg)  # broadcasting
    elif bkg_method == 'sub':
        raise IOError('Code does not currently support background subtraction, only division')
    else:
        raise IOError('Background divsion/subtraction method not recognized. Use div.')
    if verbose:
        print('Background removed')
    return frames_normed

def augment_contrast(frames, invert, cutoff, verbose):

    # center around 0 (neg = darker than background, pos = lighter than background)
    frames = frames-1

    # if there is a light backgroud, invert images
    if invert:
        frames = -1*frames
        if verbose:
            print('Inverted frames')

    # find max pixel value for each frame
    max_pixel_vals = frames.max(1).max(1)

    # divide by max so that all darker than background pixels are from -1 to 0 and lighter pixels are from 0 to 1
    frames_maxone = frames / max_pixel_vals[:,None,None]
    if frames_maxone.max(1).max(1).max(0) > 1:
        raise IOError('Error in normalized image. Did you background divide?')

    # shift pixel values to determine how dark the background should be
    frames_contrast = (255-cutoff)*frames_maxone + cutoff
    frames_contrast[frames_contrast<0]=0

    return frames_contrast


def WRTant_to_WRTframe(val_x, val_y, frame_center_x, frame_center_y, ant_ang_deg):
    ant_ang = ant_ang_deg *np.pi/180
    R = np.array([[np.cos(ant_ang), -1*np.sin(ant_ang)],
                  [np.sin(ant_ang),    np.cos(ant_ang)]])
    rotated_vals = np.dot(R,np.array([val_x-100,val_y-100]))
    translated_vals = rotated_vals*np.array([1,1]) + np.array([frame_center_x, frame_center_y])  
    return translated_vals[0], translated_vals[1];

def plot_ant_pt(ant_part, filt, df, tr_num, idx, ant_x, ant_y, ant_ang_deg, buffer): #filt = '' if want raw data
    x = df['%s_x%s'%(ant_part,filt)][tr_num][idx]
    y = df['%s_y%s'%(ant_part,filt)][tr_num][idx]
    conf = df['%s_conf'%(ant_part)][tr_num][idx]
    
    if 'fullfr' in filt:
        newx = x; newy =y;
    else:
        (newx, newy) = WRTant_to_WRTframe(x, y, ant_x, ant_y, ant_ang_deg)

    if ('eye' in ant_part) or ('antenna' in ant_part):
        if 'filt' in filt:
            conf = ['c','m'][int(re.search("\d",ant_part)[0])]
        # define colormap to show confidence
        norm2 = colors.Normalize(vmin=0, vmax=1)
        plt.scatter(newx+buffer, newy+buffer, c = conf, s = 10, cmap = cm.bwr,
                   edgecolor = 'none', norm=norm2)# '.g')
    else:
        plt.scatter(newx+buffer, newy+buffer, c = 'w', s = 10, edgecolor = 'none')
        
    return;


def crop_to_view(variable_to_use, tr_num, fr, x_dim, y_dim, buffer, axisOI):
    x = df[variable_to_use%'x'][tr_num][ff]
    y = df[variable_to_use%'y'][tr_num][ff]
    xrange = range(int(round(x)), int(round(x+2*buffer)))
    yrange = range(int(round(y)), int(round(y+2*buffer)))
    # account for if range goes outside of video frame
    xrange_actual = np.array(sorted(list( set(xrange) & set(range(0, x_dim+2*buffer) ) )))[[0,-1]]
    yrange_actual = np.array(sorted(list( set(yrange) & set(range(0, y_dim+2*buffer) ) )))[[0,-1]]
    plt.sca(axisOI)
    plt.xlim(xrange_actual)
    plt.ylim(yrange_actual)
    
    return xrange_actual, yrange_actual


def save_image(vlocation, nfig, name_base):
    pname = os.path.join(vlocation, '%s%0.3d.png'%(name_base,nfig))
    plt.savefig(pname)
    nfig = nfig + 1
    plt.pause(0.2)
#     plt.close('all')
    return nfig


def save_video(vlocation, name_base):
    # save images as movie
    if os.path.isfile((vlocation+'/%s.mp4'%name_base)):
        os.remove(vlocation + "/%s.mp4"%name_base)
        print('** Deleted %s.mp4 file'%name_base)
    print('saving %s.mp4 file'%name_base)
#     command_p1 = "ffmpeg -r 10 -i '%s/%s"%(vlocation, name_base)
#     command_p2 = " -vcodec libx264 '%s/%s.mp4'"%(vlocation, name_base)
#     command = command_p1 + "*.png'" + command_p2 #%01d
    #     print(command)
#     os.system(command)
    
    os.system("ffmpeg -pattern_type glob -r 10 -i '" + vlocation  + "/" + name_base + "*.png' -vcodec libx264 -pix_fmt yuv420p '" 
      + vlocation + "/" + name_base + ".mp4'")
    
    plt.pause(10)

    # delete all trackway vids
    pics2delete = glob.glob(os.path.join(vlocation, '%s*.png'%name_base))
    for pic in pics2delete:
        os.remove(pic)
    return




# DO THE THINGS
videofile = df.video[tr_num]
print(videofile)
cap = cv2.VideoCapture(videofile)
plt.close('all')
# calc bkgd sub
frames_normed = remove_background(videofile, None, 'div', 50, verbose = False)

fig = plt.figure(figsize=(19.8,10.8))

# plt.gcf().text(0.05, 0.7, 'Rough substrates constrain walking speed in ants but not due to large limb perturbations', fontsize = 24)
# plt.gcf().text(0.05, 0.62, 'G. T. Clifton, D. Holway, N. Gravish', fontsize = 18)
# plt.gcf().text(0.05, 0.54, 'University of California, San Diego', fontsize = 18)
plt.gcf().text(0.05, 0.2, '2019', fontsize = 18)

for im_n, fr_OI in enumerate(df.frames[tr_num][:]):
#     fr_id = np.where(df.frames_final[tr_num] == fr_OI)[0][0]
    plt.clf()
    # load frame
    ff=np.where(df.frames[tr_num]==fr_OI)[0][0]
    cap.set(1,int(fr_OI))
    ret, frame = cap.read()
    x_dim = frame.shape[1]
    y_dim = frame.shape[0]
    bkgdframe = np.stack((frames_normed[int(fr_OI),:,:],)*3,-1)
    
    # load ant x, y and angle
    x = int(df.x_kal[tr_num][ff])
    y = int(df.y_kal[tr_num][ff])
    ang = df.angle_improved[tr_num][ff]
    (thorax_x, thorax_y) = WRTant_to_WRTframe(df.thorax_x[tr_num][ff], df.thorax_y[tr_num][ff], x, y, ang)
    (neck_x, neck_y) = WRTant_to_WRTframe(df.neck_x[tr_num][ff], df.neck_y[tr_num][ff], x, y, ang)
    (head_x, head_y) = WRTant_to_WRTframe(df.head_x[tr_num][ff], df.head_y[tr_num][ff], x, y, ang)
#     print(x,y,ang)

    # PLOT THINGS
    
    # BCKGD SUB IMAGE WITH TRACKED DATA & CONFIDENCE
    ax2=fig.add_axes([0.02,0.1, 0.35, 0.7]) #plt.axes()
    # zoom into around ant
    buffer = 150
    white_frame = np.ones((y_dim+ 2*buffer, x_dim+ 2*buffer,3),dtype=np.float32)
    wframe = white_frame.copy()
    wframe[buffer:-buffer, buffer:-buffer,:] = bkgdframe
    wframe = wframe/np.max(wframe)
    plt.imshow(wframe)
    
    xrange_actual, yrange_actual = crop_to_view('%s_raw', tr_num, ff, x_dim, y_dim, buffer, ax2)
#     plt.text(xrange_actual[0]+20, yrange_actual[0]+20, 'Fr: %i'%fr_OI, color='k')
    plt.imshow(wframe)
    cmap = cm.bwr
    plt.scatter(x+buffer, y+buffer, s=20, c=np.array(0.5), norm = colors.Normalize(vmin=0, vmax=1), marker= 'o')
    plt.scatter(x+10, y+2*buffer-10, s=20, c=np.array(0.5), norm = colors.Normalize(vmin=0, vmax=1), marker= 'o')
    plt.text(x+15, y+2*buffer-10, 'estimated ant location from full body tracking', color = 'k', verticalalignment = "center")
    
    if not np.isnan(ang):
        plt.scatter(thorax_x+buffer, thorax_y+buffer, c = df.thorax_conf[tr_num][ff], s = 10, 
                cmap = cmap, norm = colors.Normalize(vmin=0, vmax=1))
        plt.scatter(neck_x+buffer, neck_y+buffer, c = df.neck_conf[tr_num][ff], s = 10, 
                cmap = cmap, norm = colors.Normalize(vmin=0, vmax=1))
        plt.scatter(head_x+buffer, head_y+buffer, c = df.head_conf[tr_num][ff], s = 10, 
                cmap = cmap, norm = colors.Normalize(vmin=0, vmax=1))
        for aa in range(0,2):
            for jj in range(0,3):
                plot_ant_pt('antenna%i_%i'%(aa,jj),'', df, tr_num, ff, x, y, ang, buffer)
        for aa in range(0,2):
                plot_ant_pt('eye%i'%(aa),'', df, tr_num, ff, x, y, ang, buffer)
    plt.gca().invert_yaxis()
    plt.axis('off')
    plt.gcf().text(0.02, 0.82, 'Raw LEAP output on background-subtracted, cropped view', color='k', FontSize = 12)
    plt.gcf().text(0.02, 0.79, 'frame: %i'%fr_OI, color='k', FontSize = 12)
    
    # COLORBAR
    cax = plt.axes([0.02,0.08,0.35, 0.03])
    plt.colorbar(cax=cax, label = 'confidence', orientation = 'horizontal')
    plt.clim(0,1)
    plt.set_cmap(cm.bwr)
    
    # RAW IMAGE WITH FILTERED DATA
    ax3=fig.add_axes([0.40,0.1, 0.35, 0.7]) #plt.axes()
    black_frame = np.ones((y_dim+ 2*buffer, x_dim+ 2*buffer,3),dtype=np.uint8)* 1# 1.001# np.max(temp) # gray background  1.0001#
    bframe = black_frame.copy()
    bframe[buffer:-buffer, buffer:-buffer,:] = frame
    plt.imshow(bframe)
    for aa in range(0,2):
        for jj in range(0,3):
            plot_ant_pt('antenna%i_%i'%(aa,jj),'_filt_fullfr', df, tr_num, ff, x, y, ang, buffer)
    for aa in range(0,2):
            plot_ant_pt('eye%i'%(aa),'_filt_fullfr', df, tr_num, ff, x, y, ang, buffer)

        
    plot_ant_pt('thorax', '_filt_fullfr', df, tr_num, ff, x, y, ang, buffer)
    plot_ant_pt('neck', '_filt_fullfr', df, tr_num, ff, x, y, ang, buffer)
    plot_ant_pt('head', '_filt_fullfr', df, tr_num, ff, x, y, ang, buffer)
    plt.xlim(xrange_actual)
    plt.ylim(yrange_actual)
    plt.gca().invert_yaxis()
    plt.axis('off')
    
    plt.gcf().text(0.4, 0.82, 'Post-processed body and limb tracking', color='k', FontSize = 12)
    plt.gcf().text(0.4, 0.79, 'low confidence removed, lowpass filtered', color='k', FontSize = 12)
    
    # FULL FRAME
    ax1=fig.add_axes([0.78,0.77, 0.2, 0.23])
    ax1.set_position([0.78,0.77, 0.2, 0.23])
    plt.imshow(frame)
    plt.plot(x, y, '.w')
    plt.axis('off')
    
    # VELOCITY TRACES OF LEGS
    ax4=fig.add_axes([0.78,0.14, 0.2, 0.60])#plt.axes()
    for aa in range(0,2):
        plt.plot(range(1,ff),np.abs(df['antenna%i_0_y_filt_WRThead'%aa][tr_num][1:ff]/pix2mm), '-', color = ['c','m'][aa], alpha = 0.3)

    plt.xlim((-10,len(df['frames'][tr_num])))
    plt.ylim((0,2))
    
    titleparts = videofile.split('/')
    plt.suptitle( '%s -- %s -- %s'
                  %(titleparts[-2], titleparts[-1].split('_')[0], titleparts[-1].split('_')[1]),x=0.02, y=.95, horizontalalignment = 'left')
    plt.pause(0.1)
    
    vlocation = '/media/gravishlab/SeagateExpansionDrive/Light_Dark'
    save_image(vlocation, im_n+10*2, 'LEAPtracking_filter')
save_video(vlocation, 'LEAPtracking_filter')
    
# plt.close()

# For non-leap velocities: Get rid of points where ant is close to a horizontal edge of the substrate 

In [None]:
# load in information from pickles saved from Find_Array_Edges notebook
vid_locations = '/media/gravishlab/SeagateExpansionDrive/Light_Dark/'
sub_edge_files = glob.glob(os.path.join(vid_locations, '**/*[!s]/*_Edges.pkl'))
sub_edge_files = sorted(sub_edge_files)
print('Total Number of Videos: ',len(sub_edge_files))

sub_edge_data = []
for file in sub_edge_files:
    colony = file.split('/')[-3]
    substrate = file.split('/')[-2]
#     print('%s -- %s'%(colony, substrate))
    temp = {}
    with open(file, 'rb') as f:
        temp['vlines'], temp['hlines'], temp['top_step_loc'], temp['img_raw'] = pickle.load(f)
    f.close()
    temp['colony']=colony
    temp['substrate']=substrate
    sub_edge_data.append(temp)
sub_edge_df = pd.DataFrame(sub_edge_data).copy()

del temp, sub_edge_data, sub_edge_files, colony, substrate

print('loaded in edge data for all substrates and colonies')

In [None]:
# confirm that sub edge info is correct by plotting it on the substrates
plt.close('all')
coltypes = sorted(list(set(df['colony'].values)))
subtypes = sorted(list(set(df['substrate'].values)))

for cc,col in enumerate(coltypes):
    for ss,sub in enumerate(subtypes):
        
        idx = sub_edge_df[(sub_edge_df['colony']==col) & (sub_edge_df['substrate']==sub)].index
        p = sub_edge_df.iloc[idx]['hlines'].values[0]
        step_side = sub_edge_df.iloc[idx]['top_step_loc'].values[0]
        img_raw = sub_edge_df.iloc[idx]['img_raw'].values[0]
        
        
        p_idx = cc*4 + ss 
        if p_idx%12 == 0:
            plt.figure(figsize = (12,6))
            
        plt.subplot(3,4,p_idx%12+1)
        plt.imshow(img_raw, cmap='gray')
        plt.axis('off')
        plt.plot(np.arange(0,1000), p[0] + p[1]*np.arange(0,1000), ':b')
        if ss==0:
            plt.ylabel(col[-4:])
            plt.text(10,-10,col[-4:])
        if ss%2 ==1:
            plt.text(100, 150, step_side, size = 8)
        
    


In [None]:
# apply to whole dataset

def remove_close_to_edge_df(df, buffer_mm, pix2mm, sub_edge_df, wid):
    buffer = buffer_mm*pix2mm
    col = df['colony']
    sub = df['substrate']
    idx = sub_edge_df[(sub_edge_df['colony']==col) & (sub_edge_df['substrate']==sub)].index
    p = sub_edge_df.iloc[idx]['hlines'].values[0]

    x = np.array(df['x_raw'])
    y = np.array(df['y_raw'])
    v = np.append(np.array(df['vfilt']), np.nan)
    
    # get rid of crazy accelerations from bad tracking
    tmp = np.diff(v/pix2mm); tmp[-1] = tmp[-2]; tmp = tmp>15
    idcs = np.logical_or(np.insert(tmp, 0, False), np.append(tmp, False))
    x[idcs] = np.nan; y[idcs] = np.nan; v[idcs]=np.nan
    
    near_bottom = y > (p[0] + p[1]*x - buffer)
    near_top = np.logical_or(y < (p[0] + p[1]*x - 16*pix2mm + buffer), y < buffer)
    near_left = x < 3*pix2mm
    near_right = x > wid-3*pix2mm
    remove = np.logical_or(np.logical_or(near_bottom, near_top),  np.logical_or(near_left, near_right))
    x[remove] = np.nan; y[remove] = np.nan; v[remove] = np.nan
    return x,y,v


def remove_slow_df(df, v_cutoff, pix2mm):

    x = np.array(df['x_final'])
    y = np.array(df['y_final'])
    v = np.array(df['v_final'])
    
    lowpass = np.append(lowpass_filt_sections(df['vfilt'], 0.06),np.nan)
    v_remove = lowpass/pix2mm<v_cutoff
    x[v_remove] = np.nan; y[v_remove] = np.nan; v[v_remove] = np.nan
    v_lowpass = lowpass.copy()
    v_lowpass[v_remove] = np.nan
    return x,y,v, lowpass, v_lowpass


df['x_final'], df['y_final'], df['v_final'] = zip(*df.apply(
        remove_close_to_edge_df, args = (2, pix2mm, sub_edge_df, wid), axis=1))
print('done removing when ant close to edge of substrate or frame')

df['x_final_noslow'], df['y_final_noslow'], df['v_final_noslow'], df['v_final_lowpass'], df['v_final_lowpass_noslow'] = zip(*df.apply(
        remove_slow_df, args = (3, pix2mm), axis=1))
print('done removing when ant is stopped')

In [None]:
# CHECK THAT GETTING GOOD FINAL VALUES FOR SPECIFIC TRIAL
tr = 1078

plt.close('all')
plt.figure()
plt.subplot(2,1,1)
plt.plot(df['x_raw'][tr],df['y_raw'][tr],'-k')
plt.plot(df['x_final'][tr],df['y_final'][tr],'.b')
plt.xlim([0,1000])
plt.ylim([0,550])

plt.subplot(2,1,2)
plt.plot(df['frames'][tr][:-1],df['vfilt'][tr]/pix2mm,'-k')
plt.plot(df['frames'][tr][:-1],df['vfilt'][tr]/pix2mm,'.k')
plt.plot(df['frames'][tr],df['v_final'][tr]/pix2mm,'-b')
plt.plot(df['frames'][tr],df['v_final_movavg_noslow'][tr]/pix2mm,'-r')
plt.ylim([0,100])


#TRIALS WITH CRAZY ACCELERATIONS: [673, 872, 1078, 1346, 1364, 3669, 4248, 4541, 4764]

In [None]:
# FOR SPECIFIC TRIAL - remove when ant close to substrate edge or slow
# df['x_kal'] <-- straight from kalman filter, no post-processing
# df['x_raw'] <-- lowpass filtered
# df['x'] <-- removed when ant slow or near wall

buffer = 1*pix2mm
# tr = 3552
for tr in np.arange(233,234):


    col = df['colony'][tr]
    sub = df['substrate'][tr]
    print('%s, %s'%(col, sub))
    idx = sub_edge_df[(sub_edge_df['colony']==col) & (sub_edge_df['substrate']==sub)].index
    img_raw = sub_edge_df.iloc[idx]['img_raw'].values[0]
    p = sub_edge_df.iloc[idx]['hlines'].values[0]

    x = np.array(df['x_raw'][tr])
    y = np.array(df['y_raw'][tr])
    near_bottom = y > (p[0] + p[1]*x - buffer)
    near_top = np.logical_or(y < (p[0] + p[1]*x - 16*pix2mm + buffer), y < buffer)
    near_left = x < 3*pix2mm
    near_right = x > wid-3*pix2mm
    remove = np.logical_or(np.logical_or(near_bottom, near_top),  np.logical_or(near_left, near_right))





    # remove when ant is slow
    win_wid = 12
    nfr_cutoff = 20
    v_cutoff = 3 # in mm/s
    movavg = take_moving_ave(df['vfilt'][tr], win_wid, nfr_cutoff)
    movavg = np.append(movavg, np.nan)
#     v_remove = movavg/pix2mm<v_cutoff
    lowpass = np.append(lowpass_filt_sections(df['vfilt'][tr], 0.06),np.nan)
    v_remove = lowpass/pix2mm<v_cutoff



    plt.close('all')
    plt.figure(figsize=(6,7))
    ax1=plt.subplot(2,1,1)
    plt.imshow(img_raw, cmap = 'gray')
    xd = np.arange(0,wid)
    sub_edge_df.iloc[idx]['img_raw'].values
    yd = p[0]+p[1]*xd
    plt.plot(xd,yd,'-g')
    plt.plot(xd,yd-buffer,':g')
    plt.plot(xd,yd-16*pix2mm+buffer,':g')
    plt.plot(x,y,'-k')
    plt.plot(x[remove],y[remove],'.b')


    plt.subplot(2,1,2)
    plt.plot(df['frames'][tr][:-1], df['vfilt'][tr]/pix2mm, '-k')
    plt.plot(df['frames'][tr], movavg/pix2mm, '-r')
    plt.plot(df['frames'][tr], lowpass/pix2mm, '-g')
    plt.axhline(y=v_cutoff, xmin=0, xmax=1, color ='r', alpha = 0.3)
    plt.axhline(y=5, xmin=0, xmax=1, color ='g', alpha = 0.3)
    plt.sca(ax1)
    plt.plot(x[v_remove],y[v_remove],'.r')

    plt.pause(3)


## Compare non-LEAP(contour) and LEAP (thorax) velocities

In [None]:
tr = 0
plt.close('all')
plt.figure()
plt.plot(df['frames'][tr], df['v_final'][tr]/pix2mm, ':k')
plt.plot(df['frames'][tr], df['thorax_v'][tr], '-g')
plt.plot(df['frames'][tr], df['thorax_v_filt'][tr], '-r')

## Improve Angle approximation from tracking

In [None]:
# FILP ANGLE FUNCTIONS

def calc_mov_avg(x, N, cutoff_nan):
    padded_x = np.insert(np.insert( np.insert(x, len(x), np.empty(int(N/2))*np.nan), 0, np.empty(int(N/2))*np.nan ),0,0)
    n_nan = np.cumsum(np.isnan(padded_x)) 
    cumsum = np.nancumsum(padded_x) 
    window_sum = cumsum[N+1:] - cumsum[:-(N+1)] - x
    window_n_nan = n_nan[N+1:] - n_nan[:-(N+1)] - np.isnan(x)
    window_n_values = (N - window_n_nan).astype(float)
    window_n_values[window_n_values<cutoff_nan] = np.nan # if fewer than cutoff values in window, ignore
    movavg = (window_sum) / (window_n_values)
    return movavg

def remove_90_turns(arr, all_frs):
        idcs = np.where(np.isfinite(arr))[0]
        d_arr = np.diff(arr[idcs])
        d_n_fr = np.diff(idcs)
        d_sign = np.sign(d_arr)
        d_med = np.logical_and(np.logical_and( np.abs(d_arr) > 0.5, np.abs(d_arr) < 1.5), d_n_fr<10)
        d_med_cumsum = np.cumsum(np.insert(d_med,0,0))
        d_med_opp = np.cumsum(np.insert(d_sign*d_med,0,0))#(d_med_cumsum%2).astype(bool)
        if np.sum(d_med_opp==0)<np.sum(d_med_opp != 0):
            d_med_opp = d_med_opp - np.median(d_med_opp)
        arr[idcs]=np.clip(arr[idcs]-d_med_opp, -1, 1)  
        return arr

def find_flipped_angles_all(obj_OI, plots_on = False):
    
    all_angs = np.array(obj_OI['angle_improved'])
    all_angs[np.isnan(all_angs)]= np.array(obj_OI['angle'])[np.isnan(all_angs)]-90
#     all_angs = obj_OI['angle']-90
    
    
    all_frs = obj_OI['frames']
    all_sin = np.sin(np.deg2rad(all_angs))
    all_cos = np.cos(np.deg2rad(all_angs))
    all_sin_sign = np.sign(np.nanmedian(all_sin))
    all_cos_sign = np.sign(np.nanmedian(all_cos))
    
    
    if np.sum(np.isfinite(all_angs))==0:
        return np.ones(np.array(all_angs).shape)*np.nan
    
    if plots_on:
        plt.figure()
        plt.subplot(3,1,1)
        plt.plot(all_frs, all_sin, '-k', alpha = 0.3)
        plt.plot(all_frs, all_sin, '.g', alpha = 0.3)
        plt.plot(all_frs, all_cos, '-k', alpha = 0.3)
        plt.plot(all_frs, all_cos, '.b', alpha = 0.3)

    # FLIP EVERY OTHER SECTION DEFINED WHERE COS OR SIN CHANGES BY MORE THAN 1
    idcs = np.where(np.isfinite(all_sin))[0]
    d_sin = np.abs(np.diff(all_sin[idcs]))
    d_cos = np.abs(np.diff(all_cos[idcs]))
    d_big = np.logical_or(d_sin> 1.5, d_cos > 1.5)
    d_big_cumsum = np.cumsum(np.insert(d_big,0,0))
    d_big_opp = (d_big_cumsum%2).astype(bool)
    
    if plots_on:
        plt.plot(all_frs[idcs[d_big_opp]], all_sin[idcs[d_big_opp]], '.r', MarkerSize = 2)
        plt.plot(all_frs[idcs[d_big_opp]], all_cos[idcs[d_big_opp]], '.r', MarkerSize = 2)
        plt.ylim([-1.1,1.1])
    
    all_sin[idcs[d_big_opp]] = -1* all_sin[idcs[d_big_opp]]
    all_cos[idcs[d_big_opp]] = -1* all_cos[idcs[d_big_opp]]
    
#     # if is now wrong sign, flip whole array
#     if (np.sign(np.nanmedian(all_sin)) != all_sin_sign) & (np.sign(np.nanmedian(all_cos)) != all_cos_sign):
# #         print('switching', all_sin_sign, np.sign(np.nanmedian(all_sin)))
#         all_sin = -1* all_sin
#         all_cos = -1* all_cos
        
    all_sin = remove_90_turns(all_sin, all_frs)
    all_cos = remove_90_turns(all_cos, all_frs)
    
    
    # interpolate nan gaps
    nan_gap_fill = 3
    where_interp =find_nan_gaps(np.array(all_angs), nan_gap_fill)
    if np.sum(where_interp)>0:
        all_sin = interp_vals(all_sin, find_interp_idcs(where_interp))
        all_cos = interp_vals(all_cos, find_interp_idcs(where_interp))
    
        
    all_sin_good = all_sin.copy()
    all_cos_good = all_cos.copy()
#     all_sin_smooth = calc_mov_avg(all_sin_good, 6, 2)
#     all_cos_smooth = calc_mov_avg(all_cos_good, 6, 2)
    fr_cutoff = 0.05
    all_sin_smooth = lowpass_filt_sections(all_sin_good, fr_cutoff)
    all_cos_smooth = lowpass_filt_sections(all_cos_good, fr_cutoff)
    all_angs_good = np.rad2deg(np.arctan2(all_sin_smooth, all_cos_smooth))

    if plots_on:
        plt.subplot(3,1,2)
        plt.plot(all_frs, all_sin_good, '-k', alpha = 0.3)
        plt.plot(all_frs, all_sin_good, '.k', alpha = 0.3)
        plt.plot(all_frs, all_cos_good, '-k', alpha = 0.3)
        plt.plot(all_frs, all_cos_good, '.k', alpha = 0.3)
        plt.plot(all_frs, all_sin_smooth, '.g', alpha = 0.3)
        plt.plot(all_frs, all_cos_smooth, '.b', alpha = 0.3)
        plt.ylim([-1.1,1.1])
        plt.subplot(3,1,3)
        plt.plot(all_frs, all_angs, 'k', alpha = 0.3)
        plt.plot(all_frs, all_angs_good, 'r')
        plt.ylim([-180, 180])
    return all_angs_good


# average x,y,angle if isolated frame without head found (angle_improve = nan)
def find_nan_gaps(arr, limit):  
    from itertools import groupby
    yy = np.isnan(arr)
    xx = range(len(yy))
    where_gapOI = np.full(arr.shape, False)
    where_othergaps = np.full(arr.shape, False)
    for k,g in groupby(iter(xx), lambda x: yy[x]):
        if k == True: # if is a group of nan
            g = list(g)
            if any(x in g for x in [0, len(arr)-1]): # if first or last group
                where_othergaps[np.array(g)]=True
                continue       
            if len(g)<= limit: # length is below limit
                where_gapOI[np.array(g)]=True
    return where_gapOI
def find_interp_idcs(where_interpolate):
    interp_idcs = []
    for val in [-1,0,1]:
        interp_idcs = np.concatenate([interp_idcs,np.where(where_interpolate)[0]+val])
    interp_idcs = np.sort(np.array(list(set(interp_idcs)))) # get of repeat elements
    interp_idcs = interp_idcs[np.logical_and(interp_idcs>-1, interp_idcs < len(where_interpolate))].astype(np.uint32) # only elements in range
    return interp_idcs
def interp_vals(arr, interp_idcs): # array includes nan values
    temp = arr[interp_idcs]
    interpolated_vals = np.interp(
        interp_idcs, 
        interp_idcs[np.logical_not(np.isnan(temp))], temp[np.logical_not(np.isnan(temp))] )
    interp = arr.copy()
    interp[interp_idcs] = interpolated_vals
    return interp


In [None]:
# apply to whole dataframe

def fix_angles_df(obj_OI, plot_things, nan_gap_fill):
    tmp = find_flipped_angles_all(obj_OI, plots_on = plot_things)
    where_interp =find_nan_gaps(tmp, nan_gap_fill)
    if np.sum(where_interp)>0:
        tmp = interp_vals(tmp, find_interp_idcs(where_interp))
    return tmp

df['angle_smooth'] =  df.apply(fix_angles_df, args =(False, 4), axis=1)
print('done fixing and smoothing orientation angles in dataframe')

# # one trial
# _=fix_angles_df(df.loc[288], True, 4)

## calculate forward and lateral velocity

In [None]:
def find_orientation_df(x): #'rotation', 'Len', 'Dur', 'travel_dir'
    orie = np.arctan2(x['neck_y_filt_fullfr']-x['thorax_y_filt_fullfr'], x['neck_x_filt_fullfr']-x['thorax_x_filt_fullfr'])
    return orie

def calc_forward_vel_df(df, v_multiplier, var_string):
    frames = df['frames']
#     t_x = df['x_%s'%var_string] # for using contour tracking
#     t_y = df['y_%s'%var_string]
    t_x = df['thorax_x_filt_fullfr'] # using LEAP thorax tracking
    t_y = df['thorax_y_filt_fullfr']
    d_t_x, d_t_y = np.diff(t_x), np.diff(t_y)
    travel_dist = np.linalg.norm([d_t_x, d_t_y], axis =0)

    or_start = (df['orientation'][:-1]+2*np.pi)%(2*np.pi)
    or_stop = (df['orientation'][1:]+2*np.pi)%(2*np.pi)
    or_ave = (np.arctan2(np.nanmean(np.array([np.sin(or_start), np.sin(or_stop)]), axis=0), 
                            np.nanmean(np.array([np.cos(or_start), np.cos(or_stop)]),axis = 0))+2*np.pi)%(2*np.pi)%(2*np.pi)
    tr_dir = (np.arctan2(d_t_y, d_t_x)+(2*np.pi))%(2*np.pi)
    
    v_forward = np.linalg.norm([d_t_x, d_t_y], axis =0)*np.cos(tr_dir - or_ave)*v_multiplier
    v_lateral = np.sin(tr_dir - or_ave)*travel_dist*v_multiplier
    return v_forward, v_lateral



# calculate orientation based on LEAP tracking
df['orientation']= df.apply(find_orientation_df, args = (), axis=1)
print('Done calculating time varying orientation for each trial')

# calculate forward and lateral from final x and y
df['v_forward'], df['v_lateral'] = zip(*df.apply(
        calc_forward_vel_df, args = (v_multiplier,'final'), axis=1))
print('calculated first round of forward and lateral velocities')

# some angles might be totally switched (pointing to back of ant) -- find and fix
df['test']=df.apply(lambda x: x.angle_smooth + 180 if np.nanmedian(x.v_forward) <= -1 else x.angle_smooth, axis =1)
df.drop("angle_smooth", axis=1, inplace=True)
df.rename(columns = {'test':'angle_smooth'}, inplace=True)
print('fixed any trials where ant is mostly moving backwards')

# recalculate forward and lateral velocities
df['v_forward'], df['v_lateral'] = zip(*df.apply(
        calc_forward_vel_df, args = (v_multiplier,'final'), axis=1))
print('recalculated forward and lateral velocities')

# calculate forward and lateral without slow points
df['v_forward_noslow'], df['v_lateral_noslow'] = zip(*df.apply(
        calc_forward_vel_df, args = (v_multiplier,'final_noslow'), axis=1))
print('recalculated forward and lateral velocities without slow points')

# lowpass filter forward and lateral velocity
df['v_forward_lowpass'] = df['v_forward'].apply(lowpass_filt_sections, args = (0.06,) )
df['v_lateral_lowpass'] = df['v_lateral'].apply(lowpass_filt_sections, args = (0.06,) )
print('lowpass filtered forward and lateral velocities')

# lowpass filter forward and lateral velocity without slow points
df['v_forward_lowpass_noslow'] = df['v_forward_noslow'].apply(lowpass_filt_sections, args = (0.06,) )
df['v_lateral_lowpass_noslow'] = df['v_lateral_noslow'].apply(lowpass_filt_sections, args = (0.06,) )
print('lowpass filtered forward and lateral velocities without slow points')


df['a_forward']= df['v_forward'].apply(lambda x: np.append(np.diff(x),np.nan)*fps)
df['a_lateral']= df['v_lateral'].apply(lambda x: np.append(np.diff(x),np.nan)*fps)
print('calculated accelerations')

print('\n\nDone!')

In [None]:
tr=24
plt.close('all')
plt.figure()
plt.plot(np.deg2rad(df['angle_smooth'][tr]), '-k')
plt.plot(np.deg2rad(df['angle_improved'][tr]), ':k')


In [None]:
tr = 287
var_string = 'final'
frames = df['frames'][tr]
t_x = df['x_%s'%var_string][tr]
t_y = df['y_%s'%var_string][tr]
d_t_x, d_t_y = np.diff(t_x), np.diff(t_y)
travel_dist = np.linalg.norm([d_t_x, d_t_y], axis =0)

or_start = (np.deg2rad(df['angle_smooth'][tr])[:-1]+2*np.pi)%(2*np.pi)%(2*np.pi)
or_stop = (np.deg2rad(df['angle_smooth'][tr])[1:]+2*np.pi)%(2*np.pi)%(2*np.pi)
or_ave = (np.arctan2(np.nanmean(np.array([np.sin(or_start), np.sin(or_stop)]), axis=0), 
                        np.nanmean(np.array([np.cos(or_start), np.cos(or_stop)]),axis = 0))+2*np.pi)%(2*np.pi)
tr_dir = (np.arctan2(d_t_y, d_t_x)+(2*np.pi))%(2*np.pi)

plt.figure()
plt.plot(or_start, 'k')
plt.plot(tr_dir)

v_forward = np.linalg.norm([d_t_x, d_t_y], axis =0)*np.cos(tr_dir - or_ave)*v_multiplier
v_lateral = np.sin(tr_dir - or_ave)*travel_dist*v_multiplier

# Find Step direction and travel direction

In [None]:
# find step direction
step_df = df.loc[df['substrate'].isin(['Step','C_Step'])]

def find_step_direction(df, sub_edge_df):
    step_dir = np.nan; c_idx = np.nan; c_x = np.nan; c_y = np.nan; t_dir = np.nan
    
    sub= df['substrate']
    col = df['colony']
    idx = sub_edge_df[(sub_edge_df['colony']==col) & (sub_edge_df['substrate']==sub)].index
    img_raw = sub_edge_df.iloc[idx]['img_raw'].values[0]
    step_side = sub_edge_df.iloc[idx]['top_step_loc'].values[0]
    step_side_val = np.array([1,-1])[np.isin(['right','left'],step_side)]
    p = sub_edge_df.iloc[idx]['vlines'].values[0][1]

    # get pathway info
    x = df['thorax_x_filt_fullfr']; y = df['thorax_y_filt_fullfr']; v = df['thorax_v_filt'];
#     x = df['x_final']; y = df['y_final']; v = df['v_final'];

    # does get within __ mm of center?
    center_cutoff = 0.5
    dist_from_center_cutoff = 3
    cross_center = np.any(np.abs(x-(y-p[0])/p[1])<center_cutoff*pix2mm)
    travel_far = np.any(np.abs(x-(y-p[0])/p[1])>dist_from_center_cutoff*pix2mm)


    if cross_center and travel_far:
        # overall direction moving
        tmp = np.diff(x)
        bin_counts,_ = np.histogram(tmp[np.isfinite(tmp)],bins = [-5,-1,1,5])
        if (np.abs( (bin_counts[2]+1)/(bin_counts[0]+1))<10) and (np.abs((bin_counts[2]+1)/(bin_counts[0]+1))>0.1): # if not a clear travel direction don't include in analysis
            return t_dir, step_dir, c_idx, c_x, c_y
        bin_counts[1]=0
        t_dir = np.argmax(bin_counts)-1 #np.sign(np.nanmedian(np.diff(x))) # -1 = to left, 1 = to right
        step_dir = t_dir*step_side_val # -1 = down, 1 = up
        step_dir = step_dir[0]
        
        # when cross center
        dist_from_step = x-(y-p[0])/p[1]
        c_idcs = np.argsort(np.abs(dist_from_step))[0:2]
        c_idx = c_idcs[0]
        c_x = x[c_idcs][0]-dist_from_step[c_idcs][0]
        c_y = p[0]+c_x*p[1]
    return t_dir, step_dir, c_idx, c_x, c_y
    
    
step_df['travel_dir'], step_df['step_dir'], step_df['cross_step_idx'], step_df['cross_step_x'], step_df['cross_step_y'] = zip(*step_df.apply(
    find_step_direction, args = (sub_edge_df,), axis=1))

print('done saving step direction and crossing info')

In [None]:
# for one trial --> find if step trial crosses center

step_df = df.loc[df['substrate'].isin(['Step','C_Step'])]

tr = 2962

sub= step_df['substrate'].loc[tr]
col = step_df['colony'].loc[tr]
print('%s -- %s'%(col, sub))

# get substrate edge info
idx = sub_edge_df[(sub_edge_df['colony']==col) & (sub_edge_df['substrate']==sub)].index
img_raw = sub_edge_df.loc[idx]['img_raw'].values[0]
step_side = sub_edge_df.loc[idx]['top_step_loc'].values[0]
step_side_val = np.array([1,-1])[np.isin(['right','left'],step_side)]
p = sub_edge_df.loc[idx]['vlines'].values[0][1]

# get pathway info
# x = step_df['x_final'].loc[tr]; y = step_df['y_final'].loc[tr]; v = step_df['v_final'].loc[tr]; 
x = df['thorax_x_filt_fullfr'].loc[tr]; y = df['thorax_y_filt_fullfr'].loc[tr]; v = df['thorax_v_filt'].loc[tr];
v_movavg = step_df['v_final_lowpass'].loc[tr]; frames = step_df['frames'].loc[tr];

# does get within __ mm of center?
center_cutoff = 0.5
dist_from_center_cutoff = 3
cross_center = np.any(np.abs(x-(y-p[0])/p[1])<center_cutoff*pix2mm)
travel_far = np.any(np.abs(x-(y-p[0])/p[1])>dist_from_center_cutoff*pix2mm)
print('crosses center? %i -- travels at least %i mm? %i'%(cross_center, dist_from_center_cutoff, travel_far))



if cross_center and travel_far:
    
    # overall direction moving
    tmp = np.diff(x)
    bin_counts,_ = np.histogram(tmp[np.isfinite(tmp)],bins = [-5,-1,1,5])
    print(bin_counts)

    if (np.abs( (bin_counts[2]+1)/(bin_counts[0]+1))<10) and (np.abs((bin_counts[2]+1)/(bin_counts[0]+1))>0.1): # if not a clear travel direction don't include in analysis
        print('too close to call which direction traveling')
#         return t_dir, step_dir, c_idx, c_x, c_y
    bin_counts[1]=0
    t_dir = np.argmax(bin_counts)-1 #np.sign(np.nanmedian(np.diff(x))) # -1 = to left, 1 = to right
    step_dir = t_dir*step_side_val # -1 = down, 1 = up
    step_dir = step_dir[0]
    
    print('traveling: %i'%t_dir)
    print('stepping up(+) or down(-): %i'%step_dir)
    
    # when cross center
    dist_from_step = x-(y-p[0])/p[1]
    c_idcs = np.argsort(np.abs(dist_from_step))[0:2]
    c_idx = c_idcs[0]
    c_x = x[c_idcs][0]-dist_from_step[c_idcs][0]
    c_y = p[0]+c_x*p[1]
    
    

plt.close('all')
plt.figure(figsize=(6,7))
ax1=plt.subplot(2,1,1)
plt.imshow(img_raw, cmap = 'gray')


yd = np.arange(0,hei)
xd = (yd-p[0])/p[1]

plt.plot(xd,yd,'-g')
plt.plot(xd+center_cutoff*pix2mm,yd,':g')
plt.plot(xd-center_cutoff*pix2mm,yd,':g')
plt.plot(xd+dist_from_center_cutoff*pix2mm,yd,':g')
plt.plot(xd-dist_from_center_cutoff*pix2mm,yd,':g')

plt.plot(x,y,'-k')
plt.plot(x[np.isfinite(x)][0], y[np.isfinite(x)][0], '.k')
plt.xlim([0,1000])
plt.ylim([550,0])


plt.subplot(2,1,2)
# plt.plot(step_df['frames'].iloc[tr],v,'-k')
plt.plot(x/pix2mm,v,'-k')
plt.plot(x/pix2mm,v_movavg,'--r')
plt.axvline(x=x[c_idx]/pix2mm,color='g')
plt.xlim([0,32])



# PLOT THINGS

## Plot distance traveled at each mov ave speed for light dark on flat and array

In [None]:
def get_dist_traveled_df(x, var_OI, bins):
    glenna=x[var_OI]#/pix2mm
    glenna= glenna[np.isfinite(glenna)]
    ordering = np.argsort(np.digitize(glenna,bins))
    ordered = glenna[ordering]
    split = np.split(ordered,np.where(np.diff(np.digitize(glenna,bins)[ordering])>0)[0]+1)
    split_sum = np.array([np.sum(x) for x in split])
    bin_idcs = np.unique(np.digitize(glenna,bins))
    bin_sums = np.zeros(bins.shape)
#     print(bin_idcs, [x[0] for x in split])
    bin_sums[bin_idcs.astype(int)] = split_sum/fps
    
    return np.abs(bin_sums)

longtracks = df[df['v_forward_lowpass_noslow'].map(lambda x: np.sum(np.isfinite(x))>50).values]

var_OI = 'v_forward_lowpass_noslow'#'movave_v'
sp_max = 65
precision = 10 # how many bins per 1 unit
bins = np.linspace(-20,sp_max, sp_max*precision+1)
longtracks['hist_dist'] =df.apply( get_dist_traveled_df, args = (var_OI, bins), axis=1)
subtypes = sorted(list(set(df['substrate'])))

plt.close('all')
pltcolors = ['#B1740F', '#BA4246', '#087E8B', '#701C6F']
percent_cutoff = 0.02
plt.figure(figsize = (12,6))

all_hists = np.ones([4,len(bins)])*np.nan


for ss,subtype in enumerate(['Flat', 'Array']):
    ax = plt.subplot(2,1,ss+1)
    print(' -- ', subtype)
#     for ltype in [1]:
#     sub_df = longtracks.loc[(longtracks['substrate']==subtype) & (longtracks['light']==ltype)]
    sub_df = longtracks.loc[(longtracks['substrate']==subtype)]

    # plot dark trials
    vals_OI = sub_df.loc[(sub_df['light'].map(lambda x: x==0)).values]['hist_dist']
    total_dist_traveled = np.sum(vals_OI.sum())
    hist_OI = vals_OI.sum()/total_dist_traveled
    
    print(' -- -- total dist: %0.2f mm'%total_dist_traveled)
    kde_data = np.repeat(bins+1/(2*precision),np.round((hist_OI*10000)).astype(int))
    kde = stats.gaussian_kde(kde_data)
    kde_fit = kde.evaluate(bins)/precision
    all_hists[ss*2+0,:]=hist_OI*total_dist_traveled
#     if ss == 0:
#         ref_speed = bins[np.argmax(kde_fit)]+1/(2*precision)
#         plt.xlabel('speed (mm/s)')
#         plt.ylabel('fraction of dist traveled')
#     plt.axvline(x=ref_speed, ymin = 0, ymax = 1, color = 'k', linestyle = ':', alpha = 0.4)
#     top_percent = np.flip(bins,0)[np.cumsum(np.flip(hist_OI,0))>percent_cutoff][0]
#     plt.axvline(x=top_percent, ymin = 0, ymax = 0.6, color = 'k', linestyle = '-', alpha = 0.4)
    plt.plot(bins[hist_OI != 0]+1/(2*precision), kde_fit[hist_OI != 0], '-', color = 'k', alpha = 0.4)
    plt.text(40, 0.11/precision, '%i'%len(vals_OI), color = 'k', alpha = 0.4)
    plt.text(50, 0.11/precision, '%0.2f cm'%(total_dist_traveled/10), color = 'k', alpha = 0.4)
    plt.xlim([-10,sp_max])
    plt.ylim([0, 0.12/precision])

    # plot light trials
    vals_OI = sub_df.loc[(sub_df['light'].map(lambda x: x==1)).values]['hist_dist']
    total_dist_traveled = np.sum(vals_OI.sum())
    hist_OI = vals_OI.sum()/total_dist_traveled
    
    print(' -- -- total dist: %0.2f mm'%total_dist_traveled)
    kde_data = np.repeat(bins+1/(2*precision),np.round((hist_OI*10000)).astype(int))
    kde = stats.gaussian_kde(kde_data)
    kde_fit = kde.evaluate(bins)/precision
    all_hists[ss*2+1,:]=hist_OI*total_dist_traveled
    plt.plot(bins[hist_OI != 0]+1/(2*precision), kde_fit[hist_OI != 0], '--', color = pltcolors[ss], alpha = 0.5)
#     top_percent = np.flip(bins,0)[np.cumsum(np.flip(hist_OI,0))>percent_cutoff][0]
#     plt.axvline(x=top_percent, ymin = 0, ymax = 0.6, color = pltcolors[ss], linestyle = '--', alpha = 0.5)
    plt.text(40, 0.10/precision, '%i'%len(vals_OI), color = pltcolors[ss], alpha = 0.5)
    plt.text(50, 0.10/precision, '%0.2f'%(total_dist_traveled/10), color = pltcolors[ss], alpha = 0.5)
    
    plt.ylim([0, 0.12/precision])
    plt.ylabel('fraction of dist traveled')
    plt.xlabel('speed (mm/s)')
    plt.xlim([-10,sp_max])
    plt.text( 2, 0.11/precision, 'Substrate: %s'%subtype, color = 'k', size = 12)
    if ss == 0:
        plt.gca().get_xaxis().set_visible(False)


# plt.savefig('cinnamon_distance.svg', transparency = True)

In [None]:
# Calculate if speed distributions are different in light and dark

from scipy.stats import mannwhitneyu
resolution_factor = 1 # how many mm should equal one datapoint
dists = np.array(['F-D', 'F-L', 'A-D', 'A-L'])

for pp,pairs in enumerate([[0,1],[2,3],[0,2],[1,3]]):
    tmp = np.repeat(bins, np.round(all_hists[pairs[0],:]/resolution_factor).astype(int))
    tmp2 = np.repeat(bins, np.round(all_hists[pairs[1],:]/resolution_factor).astype(int))
    F,p = mannwhitneyu(tmp, tmp2, alternative = 'two-sided')
    print('Compare %s with %s: F= %0.1f, p= %0.7e'%(dists[pairs[0]], dists[pairs[1]], F,p))


#### USE CONTINGENCY TABLES
# from scipy.stats import chi2_contingency
# # test if light diff on flat
# non_zero_cols = np.max(all_hists[:2,:]>0,axis=0)
# c2,p,dof,_ = chi2_contingency(all_hists[:2,non_zero_cols])
# print(c2,p)

# # test if light diff on array
# non_zero_cols = np.max(all_hists[2:,:]>0,axis=0)
# c2,p,dof,_ = chi2_contingency(all_hists[2:,non_zero_cols])
# print(c2,p)


# non_zero_cols = np.max(all_hists[1::2,:]>0,axis=0)
# c2,p,dof,_ = chi2_contingency(all_hists[1::2,non_zero_cols])
# print(c2,p)

# non_zero_cols = np.max(all_hists[0::2,:]>0,axis=0)
# c2,p,dof,_ = chi2_contingency(all_hists[0::2,non_zero_cols])
# print(c2,p)

## CALCULATE FRACTAL DIMENSIONS

In [None]:
# apply to full dataframe
def get_fractal_sinuosity_df(df, n_frs_cutoff, n_mm_total_travel, pix2mm, n_pix_cutoff): # how many pts does section need to be analyzed, how many pix to be identified frame
    x = np.array(df['x_final']) # 'x' has jumps from removing frames close to edge and when ant is stopped for long time
    y = np.array(df['y_final'])
    frames = df['frames']
    idcs = np.logical_and(np.isfinite(x), np.isfinite(y))
    x=x[idcs]; y = y[idcs]; frames= frames[idcs]

    # split into contiguous sections
    sections = np.split(np.arange(0,len(x)), np.where(np.diff(frames)>1)[0]+1)

    # make a richardson plot for each section to calc fractal dimension and sinuosity
    D = np.ones(len(sections))*np.nan
    P = np.ones(len(sections))*np.nan
    for ss,section in enumerate( sections):

        # dont' calculate things if fewer than 20 frames of data in section
        if len(section)<n_frs_cutoff:
#             print('section is too short')
            continue
            
        # total dist
        total_dist = np.cumsum(np.insert(np.linalg.norm([np.diff(x[section]),np.diff(y[section])], axis = 0),0,0))
        
        # don't calculate if total distance traveled in less than __ mm
        if total_dist[-1]<n_mm_total_travel*pix2mm:
#             print('ant travel distance is too short')
            continue
        
        # for each step distance, find total length
        dimensions = np.append(np.arange(5, total_dist[-1]/2, np.ceil(total_dist[-1]/15)), total_dist[-1]-0.01)
        total_lengths = np.ones(dimensions.shape)*np.nan
        for dd, dimen in enumerate(dimensions):
            errors = np.repeat(total_dist[:,np.newaxis], total_dist[-1]/dimen+1,axis=1
                              ) - np.repeat(np.arange(0,total_dist[-1], dimen)[np.newaxis,:], len(total_dist), axis =0)
            min_errors = np.min(np.abs(errors), axis =0)
            if np.any(min_errors > n_pix_cutoff):
#                 print('step length does not have data within cutoff for all steps')
                continue
            step_locations = np.argmin(np.abs(errors), axis =0)
            total_lengths[dd] = np.sum(np.linalg.norm([np.diff(x[section][step_locations]),np.diff(y[section][step_locations])], axis = 0))

        
        # find slope of all points not bookends --> calc fractal dist
        if np.sum(np.isfinite(total_lengths)) > 5: # only calc if 4 points go into defining line
            slope, intercept, _,_,_ = stats.linregress(np.log(dimensions[:-1]),np.log(total_lengths[:-1]))
            D[ss] = 1-slope

        # calculate sinuosity
        P[ss] = total_dist[-1]/total_lengths[-1]

    return D, P

df['fractal_dim'], df['sinuosity'] = zip(*df.apply(get_fractal_sinuosity_df, args = (20, 3, pix2mm, 2), axis=1))
print('Done calculating fractal dimension and sinuosity')
    
    


In [None]:
# save as feather for lme models in R

lens = [len(item) for item in df['fractal_dim']]
all_sections = pd.DataFrame( {"substrate" : np.repeat(df['substrate'].values, lens), "trackway" : np.repeat(df.index.values, lens),
                        "colony" : np.repeat(df['colony'].values, lens), "light": np.repeat(df['light'].values, lens),
                        "fractal_dim" : np.concatenate(df['fractal_dim'].values), "sinuosity" : np.concatenate(df['sinuosity'].values)    })

colony_R = [col.split('20190')[-1][1:] for col in all_sections['colony'].values.tolist()]
# subs_string = all_sections['substrate'].values.tolist()
# substrate_R = np.array([int(s.split('mm')[0]) for s in subs_string])
df_med_R = pd.DataFrame( { "colony" : colony_R, "substrate" : all_sections['substrate'], "trackway": all_sections['trackway'],
                       "light" : all_sections['light'], "fractal_dim": all_sections['fractal_dim'], "sinuosity": all_sections['sinuosity']} )
print('%i trackways in dataframe for analysis in R'%len(df_med_R))

# SAVE AS FEATHER FOR USE WITH R
print('\nSaving fractal dimension and sinuosity data as feather for use in R')
feather.write_dataframe(df_med_R, vid_locations + 'Fractal_Sinuosity_Data.feather')
print('Done saving')

In [None]:
# plot fractal and sinuosity - subplots are substrates


lens = [len(item) for item in df['fractal_dim']]
all_sections = pd.DataFrame( {"substrate" : np.repeat(df['substrate'].values, lens), "trackway" : np.repeat(df.index.values, lens),
                        "colony" : np.repeat(df['colony'].values, lens), "light": np.repeat(df['light'].values, lens),
                        "fractal_dim" : np.concatenate(df['fractal_dim'].values), "sinuosity" : np.concatenate(df['sinuosity'].values)    
                            })


plt.close('all')
plt.figure()
# my_pal = {"Flat": '#464F56', "Array": '#BA4246', "Step": '#087E8B', "C_Step": '#701C6F'}
my_pal = {0: '#464F56', 2: '#BA4246', 3: '#087E8B', 1: '#701C6F'}
for kk, subtype in enumerate(np.array(subtypes)[[-2,0]]):
    print(subtype)
    plt.subplot(1,2,kk+1)
    ax = sns.violinplot(x = 'light', y = 'fractal_dim',  
                data = all_sections[(all_sections['substrate']==subtype)], 
                        cut = 0 , palette=my_pal)
    plt.xlabel(subtype)
    plt.ylim([1, 1.4])

plt.figure()    
for kk, subtype in enumerate(np.array(subtypes)[[-2,0]]):
    print(subtype)
    plt.subplot(1,2,kk+1)
    ax = sns.violinplot(x = 'light', y = 'sinuosity',  
                data = all_sections[(all_sections['substrate']==subtype)], 
                        cut = 0 , palette=my_pal)
    plt.xlabel(subtype)
    plt.ylim([0.8, 13])

    

In [None]:
# plot fractal and sinuosity - subplots are light/dark


lens = [len(item) for item in df['fractal_dim']]
all_sections = pd.DataFrame( {"substrate" : np.repeat(df['substrate'].values, lens), "trackway" : np.repeat(df.index.values, lens),
                        "colony" : np.repeat(df['colony'].values, lens), "light": np.repeat(df['light'].values, lens),
                        "fractal_dim" : np.concatenate(df['fractal_dim'].values), "sinuosity" : np.concatenate(df['sinuosity'].values)    
                            })


plt.close('all')
plt.figure()
my_pal = {"Flat": '#464F56', "Array": '#BA4246', "Step": '#087E8B', "C_Step": '#701C6F'}
for kk, ltype in enumerate([1,0]):
    print('Lighting: %i'%ltype)
    plt.subplot(1,2,kk+1)
#     ax = sns.violinplot(x = 'substrate', y = 'fractal_dim',  
#                 data = all_sections[ (all_sections['light']==ltype) & np.isin(all_sections['substrate'].values, np.array(subtypes)[[0,2]]) ], 
#                         cut = 0 , palette=my_pal)
    ax = sns.boxplot(x = 'substrate', y = 'fractal_dim',  
                data = all_sections[ (all_sections['light']==ltype) & np.isin(all_sections['substrate'].values, np.array(subtypes)[[0,2]]) ], palette=my_pal)
    plt.xlabel(ltype)
    plt.ylim([1, 1.3])

plt.figure()    
for kk, ltype in enumerate([1,0]):
    print('Lighting: %i'%ltype)
    plt.subplot(1,2,kk+1)
#     ax = sns.violinplot(x = 'substrate', y = 'sinuosity',  
#                 data = all_sections[ (all_sections['light']==ltype) & np.isin(all_sections['substrate'].values, np.array(subtypes)[[0,2]]) ], 
#                         cut = 0 , palette=my_pal)
    ax = sns.boxplot(x = 'substrate', y = 'sinuosity',  
                data = all_sections[ (all_sections['light']==ltype) & np.isin(all_sections['substrate'].values, np.array(subtypes)[[0,2]]) ], palette=my_pal)
    plt.xlabel(ltype)
    plt.ylim([0.99, 2])

In [None]:
# plot weird sinuosity values
plt.close('all')
for tr in [1367,1839,2488,2537,3094]:
    plt.figure()
    plt.plot(df.loc[tr]['x'], df.loc[tr]['y'], 'k-', alpha = 0.5)
    plt.plot(df.loc[tr]['x'], df.loc[tr]['y'], 'k.')
    plt.title(df.loc[tr]['sinuosity'])
#     plt.axes('equal')
    plt.xlim([0,1000])
    plt.ylim([0,550])

In [None]:
# test out on specific trials
for tr in np.arange(1431,1438):
    plt.close('all')

    x = np.array(df['x_final'][tr]) # 'x' has jumps from removing frames close to edge and when ant is stopped for long time
    y = np.array(df['y_final'][tr])
    frames = df['frames'][tr]
    idcs = np.logical_and(np.isfinite(x), np.isfinite(y))
    x=x[idcs]; y = y[idcs]; frames= frames[idcs]

    # plot trajectory
    ax1=plt.subplot(2,1,1)
    ax2 =plt.subplot(2,1,2)

    # split into contiguous sections
    sections = np.split(np.arange(0,len(x)), np.where(np.diff(frames)>1)[0]+1)

    # make a richardson plot for each section to calc fractal dimension and sinuosity
    cutoff = 2
    D = np.ones(len(sections))*np.nan
    P = np.ones(len(sections))*np.nan
    for ss,section in enumerate( sections):

        plt.sca(ax1)
        # plt.plot(df['x_kal'][tr],df['y_kal'][tr], '.b', alpha = 0.4)
        plt.plot(x[section],y[section], '-k', alpha = 0.3) # removes sections with ants stopped --> don't want
        plt.plot(x[section],y[section], '.k', alpha = 0.8, markersize= 3)
        plt.xlim([0,1000])
        plt.ylim([0,550])

        # dont' calculate things if fewer than 20 frames of data in section
        if len(section)<20:
            print('section is too short')
            continue
            
        # total dist
        total_dist = np.cumsum(np.insert(np.linalg.norm([np.diff(x[section]),np.diff(y[section])], axis = 0),0,0))  
        
        # don't calculate if total distance traveled in less than __ mm
        if total_dist[-1]<3*pix2mm:
            print('ant travel distance is too short')
            continue
        
        # for each step distance, find total length
        dimensions = np.append(np.arange(5, total_dist[-1]/2, np.ceil(total_dist[-1]/15)), total_dist[-1]-0.01)
#         dimensions = np.append(np.arange(5, total_dist[-1]/2, pix2mm), total_dist[-1]-0.01)
        total_lengths = np.ones(dimensions.shape)*np.nan
        for dd, dimen in enumerate(dimensions):
            errors = np.repeat(total_dist[:,np.newaxis], total_dist[-1]/dimen+1,axis=1
                              ) - np.repeat(np.arange(0,total_dist[-1], dimen)[np.newaxis,:], len(total_dist), axis =0)
            min_errors = np.min(np.abs(errors), axis =0)
            if np.any(min_errors > cutoff):
                print('step length does not have data within cutoff for all steps')
            step_locations = np.argmin(np.abs(errors), axis =0)
            total_lengths[dd] = np.sum(np.linalg.norm([np.diff(x[section][step_locations]),np.diff(y[section][step_locations])], axis = 0))

        plt.sca(ax2)
        plt.plot(np.log(dimensions[1:-1]), np.log(total_lengths[1:-1]), '.', color = ['r','b','g','c'][ss])
        plt.plot(np.log(dimensions[np.array([0,-1])]), np.log(total_lengths[np.array([0,-1])]), '*', color = ['r','b','g','c'][ss])
        
        # find slope of all points not bookends, only if at least 4 points --> calculate fractal dist 
        if np.sum(np.isfinite(total_lengths)) > 5:
            slope, intercept, _,_,_ = stats.linregress(np.log(dimensions[:-1]),np.log(total_lengths[:-1]))
            plt.plot(np.log(dimensions[np.array([0,-2])]), intercept + slope*np.log(dimensions[np.array([0,-2])]), '--k', alpha = 0.5)
            D[ss] = 1-slope
        else:
            print('not enough dimension points to calculate fractal dimension')
        
        # calc sinuosity
        P[ss] = total_dist[-1]/total_lengths[-1]
        print(total_lengths[-1])
        
        
        plt.text(np.log(total_dist[-1]/3*2), np.log(total_dist[-1]/3*2), 'D: %0.2f'%(D[ss]))
        plt.text(np.log(total_dist[-1]/3*2), np.log(total_dist[-1]/3*1), 'P: %0.2f'%(P[ss]))
        plt.ylim([0,8])
        plt.xlabel('log step length')
        plt.ylabel('log trajectory length (pix)')
            

    plt.pause(3)
    

In [None]:
# try to normalize distribution
from scipy import stats

plt.close('all')

for subtype in np.array(subtypes)[[0,2]]:
    for ltype in [1,0]:

        ys = all_sections[ (all_sections['light']==ltype) & (all_sections['substrate'].values==subtype) ]['fractal_dim'].values
        ys = ys[np.isfinite(ys)]
        ys_bc, _ = stats.boxcox(ys)

#         plt.boxplot(ys, positions = [0])
#         plt.boxplot(ys**(1/2), positions = [1])
#         print(stats.shapiro(ys))
#         print(stats.shapiro(ys**(1/3)))
#         print(stats.shapiro(np.log(ys)))
#         plt.boxplot(ys_bc, positions = [1.5])
#         plt.xlim(-0.5,2)


        plt.figure()
        ax1 = plt.subplot(2,1,1)
        stats.probplot(ys, dist = stats.norm, plot= ax1)
        plt.title('sub: %s, light %i'%(subtype, ltype))
        ax2 = plt.subplot(2,1,2)
        stats.probplot(ys_bc, dist = stats.norm, plot= ax2)
        plt.title('')
        

ys = all_sections['fractal_dim'].values
ys = ys[np.isfinite(ys)]
ys_bc, _ = stats.boxcox(ys)
plt.figure()
ax1 = plt.subplot(2,1,1)
stats.probplot(ys, dist = stats.norm, plot= ax1)
plt.title('sub: %s, light %i'%(subtype, ltype))
ax2 = plt.subplot(2,1,2)
stats.probplot(ys_bc, dist = stats.norm, plot= ax2)
plt.title('')

# Normalize velocity by distance to step to average over all trials, Plot all velocity vs. dist from obstacle

In [None]:
# for each trackways, find ave velocity for each distance bin away from crossing obstacle

def ave_speed_by_bins_df(df, var_OI, resolution):
    x_raw = ((df['x_final']-df['cross_step_x'])*df['travel_dir'])/pix2mm
    v_raw = np.append(df[var_OI], np.nan)
    keep = np.logical_and(np.isfinite(v_raw) ,np.isfinite(x_raw))
    vs = v_raw[keep];  xs = x_raw[keep]; 
    
    bins = np.append(np.arange(-11,11+resolution,resolution),18)
#     bins = np.append(np.insert(np.arange(-5,5+resolution,resolution),0,-12),[12,18])
    bin_idcs = np.digitize(xs,bins)
    vs_norm = np.ones(bins.shape)*np.nan

    bvals, idx, counts = np.unique(bin_idcs, return_inverse=True, return_counts = True)
    sum_vs = np.bincount(idx, weights = vs)
    mean_vs = sum_vs / counts
    vs_norm[bvals] = mean_vs
    return vs_norm[1:-1]


resolution = 1
step_df['v_forward_norm'] = step_df.apply(ave_speed_by_bins_df, args = ('v_forward', resolution,), axis=1)
step_df['v_lateral_norm'] = step_df.apply(ave_speed_by_bins_df, args = ('v_lateral', resolution,), axis=1)
step_df['v_forward_lowpass_norm'] = step_df.apply(ave_speed_by_bins_df, args = ('v_forward_lowpass', resolution,), axis=1)
step_df['v_lateral_lowpass_norm'] = step_df.apply(ave_speed_by_bins_df, args = ('v_lateral_lowpass', resolution,), axis=1)

print('done finding ave velocity for each x distance from obstacle bin')


In [None]:
# try on one trial only
tr = 2960
var_OI = 'v_forward'

x_raw = ((step_df['x_final'][tr]-step_df['cross_step_x'][tr])*step_df['travel_dir'][tr])/pix2mm
v_raw = np.append(step_df[var_OI][tr], np.nan)
keep = np.logical_and(np.isfinite(v_raw) ,np.isfinite(x_raw))
vs = v_raw[keep];  xs = x_raw[keep]; 

#     bins = np.arange(-15,15+resolution,resolution)
bins = np.append(np.insert(np.arange(-5,5+resolution,resolution),0,-12),[12,18])
bin_idcs = np.digitize(xs,bins)
vs_norm = np.ones(bins.shape)*np.nan

bvals, idx, counts = np.unique(bin_idcs, return_inverse=True, return_counts = True)
sum_vs = np.bincount(idx, weights = vs)
mean_vs = sum_vs / counts
vs_norm[bvals] = mean_vs

In [None]:
# plot speed vs. distance from obstacle

plt.close('all')
plt.figure()

stOI = 1 # all step ups or downs
vel_OI ='' # '_lowpass'
all_vs={}
ax={}
    
for ss, subOI in enumerate(['C_Step','Step']):
    for li, liOI in enumerate([1,0]):
        

        dataOI = step_df.loc[(step_df['substrate']==subOI) & (step_df['light']==liOI) & (step_df['step_dir']==stOI)]

        ax[2*ss+li]=plt.subplot(2,2,2*li+ss+1)
        plt.axvline(x=0,color='k',alpha = 0.4)
        for index, row in dataOI.iterrows():
                plt.plot(((row['x_final']-row['cross_step_x'])*row['travel_dir'])/pix2mm,
                         np.append(row['v_forward%s'%vel_OI], np.nan),'-k', alpha = 0.05)
                
        
        all_vs[2*ss+li] = np.vstack(dataOI['v_forward%s_norm'%vel_OI].values)
        all_vys = np.vstack(np.abs(dataOI['v_lateral%s_norm'%vel_OI].values))
        ave = np.nanmean(all_vs[2*ss+li],axis=0)
        bins = np.arange(-11,11+resolution,resolution)
#         bins = np.append(np.insert(np.arange(-5,5+resolution,resolution),0,-12),12)
        xs = np.diff(bins)/2+bins[:-1]
        plt.plot(xs, ave, '.', color = pltcolors[(2*ss+li)%2])
        plt.plot(xs, ave, color = pltcolors[(2*ss+li)%2], linestyle =['-',':'][(2*ss+li)//2])
        plt.plot(xs, np.nanmedian(all_vs[2*ss+li],axis=0), '+',color = pltcolors[(2*ss+li)%2])
        plt.axvline(x=bins[0], color= 'k', linestyle = ':')
        plt.axvline(x=bins[-1], color= 'k', linestyle = ':')
        
        plt.plot(xs, np.nanmean(all_vys,axis=0), '.',color = pltcolors[3])
        plt.plot(xs, np.nanmedian(all_vys,axis=0), '+',color = pltcolors[3])
        plt.plot(xs, np.nanmean(all_vys,axis=0), '-',color = pltcolors[3])

        plt.plot([xs,xs], [ave-np.nanstd(all_vs[2*ss+li],axis=0), 
                           ave+np.nanstd(all_vs[2*ss+li],axis=0)], '-', alpha = 0.5, color = pltcolors[(2*ss+li)%2])
        plt.plot([xs,xs], [np.nanmean(all_vys,axis=0)-np.nanstd(all_vys,axis=0), 
                               np.nanmean(all_vys,axis=0)+np.nanstd(all_vys,axis=0)], '-', alpha = 0.5,color = pltcolors[3])
        
        plt.xlim([-15,15])
        plt.ylim([0,60])
        plt.ylabel('speed (mm/s)')
        plt.xlabel('distance to step')
        plt.text(8,53,'n: %i'%all_vs[2*ss+li].shape[0])
        if li==0:
            plt.gca().get_xaxis().set_visible(False)
            plt.title('Sub: %s'%subOI,loc='left')
        if ss==1:
            plt.gca().get_yaxis().set_visible(False)

            
# calculate sig differences among each velocity bin
step_vals = np.ones(len(bins)-1)*np.nan
cstep_vals = np.ones(len(bins)-1)*np.nan
light_vals = np.ones(len(bins)-1)*np.nan
dark_vals = np.ones(len(bins)-1)*np.nan

for b_OI in np.arange(len(bins)-1):
    _,cstep_vals[b_OI] = stats.ttest_ind(all_vs[0][:,b_OI], all_vs[1][:,b_OI], nan_policy = 'omit')
    _,step_vals[b_OI] = stats.ttest_ind(all_vs[2][:,b_OI], all_vs[3][:,b_OI], nan_policy = 'omit')
    _,light_vals[b_OI] = stats.ttest_ind(all_vs[0][:,b_OI], all_vs[2][:,b_OI], nan_policy = 'omit')
    _,dark_vals[b_OI] = stats.ttest_ind(all_vs[1][:,b_OI], all_vs[3][:,b_OI], nan_policy = 'omit')
# print(cstep_vals)
# print(step_vals)
# print(light_vals)
# print(dark_vals)

    
#     plt.close('all')
plt.figure()
dodge = 0.05
for pp in np.arange(4):
    ave = np.nanmean(all_vs[pp],axis=0)
    plt.plot(xs-2*dodge+pp*dodge, ave, '.', color = pltcolors[pp%2])
    plt.plot(xs-2*dodge+pp*dodge, ave, color = pltcolors[pp%2], linestyle = ['-','--'][pp//2])
#     plt.plot([xs-2*dodge+pp*dodge,xs-2*dodge+pp*dodge], [ave-np.nanstd(all_vs[pp],axis=0), 
#                            ave+np.nanstd(all_vs[pp],axis=0)],  alpha = 0.3, color = pltcolors[pp%2], linestyle = ['-',':'][pp//2])

plt.plot(xs[light_vals<0.05], 30*np.ones(np.sum(light_vals<0.05)), '*', alpha = 0.5, color = pltcolors[0])
plt.plot(xs[dark_vals<0.05], 31*np.ones(np.sum(dark_vals<0.05)), '*', alpha = 0.5, color = pltcolors[1])
plt.plot(xs[cstep_vals<0.05], 32*np.ones(np.sum(cstep_vals<0.05)), 'vk', alpha = 0.5)
plt.plot(xs[step_vals<0.05], 33*np.ones(np.sum(step_vals<0.05)), 'vk', alpha = 0.5, markerfacecolor='white')

In [None]:
# plot out step down velocity traces by colony to see what's going on

plt.close('all')
plt.figure(figsize=(7,12))
coltypes = sorted(list(set(df['colony'].values)))

stOI = -1 # all step ups or downs
liOI = 1 
# all_vs={}
# ax={}
    

for cc,colOI in enumerate(coltypes):
    for ss, subOI in enumerate(['C_Step','Step']):
        

        dataOI = step_df.loc[(step_df['substrate']==subOI) & (step_df['light']==liOI) 
                             & (step_df['step_dir']==stOI) & (step_df['colony']==colOI)]

        plt.subplot(len(coltypes),2,2*cc+ss+1)
        plt.axvline(x=0,color='k',alpha = 0.4)
        plt.axhline(y=0,color='k',linestyle = ':', alpha = 0.4)
        for index, row in dataOI.iterrows():
                plt.plot(((row['x_final']-row['cross_step_x'])*row['travel_dir'])/pix2mm,
                         np.append(row['v_forward'], np.nan),'-k', alpha = 0.2)
                if np.median(row['v_forward'][(((row['x_final']-row['cross_step_x'])*row['travel_dir'])<0)[:-1]]) <1 :
                    print(index)
                
        
#         all_vs[2*ss+li] = np.vstack(dataOI['v_final_norm'].values)[:,1:]
#         all_vys = np.vstack(np.abs(dataOI['vy_final_norm'].values))[:,1:]
#         ave = np.nanmedian(all_vs[2*ss+li],axis=0)
#         bins = np.append(np.insert(np.arange(-5,5+resolution,resolution),0,-15),15)
#         xs = np.diff(bins)/2+bins[:-1]
#         plt.plot(xs, ave, '.', color = pltcolors[(2*ss+li)%2])
#         plt.plot(xs, ave, color = pltcolors[(2*ss+li)%2], linestyle =['-',':'][(2*ss+li)//2])
#         plt.plot(xs+0.2, np.nanmean(all_vys,axis=0), '.',color = pltcolors[3])
#         plt.plot(xs+0.2, np.nanmean(all_vys,axis=0), '-',color = pltcolors[3])

#         plt.plot([xs,xs], [ave-np.nanstd(all_vs[2*ss+li],axis=0), 
#                            ave+np.nanstd(all_vs[2*ss+li],axis=0)], '-', alpha = 0.5, color = pltcolors[(2*ss+li)%2])
#         plt.plot([xs+0.2,xs+0.2], [np.nanmean(all_vys,axis=0)-np.nanstd(all_vys,axis=0), 
#                                np.nanmean(all_vys,axis=0)+np.nanstd(all_vys,axis=0)], '-', alpha = 0.5,color = pltcolors[3])
        
        plt.xlim([-15,15])
        plt.ylim([-5,60])
        plt.xlabel('distance to step')
        plt.ylabel(colOI[-4:])
        plt.text(8,53,'n: %i'%len(dataOI))
        if cc==0:
            plt.title('Sub: %s'%subOI,loc='left')
        if cc < len(coltypes)-1:
            plt.gca().get_xaxis().set_visible(False)
        if ss==1:
            plt.gca().get_yaxis().set_visible(False)


# X and Y velocities, forward and lateral velocities

In [None]:
# FORWARD AND LATERAL
x_min = -6
plt.close('all')

for tr in [287]:#np.random.choice(step_df.loc[step_df['step_dir']==1].index, 5, replace=False):
    x_raw = ((step_df['x_final'][tr]-step_df['cross_step_x'][tr])*step_df['travel_dir'][tr])/pix2mm
    y_raw = step_df['y_final'][tr]/pix2mm
    
    if np.any(np.isfinite(x_raw)) & (np.nanmin(x_raw)<x_min) & (np.isfinite(step_df['light'][tr])) : # is there any data?
        
        print('Tr: %i -- Sub: %s -- Light: %i -- Step Direction: %0.0f'%
              (tr, step_df['substrate'][tr], step_df['light'][tr], step_df['step_dir'][tr]))
        v_raw = step_df['v_final'][tr]/pix2mm
        vx_raw = np.append(step_df['v_forward'][tr], np.nan)
        vy_raw = np.append(step_df['v_lateral'][tr], np.nan)
        c_idx = int(step_df['cross_step_idx'][tr])
        c_x = step_df['cross_step_x'][tr]
        c_y = step_df['cross_step_y'][tr]
        ti = (np.arange(0,len(x_raw))-c_idx)/fps
        
        
        # is there a big block of nan preventing analysis?
        x_min_idx = np.nanargmin(1/(x_raw-x_min)) #np.where(np.abs(x_raw-x_min)<.2)[0][0] #np.nanargmin(np.abs(x_raw-x_min))
        if np.sum(np.isnan(x_raw[x_min_idx:c_idx])) > 10:
            print('     too many nan before gets to obstacle')
            continue
        
        
        fig = plt.figure(figsize = (9,12))
        ax1 = plt.subplot(4,1,1)
#         plt.plot(step_df['x_final'][tr]/pix2mm, step_df['y_final'][tr]/pix2mm, '.k', alpha = 0.2)
        plt.plot(step_df['x_final'][tr]/pix2mm, step_df['y_final'][tr]/pix2mm, '-k', alpha = 0.3)
        plt.plot(step_df['cross_step_x'][tr]/pix2mm+step_df['travel_dir'][tr]*7, 7.5, 'ok')
        plt.axvline(x=step_df['cross_step_x'][tr]/pix2mm,color = 'k', alpha = 1)
        plt.ylim([0,550/pix2mm])
        plt.xlim([0,1000/pix2mm])
        plt.gca().invert_yaxis()
        plt.title('Tr: %i -- Sub: %s -- Light: %i -- Step Direction: %0.0f'%
              (tr, step_df['substrate'][tr], step_df['light'][tr], step_df['step_dir'][tr]), loc = 'left')
        # add near edge cut-off
        col = df['colony'][tr]
        sub = df['substrate'][tr]
        idx = sub_edge_df[(sub_edge_df['colony']==col) & (sub_edge_df['substrate']==sub)].index
        p = sub_edge_df.iloc[idx]['hlines'].values[0]   
        plt.plot(np.arange(0,1000)/pix2mm, (p[0] + p[1]*np.arange(0,1000))/pix2mm, 'k')
        plt.plot(np.arange(0,1000)/pix2mm, (p[0] + p[1]*np.arange(0,1000))/pix2mm-2, ':r')
        plt.plot(np.arange(0,1000)/pix2mm, (p[0] + p[1]*np.arange(0,1000))/pix2mm-16, 'k')
        plt.plot(np.arange(0,1000)/pix2mm, (p[0] + p[1]*np.arange(0,1000))/pix2mm-14, ':r')
        
        plt.subplot(4,1,2)
        plt.plot(x_raw, step_df['angle_smooth'][tr], '-k', alpha = 0.3)
        plt.plot(x_raw, step_df['angle_improved'][tr], '--k', alpha = 0.3)
        plt.plot(x_raw, step_df['angle'][tr]-90, ':k', alpha = 0.3)
        plt.axvline(x=0,color = 'k', alpha = 0.3)
        plt.ylim([-180,180])
        plt.ylabel('smoothed angle')
        
        plt.subplot(4,1,3)
        plt.plot(x_raw,vy_raw, '-r', alpha = 0.3)
        plt.plot(x_raw,vx_raw, '-b', alpha = 0.3)
        plt.plot(x_raw, lowpass_filt_sections(vy_raw, 0.06) , 'r', alpha = 0.5, linewidth = 2)
        plt.plot(x_raw, lowpass_filt_sections(vx_raw, 0.06), 'b', alpha = 0.5, linewidth = 2)
        plt.axvline(x=0,color = 'k', alpha = 0.3)
        plt.axhline(y=0,color = 'k', alpha = 0.3)     
        plt.ylim([-30,50])
        plt.xlabel('x dist from step')
        plt.ylabel('velocity (mm/s)')
        
        ax4=plt.subplot(4,1,4)
        plt.plot(step_df['frames'][tr],vy_raw, '-r', alpha = 0.3)
        plt.plot(step_df['frames'][tr],vx_raw, '-b', alpha = 0.3)
        plt.plot(step_df['frames'][tr], lowpass_filt_sections(vy_raw, 0.06) , 'r', alpha = 0.5, linewidth = 2)
        plt.plot(step_df['frames'][tr], lowpass_filt_sections(vx_raw, 0.06), 'b', alpha = 0.5, linewidth = 2)
        plt.axvline(x=step_df['frames'][tr][c_idx],color = 'k', alpha = 0.3)
        plt.axhline(y=0,color = 'k', alpha = 0.3)     
        plt.ylim([-30,50])
        plt.xlabel('frame')
        plt.ylabel('velocity (mm/s)')
        
        ax2 = ax4.twinx()
        ax2.plot(step_df['frames'][tr][:-1], np.diff(lowpass_filt_sections(vy_raw, 0.06))*fps,':r', alpha = 0.2)
        ax2.plot(step_df['frames'][tr][:-1], np.diff(lowpass_filt_sections(vx_raw, 0.06))*fps,':b', alpha = 0.2)
        plt.ylabel('accel (mm/s^2)')
        plt.axhline(y=0,color = 'k', alpha = 0.3, linestyle=':')
        plt.ylim([-300,300])



In [None]:
# X AND Y
x_min = -6
plt.close('all')

for tr in np.random.choice(step_df.loc[step_df['step_dir']==1].index, 5, replace=False):
    x_raw = ((step_df['x_final'][tr]-step_df['cross_step_x'][tr])*step_df['travel_dir'][tr])/pix2mm
    y_raw = step_df['y_final'][tr]/pix2mm
    
    if np.any(np.isfinite(x_raw)) & (np.nanmin(x_raw)<x_min) & (np.isfinite(step_df['light'][tr])) : # is there any data?
        
        print('Tr: %i -- Sub: %s -- Light: %i -- Step Direction: %0.0f'%
              (tr, step_df['substrate'][tr], step_df['light'][tr], step_df['step_dir'][tr]))
        v_raw = step_df['v_final'][tr]/pix2mm
        vx_raw = np.append(np.diff(x_raw)*fps, np.nan)
        vy_raw = np.append(np.diff(y_raw)*fps, np.nan)
        c_idx = int(step_df['cross_step_idx'][tr])
        c_x = step_df['cross_step_x'][tr]
        c_y = step_df['cross_step_y'][tr]
        ti = (np.arange(0,len(x_raw))-c_idx)/fps
        
        
        # is there a big block of nan preventing analysis?
        x_min_idx = np.nanargmin(1/(x_raw-x_min)) #np.where(np.abs(x_raw-x_min)<.2)[0][0] #np.nanargmin(np.abs(x_raw-x_min))
        if np.sum(np.isnan(x_raw[x_min_idx:c_idx])) > 10:
            print('     too many nan before gets to obstacle')
            continue
        
        
        fig,ax1 = plt.subplots(figsize = (12,5))
        plt.plot(x_raw,vy_raw, '-r', alpha = 0.3)
        plt.plot(x_raw,vx_raw, '-b', alpha = 0.3)
        plt.plot(x_raw, lowpass_filt_sections(vy_raw, 0.06) , 'r', alpha = 0.5, linewidth = 2)
        plt.plot(x_raw, lowpass_filt_sections(vx_raw, 0.06), 'b', alpha = 0.5, linewidth = 2)

        
        plt.axvline(x=0,color = 'k', alpha = 0.3)
        plt.axhline(y=0,color = 'k', alpha = 0.3)
        
        plt.ylim([-30,50])

        
        
        ax2 = ax1.twinx()
        ax2.plot(x_raw[:-1], np.diff(lowpass_filt_sections(vy_raw, 0.06))*fps,':r', alpha = 0.2)
        ax2.plot(x_raw[:-1], np.diff(lowpass_filt_sections(vx_raw, 0.06))*fps,':b', alpha = 0.2)
        plt.ylabel('accel (mm/s^2)')
        plt.axhline(y=0,color = 'k', alpha = 0.3, linestyle=':')
        plt.ylim([-300,300])


#         v_movavg = take_moving_ave(v_raw, 24, 5)
#         v_lowpass = lowpass_filt_sections(v_raw, 0.06)
# #         plt.plot(x_raw, v_movavg, '-r', alpha = 0.3)
#         plt.plot(x_raw, v_lowpass, '-g', alpha = 0.3)
#         plt.title('Tr: %i -- Sub: %s -- Light: %i -- Step Direction: %0.0f'%
#               (tr, step_df['substrate'][tr], step_df['light'][tr], step_df['step_dir'][tr]))

# Find distance and time from step up that ant decelerates

In [None]:
# try out on single trial -- average speed for distance bins around when cross obstacle
from scipy.signal import argrelextrema


# tr = 4863 #114#4813
# [521, 2376,688, 461, 2229, 2211, 3294]
# [ 231, 232, 234, 243]

step_dir_OI = 1

x_min = -7
plt.close('all')
for tr in np.random.choice(step_df.loc[step_df['step_dir']==step_dir_OI].index, 5, replace=False):
    x_raw = ((step_df['x_final'][tr]-step_df['cross_step_x'][tr])*step_df['travel_dir'][tr])/pix2mm
    y_raw = step_df['y_final'][tr]/pix2mm
    
    if np.any(np.isfinite(x_raw)) & (np.nanmin(x_raw)<x_min): # is there any data?
        
        print('Tr: %i -- Sub: %s -- Light: %i -- Step Direction: %0.0f'%
              (tr, step_df['substrate'][tr], step_df['light'][tr], step_df['step_dir'][tr]))
        v_raw = step_df['v_final'][tr]/pix2mm
#         vx_raw = np.diff(x_raw)*fps
#         vy_raw = np.diff(y_raw)*fps
        vx_raw = np.append(step_df['v_forward'][tr], np.nan)
        vy_raw = np.append(step_df['v_lateral'][tr], np.nan)
        
        
        # does ant go both directions across substrate?
        tmp = np.diff(x_raw*pix2mm)
        bin_counts,_ = np.histogram(tmp[np.isfinite(tmp)],bins = [-5,-1,1,5])
#         print( np.abs( (bin_counts[2]+1)/(bin_counts[0]+1)) )
        if np.abs( (bin_counts[2]+1)/(bin_counts[0]+1))<10:
            print('ant walks in both forward and backward directions')
            continue
        
        
        
        # PART 1 ---- normalize x position so can average across all trials
        # only keep non-nan values to average across normalized x positions
        keep = np.logical_and(np.isfinite(v_raw) ,np.isfinite(x_raw))
        vs = v_raw[keep];  xs = x_raw[keep]; 

        resolution = 0.05
        bins = np.arange(-15,15+resolution,resolution)
        bin_idcs = np.digitize(xs,bins)
        vs_norm = np.ones(bins.shape)*np.nan
        bvals, idx, counts = np.unique(bin_idcs, return_inverse=True, return_counts = True)
        sum_vs = np.bincount(idx, weights = vs)
        mean_vs = sum_vs / counts
        vs_norm[bvals] = mean_vs


        # PART 2 ----  find time and distance of deceleration
        c_idx = int(step_df['cross_step_idx'][tr])
        c_x = step_df['cross_step_x'][tr]
        c_y = step_df['cross_step_y'][tr]
        v_lowpass = lowpass_filt_sections(vx_raw, 0.06)
        
        #         plt.figure(figsize = (12,5))
        fig = plt.figure(figsize = (9,12))
        ax1 = plt.subplot(4,1,1)
        plt.plot(step_df['x_final'][tr]/pix2mm, step_df['y_final'][tr]/pix2mm, '-k', alpha = 0.3)
        plt.plot(step_df['cross_step_x'][tr]/pix2mm+step_df['travel_dir'][tr]*step_df['step_dir'][tr]*7, 7.5, 'ok')
        print(step_df['travel_dir'][tr], step_df['step_dir'][tr], np.nanmean(np.diff(step_df['x_final'][tr])) )
        plt.axvline(x=step_df['cross_step_x'][tr]/pix2mm,color = 'k', alpha = 1)
        plt.ylim([0,550/pix2mm])
        plt.xlim([0,1000/pix2mm])
        plt.gca().invert_yaxis()
        plt.title('Tr: %i -- Sub: %s -- Light: %i -- Step Direction: %0.0f'%
              (tr, step_df['substrate'][tr], step_df['light'][tr], step_df['step_dir'][tr]), loc = 'left')
        # add near edge cut-off
        col = df['colony'][tr]
        sub = df['substrate'][tr]
        idx = sub_edge_df[(sub_edge_df['colony']==col) & (sub_edge_df['substrate']==sub)].index
        p = sub_edge_df.iloc[idx]['hlines'].values[0]   
        plt.plot(np.arange(0,1000)/pix2mm, (p[0] + p[1]*np.arange(0,1000))/pix2mm, 'k')
        plt.plot(np.arange(0,1000)/pix2mm, (p[0] + p[1]*np.arange(0,1000))/pix2mm-2, ':r')
        plt.plot(np.arange(0,1000)/pix2mm, (p[0] + p[1]*np.arange(0,1000))/pix2mm-16, 'k')
        plt.plot(np.arange(0,1000)/pix2mm, (p[0] + p[1]*np.arange(0,1000))/pix2mm-14, ':r')
        
        plt.subplot(4,1,2)
        plt.plot(x_raw, step_df['angle_smooth'][tr], '-k', alpha = 0.3)
        plt.plot(x_raw, step_df['angle_improved'][tr], '--k', alpha = 0.3)
        plt.plot(x_raw, step_df['angle'][tr]-90, ':k', alpha = 0.3)
        plt.axvline(x=0,color = 'k', alpha = 0.3)
        plt.ylim([-180,180])
        plt.xlim([-15,15])
        plt.ylabel('smoothed angle')
        
        ax3=plt.subplot(4,1,3)
        plt.plot(x_raw,vy_raw, '-r', alpha = 0.3)
        plt.plot(x_raw,vx_raw, '-b', alpha = 0.3)
        plt.plot(x_raw, lowpass_filt_sections(vy_raw, 0.06) , 'r', alpha = 0.5, linewidth = 2)
        plt.plot(x_raw, lowpass_filt_sections(vx_raw, 0.06), 'b', alpha = 0.5, linewidth = 2)
        plt.axvline(x=0,color = 'k', alpha = 0.3)
        plt.axhline(y=0,color = 'k', alpha = 0.3)     
        plt.ylim([-20,50])
        plt.xlim([-15,15])
        plt.xlabel('x dist from step')
        plt.ylabel('velocity (mm/s)')
        
        ax4=plt.subplot(4,1,4)
        t_cross = step_df['frames'][tr][c_idx]/fps
        plt.plot(step_df['frames'][tr]/fps-t_cross ,vy_raw, '-r', alpha = 0.3)
        plt.plot(step_df['frames'][tr]/fps-t_cross ,vx_raw, '-b', alpha = 0.3)
        plt.plot(step_df['frames'][tr]/fps-t_cross , lowpass_filt_sections(vy_raw, 0.06) , 'r', alpha = 0.5, linewidth = 2)
        plt.plot(step_df['frames'][tr]/fps-t_cross , lowpass_filt_sections(vx_raw, 0.06), 'b', alpha = 0.5, linewidth = 2)
        plt.axvline(x=0,color = 'k', alpha = 0.3)
        plt.axhline(y=0,color = 'k', alpha = 0.3)     
        plt.ylim([-20,50])
        plt.xlabel('time (s)')
        plt.ylabel('velocity (mm/s)')
        
        ax2 = ax4.twinx()
        ax2.plot(step_df['frames'][tr][:-1]/fps-t_cross, np.diff(lowpass_filt_sections(vy_raw, 0.06))*fps,':r', alpha = 0.2)
        ax2.plot(step_df['frames'][tr][:-1]/fps-t_cross, np.diff(lowpass_filt_sections(vx_raw, 0.06))*fps,':b', alpha = 0.2)
        plt.ylabel('accel (mm/s^2)')
        plt.axhline(y=0,color = 'k', alpha = 0.3, linestyle=':')
        plt.ylim([-300,300])
        
        
        
        # CALCULATE THINGS
        
    
        # if it's a step down, look for decel after crossing step too
        if step_df['step_dir'][tr] == -1:
            c_idx = c_idx + 20
    
        # is there a big block of nan preventing analysis?
        x_min_idx = np.nanargmin(1/(x_raw-x_min)) #np.where(np.abs(x_raw-x_min)<.2)[0][0] #np.nanargmin(np.abs(x_raw-x_min))
        if np.sum(np.isnan(x_raw[x_min_idx:c_idx])) >5:
            print('too many nan before gets to obstacle')
            plt.pause(2)
            continue

        # use acceleration to find when decelerates
        a_cutoff = -150
        a = np.diff(v_lowpass)*fps
        
        # does ant slow down at all
        if np.nanmin(a[x_min_idx:c_idx]) > -100:
            print('does not slow down')
            plt.pause(3)
            continue
        
        biggest_valley = np.nanargmin(a[x_min_idx:c_idx]) + x_min_idx
        valleys = argrelextrema(a[x_min_idx:(biggest_valley+4)], np.less, order = 2) + x_min_idx
        valleys = valleys[ np.logical_and( x_raw[valleys]>-5, a[valleys]<-100) ]
        if len(valleys)>1:
            peaks_in_between = [np.any(a[valleys[i]:valleys[i+1]]>-50) for i in np.arange(0,len(valleys)-1)]
            peak_OI = np.where(np.logical_not(np.append(peaks_in_between,False))) [0][0]
            d_i2 = valleys[peak_OI]
            
        elif len(valleys)==1:
            d_i2 = valleys[0]
        else:
            print('no single decel identified')
            continue
        
        print(c_idx, len(a))
    
        plt.sca(ax3)
        plt.plot(x_raw[d_i2-3], v_lowpass[d_i2-3], '+g')
        plt.axvline(x=x_raw[d_i2], color='g', alpha = 0.1, linestyle = ':')
        plt.axvline(x=x_raw[d_i2-3], color='g', alpha = 0.3, linestyle = '-')
        
        plt.sca(ax4)
        plt.plot(step_df['frames'][tr][d_i2-3]/fps-t_cross, v_lowpass[d_i2-3], '+g')
        plt.axvline(x=step_df['frames'][tr][d_i2]/fps-t_cross, color='g', alpha = 0.1, linestyle = ':')
        plt.axvline(x=step_df['frames'][tr][d_i2-3]/fps-t_cross, color='g', alpha = 0.3, linestyle = '-')

        
        decel_x = x_raw[d_i2-3]
        decel_t = (int(step_df['cross_step_idx'][tr]) - d_i2 +3)/fps
        print('decel dist: %0.2f mm -- decel time: %0.2f s'%(decel_x, decel_t))

        
        plt.pause(1)


In [None]:
# APPLY TO WHOLE DATAFRAME
from scipy.signal import argrelextrema

def find_decel_df(x, pix2mm, x_min):
    
    decel_x = np.nan; decel_t = np.nan;

    x_raw = ((x['x_final']-x['cross_step_x'])*x['travel_dir'])/pix2mm
    if np.any(np.isfinite(x_raw)) & (np.nanmin(x_raw)<x_min): # is there any data?
        v_raw = x['v_final']/pix2mm
        vx_raw = np.append(x['v_forward'], np.nan)
        vy_raw = np.append(x['v_lateral'], np.nan)
        c_idx = int(x['cross_step_idx'])
        c_x = x['cross_step_x']
        c_y = x['cross_step_y']
        v_lowpass = lowpass_filt_sections(vx_raw, 0.06)
        
        # if it's a step down, look for decel after crossing step too
        if step_df['step_dir'][tr] == -1:
            c_idx = c_idx + 20
        
        # does ant go both directions across substrate?
        tmp = np.diff(x_raw*pix2mm)
        bin_counts,_ = np.histogram(tmp[np.isfinite(tmp)],bins = [-5,-1,1,5])
        if np.abs( (bin_counts[2]+1)/(bin_counts[0]+1))<10:
            return decel_x, decel_t
        
        # if it's a step down, look for decel after crossing step too
        if x['step_dir'] == -1:
            c_idx = c_idx + 20
    
        # is there a big block of nan preventing analysis?
        x_min_idx = np.nanargmin(1/(x_raw-x_min)) 
        if np.sum(np.isnan(x_raw[x_min_idx:c_idx])) >5:
            return decel_x, decel_t
        
        # use acceleration to find when decelerates
        a_cutoff = -150
        a = np.diff(v_lowpass)*fps
   
        # does ant slow down at all
        if np.nanmin(a[x_min_idx:c_idx]) > -100:
            return decel_x, decel_t
        
        biggest_valley = np.nanargmin(a[x_min_idx:c_idx]) + x_min_idx
        valleys = argrelextrema(a[x_min_idx:(biggest_valley+4)], np.less, order = 2) + x_min_idx
        valleys = valleys[ np.logical_and( x_raw[valleys]>-5, a[valleys]<-100) ]
        if len(valleys)>1:
            peaks_in_between = [np.any(a[valleys[i]:valleys[i+1]]>-50) for i in np.arange(0,len(valleys)-1)]
            peak_OI = np.where(np.logical_not(np.append(peaks_in_between,False))) [0][0]
            d_i2 = valleys[peak_OI]
            
        elif len(valleys)==1:
            d_i2 = valleys[0]
        else:
            return decel_x, decel_t
        
        decel_x = x_raw[d_i2-3]
        decel_t = (int(x['cross_step_idx']) - d_i2 +3)/fps
        
    return decel_x, decel_t


x_min = -7 #only use trials that has speed data for at least this distance from the obstacle
step_df['decel_x'], step_df['decel_t'] = zip(*step_df.apply(
    find_decel_df, args = (pix2mm, x_min), axis=1))

print('done saving deceleration distance and time info')

In [None]:
# BOXPLOT OF DECEL DATA

step_dir = 1

plt.close('all')
plt.figure()
plt.subplot(1,2,1)
bp=sns.boxplot(x="light", y = "decel_x", hue = "substrate", data = step_df.loc[step_df['step_dir']==step_dir], order = [1,0])
plt.title('stepping up', loc = 'left')
# plt.title('stepping down', loc = 'left')
plt.ylabel('deceleration dist from step (mm)')
plt.ylim([0,-6])
print(step_df.groupby(['step_dir','light','substrate']).apply(lambda x: len(x)))
# print(step_df.groupby(['step_dir','light','substrate']).apply(lambda x: x.notnull().sum())) # how many non-nan values in boxplot?

plt.subplot(1,2,2)
sns.boxplot(x="light", y = "decel_t", hue = "substrate", data = step_df.loc[step_df['step_dir']==step_dir], order = [1,0])
plt.ylabel('deceleration time from crossing step (s)')
plt.ylim([0,1.5])



# VIOLIN PLOT?

# plt.figure()
# plt.subplot(1,2,1)
# sns.violinplot(x="light", y = "decel_x", hue = "substrate", data = step_df.loc[step_df['step_dir']==step_dir], order = [1,0])
# plt.title('stepping down', loc = 'left')
# plt.ylabel('deceleration dist from step (mm)')


# Use PCA to identify deceleration? 

In [None]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler



df_OI = df.loc[df['substrate'].isin(['Flat', 'Array'])]
lens = [len(item) for item in df_OI['x_final']]
pca_data = pd.DataFrame( {"substrate" : np.repeat(df_OI['substrate'].values, lens), "trackway" : np.repeat(df_OI.index.values, lens),
                            "colony" : np.repeat(df_OI['colony'].values, lens), 
                            "x_final" : np.concatenate(df_OI['x_final'].values), "y_final" : np.concatenate(df_OI['y_final'].values),
                            "v_forward" : np.concatenate(df_OI['v_forward'].apply(lambda x: np.append(x, np.nan)).values), 
                            "v_lateral" : np.concatenate(df_OI['v_lateral'].apply(lambda x: np.append(x, np.nan)).values), 
                            "a_forward" : np.concatenate(df_OI['a_forward'].apply(lambda x: np.append(x, np.nan)).values), 
                            "a_lateral" : np.concatenate(df_OI['a_lateral'].apply(lambda x: np.append(x, np.nan)).values), 
                            })
pca_data = pca_data.dropna(axis=0)

features = ['v_forward','v_lateral','a_forward','a_lateral']
x = pca_data.loc[:,features].values
scaled = StandardScaler().fit_transform(x)

pca = PCA(n_components = 2)
pca.fit_transform(scaled)
projected = pca.transform(scaled)





# loadings on first PC (in %)
print('first PC loadings: ', (pca.components_.T**2)[:,0][0:4]*100)
print('second PC loadings: ', (pca.components_.T**2)[:,1][0:4]*100)


plt.close('all')
colors = {'Array': 'red', 'Flat': 'black'}

# plot PCA 1 and 2
plt.figure()
plt.scatter(projected[:,0], projected[:,1],s=2, alpha = 0.01, color = pca_data['substrate'].apply(lambda x: colors[x]))
plt.axis('equal')

# plot scaled data
plt.figure()
plt.subplot(1,2,1)
plt.scatter(scaled[:,0], scaled[:,1],s=2, alpha = 0.1, color = pca_data['substrate'].apply(lambda x: colors[x]))
plt.plot(scaled[:,0], scaled[:,1],'.k', alpha = 0.01)
plt.xlabel('v_for'); plt.ylabel('v_lat');
plt.subplot(1,2,2)
plt.scatter(scaled[:,2], scaled[:,3],s=2, alpha = 0.01, color = pca_data['substrate'].apply(lambda x: colors[x]))
plt.xlabel('a_for'); plt.ylabel('a_lat');



# ANTENNAE ANALYSIS

## point cloud of antennal tips

In [None]:
wrt_bodypart = 'neck' #head

plt.close('all')
plt.figure(figsize = (8,13))
for ss,subtype in enumerate(['Flat', 'Array']):
    print(' -- ', subtype)
    sub_df = df.loc[(df['substrate']==subtype)]
    
    
    lens = [len(item) for item in sub_df['antenna0_0_x_filt_WRThead']]
    all_sections = pd.DataFrame( {"substrate" : np.repeat(sub_df['substrate'].values, lens), "trackway" : np.repeat(sub_df.index.values, lens),
                            "colony" : np.repeat(sub_df['colony'].values, lens), "light": np.repeat(sub_df['light'].values, lens),
                            "antenna_L_x" : np.concatenate(sub_df['antenna0_0_x_filt_WRT%s'%wrt_bodypart].values), 
                            "antenna_L_y" : np.concatenate(sub_df['antenna0_0_y_filt_WRT%s'%wrt_bodypart].values),    
                            "antenna_R_x" : np.concatenate(sub_df['antenna1_0_x_filt_WRT%s'%wrt_bodypart].values), 
                            "antenna_R_y" : np.concatenate(sub_df['antenna1_0_y_filt_WRT%s'%wrt_bodypart].values)  
                                 })
    
    for ll,li in enumerate([1,0]):
        plt.subplot(2,2,2*ss+ll+1)
    
        for si,side in enumerate(['L','R']):
            x = all_sections.loc[(all_sections['light']==li)]['antenna_%s_x'%side].dropna()
            y = all_sections.loc[(all_sections['light']==li)]['antenna_%s_y'%side].dropna() 

    #         sns.kdeplot(x, y, shade=True, color = 'b')
            xy_train = np.vstack([y,x]).T
            d = np.min(xy_train.shape) # number of dimensions
            n = np.max(xy_train.shape) # number of points
            bw_sil = (n*(d+2) / 4.) ** (-1./ (d +4)) # silverman - "Density estimation for statistics and data analysis" 1986
            bw_sco = n**(-1./(d+4)) # scott's - "Multivariate density estimateion..." 1992
            #     print('bandwidths (scott vs. silverman): %0.9f, %0.9f'%(bw_sco, bw_sil))
            kde = KernelDensity(bandwidth = bw_sco, kernel = 'gaussian')
            kde.fit(xy_train)
            resolution = 1
            xx,yy = np.meshgrid(np.arange(0,100,resolution),np.arange(-80,80,resolution)) # predict density on a mesh of datapoints
            xx= np.concatenate(xx); yy = np.concatenate(yy)
            density = np.exp(kde.score_samples(np.vstack([yy,xx]).T))
    
            # plot using 2d hist
            c_array = colors.ListedColormap(pltcolors[li])(range(0,1000))
            c_array[:, -1]=np.arange(0,1,0.001)
            new_cmap = LinearSegmentedColormap.from_list(name = '%s_alpha'%pltcolors[li], colors = c_array)
            hb=plt.hist2d(x, y, 50, weights =np.ones_like(x)/float(len(x)), vmin = 0, vmax =0.008, cmap = new_cmap)


            for pp,p_cutoff in enumerate([0.05, 1-2*(.1915+.1498+.0919), 1-2*(.1915+.1498), 1-2*.1915]):
                sorted_den = np.sort(density)
                sorted_den_idcs = np.argsort(density)
                idx_cutoff = np.argmin(np.abs(np.cumsum(sorted_den)/np.sum(density)-p_cutoff)) 
                inlier_mask = sorted_den_idcs[idx_cutoff:]

                # convex hull of all inlier points
                hull = ConvexHull(np.vstack([yy[inlier_mask],xx[inlier_mask]]).T)
                plt.text(75, -65+120*si+pp*5, 'A: %0.2f'%hull.area)
                for simplex in hull.simplices:
                    plt.plot(xx[inlier_mask][simplex], yy[inlier_mask][simplex], 'k-', alpha = 0.5)

        plt.gca().invert_yaxis()
        plt.ylim([70,-70])
        plt.xlim([0,80])
        if wrt_bodypart == 'neck':
            plt.xlim([0,100])
        plt.title('%s - Light on? %i'%(subtype,li), loc = 'left')
        plt.text(40,-65, 'n: %i'%np.sum(np.isfinite(all_sections.loc[(all_sections['light']==li)]['antenna_L_x'])), color ='k')
        plt.text(40,65, 'n: %i'%np.sum(np.isfinite(all_sections.loc[(all_sections['light']==li)]['antenna_R_x'])), color ='k')
        
del all_sections

## How long do antennae explore the step?

#### for randomly-chosen trials, plot figure showing antenna position and velocities

In [93]:
step_dir_OI = 1

x_min = -7
plt.close('all')
for tr in [1239, 2426]:#np.random.choice(step_df.loc[step_df['step_dir']==step_dir_OI].index, 5, replace=False):
    x_raw = ((step_df['thorax_x_filt_fullfr'][tr]-step_df['cross_step_x'][tr])*step_df['travel_dir'][tr])/pix2mm
    y_raw = step_df['thorax_y_filt_fullfr'][tr]/pix2mm
    
    if np.any(np.isfinite(x_raw)): # is there any data?
        
        print('Tr: %i -- Sub: %s -- Light: %i -- Step Direction: %0.0f'%
              (tr, step_df['substrate'][tr], step_df['light'][tr], step_df['step_dir'][tr]))
        v_raw = step_df['thorax_v_filt'][tr]/pix2mm
#         vx_raw = np.diff(x_raw)*fps
#         vy_raw = np.diff(y_raw)*fps
        vx_raw = np.append(step_df['v_forward'][tr], np.nan)
        vy_raw = np.append(step_df['v_lateral'][tr], np.nan)
        
        
        # does ant go both directions across substrate?
        tmp = np.diff(x_raw*pix2mm)
        bin_counts,_ = np.histogram(tmp[np.isfinite(tmp)],bins = [-5,-1,1,5])
#         print( np.abs( (bin_counts[2]+1)/(bin_counts[0]+1)) )
        if np.abs( (bin_counts[2]+1)/(bin_counts[0]+1))<10:
            print('ant walks in both forward and backward directions')
            continue
        
        
        
        # PART 1 ---- normalize x position so can average across all trials
        # only keep non-nan values to average across normalized x positions
        keep = np.logical_and(np.isfinite(v_raw) ,np.isfinite(x_raw))
        vs = v_raw[keep];  xs = x_raw[keep]; 

        resolution = 0.05
        bins = np.arange(-15,15+resolution,resolution)
        bin_idcs = np.digitize(xs,bins)
        vs_norm = np.ones(bins.shape)*np.nan
        bvals, idx, counts = np.unique(bin_idcs, return_inverse=True, return_counts = True)
        sum_vs = np.bincount(idx, weights = vs)
        mean_vs = sum_vs / counts
        vs_norm[bvals] = mean_vs


        # PART 2 ----  find time and distance of deceleration
        c_idx = int(step_df['cross_step_idx'][tr])
        c_x = step_df['cross_step_x'][tr]
        c_y = step_df['cross_step_y'][tr]
        v_lowpass = lowpass_filt_sections(vx_raw, 0.06)
        
        fig = plt.figure(figsize = (9,12))
        ax1 = plt.subplot(4,1,1)
        plt.plot(step_df['thorax_x_filt_fullfr'][tr]/pix2mm, step_df['thorax_y_filt_fullfr'][tr]/pix2mm, '-k', alpha = 0.3)
        plt.plot(step_df['cross_step_x'][tr]/pix2mm+step_df['travel_dir'][tr]*step_df['step_dir'][tr]*7, 7.5, 'ok')
        print(step_df['travel_dir'][tr], step_df['step_dir'][tr], np.nanmean(np.diff(step_df['thorax_x_filt_fullfr'][tr])) )
        plt.axvline(x=step_df['cross_step_x'][tr]/pix2mm,color = 'k', alpha = 1)
        plt.ylim([0,550/pix2mm])
        plt.xlim([0,1000/pix2mm])
        plt.gca().invert_yaxis()
        plt.title('Tr: %i -- Sub: %s -- Light: %i -- Step Direction: %0.0f'%
              (tr, step_df['substrate'][tr], step_df['light'][tr], step_df['step_dir'][tr]), loc = 'left')
        # add near edge cut-off
        col = df['colony'][tr]
        sub = df['substrate'][tr]
        idx = sub_edge_df[(sub_edge_df['colony']==col) & (sub_edge_df['substrate']==sub)].index
        p = sub_edge_df.iloc[idx]['hlines'].values[0]   
        plt.plot(np.arange(0,1000)/pix2mm, (p[0] + p[1]*np.arange(0,1000))/pix2mm, 'k')
        plt.plot(np.arange(0,1000)/pix2mm, (p[0] + p[1]*np.arange(0,1000))/pix2mm-1, ':r')
        plt.plot(np.arange(0,1000)/pix2mm, (p[0] + p[1]*np.arange(0,1000))/pix2mm-16, 'k')
        plt.plot(np.arange(0,1000)/pix2mm, (p[0] + p[1]*np.arange(0,1000))/pix2mm-15, ':r')
        
        ax2=plt.subplot(4,1,2)
        plt.plot(x_raw,vy_raw, '-r', alpha = 0.3)
        plt.plot(x_raw,vx_raw, '-b', alpha = 0.3)
        plt.plot(x_raw, lowpass_filt_sections(vy_raw, 0.06) , 'r', alpha = 0.5, linewidth = 2)
        plt.plot(x_raw, lowpass_filt_sections(vx_raw, 0.06), 'b', alpha = 0.5, linewidth = 2)
        plt.axvline(x=0,color = 'k', alpha = 0.3)
        plt.axhline(y=0,color = 'k', alpha = 0.3)     
        plt.ylim([-20,50])
        plt.xlim([-15,15])
        plt.xlabel('x dist from step')
        plt.ylabel('velocity (mm/s)')

        
        
        
        ax3=plt.subplot(4,1,3)
        # OPTION 1: antenna dist vs. time
        t_cross = step_df['frames'][tr][c_idx]/fps
        p = sub_edge_df.iloc[idx]['vlines'].values[0][1,:]   
        antenna_contact_time = np.ones(2)*np.nan
        for side in [0,1]:
            step_x_for_antenna = (-1*p[0]/p[1] + (1/p[0])*step_df['antenna%i_0_y_filt_fullfr'%side][tr])/pix2mm
            antenna_dist_to_step = step_df['travel_dir'][tr]*(step_df['antenna%i_0_x_filt_fullfr'%side][tr]/pix2mm - step_x_for_antenna)
            if np.any(np.logical_and(antenna_dist_to_step-(-0.5)>0, antenna_dist_to_step-(-0.5)<0.5)):
                ant_sta_idx = np.where(antenna_dist_to_step-(-0.5)>0)[0][0]
                ant_sta = step_df['frames'][tr][ant_sta_idx]/fps-t_cross
                if np.any(np.logical_and(antenna_dist_to_step>0.5, antenna_dist_to_step<1)):
                    ant_sto_idx = np.where(antenna_dist_to_step<0.5)[0][-1]
                    ant_sto = step_df['frames'][tr][ant_sto_idx]/fps-t_cross
                    if np.any([np.isnan(step_x_for_antenna[ant_sto_idx+1]), np.isnan(step_x_for_antenna[ant_sta_idx-1])]):
                        rect = Rectangle((ant_sta, -0.5), ant_sto-ant_sta, 1, color = ['c','m'][side], alpha = 0.2, fill = False, linestyle = ':')
                        ax3.add_patch(rect)
                        antenna_contact_time[side] = np.nan
                    else:
                        antenna_contact_time[side] = ant_sto-ant_sta
                        rect = Rectangle((ant_sta, -0.5), ant_sto-ant_sta, 1, color = ['c','m'][side], alpha = 0.4, fill = False, linestyle = '--')
                        ax3.add_patch(rect)
                        plt.text(0.4, 4-side*0.8, '%0.2f s'%antenna_contact_time[side], color = ['c','m'][side])
            plt.plot(step_df['frames'][tr]/fps-t_cross , antenna_dist_to_step, '-', color = ['c','m'][side], alpha = 0.8)
            
        plt.axhline(y=0, color = 'k', alpha = 0.2)
        plt.axvline(x=0,color = 'k', alpha = 0.3)
        plt.ylim([-5,5])
        plt.xlim([np.min(step_df['frames'][tr]-5)/fps, np.max(step_df['frames'][tr]+5)/fps]-t_cross)
        
        
        # OPTION 2: forward velocity vs. antenna dist
#         antenna_dist={}
#         p = sub_edge_df.iloc[idx]['vlines'].values[0][1,:]   
#         for side in [0,1]:
#             step_x_for_antenna = (-1*p[0]/p[1] + (1/p[0])*step_df['antenna%i_0_y_filt_fullfr'%side][tr])/pix2mm
#             antenna_dist[side] = step_df['travel_dir'][tr]*(step_df['antenna%i_0_x_filt_fullfr'%side][tr]/pix2mm - step_x_for_antenna)
#         plt.plot(np.nanmax([antenna_dist[0],antenna_dist[1]],axis=0), vx_raw, '-b', alpha = 0.3)
#         plt.axvline(x=0,color='k', alpha = 0.3)




        
        ax4=plt.subplot(4,1,4)
        t_cross = step_df['frames'][tr][c_idx]/fps
        plt.plot(step_df['frames'][tr]/fps-t_cross ,vy_raw, '-r', alpha = 0.3)
        plt.plot(step_df['frames'][tr]/fps-t_cross ,vx_raw, '-b', alpha = 0.3)
        plt.plot(step_df['frames'][tr]/fps-t_cross , lowpass_filt_sections(vy_raw, 0.06) , 'r', alpha = 0.5, linewidth = 2)
        plt.plot(step_df['frames'][tr]/fps-t_cross , lowpass_filt_sections(vx_raw, 0.06), 'b', alpha = 0.5, linewidth = 2)
        plt.axvline(x=0,color = 'k', alpha = 0.3)
        plt.axhline(y=0,color = 'k', alpha = 0.3)     
        plt.ylim([-20,50])
        plt.xlim([np.min(step_df['frames'][tr]-5)/fps, np.max(step_df['frames'][tr]+5)/fps]-t_cross)
        plt.xlabel('time (s)')
        plt.ylabel('velocity (mm/s)')
        
        ax5 = ax4.twinx()
        ax5.plot(step_df['frames'][tr][:-1]/fps-t_cross, np.diff(lowpass_filt_sections(vy_raw, 0.06))*fps,':r', alpha = 0.2)
        ax5.plot(step_df['frames'][tr][:-1]/fps-t_cross, np.diff(lowpass_filt_sections(vx_raw, 0.06))*fps,':b', alpha = 0.2)
        plt.ylabel('accel (mm/s^2)')
        plt.axhline(y=0,color = 'k', alpha = 0.3, linestyle=':')
        plt.ylim([-300,300])
        
        
        
        # CALCULATE THINGS
        
    
        # if it's a step down, look for decel after crossing step too
        if step_df['step_dir'][tr] == -1:
            c_idx = c_idx + 20
    
#         # is there a big block of nan preventing analysis?
#         x_min_idx = np.nanargmin(1/(x_raw-x_min)) #np.where(np.abs(x_raw-x_min)<.2)[0][0] #np.nanargmin(np.abs(x_raw-x_min))
#         if np.sum(np.isnan(x_raw[x_min_idx:c_idx])) >5:
#             print('too many nan before gets to obstacle')
#             plt.pause(2)
#             continue

        # use acceleration to find when decelerates
        a_cutoff = -150
        a = np.diff(v_lowpass)*fps
        
        # does ant slow down at all
        if np.nanmin(a[x_min_idx:c_idx]) > -100:
            print('does not slow down')
            plt.pause(3)
            continue
        
        biggest_valley = np.nanargmin(a[x_min_idx:c_idx]) + x_min_idx
        valleys = argrelextrema(a[x_min_idx:(biggest_valley+4)], np.less, order = 2) + x_min_idx
        valleys = valleys[ np.logical_and( x_raw[valleys]>-5, a[valleys]<-100) ]
        if len(valleys)>1:
            peaks_in_between = [np.any(a[valleys[i]:valleys[i+1]]>-50) for i in np.arange(0,len(valleys)-1)]
            peak_OI = np.where(np.logical_not(np.append(peaks_in_between,False))) [0][0]
            d_i2 = valleys[peak_OI]
            
        elif len(valleys)==1:
            d_i2 = valleys[0]
        else:
            print('no single decel identified')
            continue
        
#         print(c_idx, len(a))
    
        plt.sca(ax2)
        plt.plot(x_raw[d_i2-3], v_lowpass[d_i2-3], '+g')
        plt.axvline(x=x_raw[d_i2], color='g', alpha = 0.1, linestyle = ':')
        plt.axvline(x=x_raw[d_i2-3], color='g', alpha = 0.3, linestyle = '-')
        
        plt.sca(ax4)
        plt.plot(step_df['frames'][tr][d_i2-3]/fps-t_cross, v_lowpass[d_i2-3], '+g')
        plt.axvline(x=step_df['frames'][tr][d_i2]/fps-t_cross, color='g', alpha = 0.1, linestyle = ':')
        plt.axvline(x=step_df['frames'][tr][d_i2-3]/fps-t_cross, color='g', alpha = 0.3, linestyle = '-')
        
        plt.sca(ax3)
        plt.axvline(x=step_df['frames'][tr][d_i2]/fps-t_cross, color='g', alpha = 0.1, linestyle = ':')
        plt.axvline(x=step_df['frames'][tr][d_i2-3]/fps-t_cross, color='g', alpha = 0.3, linestyle = '-')

        
        decel_x = x_raw[d_i2-3]
        decel_t = (int(step_df['cross_step_idx'][tr]) - d_i2 +3)/fps
        print('decel dist: %0.2f mm -- decel time: %0.2f s'%(decel_x, decel_t))

        
        plt.pause(1)

Tr: 1239 -- Sub: C_Step -- Light: 1 -- Step Direction: 1
-1.0 1.0 -1.0971184052076006
decel dist: -0.46 mm -- decel time: 0.26 s




Tr: 2426 -- Sub: C_Step -- Light: 1 -- Step Direction: 1
1.0 1.0 1.3924489646703786
decel dist: -3.55 mm -- decel time: 1.00 s


#### calculate exploring time for whole dataframe

In [None]:
# APPLY TO WHOLE DATAFRAME

def antenna_step_time_df(step_df, sub_edge_df, pix2mm, fps, d_cutoff):
    
    antenna_contact_time = np.ones(2)*np.nan
    
    col = step_df['colony']
    sub = step_df['substrate']
    idx = sub_edge_df[(sub_edge_df['colony']==col) & (sub_edge_df['substrate']==sub)].index
    p = sub_edge_df.iloc[idx]['vlines'].values[0][1,:]   
        
    for side in [0,1]:
        step_x_for_antenna = (-1*p[0]/p[1] + (1/p[0])*step_df['antenna%i_0_y_filt_fullfr'%side])/pix2mm
        antenna_dist_to_step = step_df['travel_dir']*(step_df['antenna%i_0_x_filt_fullfr'%side]/pix2mm - step_x_for_antenna)

        if np.any(np.logical_and(antenna_dist_to_step-(-1*d_cutoff)>0, antenna_dist_to_step-(-1*d_cutoff)<d_cutoff)):
            ant_sta_idx = np.where(antenna_dist_to_step-(-1*d_cutoff)>0)[0][0]
            ant_sta = step_df['frames'][ant_sta_idx]/fps

            if np.any(np.logical_and(antenna_dist_to_step>d_cutoff, antenna_dist_to_step<2*d_cutoff)):
                ant_sto_idx = np.where(antenna_dist_to_step<d_cutoff)[0][-1]
                ant_sto = step_df['frames'][ant_sto_idx]/fps

                if np.any([np.isnan(step_x_for_antenna[ant_sto_idx+1]), np.isnan(step_x_for_antenna[ant_sta_idx-1])]):
                    antenna_contact_time[side] = np.nan
                else:
                    antenna_contact_time[side] = ant_sto-ant_sta # in seconds

                        
    return antenna_contact_time



d_cutoff = 0.5 # how many mm from step = exploring by antennae
step_df['antennae_contact_time'] = step_df.apply(antenna_step_time_df, args = (sub_edge_df, pix2mm, fps, d_cutoff), axis=1)
step_df['antennae_contact_time_total']=step_df['antennae_contact_time'].apply(sum)

print('done saving time that antennae are exploring the step')

#### violin plots and stats comparing distributions

In [89]:
# compare exploration time across lighting and substrates
plt.close('all')

subtypes = sorted(list(set(step_df['substrate'])))
my_pal = {"Flat": '#464F56', "Array": '#BA4246', "Step": '#087E8B', "C_Step": '#701C6F'}

# box plots
# plt.figure()
# for kk, ltype in enumerate([1,0]):
#     print('Lighting: %i'%ltype)
#     plt.subplot(1,2,kk+1)
#     ax = sns.boxplot(x = 'substrate', y = 'antennae_contact_time_total',  
#                 data =step_df[ (step_df['light']==ltype)], palette=my_pal)
#     plt.xlabel(ltype)
#     plt.ylim([0,1.5])


# violin plots
plt.figure()  
for step_dir in [1]:
    for kk, ltype in enumerate([1,0]):
        print('\nLighting: %i'%ltype)
        plt.subplot(1,2,kk+1)
        ax = sns.violinplot(x = 'substrate', y = 'antennae_contact_time_total',  
                    data =step_df[ (step_df['light']==ltype) & (step_df['step_dir']==step_dir)], palette=my_pal, cut = 0)
        plt.xlabel(ltype)
        plt.ylim([0,1.5])
        for ss in subtypes:
            print('    %s -- n: %i'%(ss, len(step_df[ (step_df['light']==ltype) & (step_df['step_dir']==step_dir)  & (step_df['substrate']==ss) ])))

            
from scipy.stats import mannwhitneyu
step_dir = 1
print('\n')
for ss in subtypes:
    stat, p = mannwhitneyu(step_df[ (step_df['light']==1) & (step_df['substrate']==ss) & (step_df['step_dir']==step_dir)]['antennae_contact_time_total'].values, 
                           step_df[ (step_df['light']==0) & (step_df['substrate']==ss) & (step_df['step_dir']==step_dir)]['antennae_contact_time_total'].values )
    print('Step up - Substrate: %s - Light v Dark: p = %0.3f'%(ss,p))
print('\n')
for ltype in [1,0]:
    stat, p = mannwhitneyu(step_df[ (step_df['light']==ltype) & (step_df['substrate']=='Step') & (step_df['step_dir']==step_dir)]['antennae_contact_time_total'].values, 
                           step_df[ (step_df['light']==ltype) & (step_df['substrate']=='C_Step') & (step_df['step_dir']==step_dir)]['antennae_contact_time_total'].values )
    print('Step up - Light: %i - Step v C_Step: p = %0.3f'%(ltype,p))


Lighting: 1
    C_Step -- n: 181
    Step -- n: 240

Lighting: 0
    C_Step -- n: 113
    Step -- n: 155


Step up - Substrate: C_Step - Light v Dark: p = 0.018
Step up - Substrate: Step - Light v Dark: p = 0.413


Step up - Light: 1 - Step v C_Step: p = 0.000
Step up - Light: 0 - Step v C_Step: p = 0.135


In [90]:
# where are long values?
step_df[step_df['antennae_contact_time_total']>1].index


Int64Index([1239, 2426], dtype='int64')

## trying to plot traces of whole antennae relative to head, TOO MUCH DATA = CRASHES KERNEL

In [None]:
# trying to plot point cloud of all antennal joints
wrt_bodypart = 'head' #head

plt.close('all')
plt.figure(figsize = (8,13))
for ss,subtype in enumerate(['Flat']):#, 'Array']):
    print(' -- ', subtype)
    sub_df = df.loc[(df['substrate']==subtype)]
    
    for si, side in enumerate(['L']):#,'R']:
    

        lens = [len(item) for item in sub_df['antenna0_0_x_filt_WRThead']]
        all_sections = pd.DataFrame( {"substrate" : np.repeat(sub_df['substrate'].values, lens), "trackway" : np.repeat(sub_df.index.values, lens),
                                "colony" : np.repeat(sub_df['colony'].values, lens), "light": np.repeat(sub_df['light'].values, lens),
                                "antenna_0_x" : np.concatenate(sub_df['antenna%i_0_x_filt_WRT%s'%(si, wrt_bodypart)].values), 
                                "antenna_0_y" : np.concatenate(sub_df['antenna%i_0_y_filt_WRT%s'%(si, wrt_bodypart)].values),    
                                "antenna_1_x" : np.concatenate(sub_df['antenna%i_1_x_filt_WRT%s'%(si, wrt_bodypart)].values), 
                                "antenna_1_y" : np.concatenate(sub_df['antenna%i_1_y_filt_WRT%s'%(si, wrt_bodypart)].values),          
                                "antenna_2_x" : np.concatenate(sub_df['antenna%i_2_x_filt_WRT%s'%(si, wrt_bodypart)].values), 
                                "antenna_2_y" : np.concatenate(sub_df['antenna%i_2_y_filt_WRT%s'%(si, wrt_bodypart)].values)
                                     })

        for ll,li in enumerate([1,0]):
            plt.subplot(2,2,2*ss+ll+1)

            for si,side in enumerate(['L','R']):
                x0 = all_sections.loc[(all_sections['light']==li)]['antenna_0_x'].values
                y0 = all_sections.loc[(all_sections['light']==li)]['antenna_0_y'].values
                x1 = all_sections.loc[(all_sections['light']==li)]['antenna_1_x'].values
                y1 = all_sections.loc[(all_sections['light']==li)]['antenna_1_y'].values
                x2 = all_sections.loc[(all_sections['light']==li)]['antenna_2_x'].values
                y2 = all_sections.loc[(all_sections['light']==li)]['antenna_2_y'].values 

                plt.plot([x0,x1,x2],[y0,y1,y2],'-k', alpha = 0.01)


        plt.gca().invert_yaxis()
        plt.ylim([70,-70])
        plt.xlim([0,80])
        if wrt_bodypart == 'neck':
            plt.xlim([0,100])
        plt.title('%s - Light on? %i'%(subtype,li), loc = 'left')
#         plt.text(40,-65, 'n: %i'%np.sum(np.isfinite(all_sections.loc[(all_sections['light']==li)]['antenna_L_x'])), color ='k')
#         plt.text(40,65, 'n: %i'%np.sum(np.isfinite(all_sections.loc[(all_sections['light']==li)]['antenna_R_x'])), color ='k')

## traces of lateral antennal motion

In [None]:
tr=25

wrt_bodypart = 'neck'# 'head' #head

plt.close('all')
plt.figure(figsize = (8,5))

plt.plot(df['frames'][tr], -1*df['antenna0_0_y_filt_WRT%s'%wrt_bodypart][tr]/pix2mm, '-b' )
plt.plot(df['frames'][tr], df['antenna1_0_y_filt_WRT%s'%wrt_bodypart][tr]/pix2mm, '-r' )

plt.ylabel('lateral position (mm)')
plt.xlabel('frame')
plt.title('%s - lateral antennal position wrt head'%df['substrate'][tr], loc ='left')
    


## find antennal contacts

In [None]:
tr=24
plt.close('all')

for side in [0,1]:
    plt.subplot(2,1,side+1)

    x = df['antenna%i_0_x_filt_fullfr'%side][tr]
    y = df['antenna%i_0_y_filt_fullfr'%side][tr]
    v= np.linalg.norm([np.diff(x),np.diff(y)],axis=0) / pix2mm *fps

    plt.plot(df['frames'][tr][:-1],v, '-', color = ['b','r'][side], alpha = 0.4)
    
    angle0 = np.rad2deg( np.arctan2(-1*(side*-2+1)*(df['antenna%i_0_y_filt_WRThead'%side][tr]-df['antenna%i_1_y_filt_WRThead'%side][tr]), 
                                    [df['antenna%i_0_x_filt_WRThead'%side][tr]-df['antenna%i_1_x_filt_WRThead'%side][tr]]).flatten())
    
    angle1 = np.rad2deg( np.arctan2(-1*(side*-2+1)*(df['antenna%i_1_y_filt_WRThead'%side][tr]-df['antenna%i_2_y_filt_WRThead'%side][tr]), 
                                    [df['antenna%i_1_x_filt_WRThead'%side][tr]-df['antenna%i_2_x_filt_WRThead'%side][tr]]).flatten())
    
    plt.plot(df['frames'][tr], angle0, ':', color = ['b','r'][side], alpha = 0.3)
    plt.plot(df['frames'][tr], angle1, '--', color = ['b','r'][side], alpha = 0.3)

