# Run Duration 

In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta

# Read Spider activity CSV file
filename = 'Metazygia wittfeldae Monitor 1 activity_DD_binary'
df = pd.read_csv(filename + '.csv', index_col = 0)
df['Date_Time'] = pd.to_datetime(df.Date + ' ' + df.Time)
df = df.set_index('Date_Time')

# Identifies where spider started and stopped moving and checks corner cases (start of dataframe row and end of 
# dataframe row). This function finds the switch in the spider column from 0 to 1 or 1 to 0 and labels it as 1 
# or -1, respectively, in the switch column. It also checks for the corner cases; that is, whether or not the 
# spider is moving at the very beginning or very end. If run begins immediately, the switch column is labeled as 1. 
# If spider run still occurs at the end, the switch column is labeled as -1.

def adjust_start_and_end(df_switch, x):
    df_switch['switch'] = df_switch[x].diff()
    
    if df_switch.at[0, x] == 1:
            df_switch.at[0, 'switch'] = 1
    
    if df_switch.at[df_switch.index[-1], x] == 1 and df_switch.at[df_switch.index[-2], x] == 1:
            df_switch.at[df_switch.index[-1], 'switch'] = -1
    
    if df_switch.at[df_switch.index[-1], x] == 1 and df_switch.at[df_switch.index[-2], x] == 0:
            df_switch.at[df_switch.index[-1], 'switch'] = 0
            
    return

# Creates two lists; runStart is a list with the indices of when the spider started moving, runEnd is a list with
# the indices of when the spider stopped moving. Run duration is found by subtracting these two lists. This 
# function finds the run duration by subtracting the index of when the run ended from the index of when the run 
# started. This is found by looking at the switch column: 1 indicates that the run has begun and -1 indicates that
# the run has ended.

def find_duration(df_switch, x):
    runStart = df_switch.index[df_switch['switch'] == 1].tolist()
    runEnd = df_switch.index[df_switch['switch'] == -1].tolist()
    runDuration = np.subtract(runEnd, runStart)
    
    return runDuration

# This function displays the new dataframe and saves it to a .csv file. This function loops through all spider 
# columns, uses the previously defined functions, and appends the run duration columns to a new dataframe. If 
# a spider has a shorter run duration count than another spider, then empty rows are filled with NaN. Run 
# durations are converted to float because NaN values cannot be converted to integers. 

def allSpiderRuns(df):
    newdf = pd.DataFrame()
    df_switch = df.reset_index().copy()
    
    for x in df1.loc[:, df.columns.str.startswith('s')]:
        
        adjust_start_and_end(df_switch, x)
        
        runDuration = find_duration(df_switch, x)
    
        s = pd.Series(runDuration)
        # Appends columns and fills columns outside the intersection with NaN values
        newdf = pd.concat([newdf, s.rename(x)], axis = 1) 
        # Converts all cells to float
        newdf[x] = newdf[x].astype(float)
    
    display(newdf)
    newdf.to_csv(filename + '_run_duration.csv')

In [7]:
allSpiderRuns(df)

Unnamed: 0,s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,...,s23,s24,s25,s26,s27,s28,s29,s30,s31,s32
0,4.0,4.0,5.0,8.0,3.0,3.0,32.0,1.0,1.0,14.0,...,15.0,5.0,2.0,,4.0,,2.0,1.0,2.0,1.0
1,1.0,10.0,2.0,2.0,1.0,8.0,27.0,1.0,3.0,3.0,...,4.0,5.0,1.0,,2.0,,2.0,3.0,1.0,1.0
2,2.0,2.0,1.0,2.0,3.0,1.0,2.0,2.0,1.0,1.0,...,31.0,1.0,1.0,,8.0,,4.0,2.0,2.0,3.0
3,2.0,12.0,10.0,2.0,2.0,6.0,8.0,1.0,1.0,5.0,...,1.0,2.0,1.0,,19.0,,4.0,1.0,2.0,2.0
4,1.0,4.0,6.0,4.0,1.0,5.0,1.0,2.0,1.0,1.0,...,10.0,2.0,3.0,,2.0,,6.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1226,,,,,,,,,,,...,,,,,,,,,,
1227,,,,,,,,,,,...,,,,,,,,,,
1228,,,,,,,,,,,...,,,,,,,,,,
1229,,,,,,,,,,,...,,,,,,,,,,
