# Create Run Duration Dataframe

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta

# Read Spider activity CSV file
filename = 'Metazygia wittfeldae Monitor 1 activity_DD_binary'
df = pd.read_csv(filename + '.csv', index_col = 0)
df['Date_Time'] = pd.to_datetime(df.Date + ' ' + df.Time)
df = df.set_index('Date_Time')

# This function takes parameters df_switch and col and finds the switch from running to not running. 
# df_switch is a copy of the given dataframe with reset indices. x represents the spider column.

def adjust_start_and_end(df_switch, col):
    # create switch column for column: switch from 0 to 1 labeled 1 in corresponding row
    # and switch from 1 to 0 labeled -1. No switch labeled 0 in corresponding row
    df_switch['switch'] = df_switch[col].diff()
    
    # checking for corner cases
    
    # if run begins immediately, corresponding row in switch column labeled 1
    if df_switch.at[0, col] == 1:
            df_switch.at[0, 'switch'] = 1
    
    # if spider run still occurs at the end, corresponding row in switch column labeled -1
    if df_switch.at[df_switch.index[-1], col] == 1 and df_switch.at[df_switch.index[-2], col] == 1:
            df_switch.at[df_switch.index[-1], 'switch'] = -1
            
    # if spider not running at the end, corresponding row in switch column labeled 0
    if df_switch.at[df_switch.index[-1], col] == 1 and df_switch.at[df_switch.index[-2], col] == 0:
            df_switch.at[df_switch.index[-1], 'switch'] = 0
            
    return

# This function takes parameters df_switch and col and finds the run duration of the spider; df_switch 
# is a copy of the given dataframe with reset indices. x represents the spider column.

def find_duration(df_switch, col):
    # create list of all start run indices
    runStart = df_switch.index[df_switch['switch'] == 1].tolist()
    
    # create list of all end run indices
    runEnd = df_switch.index[df_switch['switch'] == -1].tolist()
    
    # create list of run duration by subtracting run end indices and run start indices
    runDuration = np.subtract(runEnd, runStart)
    
    return runDuration

# This function displays the new dataframe and saves it to a csv file.

def create_spider_run_duration_dataframe(df):
    # create new dataframe
    newdf = pd.DataFrame()
    df_switch = df.reset_index().copy()
    
    # loop through all spider columns and call functions
    for col in df.loc[:, df.columns.str.startswith('s')]:
        adjust_start_and_end(df_switch, col)
        runDuration = find_duration(df_switch, col)
        
        # create series from run duration list
        s = pd.Series(runDuration)
        
        # appends columns and fills columns outside the intersection with NaN values
        newdf = pd.concat([newdf, s.rename(col)], axis = 1) 
        
        # converts all cells to float as NaN values cannot be converted to integers
        newdf[col] = newdf[col].astype(float)
    
    display(newdf)
    #newdf.to_csv(filename + "_run_duration.csv")    

In [5]:
create_spider_run_duration_dataframe(df)

Unnamed: 0,s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,...,s23,s24,s25,s26,s27,s28,s29,s30,s31,s32
0,4.0,4.0,5.0,8.0,3.0,3.0,32.0,1.0,1.0,14.0,...,15.0,5.0,2.0,,4.0,,2.0,1.0,2.0,1.0
1,1.0,10.0,2.0,2.0,1.0,8.0,27.0,1.0,3.0,3.0,...,4.0,5.0,1.0,,2.0,,2.0,3.0,1.0,1.0
2,2.0,2.0,1.0,2.0,3.0,1.0,2.0,2.0,1.0,1.0,...,31.0,1.0,1.0,,8.0,,4.0,2.0,2.0,3.0
3,2.0,12.0,10.0,2.0,2.0,6.0,8.0,1.0,1.0,5.0,...,1.0,2.0,1.0,,19.0,,4.0,1.0,2.0,2.0
4,1.0,4.0,6.0,4.0,1.0,5.0,1.0,2.0,1.0,1.0,...,10.0,2.0,3.0,,2.0,,6.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1226,,,,,,,,,,,...,,,,,,,,,,
1227,,,,,,,,,,,...,,,,,,,,,,
1228,,,,,,,,,,,...,,,,,,,,,,
1229,,,,,,,,,,,...,,,,,,,,,,
