# Importing Libraries and CSV files

Importing necessary libraries and csv files.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta

#Read Spider activity CSV file
filename = 'Metazygia wittfeldae Monitor 1 activity_LD_binary'
df = pd.read_csv(filename + '.csv', index_col = 0)
df["Date_Time"] = pd.to_datetime(df.Date + ' ' + df.Time)
df = df.set_index("Date_Time")

df1 = df.copy()
df1 = df1[['s2']]

display(df1)

Unnamed: 0_level_0,s2
Date_Time,Unnamed: 1_level_1
2017-06-11 00:00:00,1
2017-06-11 00:01:00,1
2017-06-11 00:02:00,1
2017-06-11 00:03:00,1
2017-06-11 00:04:00,1
...,...
2017-06-18 23:55:00,1
2017-06-18 23:56:00,1
2017-06-18 23:57:00,1
2017-06-18 23:58:00,1


# Switch Function

This function takes parameters df_switch and col and finds the switch from running to not running. df_switch is a copy of the given dataframe with reset indices. x represents the spider column.

In [2]:
def adjust_start_and_end(df_switch, col):
    # create switch column for column: switch from 0 to 1 labeled 1 in corresponding row
    # and switch from 1 to 0 labeled -1. No switch labeled 0 in corresponding row
    df_switch['switch'] = df_switch[col].diff()
    
    # checking for corner cases
    
    # if run begins immediately, corresponding row in switch column labeled 1
    if df_switch.at[0, col] == 1:
            df_switch.at[0, 'switch'] = 1
    
    # if spider run still occurs at the end, corresponding row in switch column labeled -1
    if df_switch.at[df_switch.index[-1], col] == 1 and df_switch.at[df_switch.index[-2], col] == 1:
            df_switch.at[df_switch.index[-1], 'switch'] = -1
            
    # if spider not running at the end, corresponding row in switch column labeled 0
    if df_switch.at[df_switch.index[-1], col] == 1 and df_switch.at[df_switch.index[-2], col] == 0:
            df_switch.at[df_switch.index[-1], 'switch'] = 0
            
    return

# for the purposes of testing df_switch is defined outside the function. Indices are reset because 
# run duration does not use DateTimeIndex
df_switch = df1.reset_index().copy()

adjust_start_and_end(df_switch, 's2')

display(df_switch)

Unnamed: 0,Date_Time,s2,switch
0,2017-06-11 00:00:00,1,1.0
1,2017-06-11 00:01:00,1,0.0
2,2017-06-11 00:02:00,1,0.0
3,2017-06-11 00:03:00,1,0.0
4,2017-06-11 00:04:00,1,0.0
...,...,...,...
11515,2017-06-18 23:55:00,1,1.0
11516,2017-06-18 23:56:00,1,0.0
11517,2017-06-18 23:57:00,1,0.0
11518,2017-06-18 23:58:00,1,0.0


# Duration Function

This function takes parameters df_switch and col and finds the run duration of the spider; df_switch is a copy of the given dataframe with reset indices. x represents the spider column.

In [3]:
def find_duration(df_switch, col):
    # create list of all start run indices
    runStart = df_switch.index[df_switch['switch'] == 1].tolist()
    
    # create list of all end run indices
    runEnd = df_switch.index[df_switch['switch'] == -1].tolist()
    
    # create list of run duration by subtracting run end indices and run start indices
    runDuration = np.subtract(runEnd, runStart)
    
    return runDuration

runDuration1 = find_duration(df_switch, 's2')

display(runDuration1)

array([23,  7,  4,  5,  5,  9,  3,  6,  2,  1,  1, 50, 18,  5, 23,  5,  7,
        4,  1, 31, 19,  1,  4, 13,  2, 12,  1,  2,  8,  4,  5, 23,  5,  5,
        3,  3,  1,  3,  4,  1, 10,  4,  1,  3,  1,  1, 27, 34,  2,  8, 14,
       24,  7,  9, 17, 11, 17,  9, 10,  4,  9,  7, 28,  1, 16,  5, 21,  1,
       14,  3,  2,  5,  2,  1,  2,  4,  2,  1, 46, 26,  7, 16, 19, 31,  3,
        9, 20,  1,  5, 11,  4,  6,  1,  6, 13, 12,  5,  5,  1,  4, 12,  3,
       10, 11, 10, 14,  4,  4,  2,  6, 15,  9,  1,  1,  1,  1,  1, 66,  9,
       18, 13, 10,  5,  2, 19,  5,  2,  1,  1,  8,  9,  1,  6,  5,  1,  6,
       13,  1,  4,  2,  1, 13,  1,  1,  6,  1,  2,  2,  2,  5,  1,  9, 13,
        3, 10,  2,  9, 20,  6,  1,  1,  1,  2,  8,  1,  7,  4,  6,  1,  2,
        5,  1,  1,  1,  1,  2,  1,  1, 80, 10,  4,  6,  1, 15,  1,  5,  3,
        1,  1,  3,  1,  6,  5,  5, 20,  1,  1, 22,  3,  1,  7, 11,  2,  4,
        3,  2,  9,  2,  2,  7,  1,  2,  6,  6,  1,  4,  6,  1,  2,  1,  4,
        5,  1,  1,  2,  2

# Run Duration Dataframe

This function displays the new dataframe and saves it to a csv file.

In [4]:
def create_spider_run_duration_dataframe(df, col):
    # create new dataframe
    newdf = pd.DataFrame()
    
    # loop through all spider columns and call functions
    for col in df.loc[:, df.columns.str.startswith('s')]:
        adjust_start_and_end(df_switch, col)
        runDuration = find_duration(df_switch, col)
        
        # create series from run duration list
        s = pd.Series(runDuration)
        
        # appends columns and fills columns outside the intersection with NaN values
        newdf = pd.concat([newdf, s.rename(col)], axis = 1) 
        
        # converts all cells to float as NaN values cannot be converted to integers
        newdf[col] = newdf[col].astype(float)
    
    display(newdf)
    #newdf.to_csv(filename + "_run_duration.csv")    

newdf1 = create_spider_run_duration_dataframe(df1, 's2')

Unnamed: 0,s2
0,23.0
1,7.0
2,4.0
3,5.0
4,5.0
...,...
343,3.0
344,4.0
345,3.0
346,4.0
