# Testing Functions for Calculating Activity Onset

### In this notebook:
- I will be testing necessary functions for calculating activity onset

In [3]:
#Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta

#Read Spider activity CSV file
df = pd.read_csv('Metazygia wittfeldae Monitor 1 Updated_LD.csv')

#Turn date-time column into datetime format
df['Date-Time'] = pd.to_datetime(df['Date-Time'])

#Add date column to dataframe
df['Date'] = df['Date-Time'].dt.date

#Change Date column to string format, as it makes it easier to use
df['Date'] = df['Date'].astype(str)

#Cut out certain columns to make data more managable to test on
df.drop(df.iloc[:, 3:-1], axis = 1, inplace = True)
df.drop(df.index[2880:], inplace = True)

df

Unnamed: 0,Date-Time,Light,Monitor 1 Spider 1,Date
0,2017-04-21 00:00:00,0,0,2017-04-21
1,2017-04-21 00:01:00,0,0,2017-04-21
2,2017-04-21 00:02:00,0,0,2017-04-21
3,2017-04-21 00:03:00,0,0,2017-04-21
4,2017-04-21 00:04:00,0,0,2017-04-21
...,...,...,...,...
2875,2017-04-22 23:55:00,0,0,2017-04-22
2876,2017-04-22 23:56:00,0,0,2017-04-22
2877,2017-04-22 23:57:00,0,10,2017-04-22
2878,2017-04-22 23:58:00,0,22,2017-04-22


###### I tried so hard to get the following function to work with arrays, but ended up not being able to so I found much more simple code for flattening lists

In [4]:
#This function is used for adding a column to the dataframe that contains the numerical minute out of the day.
#For example, at 12:25 AM, the column will have a value of 25.
def add_minutes_column(df):
    
    #Create a list containing each minute in a day
    total_minutes = []
    minutes = np.arange(1,1441).tolist()
    num_days = len(pd.unique(df['Date']).tolist())
    
    #extend the list n number of times, with n being the number of days in the dataframe
    total_minutes.extend([minutes for i in range(num_days)])
    
    #create new list that flattens the original list, allowing it to be used as a dataframe column
    total_minutes_list = [x for l in total_minutes for x in l]

    #append the flattened list to the original dataframe
    df['Total Minutes'] = total_minutes_list
    return df

In [5]:
#Testing add minutes to dataframe function
add_minutes_column(df)

Unnamed: 0,Date-Time,Light,Monitor 1 Spider 1,Date,Total Minutes
0,2017-04-21 00:00:00,0,0,2017-04-21,1
1,2017-04-21 00:01:00,0,0,2017-04-21,2
2,2017-04-21 00:02:00,0,0,2017-04-21,3
3,2017-04-21 00:03:00,0,0,2017-04-21,4
4,2017-04-21 00:04:00,0,0,2017-04-21,5
...,...,...,...,...,...
2875,2017-04-22 23:55:00,0,0,2017-04-22,1436
2876,2017-04-22 23:56:00,0,0,2017-04-22,1437
2877,2017-04-22 23:57:00,0,10,2017-04-22,1438
2878,2017-04-22 23:58:00,0,22,2017-04-22,1439


### This function is used throughout my code to separate the dataframe into multiple days

In [6]:
#This function is used in my bigger functions to separate days. Creates a list of dataframes, with each dataframe containing a unique date
def create_list_of_df(list_of_days):
    list_of_df = []
    for x in range(len(list_of_days)):
        list_of_df.append(df[df.Date==str(list_of_days[x])])
    return(list_of_df)

In [7]:
#Testing create list of dataframe function
list_of_days = df['Date'].unique()
create_list_of_df(list_of_days)

[               Date-Time  Light  Monitor 1 Spider 1        Date  Total Minutes
 0    2017-04-21 00:00:00      0                   0  2017-04-21              1
 1    2017-04-21 00:01:00      0                   0  2017-04-21              2
 2    2017-04-21 00:02:00      0                   0  2017-04-21              3
 3    2017-04-21 00:03:00      0                   0  2017-04-21              4
 4    2017-04-21 00:04:00      0                   0  2017-04-21              5
 ...                  ...    ...                 ...         ...            ...
 1435 2017-04-21 23:55:00      0                   0  2017-04-21           1436
 1436 2017-04-21 23:56:00      0                   0  2017-04-21           1437
 1437 2017-04-21 23:57:00      0                   0  2017-04-21           1438
 1438 2017-04-21 23:58:00      0                   0  2017-04-21           1439
 1439 2017-04-21 23:59:00      0                   0  2017-04-21           1440
 
 [1440 rows x 5 columns],
            

### The following function is used to add a rolling average column over 30 bins to the original dataframe

##### *** This is why the spider activity must be in the first column! When more spiders are introduced to the dataframe, the for loop used to calculate activity onset places the spider name in column 1

In [17]:
#Create add rolling average to dataframe function
def add_rolling_average_column(df):
    #calculate rolling average of activity
    activity = df.iloc[: , 0]
    rolling = activity.rolling(30).mean().dropna()

    #Add new column to dataframe with the rolling mean
    df['Rolling'] = rolling
    return df

In [18]:
#Test add rolling function
add_rolling_average_column(df)

Unnamed: 0,Monitor 1 Spider 1,Date-Time,Light,Date,Total Minutes,Rolling
0,0,2017-04-21 00:00:00,0,2017-04-21,1,
1,0,2017-04-21 00:01:00,0,2017-04-21,2,
2,0,2017-04-21 00:02:00,0,2017-04-21,3,
3,0,2017-04-21 00:03:00,0,2017-04-21,4,
4,0,2017-04-21 00:04:00,0,2017-04-21,5,
...,...,...,...,...,...,...
2875,0,2017-04-22 23:55:00,0,2017-04-22,1436,4.266667
2876,0,2017-04-22 23:56:00,0,2017-04-22,1437,4.033333
2877,10,2017-04-22 23:57:00,0,2017-04-22,1438,4.166667
2878,22,2017-04-22 23:58:00,0,2017-04-22,1439,4.733333


### The following function will not be necessary when using the multiple spider dataframe, only for the test data

In [8]:
#Create function that places spider activity in the first column, as this is one of the parameters for the calculate activity onset code
def move_activity_column_to_0_index(df):
    first_column = df.pop('Monitor 1 Spider 1')
    df.insert(0, 'Monitor 1 Spider 1', first_column)
    return df


In [9]:
# Test column switch function
move_activity_column_to_0_index(df)

Unnamed: 0,Monitor 1 Spider 1,Date-Time,Light,Date,Total Minutes
0,0,2017-04-21 00:00:00,0,2017-04-21,1
1,0,2017-04-21 00:01:00,0,2017-04-21,2
2,0,2017-04-21 00:02:00,0,2017-04-21,3
3,0,2017-04-21 00:03:00,0,2017-04-21,4
4,0,2017-04-21 00:04:00,0,2017-04-21,5
...,...,...,...,...,...
2875,0,2017-04-22 23:55:00,0,2017-04-22,1436
2876,0,2017-04-22 23:56:00,0,2017-04-22,1437
2877,10,2017-04-22 23:57:00,0,2017-04-22,1438
2878,22,2017-04-22 23:58:00,0,2017-04-22,1439


In [27]:
#This function calculates the threshold value for a given day and appends the first instance where the threshold was crossed to a dataframe
def calculate_threshold(list_of_df):
    activity_onset_values = []
    for i in list_of_df:
        #calculate treshold
        calculate = i.loc[i['Total Minutes'] > 0]
        stdv = calculate.Rolling.std()
        threshold = calculate.Rolling.mean() + stdv
    
        #determine all instances where threshold is crossed
        threshold_crossings = np.diff(calculate.Rolling > threshold, prepend=False)

        #create a new array with index of threshold crossings for each day
        threshold_crossings_array = np.argwhere(threshold_crossings)[::2,0]

        #find the first threshold crossing after minute 1142, as this is considered the start of the night
        true_threshold_crossings_array = threshold_crossings_array[threshold_crossings_array >1142]
        
        #append first threshold crossing to the activity onset list every day
        if len(true_threshold_crossings_array) > 0:
            activity_onset_values.append(true_threshold_crossings_array[0])
            
    return activity_onset_values
    

In [30]:
#Test calculate_threshold function using calculate_threshold and create_list_of_df functions

#step 1
step1 = create_list_of_df(list_of_days)

#step 2
step2 = calculate_threshold(step1)

print(step2)

[1177, 1192]


### The following function is the final function for calculating activity onset; It takes the list of activity onsets and creates a dataframe for it

In [40]:
#Create activity onset dataframe function
def create_activity_onset_df(activity_onset_values):
    #define necessary variables and set up empty dataframe
    lights_off = 1142
    list_of_days = df.Date.unique()
    column_names = ['Date', 'Activity Onset']
    activity_onset_df = pd.DataFrame(columns = column_names)
    activity_onset_df['Date'] = list_of_days
    
    #subtract minutes from light onset from calculated value to get activity onset in relation to when lights turn on
    activity_onset_values_subtracted = [x - lights_off for x in activity_onset_values]
    
    #set this list as the activity onset column of the empty dataframe
    activity_onset_df['Activity Onset'] = activity_onset_values_subtracted
    activity_onset_df = activity_onset_df.set_index('Date')
    
    return activity_onset_df

In [41]:
#Test this function using a 3 step process including the 2 functions previously used

step3 = create_activity_onset_df(step2)

display(step3)

Unnamed: 0_level_0,Activity Onset
Date,Unnamed: 1_level_1
2017-04-21,35
2017-04-22,50
