# Testing Functions for Calculating Activity Onset

### In this notebook:
- I will be testing necessary functions for calculating activity onset

In [1]:
#Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta
import time

#Read Spider activity CSV file
df = pd.read_csv('Metazygia wittfeldae Monitor 1 Updated_LD.csv',)
df.set_index('Date-Time', inplace = True)

#Turn date-time column into datetime format
df.index = pd.to_datetime(df.index)

#Cut out certain columns and rows to make data more managable to test on
df.drop(df.iloc[:, 2:], axis = 1, inplace = True)

df

Unnamed: 0_level_0,Light,Monitor 1 Spider 1
Date-Time,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-04-21 00:00:00,0,0
2017-04-21 00:01:00,0,0
2017-04-21 00:02:00,0,0
2017-04-21 00:03:00,0,0
2017-04-21 00:04:00,0,0
...,...,...
2017-04-25 23:55:00,0,15
2017-04-25 23:56:00,0,6
2017-04-25 23:57:00,0,12
2017-04-25 23:58:00,0,5


In [2]:
#This function adds a column to the dataframe with the minute out of the day for that particular row
def add_minutes_column(df):
    df['Total Minutes'] = df.index.minute + (df.index.hour * 60)
    return(df)

In [3]:
#Testing add minutes to dataframe function
add_minutes_column(df)

Unnamed: 0_level_0,Light,Monitor 1 Spider 1,Total Minutes
Date-Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-04-21 00:00:00,0,0,0
2017-04-21 00:01:00,0,0,1
2017-04-21 00:02:00,0,0,2
2017-04-21 00:03:00,0,0,3
2017-04-21 00:04:00,0,0,4
...,...,...,...
2017-04-25 23:55:00,0,15,1435
2017-04-25 23:56:00,0,6,1436
2017-04-25 23:57:00,0,12,1437
2017-04-25 23:58:00,0,5,1438


### The following function will not be necessary when using the multiple spider dataframe, only for the test data

In [4]:
#Create function that places spider activity in the first column, as this is one of the parameters for the calculate activity onset code
def move_activity_column_to_0_index(df):
    first_column = df.pop('Monitor 1 Spider 1')
    df.insert(0, 'Monitor 1 Spider 1', first_column)
    return df

In [5]:
# Test column switch function
move_activity_column_to_0_index(df)

Unnamed: 0_level_0,Monitor 1 Spider 1,Light,Total Minutes
Date-Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-04-21 00:00:00,0,0,0
2017-04-21 00:01:00,0,0,1
2017-04-21 00:02:00,0,0,2
2017-04-21 00:03:00,0,0,3
2017-04-21 00:04:00,0,0,4
...,...,...,...
2017-04-25 23:55:00,15,0,1435
2017-04-25 23:56:00,6,0,1436
2017-04-25 23:57:00,12,0,1437
2017-04-25 23:58:00,5,0,1438


### The following function is used to add a rolling average column over 30 bins to the original dataframe

##### *** This is why the spider activity must be in the first column! When more spiders are introduced to the dataframe, the for loop used to calculate activity onset places the spider name in column 1

In [6]:
#Create add rolling average to dataframe function
def add_rolling_average_column(df):
    #calculate rolling average of activity
    activity = df.iloc[: , 0]
    rolling = activity.rolling(30).mean().dropna()

    #Add new column to dataframe with the rolling mean
    df['Rolling'] = rolling
    return df

In [7]:
#Test add rolling function
add_rolling_average_column(df)

Unnamed: 0_level_0,Monitor 1 Spider 1,Light,Total Minutes,Rolling
Date-Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-04-21 00:00:00,0,0,0,
2017-04-21 00:01:00,0,0,1,
2017-04-21 00:02:00,0,0,2,
2017-04-21 00:03:00,0,0,3,
2017-04-21 00:04:00,0,0,4,
...,...,...,...,...
2017-04-25 23:55:00,15,0,1435,2.700000
2017-04-25 23:56:00,6,0,1436,2.900000
2017-04-25 23:57:00,12,0,1437,3.300000
2017-04-25 23:58:00,5,0,1438,3.466667


## The following 2 functions will be used within the calculate activity onset function

In [8]:
#This function finds all times when lights turn on and off and returns the index of these occurrences
def find_lights_turn_on_off(df):
    lights_turn_on_off = df[df['Light'].diff() != 0]
    
    #drop the first 2 and last row because these are incomplete days
    on_off = lights_turn_on_off.iloc[2:-1, :]
    
    #turn the index into a list to loop through
    on_off_list = list(on_off.index.values)
    
    return on_off_list

In [9]:
find_lights_turn_on_off(df)

[numpy.datetime64('2017-04-21T19:02:00.000000000'),
 numpy.datetime64('2017-04-22T07:01:00.000000000'),
 numpy.datetime64('2017-04-22T19:02:00.000000000'),
 numpy.datetime64('2017-04-23T07:01:00.000000000'),
 numpy.datetime64('2017-04-23T19:02:00.000000000'),
 numpy.datetime64('2017-04-24T07:01:00.000000000'),
 numpy.datetime64('2017-04-24T19:02:00.000000000'),
 numpy.datetime64('2017-04-25T07:01:00.000000000')]

In [10]:
#This function groups the created list so that the times where the lights turn on and off each day are subelements
def create_on_off_sublist(on_off_list):
    on_off_subList = [on_off_list[n:n+2] for n in range(0, len(on_off_list), 2)]
    
    return on_off_subList

In [11]:
#This function calculates the threshold for every day and appends the value to a list
def calculate_threshold(df, on_off_sublist):
    activity_onset_values = []
    for x in on_off_sublist:
        
        #loop through the times it is dark every night
        looped_df = df.loc[x[0]:x[1], :]
        
        #calculate threshold every night
        threshold = looped_df.Rolling.mean()
        
        #find all points where rolling average of activity is above the threshold 
        threshold_crossings = looped_df[looped_df.Rolling > threshold].index
        
        #append first threshold crossing to the activity onset list every day
        if len(threshold_crossings) > 0:
            activity_onset_values.append(threshold_crossings[0])
        else:
            activity_onset_values.append('NaN')
        
    
    return activity_onset_values

In [29]:
#Test calculate_threshold function using find_lights_turn_on_off and on_off_sublist functions
transition_indexes_list = find_lights_turn_on_off(df)

on_off_sublist = create_on_off_sublist(transition_indexes_list)

activity_onset_values = calculate_threshold(df, on_off_sublist)

print(activity_onset_values)

[Timestamp('2017-04-21 19:32:00'), Timestamp('2017-04-22 19:45:00'), Timestamp('2017-04-23 19:30:00'), Timestamp('2017-04-24 19:26:00')]


### This function defines the necessary variables for creating the activity onset dataframe

In [53]:
#define activity onset variables for dataframe
def activity_onset_df_variables():
    #the time the lights turn off at night
    lights_turn_off = pd.to_datetime('19:00:00')
    
    #list of unique dates to make the index
    list_of_dates = list(pd.unique(df.index.date))[1:]
    
    #empty list to store subtracted activity onset values
    activity_onset_values_subtracted = []
    
    #set up dataframe
    column_names = ['Date', 'Activity Onset']
    activity_onset_df = pd.DataFrame(columns = column_names)
    activity_onset_df['Date'] = list_of_dates
    activity_onset_df = activity_onset_df.set_index('Date')
    
    return lights_turn_off, list_of_dates, activity_onset_values_subtracted, activity_onset_df

### The following function is the final function for calculating activity onset; It takes the list of activity onsets and creates a dataframe for it

In [57]:
#Create activity onset dataframe function
def create_activity_onset_df(activity_onset_values):
    
    #set up dataframe using activity_onset_df_variables function
    activity_onset_variables = activity_onset_df_variables()
    
    #loop through the activity onset times and subtract the time the lights turn off to get minutes after dark that activity starts
    for x in activity_onset_values:
        time_difference = x - activity_onset_variables[0]
        hours, minutes = time_difference.seconds // 3600, time_difference.seconds // 60 % 60
        minutes_subtracted = minutes + hours*60
        activity_onset_variables[2].append(minutes_subtracted)
             
    #set this list as the activity onset column of the empty dataframe
    activity_onset_variables[3]['Activity Onset'] = activity_onset_variables[2]
    
    #return activity onset dataframe
    return activity_onset_variables[3]   
  

In [56]:
#Test this function using a 3 step process including the 2 functions previously used

activity_onset_df = create_activity_onset_df(activity_onset_values)

display(activity_onset_df)

Unnamed: 0_level_0,Activity Onset
Date,Unnamed: 1_level_1
2017-04-22,32
2017-04-23,45
2017-04-24,30
2017-04-25,26


# Creating Raster Plots