# Calculating Activity Onset

## In this notebook, I will:
- import dataset containing multiple spiders
- define a threshold for activity onset for each spider every day
- find the first point that threshold is crossed
- create a dataframe containing all activity onsets

In [5]:
#Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#Read Spider activity CSV file
df = pd.read_csv('Metazygia wittfeldae Monitor 1 Updated_LD.csv',)
df.set_index('Date-Time', inplace = True)

#Turn date-time column into datetime format
df.index = pd.to_datetime(df.index)

#Cut out certain columns and rows to make data more managable to test on
df.drop(df.iloc[:, 2:], axis = 1, inplace = True)

df

Unnamed: 0_level_0,Light,Monitor 1 Spider 1
Date-Time,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-04-21 00:00:00,0,0
2017-04-21 00:01:00,0,0
2017-04-21 00:02:00,0,0
2017-04-21 00:03:00,0,0
2017-04-21 00:04:00,0,0
...,...,...
2017-04-25 23:55:00,0,15
2017-04-25 23:56:00,0,6
2017-04-25 23:57:00,0,12
2017-04-25 23:58:00,0,5


### Define necessary functions for calculating activity onset

In [6]:
#This function calculates the rolling average of activity over 30 bins and adds these values as a column of a dataframe
def add_rolling_average_column(df):
    #calculate rolling average of activity
    activity = df.iloc[: , 0]
    rolling = activity.rolling(30).mean().dropna()

    #Add new column to dataframe with the rolling mean
    df['Rolling'] = rolling
    return df


#This function finds all times when lights turn on and off and returns the index of these occurrences
def find_lights_turn_on_off(df):
    lights_turn_on_off = df[df['Light'].diff() != 0]
    
    #drop the first 2 and last row because these are incomplete days
    on_off = lights_turn_on_off.iloc[2:-1, :]
    
    #turn the index into a list to loop through
    on_off_list = list(on_off.index.values)
    
    return on_off_list


#This function groups the created list so that the times where the lights turn on and off each day are subelements
def create_on_off_sublist(on_off_list):
    on_off_subList = [on_off_list[n:n+2] for n in range(0, len(on_off_list), 2)]
    
    return on_off_subList


#This function calculates the threshold for every day and appends the value to a list
def calculate_threshold(df, on_off_sublist):
    activity_onset_values = []
    for x in on_off_sublist:
        
        #loop through the times it is dark every night
        looped_df = df.loc[x[0]:x[1], :]
        
        #calculate threshold every night
        threshold = looped_df.Rolling.mean()
        
        #find all points where rolling average of activity is above the threshold 
        threshold_crossings = looped_df[looped_df.Rolling > threshold].index
        
        #append first threshold crossing to the activity onset list every day
        if len(threshold_crossings) > 0:
            activity_onset_values.append(threshold_crossings[0])
        else:
            activity_onset_values.append('NaN')
        
    return activity_onset_values


#define activity onset variables for dataframe
def activity_onset_df_variables():
    #the time the lights turn off at night
    lights_turn_off = pd.to_datetime('19:00:00')
    
    #list of unique dates to make the index
    list_of_dates = list(pd.unique(df.index.date))[1:]
    
    #empty list to store subtracted activity onset values
    activity_onset_values_subtracted = []
    
    #set up dataframe
    column_names = ['Date', 'Activity Onset']
    activity_onset_df = pd.DataFrame(columns = column_names)
    activity_onset_df['Date'] = list_of_dates
    activity_onset_df = activity_onset_df.set_index('Date')
    
    return lights_turn_off, list_of_dates, activity_onset_values_subtracted, activity_onset_df


#Create activity onset dataframe function
def create_activity_onset_df(activity_onset_values):
    
    #set up dataframe using activity_onset_df_variables function
    activity_onset_variables = activity_onset_df_variables()
    
    #loop through the activity onset times and subtract the time the lights turn off to get minutes after dark that activity starts
    for x in activity_onset_values:
        time_difference = x - activity_onset_variables[0]
        hours, minutes = time_difference.seconds // 3600, time_difference.seconds // 60 % 60
        minutes_subtracted = minutes + hours*60
        activity_onset_variables[2].append(minutes_subtracted)
             
    #set this list as the activity onset column of the empty dataframe
    activity_onset_variables[3]['Activity Onset'] = activity_onset_variables[2]
    
    #return activity onset dataframe
    return activity_onset_variables[3]   
  