# Calculating Activity Onset

## In this notebook, I will:
- import dataset containing multiple spiders and their activity data
- define a threshold for activity onset for each spider every day
- find the first point that threshold is crossed every day
- create a dataframe containing all activity onsets for every spider
- Visualize activity and activity onset through raster plots

In [15]:
#Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as matdates
from datetime import timedelta

#Set filename as variable to use when saving files
filename = 'Metazygia wittfeldae Monitor 1 Updated_LD'

#Read Spider activity CSV file
df = pd.read_csv(filename + '.csv')
df.set_index('Date-Time', inplace = True)

#Turn date-time column into datetime format
df.index = pd.to_datetime(df.index)

#Determine when lights turn on and off at night
lights_turn_off = df[df['Light'].diff() == -1].index[0]
lights_turn_on = df[df['Light'].diff() == 1].index[0]

df

Unnamed: 0_level_0,Light,Monitor 1 Spider 1,Monitor 1 Spider 2,Monitor 1 Spider 3,Monitor 1 Spider 4,Monitor 1 Spider 5,Monitor 1 Spider 6,Monitor 1 Spider 7,Monitor 1 Spider 8,Monitor 1 Spider 9,...,Monitor 1 Spider 21,Monitor 1 Spider 23,Monitor 1 Spider 25,Monitor 1 Spider 26,Monitor 1 Spider 27,Monitor 1 Spider 28,Monitor 1 Spider 29,Monitor 1 Spider 30,Monitor 1 Spider 31,Monitor 1 Spider 32
Date-Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-04-21 00:00:00,0,0,0,0,0,1,3,0,0,3,...,0,0,0,0,0,2,5,0,0,0
2017-04-21 00:01:00,0,0,0,0,0,0,9,0,0,16,...,0,0,0,0,0,5,10,0,0,0
2017-04-21 00:02:00,0,0,0,0,0,7,2,0,0,10,...,0,0,0,0,4,4,4,0,0,0
2017-04-21 00:03:00,0,0,0,0,0,6,0,0,0,0,...,0,0,0,0,0,5,4,0,0,0
2017-04-21 00:04:00,0,0,0,0,2,2,0,0,0,2,...,0,0,0,0,0,3,2,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-04-25 23:55:00,0,15,0,5,0,0,0,0,0,0,...,0,0,0,0,0,0,0,11,0,2
2017-04-25 23:56:00,0,6,0,6,0,0,0,0,0,0,...,0,0,0,0,0,0,0,5,10,0
2017-04-25 23:57:00,0,12,0,5,0,0,0,0,0,0,...,0,0,0,0,0,0,0,5,7,7
2017-04-25 23:58:00,0,5,0,17,0,0,0,0,0,0,...,0,0,0,0,0,0,0,7,3,4


## Define necessary functions for calculating activity onset

In [18]:
#This function makes a copy of the original dataframe and calculates the rolling average of activity for every spider 
def create_rolling_df(df):
    
    #copy the original dataframe and drop light column as to only include spider activity in new dataframe
    rolling_df = df.copy().drop('Light', axis = 1)
    
    #calculate rolling average of every column
    rolling_df = rolling_df.rolling(30).mean()
    
    return rolling_df

#This function finds all times when lights turn on and off and returns the index of these occurrences
def find_lights_turn_on_off(df):
    lights_turn_on_off = df[df['Light'].diff() != 0]
    
    #drop the first 2 and last row because these are incomplete days
    on_off = lights_turn_on_off.iloc[2:-1, :]
    
    #turn the index into a list to loop through
    on_off_list = list(on_off.index.values)
    
    return on_off_list


#This function groups the created list so that the times where the lights turn on and off each day are subelements
def create_on_off_sublist(on_off_list):
    on_off_subList = [on_off_list[n:n+2] for n in range(0, len(on_off_list), 2)]
    
    return on_off_subList


#This function calculates the threshold for every day and appends the value to a list
def calculate_activity_onset(df, on_off_sublist, column_name):
    activity_onset_values = []
    for x in on_off_sublist:
        
        #loop through the times it is dark every night
        looped_df = df.loc[x[0]:x[1], :]
        
        #calculate threshold every night
        threshold = looped_df[column_name].mean()
        
        #find all points where rolling average of activity is above the threshold 
        threshold_crossings = looped_df[looped_df[column_name] > threshold].index
        
        #append first threshold crossing to the activity onset list every day
        if len(threshold_crossings) > 0:
            activity_onset_values.append(threshold_crossings[0])
        else:
            activity_onset_values.append('NaN') 
    
    return activity_onset_values


#this function loops through all column names and calculates activity onset for every day for every spider
def calculate_activity_onset_multiple_spiders(df, rolling_df):
    #create list of spider names
    spider_names = list(rolling_df)
    
    #create empty dictionary to store activity onset values in
    mult_spiders_activity_onset_dict = {}
    
    #create list of all dates and times where lights turn on and off
    transition_indexes_list = find_lights_turn_on_off(df)
    
    #create sublists containing each day of light switch indeces so they are easy to work with
    on_off_sublist = create_on_off_sublist(transition_indexes_list)
    
    #loop through all columns
    for i in spider_names:
        mult_spiders_activity_onset_values = calculate_activity_onset(rolling_df, on_off_sublist, i)
    
        #create dictionary key and element containing spider name and activity onset
        mult_spiders_activity_onset_dict[i] = mult_spiders_activity_onset_values
        
    return mult_spiders_activity_onset_dict


#Create activity onset dataframe function
def create_activity_onset_df(mult_spiders_activity_onset_dict, lights_turn_off):

    #create dataframe from dictionary previously created
    mult_spiders_activity_onset_df = pd.DataFrame.from_dict(mult_spiders_activity_onset_dict)

    #subtract the time the lights turn off from every value in df
    subtracted_df = mult_spiders_activity_onset_df - lights_turn_off
    
    #create list of spiders
    spider_names = list(subtracted_df)
    
    #loop through every column to extract activity onset time
    for x in spider_names: 
        
        #extract minutes and hours from every value in each column to get activity onset in relation to lights turn off
        subtracted_df[x] = subtracted_df[x].dt.seconds/60
    
    #list of unique dates to make the index of the subtracted df
    list_of_dates = list(mult_spiders_activity_onset_df.iloc[:, 1].dt.date)
    
    #set the unique dates as the index
    subtracted_df.index = list_of_dates
    
    #save dataframe as csv file
    subtracted_df.to_csv(filename + ' Activity Onset Values.csv')
    
    return subtracted_df   
  

## Calculate Activity Onset for Every Spider and Create Dataframe

In [19]:
#Create rolling dataframe from original df
rolling_df = create_rolling_df(df)

#Create dictionary containing all activity onset values for every spider
mult_spiders_activity_onset_dict = calculate_activity_onset_multiple_spiders(df, rolling_df)

#Create a dataframe from the dictionary and subtract the activity onset times from when the lights turn off at night
activity_onset_df = create_activity_onset_df(mult_spiders_activity_onset_dict, lights_turn_off)

#display activity onset dataframe
display(activity_onset_df)

Unnamed: 0,Monitor 1 Spider 1,Monitor 1 Spider 2,Monitor 1 Spider 3,Monitor 1 Spider 4,Monitor 1 Spider 5,Monitor 1 Spider 6,Monitor 1 Spider 7,Monitor 1 Spider 8,Monitor 1 Spider 9,Monitor 1 Spider 10,...,Monitor 1 Spider 21,Monitor 1 Spider 23,Monitor 1 Spider 25,Monitor 1 Spider 26,Monitor 1 Spider 27,Monitor 1 Spider 28,Monitor 1 Spider 29,Monitor 1 Spider 30,Monitor 1 Spider 31,Monitor 1 Spider 32
2017-04-21,30.0,68.0,38.0,61.0,36.0,33.0,54.0,33.0,43.0,63.0,...,46.0,128.0,51.0,36.0,32.0,73.0,35.0,182.0,25.0,33.0
2017-04-22,43.0,37.0,41.0,59.0,39.0,25.0,27.0,42.0,34.0,340.0,...,26.0,91.0,33.0,37.0,34.0,54.0,31.0,54.0,34.0,37.0
2017-04-23,28.0,35.0,26.0,63.0,31.0,38.0,36.0,45.0,38.0,36.0,...,21.0,88.0,47.0,23.0,28.0,49.0,26.0,45.0,24.0,33.0
2017-04-24,24.0,27.0,29.0,35.0,34.0,28.0,34.0,21.0,28.0,47.0,...,32.0,70.0,47.0,36.0,28.0,51.0,30.0,29.0,24.0,27.0
