# Testing Functions for Calculating Activity Onset on Mult Spiders

### In this notebook:
- I will be testing necessary functions for calculating activity onset

In [115]:
#Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import timedelta

#Read Spider activity CSV file
df = pd.read_csv('Metazygia wittfeldae Monitor 1 Updated_LD.csv',)
df.set_index('Date-Time', inplace = True)

#Turn date-time column into datetime format
df.index = pd.to_datetime(df.index)

#Determine what time the lights turn off at night
lights_turn_off = df[df['Light'].diff() == -1].index[0].time()

df

Unnamed: 0_level_0,Light,Monitor 1 Spider 1,Monitor 1 Spider 2,Monitor 1 Spider 3,Monitor 1 Spider 4,Monitor 1 Spider 5,Monitor 1 Spider 6,Monitor 1 Spider 7,Monitor 1 Spider 8,Monitor 1 Spider 9,...,Monitor 1 Spider 21,Monitor 1 Spider 23,Monitor 1 Spider 25,Monitor 1 Spider 26,Monitor 1 Spider 27,Monitor 1 Spider 28,Monitor 1 Spider 29,Monitor 1 Spider 30,Monitor 1 Spider 31,Monitor 1 Spider 32
Date-Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-04-21 00:00:00,0,0,0,0,0,1,3,0,0,3,...,0,0,0,0,0,2,5,0,0,0
2017-04-21 00:01:00,0,0,0,0,0,0,9,0,0,16,...,0,0,0,0,0,5,10,0,0,0
2017-04-21 00:02:00,0,0,0,0,0,7,2,0,0,10,...,0,0,0,0,4,4,4,0,0,0
2017-04-21 00:03:00,0,0,0,0,0,6,0,0,0,0,...,0,0,0,0,0,5,4,0,0,0
2017-04-21 00:04:00,0,0,0,0,2,2,0,0,0,2,...,0,0,0,0,0,3,2,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-04-25 23:55:00,0,15,0,5,0,0,0,0,0,0,...,0,0,0,0,0,0,0,11,0,2
2017-04-25 23:56:00,0,6,0,6,0,0,0,0,0,0,...,0,0,0,0,0,0,0,5,10,0
2017-04-25 23:57:00,0,12,0,5,0,0,0,0,0,0,...,0,0,0,0,0,0,0,5,7,7
2017-04-25 23:58:00,0,5,0,17,0,0,0,0,0,0,...,0,0,0,0,0,0,0,7,3,4


## Creating a Rolling average dataframe

In [2]:
## This function makes a copy of the original dataframe 
def create_rolling_df(df):
    
    #copy the original dataframe and drop light column as to only include spider activity in new dataframe
    rolling_df = df.copy().drop('Light', axis = 1)
    
    #calculate rolling average of every column
    rolling_df = rolling_df.rolling(30).mean()
    
    return rolling_df

In [3]:
#test create rolling average dataframe function
rolling_df = create_rolling_df(df)

display(rolling_df)

Unnamed: 0_level_0,Monitor 1 Spider 1,Monitor 1 Spider 2,Monitor 1 Spider 3,Monitor 1 Spider 4,Monitor 1 Spider 5,Monitor 1 Spider 6,Monitor 1 Spider 7,Monitor 1 Spider 8,Monitor 1 Spider 9,Monitor 1 Spider 10,...,Monitor 1 Spider 21,Monitor 1 Spider 23,Monitor 1 Spider 25,Monitor 1 Spider 26,Monitor 1 Spider 27,Monitor 1 Spider 28,Monitor 1 Spider 29,Monitor 1 Spider 30,Monitor 1 Spider 31,Monitor 1 Spider 32
Date-Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-04-21 00:00:00,,,,,,,,,,,...,,,,,,,,,,
2017-04-21 00:01:00,,,,,,,,,,,...,,,,,,,,,,
2017-04-21 00:02:00,,,,,,,,,,,...,,,,,,,,,,
2017-04-21 00:03:00,,,,,,,,,,,...,,,,,,,,,,
2017-04-21 00:04:00,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-04-25 23:55:00,2.700000,1.866667,5.200000,0.0,0.0,4.033333,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.633333,5.566667,9.266667
2017-04-25 23:56:00,2.900000,1.866667,5.400000,0.0,0.0,3.966667,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.800000,5.900000,9.066667
2017-04-25 23:57:00,3.300000,1.866667,5.566667,0.0,0.0,3.766667,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.966667,5.900000,8.666667
2017-04-25 23:58:00,3.466667,1.866667,6.133333,0.0,0.0,3.666667,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.200000,5.800000,6.033333


## The following 2 functions are used to find when the lights turn on and off and then set the list up for use with calculating threshold

In [4]:
#This function finds all times when lights turn on and off and returns the index of these occurrences
def find_lights_turn_on_off(df):
    
    #find indices where lights turn on and off
    lights_turn_on_off = df[df['Light'].diff() != 0]
    
    #drop the first 2 and last row because these are incomplete days
    on_off = lights_turn_on_off.iloc[2:-1, :]
    
    #turn the index into a list to loop through
    on_off_list = list(on_off.index.values)
    
    return on_off_list

In [5]:
#test function
find_lights_turn_on_off(df)

[numpy.datetime64('2017-04-21T19:02:00.000000000'),
 numpy.datetime64('2017-04-22T07:01:00.000000000'),
 numpy.datetime64('2017-04-22T19:02:00.000000000'),
 numpy.datetime64('2017-04-23T07:01:00.000000000'),
 numpy.datetime64('2017-04-23T19:02:00.000000000'),
 numpy.datetime64('2017-04-24T07:01:00.000000000'),
 numpy.datetime64('2017-04-24T19:02:00.000000000'),
 numpy.datetime64('2017-04-25T07:01:00.000000000')]

In [6]:
#This function groups the created list so that the times where the lights turn on and off each day are subelements
def create_on_off_sublist(on_off_list):
    on_off_subList = [on_off_list[n:n+2] for n in range(0, len(on_off_list), 2)]
    
    return on_off_subList

## The following two function are used to calculate activity onset values
##### the first calculates activity onset on the column, the second is used to loop through all spider columns

In [7]:
#This function calculates the threshold for every day and appends the value to a list
def calculate_activity_onset(df, on_off_sublist, column_name):
    activity_onset_values = []
    for x in on_off_sublist:
        
        #loop through the times it is dark every night
        looped_df = df.loc[x[0]:x[1], :]
        
        #calculate threshold every night
        threshold = looped_df[column_name].mean()
        
        #find all points where rolling average of activity is above the threshold 
        threshold_crossings = looped_df[looped_df[column_name] > threshold].index
        
        #append first threshold crossing to the activity onset list every day
        if len(threshold_crossings) > 0:
            activity_onset_values.append(threshold_crossings[0])
        else:
            activity_onset_values.append('NaN')
        
    
    return activity_onset_values

In [8]:
#this function loops through all column names and calculates activity onset for every day for every spider
def calculate_activity_onset_multiple_spiders(df, rolling_df):
    #create list of spider names
    spider_names = list(rolling_df)
    
    #create empty dictionary to store activity onset values in
    mult_spiders_activity_onset_dict = {}
    
    #create list of all dates and times where lights turn on and off
    transition_indexes_list = find_lights_turn_on_off(df)
    
    #create sublists containing each day of light switch indeces so they are easy to work with
    on_off_sublist = create_on_off_sublist(transition_indexes_list)
    
    #loop through all columns
    for i in spider_names:
        mult_spiders_activity_onset_values = calculate_activity_onset(rolling_df, on_off_sublist, i)
    
        #create dictionary key and element containing spider name and activity onset
        mult_spiders_activity_onset_dict[i] = mult_spiders_activity_onset_values
        
    return mult_spiders_activity_onset_dict
    

In [9]:
#test the multiple spider activity onset function
mult_spiders_activity_onset_dict = calculate_activity_onset_multiple_spiders(df, rolling_df)

mult_spiders_activity_onset_dict

{'Monitor 1 Spider 1': [Timestamp('2017-04-21 19:32:00'),
  Timestamp('2017-04-22 19:45:00'),
  Timestamp('2017-04-23 19:30:00'),
  Timestamp('2017-04-24 19:26:00')],
 'Monitor 1 Spider 2': [Timestamp('2017-04-21 20:10:00'),
  Timestamp('2017-04-22 19:39:00'),
  Timestamp('2017-04-23 19:37:00'),
  Timestamp('2017-04-24 19:29:00')],
 'Monitor 1 Spider 3': [Timestamp('2017-04-21 19:40:00'),
  Timestamp('2017-04-22 19:43:00'),
  Timestamp('2017-04-23 19:28:00'),
  Timestamp('2017-04-24 19:31:00')],
 'Monitor 1 Spider 4': [Timestamp('2017-04-21 20:03:00'),
  Timestamp('2017-04-22 20:01:00'),
  Timestamp('2017-04-23 20:05:00'),
  Timestamp('2017-04-24 19:37:00')],
 'Monitor 1 Spider 5': [Timestamp('2017-04-21 19:38:00'),
  Timestamp('2017-04-22 19:41:00'),
  Timestamp('2017-04-23 19:33:00'),
  Timestamp('2017-04-24 19:36:00')],
 'Monitor 1 Spider 6': [Timestamp('2017-04-21 19:35:00'),
  Timestamp('2017-04-22 19:27:00'),
  Timestamp('2017-04-23 19:40:00'),
  Timestamp('2017-04-24 19:30:00')]

### The following function is the final function for calculating activity onset; It takes the dictionary of activity onsets and creates a dataframe for it

In [114]:
#Create activity onset dataframe function
def create_activity_onset_df(mult_spiders_activity_onset_dict, lights_turn_off):

    #create dataframe from dictionary previously created
    mult_spiders_activity_onset_df = pd.DataFrame(mult_spiders_activity_onset_dict)
    
    #subtract the time the lights turn off from every value in df
    subtracted_df = mult_spiders_activity_onset_df - lights_turn_off
    
    #loop through every column to extract activity onset time
    for x in spider_names: 
        
        #extract minutes and hours from every value in each column to get activity onset in relation to lights turn off
        subtracted_df[x] = subtracted_df[x].dt.minute + subtracted_df[x].dt.hour*60
    
    #list of unique dates to make the index of the subtracted df
    list_of_dates = list(mult_spiders_activity_onset_df.iloc[:, 1].dt.date)
    
    #set the unique dates as the index
    subtracted_df.index = list_of_dates

    return subtracted_df

In [113]:
#Test this function using a 3 step process including the 2 functions previously used
activity_onset_df = create_activity_onset_df(mult_spiders_activity_onset_dict, lights_turn_off)

display(activity_onset_df)

Unnamed: 0,Monitor 1 Spider 1,Monitor 1 Spider 2,Monitor 1 Spider 3,Monitor 1 Spider 4,Monitor 1 Spider 5,Monitor 1 Spider 6,Monitor 1 Spider 7,Monitor 1 Spider 8,Monitor 1 Spider 9,Monitor 1 Spider 10,...,Monitor 1 Spider 21,Monitor 1 Spider 23,Monitor 1 Spider 25,Monitor 1 Spider 26,Monitor 1 Spider 27,Monitor 1 Spider 28,Monitor 1 Spider 29,Monitor 1 Spider 30,Monitor 1 Spider 31,Monitor 1 Spider 32
2017-04-21,30,68,38,61,36,33,54,33,43,63,...,46,128,51,36,32,73,35,182,25,33
2017-04-22,43,37,41,59,39,25,27,42,34,340,...,26,91,33,37,34,54,31,54,34,37
2017-04-23,28,35,26,63,31,38,36,45,38,36,...,21,88,47,23,28,49,26,45,24,33
2017-04-24,24,27,29,35,34,28,34,21,28,47,...,32,70,47,36,28,51,30,29,24,27


# Creating Raster Plots

In [None]:
##WORK IN PROGRESS, not even close to done

In [None]:
all_days = df.Date.unique()
def raster_plot(df):
    days= pd.unique(df['Date']).tolist()
    place= 1
    
    bracket = []
    for x in range(len(all_days)):
        bracket.append(df[df.Date==str(all_days[x])])
        
    plt.figure(figsize= (10, 13))
    plt.subplots_adjust(hspace = .2, wspace = 0)
    for i in bracket: 
        plt.subplot(len(all_days), 1, place)
        plt.plot(i['Total Minutes'], i['Rolling'])
        plt.ylabel(all_days[place - 1], rotation=0, labelpad=25)
        if place <= 1: 
            plt.title('Raster Plot of Spider Activity Over Multiple Days')
        else: 
            plt.title('')
            
        plt.yticks([0,15])
        place += 1
        for value in range(len(bracket)): 
            if place >= (len(all_days)+1): 
                plt.xlabel('Minutes Since Start of Day')
                plt.tick_params(axis='x', which='both', bottom=True, top=False, labelbottom=True)
            else: 
                plt.xlabel('')
                plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
        plt.axvspan(0, 421, alpha = 0.5, color = 'grey')
        plt.axvspan(1142, 1440, alpha = 0.5, color = 'grey')
        
        #calculate the threshold for the specific day
        calculate = i.loc[i['Total Minutes'] > 421]
        stdv = calculate.Rolling.std()
        threshold = calculate.Rolling.mean() + stdv

        #determine where threshold is crossed
        threshold_crossings = np.diff(i.Rolling > threshold, prepend=False)
        threshold_crossings_array = np.argwhere(threshold_crossings)[::2,0]

        #find the first threshold crossing after minute 1142, as this is considered the activity onset for the day
        true_threshold_crossings_array = threshold_crossings_array[threshold_crossings_array >1000]
        start_shade = true_threshold_crossings_array[0]
        plt.axvline(x = start_shade, color = "red", linestyle = '--', label = "mean activity onset")
        plt.savefig("Raster Plot.png",format='PNG')