# Replacing Activity Onset Means with Medians

## In this notebook, I will replace activity onset means with medians and replace the values in the summary file 

In [1]:
#Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as matdates
from datetime import timedelta
import os

### Import activity means dataframe

In [2]:
#change directory to the location of activity onset file
os.chdir("/Users/willcatalano/Library/CloudStorage/Box-Box/Spider Data Zoo/Zoo by specie/Schizocosa ocreata/Activity Onset")

#define file name
filename = "Schizocosa ocreata Monitor 2 Updated_LD"

#Import mean activity onset df
activity_onset_df = pd.read_csv(filename + " Activity Onset Values.csv")

#drop date column
activity_onset_df = activity_onset_df.iloc[: , 1:]

activity_onset_df

Unnamed: 0,Monitor 2 Spider 1,Monitor 2 Spider 2,Monitor 2 Spider 3,Monitor 2 Spider 4,Monitor 2 Spider 5,Monitor 2 Spider 6,Monitor 2 Spider 7,Monitor 2 Spider 8,Monitor 2 Spider 9,Monitor 2 Spider 10,Monitor 2 Spider 11,Monitor 2 Spider 12,Monitor 2 Spider 13,Monitor 2 Spider 14,Monitor 2 Spider 15,Monitor 2 Spider 16,Monitor 2 Spider 31
0,0.0,0.0,5.0,0.0,11.0,0.0,0.0,0.0,0.0,0.0,18.0,4.0,15.0,0.0,10.0,15.0,10.0
1,0.0,0.0,0.0,49.0,0.0,9.0,0.0,5.0,0.0,8.0,4.0,4.0,0.0,9.0,0.0,9.0,5.0
2,18.0,4.0,18.0,1.0,19.0,7.0,18.0,1.0,1.0,10.0,25.0,1.0,1.0,13.0,1.0,6.0,57.0
3,1.0,4.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,17.0,4.0,1.0,18.0,1.0,16.0,19.0
4,25.0,1.0,1.0,1.0,1.0,5.0,1.0,1.0,18.0,3.0,39.0,27.0,17.0,24.0,1.0,1.0,4.0
5,23.0,6.0,25.0,1.0,1.0,1.0,20.0,1.0,1.0,4.0,1.0,1.0,1.0,22.0,1.0,1.0,1.0
6,1.0,31.0,20.0,49.0,4.0,1.0,30.0,1.0,28.0,25.0,1.0,29.0,11.0,1.0,1.0,33.0,31.0


### Define Necessary Functions for Creating Median Activity Onset Dataframe

In [5]:
#this function will take activity onset values and calculate the median activity onset for each individual
def calculate_activity_onset_medians(activity_onset_df):
    
    #create empty list to append means to
    activity_onset_medians_list = []

    #create activity onset medians df
    activity_onset_medians_df = activity_onset_df
    
    #take median activity onset for each spider and append to list
    column_names = list(activity_onset_medians_df)
    
    #calculate median of every column
    for x in column_names:
        
        median_activity_onset = np.nanmedian(activity_onset_medians_df[x])
        
        activity_onset_medians_list.append(median_activity_onset)
    
    #append averages as the last row of the dataframe
    activity_onset_medians_df.loc[len(activity_onset_medians_df)] = activity_onset_medians_list
    
    return activity_onset_medians_df

#this function will calculate the sem of every spider
def calculate_sem(activity_onset_df, activity_onset_medians_df):
    
    #create new df
    activity_onset_medians_sem = activity_onset_df
    
    #count number of days activity onset occurs
    activity_onset_days_df = activity_onset_df.count()
    
    #create empty list to append sem to
    activity_onset_sem_list = []
    
    #get col names
    column_names = list(activity_onset_medians_df)
    
    #calculate sem
    for x in column_names:
        
        sem_activity_onset = np.std(activity_onset_medians_df[x][:-1], ddof=1) / np.sqrt(np.size(activity_onset_days_df[x]))
        
        activity_onset_sem_list.append(sem_activity_onset)
    
    #append sems as the last row of the dataframe
    activity_onset_medians_sem.loc[len(activity_onset_medians_df)] = activity_onset_sem_list
    
    return activity_onset_medians_sem

#this function will create the final df, with one column containing sem and one containing median activity onset
def create_median_sem_df(activity_onset_medians_sem):
    
    #flip columns and rows of df
    activity_onset_medians_sem_T = activity_onset_medians_sem.transpose()
    
    #select for last two columns, as those contain sem and median
    activity_onset_medians_sem_T = activity_onset_medians_sem_T.iloc[: , -2:]
    
    #rename columns
    activity_onset_medians_sem_T = activity_onset_medians_sem_T.rename(columns = {activity_onset_medians_sem_T.columns[-2] : 'Median Activity Onset',
                                                                                  activity_onset_medians_sem_T.columns[-1] : 'SEM'}) 
    #save df as csv file
    activity_onset_medians_sem_T.to_csv(filename + " Median Activity Onsets.csv")
    
    return activity_onset_medians_sem_T

#sthis function will split the monitor and spider name into two separate columns as this is needed for further functions
def split_monitor_and_spider(final_median_activity_onset_df):
    
    #create two empty lists to hold monitor and spider names
    monitor_list = []
    spider_list = []
    
    #loop through length of activity onset dataframe
    for i in range(len(final_median_activity_onset_df)):
        
        #split index names into four elements 
        #may have to alter this based on the number of elements in the monitor/spider name
        monitor1, number1, spider1, number2 = final_median_activity_onset_df.index[i].split(" ")
        
        #combnine monitor name and number and spider name and number
        monitor = monitor1 + " " + number1
        spider = spider1 + " " + number2
        
        #append to list
        monitor_list.append(monitor)
        spider_list.append(spider)
        
    #fill columns with spider and monitor names
    final_median_activity_onset_df["Monitor"] = monitor_list
    final_median_activity_onset_df["Spider"] = spider_list
    
    return final_median_activity_onset_df

### Create Median Activity Onset dataframe

In [7]:
#change directory to location to save median activity onsets files
directory = "/Users/willcatalano/Library/CloudStorage/Box-Box/Spider Data Zoo/Zoo by specie"

#calculate medians
activity_onset_medians_df = calculate_activity_onset_medians(activity_onset_df)

#calculate SEMs
activity_onset_medians_sem = calculate_sem(activity_onset_df, activity_onset_medians_df)

#combine into one dataframe
final_median_activity_onset_df = create_median_sem_df(activity_onset_medians_sem)

#split monitors and spider numbers
median_activity_df = split_monitor_and_spider(final_median_activity_onset_df)

median_activity_df

Unnamed: 0,Median Activity Onset,SEM,Monitor,Spider
Monitor 2 Spider 1,1.0,10.265388,Monitor 2,Spider 1
Monitor 2 Spider 2,4.0,9.153949,Monitor 2,Spider 2
Monitor 2 Spider 3,5.0,8.919204,Monitor 2,Spider 3
Monitor 2 Spider 4,1.0,20.303561,Monitor 2,Spider 4
Monitor 2 Spider 5,1.0,6.140708,Monitor 2,Spider 5
Monitor 2 Spider 6,1.0,3.075407,Monitor 2,Spider 6
Monitor 2 Spider 7,1.0,10.88284,Monitor 2,Spider 7
Monitor 2 Spider 8,1.0,1.33445,Monitor 2,Spider 8
Monitor 2 Spider 9,1.0,9.69928,Monitor 2,Spider 9
Monitor 2 Spider 10,4.0,7.166392,Monitor 2,Spider 10


### Define Necessary Functions for Replacing Activity Onset Means with Medians in Summary File

In [107]:
#Define path to summary file and spider name
current_path = "/Users/willcatalano/Library/CloudStorage/Box-Box/Spider Data Zoo/Zoo by specie/"
spider_name = "Schizocosa ocreata"

#define function to retrieve summary file from box
def get_summary_file(spider_name):
    
    #change directory to spider of interest
    os.chdir(current_path + spider_name)
    
    #read summary file
    current_summary = pd.read_csv(spider_name + " summary updated.csv")
    
    return current_summary

#this function will replace the activity onsets in the summary file with the median activity onsets calculated
def replace_activity_onsets_with_medians(summary_file, median_activity_onsets_df):
    
    #create copy of summary file so that the original is not modified
    summary_file_updated = summary_file.copy().drop("Unnamed: 0", axis = 1)#.drop("Mean actiivty onset", axis = 1)
    
    #loop through length of activity onset df
    for x in range(len(median_activity_onsets_df)):
        
        #loop through spider names in summary file
        for i in summary_file_updated["Spider ID"]:
            
            #if statement to make sure the spider is in the summary file
            if median_activity_onsets_df.index[x] in i: 
                
                #replace activity onsets when spider IDs match, this way we dont have to worry about missing spiders
                summary_file_updated.loc[summary_file_updated["Spider ID"] == median_activity_onsets_df.index[x], "Median activity onset"] = median_activity_onsets_df["Median Activity Onset"][x] 
                summary_file_updated.loc[summary_file_updated["Spider ID"] == median_activity_onsets_df.index[x], "Activity onset SEM"] = median_activity_onsets_df["SEM"][x]
                
    #save as csv
    summary_file_updated.to_csv(spider_name + " summary updated.csv")
    
    return summary_file_updated

### Create new summary file containing updated activity onset medians and SEM

In [108]:
#retrieve summary file
spider_summary = get_summary_file(spider_name)

#replace values
summary_file_updated = replace_activity_onsets_with_medians(spider_summary, median_activity_df)

summary_file_updated

Unnamed: 0.1,Unnamed: 0,Specie Name,Spider ID,Conditions,Is stationary,LombSc period,LombSc amplitude,LombSc p value,Masking,DiNoc ratio,Activity Rest ratio,Mean vector lengh,Mean vector angle,Median activity onset,Activity onset SEM
0,0,Schizocosa ocreata,Monitor 1 Spider 1,LD,1,22.218001,27.831343,1.113597e-07,,0.349398,0.078883,0.123576,246.971286,1.0,7.637626
1,1,Schizocosa ocreata,Monitor 1 Spider 2,LD,1,13.997811,4.334313,1.000000e+00,,0.007341,0.054835,0.124088,-1.996048,1.0,6.094494
2,2,Schizocosa ocreata,Monitor 1 Spider 3,LD,1,27.445179,19.145305,5.466030e-04,,-0.337149,0.083638,0.118771,-14.192977,9.0,5.715476
3,3,Schizocosa ocreata,Monitor 1 Spider 4,LD,1,22.883873,112.753824,2.945507e-44,,-1.370995,0.065652,0.473664,-4.621213,10.0,6.294366
4,4,Schizocosa ocreata,Monitor 1 Spider 5,LD,1,25.840861,7.029854,1.000000e+00,,0.187274,0.053952,0.103268,246.609055,1.0,5.639993
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61,61,Schizocosa ocreata,Monitor 2 Spider 13,DD,1,22.306739,13.975210,1.107720e-01,,,0.055254,0.231529,9.236384,,
62,62,Schizocosa ocreata,Monitor 2 Spider 14,DD,1,25.106932,1.441634,1.000000e+00,,,0.736195,0.164232,1.382732,,
63,63,Schizocosa ocreata,Monitor 2 Spider 15,DD,1,23.168142,0.468561,1.000000e+00,,,0.003904,0.343003,87.828164,,
64,64,Schizocosa ocreata,Monitor 2 Spider 16,DD,1,22.183023,32.712805,1.308333e-09,,,0.043856,0.395671,205.112994,,
