# Replacing Activity Onset Means with Medians

## In this notebook, I will replace activity onset means with medians and replace the values in the data zoo dataframe

In [1]:
#Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as matdates
from datetime import timedelta
import os

### Import df

In [109]:
os.chdir("/Users/willcatalano/Library/CloudStorage/Box-Box/Spider Data Zoo/Zoo by specie/Frontinella pyramitela/Activity Onset")

#define monitors and species, THIS WILL BE CHANGED LATER
filename = "Frontinella pyramitela Monitor 13_LD"

#Import mean activity onset df
activity_onset_df = pd.read_csv(filename + " Activity Onset Values.csv")

#drop date column
activity_onset_df = activity_onset_df.iloc[: , 1:]

activity_onset_df

Unnamed: 0,Monitor 13 Spider 5,Monitor 13 Spider 10,Monitor 13 Spider 11,Monitor 13 Spider 16,Monitor 13 Spider 17,Monitor 13 Spider 19,Monitor 13 Spider 20,Monitor 13 Spider 22,Monitor 13 Spider 25
0,86.0,67.0,58.0,111.0,15.0,112.0,90.0,121.0,
1,108.0,122.0,15.0,96.0,9.0,115.0,85.0,121.0,179.0
2,96.0,116.0,75.0,56.0,7.0,112.0,85.0,123.0,140.0
3,167.0,103.0,47.0,103.0,5.0,101.0,90.0,118.0,209.0
4,,121.0,77.0,64.0,76.0,,89.0,98.0,


### Define Necessary Functions

In [27]:
#this function will loop through the folders in the spider data zoo to gather spider names
directory = "/Users/willcatalano/Library/CloudStorage/Box-Box/Spider Data Zoo/Zoo by specie"

### This function creates list that counts the number of days that each spider has an activity onset for
def activity_onset_days(activity_onset_df):
    
    activity_onset_days = activity_onset_df.count()
    
    return activity_onset_days

def get_species_names(directory):
    
    #create empty list to append names
    species_names = []
    
    #loop through folders in given directory
    for folder in os.listdir(directory):
        
        #only select folders starting with capital letter, as these are the species names
        if folder == folder.capitalize():
        
            #append names of files to empty list
            species_names.append(folder)
        
    
    return species_names

#this function will take activity onset values and calculate the median activity onset for each individual
def calculate_activity_onset_medians(activity_onset_df):
    
    #create empty list to append means to
    activity_onset_medians_list = []

    #remove date column
    activity_onset_medians_df = activity_onset_df
    
    #take median activity onset for each spider and append to list
    column_names = list(activity_onset_medians_df)
    
    #calculate median of every column
    for x in column_names:
        
        median_activity_onset = np.nanmedian(activity_onset_medians_df[x])
        
        activity_onset_medians_list.append(median_activity_onset)
    
    #append averages as the last row of the dataframe
    activity_onset_medians_df.loc[len(activity_onset_medians_df)] = activity_onset_medians_list
    
    return activity_onset_medians_df

#this function will take the sem of every spider
def calculate_sem(activity_onset_df, activity_onset_medians_df):
    
    #create new df
    activity_onset_medians_sem = activity_onset_df
    
    #count number of days activity onset occurs
    activity_onset_days_df = activity_onset_days(activity_onset_medians_df)
    
    #create empty list to append sem to
    activity_onset_sem_list = []
    
    #get col names
    column_names = list(activity_onset_medians_df)
    
    #calculate sem
    for x in column_names:
        
        sem_activity_onset = np.std(activity_onset_medians_df[x][:-1], ddof=1) / np.sqrt(np.size(activity_onset_days_df[x]))
        
        activity_onset_sem_list.append(sem_activity_onset)
    
    #append sems as the last row of the dataframe
    activity_onset_medians_sem.loc[len(activity_onset_medians_df)] = activity_onset_sem_list
    
    return activity_onset_medians_sem

#this function will create the final df, with one column containing sem and one containing median activity onset
def create_median_sem_df(activity_onset_medians_sem):
    
    #flip columns and rows of df
    activity_onset_medians_sem_T = activity_onset_medians_sem.transpose()
    
    #select for last two columns, as those contain sem and median
    activity_onset_medians_sem_T = activity_onset_medians_sem_T.iloc[: , -2:]
    
    #rename columns
    activity_onset_medians_sem_T = activity_onset_medians_sem_T.rename(columns = {activity_onset_medians_sem_T.columns[-2] : 'Median Activity Onset',
                                                                                  activity_onset_medians_sem_T.columns[-1] : 'SEM'}) 
    #save df as csv file
    activity_onset_medians_sem_T.to_csv(filename + " Median Activity Onsets.csv")
    
    return activity_onset_medians_sem_T
    

### Test Functions

In [110]:
activity_onset_medians_df = calculate_activity_onset_medians(activity_onset_df)

activity_onset_medians_sem = calculate_sem(activity_onset_df, activity_onset_medians_df)

final_median_activity_onset_df = create_median_sem_df(activity_onset_medians_sem)

final_median_activity_onset_df

Unnamed: 0,Median Activity Onset,SEM
Monitor 13 Spider 5,102.0,36.298531
Monitor 13 Spider 10,116.0,22.971722
Monitor 13 Spider 11,58.0,25.274493
Monitor 13 Spider 16,96.0,24.484689
Monitor 13 Spider 17,9.0,30.196026
Monitor 13 Spider 19,112.0,6.164414
Monitor 13 Spider 20,89.0,2.588436
Monitor 13 Spider 22,121.0,10.329569
Monitor 13 Spider 25,179.0,34.597688


In [111]:
#splitting index into species and spider number
def split_monitor_and_spider(final_median_activity_onset_df):
    
    #create two empty lists to hold monitor and spider names
    monitor_list = []
    spider_list = []
    
    for i in range(len(final_median_activity_onset_df)):
        
        #split index names into four elements
        monitor1, number1, spider1, number2 = final_median_activity_onset_df.index[i].split(" ")
        
        #combnine monitor name and number and spider name and number
        monitor = monitor1 + " " + number1 
        spider = spider1 + " " + number2
        
        monitor_list.append(monitor)
        spider_list.append(spider)
        
    #fill columns with spider and monitor names
    final_median_activity_onset_df["Monitor"] = monitor_list
    final_median_activity_onset_df["Spider"] = spider_list
    
    return final_median_activity_onset_df

median_activity_df = split_monitor_and_spider(final_median_activity_onset_df)

median_activity_df

Unnamed: 0,Median Activity Onset,SEM,Monitor,Spider
Monitor 13 Spider 5,102.0,36.298531,Monitor 13,Spider 5
Monitor 13 Spider 10,116.0,22.971722,Monitor 13,Spider 10
Monitor 13 Spider 11,58.0,25.274493,Monitor 13,Spider 11
Monitor 13 Spider 16,96.0,24.484689,Monitor 13,Spider 16
Monitor 13 Spider 17,9.0,30.196026,Monitor 13,Spider 17
Monitor 13 Spider 19,112.0,6.164414,Monitor 13,Spider 19
Monitor 13 Spider 20,89.0,2.588436,Monitor 13,Spider 20
Monitor 13 Spider 22,121.0,10.329569,Monitor 13,Spider 22
Monitor 13 Spider 25,179.0,34.597688,Monitor 13,Spider 25


In [112]:
# this function will take the current path and spider name and retrieve the summary file for that spider
current_path = "/Users/willcatalano/Library/CloudStorage/Box-Box/Spider Data Zoo/Zoo by specie/"
spider_name = "Frontinella pyramitela"


#define function
def get_summary_file(spider_name):
    
    #change directory to spider of interest
    os.chdir(current_path + spider_name)
    
    #read summary file
    current_summary = pd.read_csv(spider_name + " summary updated.csv")
    
    return current_summary

In [113]:
#test function
spider_summary = get_summary_file(spider_name)

spider_summary

Unnamed: 0.1,Unnamed: 0,Specie Name,Spider ID,Conditions,Is stationary,LombSc period,LombSc amplitude,LombSc p value,Masking,DiNoc ratio,Activity/Rest Ratio,Mean vector lengh,Mean vector angle,Median activity onset,Activity onset SEM
0,0,Frontinella pyramitela,Monitor 11 Spider 1,LD,1,23.953925,32.644161,7e-10,,-1.97388,0.031962,0.699803,-13.453604,40.0,14.822281
1,1,Frontinella pyramitela,Monitor 11 Spider 2,LD,1,23.953925,12.050587,0.3117154,,-1.945475,0.007275,0.841823,53.835203,199.0,7.637626
2,2,Frontinella pyramitela,Monitor 11 Spider 3,LD,1,23.393482,143.065286,1.62e-57,,-1.589308,0.102266,0.58128,-12.485353,12.0,7.314369
3,3,Frontinella pyramitela,Monitor 11 Spider 4,LD,1,24.541881,139.050524,8.87e-56,,-1.997207,0.056958,0.687169,-4.883445,32.0,18.119051
4,4,Frontinella pyramitela,Monitor 11 Spider 5,LD,1,23.953925,19.914004,0.000184649,,-1.892627,0.022292,0.646584,2.974533,43.0,14.272351
5,5,Frontinella pyramitela,Monitor 11 Spider 7,LD,1,24.541881,18.603134,0.000661852,,-1.913588,0.038511,0.618078,8.434716,30.0,12.091319
6,6,Frontinella pyramitela,Monitor 11 Spider 8,LD,1,22.019648,11.883533,0.3549329,,-1.158599,0.024911,0.416712,-5.548215,41.0,107.949062
7,7,Frontinella pyramitela,Monitor 11 Spider 11,LD,1,18.377205,0.001354,1.0,,-0.669446,0.000417,0.745356,-16.565051,,
8,8,Frontinella pyramitela,Monitor 11 Spider 12,LD,1,20.168922,0.681022,1.0,,-1.848935,0.003904,0.752485,0.17771,99.0,
9,9,Frontinella pyramitela,Monitor 11 Spider 13,LD,1,22.019648,16.51318,0.005030236,,-1.595373,0.020264,0.486991,34.262203,22.0,13.400871


In [114]:
#this function will replace the activity onsets in the summary file with the median activity onsets calculated

def replace_activity_onsets_with_medians(summary_file, median_activity_onsets_df):
    
    #create copy of summary file so that the original is not modified
    summary_file_updated = summary_file.copy().drop("Unnamed: 0", axis = 1)#.drop("Mean actiivty onset", axis = 1)
    
    #loop through length of activity onset df
    for x in range(len(median_activity_onsets_df)):
        
        #loop through spider names in summary file
        for i in summary_file_updated["Spider ID"]:
            
            if median_activity_onsets_df.index[x] in i: 
                
                #replace activity onsets when spider IDs match, this way we dont have to worry about missing spiders
                summary_file_updated.loc[summary_file_updated["Spider ID"] == median_activity_onsets_df.index[x], "Median activity onset"] = median_activity_onsets_df["Median Activity Onset"][x] 
                summary_file_updated.loc[summary_file_updated["Spider ID"] == median_activity_onsets_df.index[x], "Activity onset SEM"] = median_activity_onsets_df["SEM"][x]
                
    #save as csv
    summary_file_updated.to_csv(spider_name + " summary updated.csv")
    
    return summary_file_updated

summary_file_updated = replace_activity_onsets_with_medians(spider_summary, median_activity_df)

summary_file_updated
        

Unnamed: 0,Specie Name,Spider ID,Conditions,Is stationary,LombSc period,LombSc amplitude,LombSc p value,Masking,DiNoc ratio,Activity/Rest Ratio,Mean vector lengh,Mean vector angle,Median activity onset,Activity onset SEM
0,Frontinella pyramitela,Monitor 11 Spider 1,LD,1,23.953925,32.644161,7e-10,,-1.97388,0.031962,0.699803,-13.453604,40.0,14.822281
1,Frontinella pyramitela,Monitor 11 Spider 2,LD,1,23.953925,12.050587,0.3117154,,-1.945475,0.007275,0.841823,53.835203,199.0,7.637626
2,Frontinella pyramitela,Monitor 11 Spider 3,LD,1,23.393482,143.065286,1.62e-57,,-1.589308,0.102266,0.58128,-12.485353,12.0,7.314369
3,Frontinella pyramitela,Monitor 11 Spider 4,LD,1,24.541881,139.050524,8.87e-56,,-1.997207,0.056958,0.687169,-4.883445,32.0,18.119051
4,Frontinella pyramitela,Monitor 11 Spider 5,LD,1,23.953925,19.914004,0.000184649,,-1.892627,0.022292,0.646584,2.974533,43.0,14.272351
5,Frontinella pyramitela,Monitor 11 Spider 7,LD,1,24.541881,18.603134,0.000661852,,-1.913588,0.038511,0.618078,8.434716,30.0,12.091319
6,Frontinella pyramitela,Monitor 11 Spider 8,LD,1,22.019648,11.883533,0.3549329,,-1.158599,0.024911,0.416712,-5.548215,41.0,107.949062
7,Frontinella pyramitela,Monitor 11 Spider 11,LD,1,18.377205,0.001354,1.0,,-0.669446,0.000417,0.745356,-16.565051,,
8,Frontinella pyramitela,Monitor 11 Spider 12,LD,1,20.168922,0.681022,1.0,,-1.848935,0.003904,0.752485,0.17771,99.0,
9,Frontinella pyramitela,Monitor 11 Spider 13,LD,1,22.019648,16.51318,0.005030236,,-1.595373,0.020264,0.486991,34.262203,22.0,13.400871


In [None]:
species_names = get_species_names(directory)

def get_monitors(species_names):
    
    monitors = []
    
    for x in species_names:
        for folder in os.listdir(directory + "/" + x + "/Activity Onset"):
            #only select folders starting with capital letter, as these are the species names
            if folder == folder.capitalize():
        
                #append names of files to empty list
                monitors.append(folder)
                
    return monitors
                
aaa = get_monitors(species_names)

print(aaa)