# Replacing Activity Onset Means with Medians

## In this notebook, I will replace activity onset means with medians and replace the values in the data zoo dataframe

In [3]:
#Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as matdates
from datetime import timedelta
import os

### Import df

In [4]:
#define monitors and species, THIS WILL BE CHANGED LATER
monitor = 'Monitor 1'
master_df_name = 'all_spider_and_fly_summary.csv'
species = 'Metazygia wittfeldae'

directory = "/Users/willcatalano/Library/CloudStorage/Box-Box/Spider Data Zoo/Zoo by specie/" + species + "/Activity Onset"

#change directories 
os.chdir(directory)

#Import mean activity onset df
activity_onset_df = pd.read_csv(species + ' ' + monitor + ' Updated_LD Activity Onset Values.csv')

#drop date column
activity_onset_df = activity_onset_df.iloc[: , 1:]

activity_onset_df

Unnamed: 0,Monitor 1 Spider 1,Monitor 1 Spider 2,Monitor 1 Spider 3,Monitor 1 Spider 4,Monitor 1 Spider 5,Monitor 1 Spider 6,Monitor 1 Spider 7,Monitor 1 Spider 8,Monitor 1 Spider 9,Monitor 1 Spider 10,...,Monitor 1 Spider 21,Monitor 1 Spider 23,Monitor 1 Spider 25,Monitor 1 Spider 26,Monitor 1 Spider 27,Monitor 1 Spider 28,Monitor 1 Spider 29,Monitor 1 Spider 30,Monitor 1 Spider 31,Monitor 1 Spider 32
0,30.0,68.0,38.0,61.0,36.0,33.0,54.0,33.0,43.0,63.0,...,46.0,128.0,51.0,36.0,32.0,73.0,35.0,182.0,25.0,33.0
1,43.0,37.0,41.0,59.0,39.0,25.0,27.0,42.0,34.0,,...,26.0,91.0,33.0,37.0,34.0,54.0,31.0,54.0,34.0,37.0
2,28.0,35.0,26.0,63.0,31.0,38.0,36.0,45.0,38.0,36.0,...,21.0,88.0,47.0,23.0,28.0,49.0,26.0,45.0,24.0,33.0
3,24.0,27.0,29.0,35.0,34.0,28.0,34.0,21.0,28.0,47.0,...,32.0,70.0,47.0,36.0,28.0,51.0,30.0,29.0,24.0,27.0
4,37.0,36.0,34.0,42.0,36.0,26.0,33.0,25.0,34.0,54.0,...,32.0,77.0,67.0,27.0,24.0,52.0,18.0,44.0,37.0,43.0


### Define Necessary Functions

In [7]:
#this function will loop through the folders in the spider data zoo to gather spider names
directory = "/Users/willcatalano/Library/CloudStorage/Box-Box/Spider Data Zoo/Zoo by specie"

### This function creates list that counts the number of days that each spider has an activity onset for
def activity_onset_days(activity_onset_df):
    
    activity_onset_days = activity_onset_df.count()
    
    return activity_onset_days

def get_species_names(directory):
    
    #create empty list to append names
    species_names = []
    
    #loop through folders in given directory
    for folder in os.listdir(directory):
        
        #only select folders starting with capital letter, as these are the species names
        if folder == folder.capitalize():
        
            #append names of files to empty list
            species_names.append(folder)
        
    
    return species_names

#this function will take activity onset values and calculate the median activity onset for each individual
def calculate_activity_onset_medians(activity_onset_df):
    
    #create empty list to append means to
    activity_onset_medians_list = []

    #remove date column
    activity_onset_medians_df = activity_onset_df
    
    #take median activity onset for each spider and append to list
    column_names = list(activity_onset_medians_df)
    
    #calculate median of every column
    for x in column_names:
        
        median_activity_onset = np.nanmedian(activity_onset_medians_df[x])
        
        activity_onset_medians_list.append(median_activity_onset)
    
    #append averages as the last row of the dataframe
    activity_onset_medians_df.loc[len(activity_onset_medians_df)] = activity_onset_medians_list
    
    return activity_onset_medians_df

#this function will take the sem of every spider
def calculate_sem(activity_onset_df, activity_onset_medians_df):
    
    #create new df
    activity_onset_medians_sem = activity_onset_df
    
    #count number of days activity onset occurs
    activity_onset_days_df = activity_onset_days(activity_onset_medians_df)
    
    #create empty list to append sem to
    activity_onset_sem_list = []
    
    #get col names
    column_names = list(activity_onset_medians_df)
    
    #calculate sem
    for x in column_names:
        
        sem_activity_onset = np.std(activity_onset_medians_df[x][:-1], ddof=1) / np.sqrt(np.size(activity_onset_days_df[x]))
        
        activity_onset_sem_list.append(sem_activity_onset)
    
    #append sems as the last row of the dataframe
    activity_onset_medians_sem.loc[len(activity_onset_medians_df)] = activity_onset_sem_list
    
    return activity_onset_medians_sem

#this function will create the final df, with one column containing sem and one containing median activity onset
def create_median_sem_df(activity_onset_medians_sem):
    
    #flip columns and rows of df
    activity_onset_medians_sem_T = activity_onset_medians_sem.transpose()
    
    #select for last two columns, as those contain sem and median
    activity_onset_medians_sem_T = activity_onset_medians_sem_T.iloc[: , -2:]
    
    #rename columns
    activity_onset_medians_sem_T = activity_onset_medians_sem_T.rename(columns = {activity_onset_medians_sem_T.columns[-2] : 'Median Activity Onset',
                                                                                  activity_onset_medians_sem_T.columns[-1] : 'SEM'}) 
    
    return activity_onset_medians_sem_T
    

### Test Functions

In [4]:
activity_onset_medians_df = calculate_activity_onset_medians(activity_onset_df)

activity_onset_medians_sem = calculate_sem(activity_onset_df, activity_onset_medians_df)

final_median_activity_onset_df = create_median_sem_df(activity_onset_medians_sem)

In [5]:
final_median_activity_onset_df

Unnamed: 0,Median Activity Onset,SEM
Monitor 1 Spider 1,30.0,7.569676
Monitor 1 Spider 2,36.0,15.820872
Monitor 1 Spider 3,34.0,6.188699
Monitor 1 Spider 4,59.0,12.649111
Monitor 1 Spider 5,36.0,2.949576
Monitor 1 Spider 6,28.0,5.43139
Monitor 1 Spider 7,34.0,10.18332
Monitor 1 Spider 8,33.0,10.401923
Monitor 1 Spider 9,34.0,5.549775
Monitor 1 Spider 10,50.5,11.401754


In [9]:
directory

'/Users/willcatalano/Library/CloudStorage/Box-Box/Spider Data Zoo/Zoo by specie'

In [16]:
species_names = get_species_names(directory)

def get_monitors(species_names):
    
    monitors = []
    
    for x in species_names:
        for folder in os.listdir(directory + "/" + x + "/Activity Onset"):
            #only select folders starting with capital letter, as these are the species names
            if folder == folder.capitalize():
        
                #append names of files to empty list
                monitors.append(folder)
                
    return monitors
                
aaa = get_monitors(species_names)

print(aaa)

FileNotFoundError: [Errno 2] No such file or directory: '/Users/willcatalano/Library/CloudStorage/Box-Box/Spider Data Zoo/Zoo by specie/Verrucosa/Activity Onset'