# Preprocessing

In [2]:
import pandas as pd
import numpy as np
import csv
import matplotlib.pyplot as plt
from datetime import date, time

In [3]:
# Change filename as appropriate
filename = "Metazygia monitor 1.txt"
df = pd.read_csv(filename, index_col = 0, header = None, delimiter="\t")
display(df)

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,32,33,34,35,36,37,38,39,40,41
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
145,20 Apr 17,16:46:00,1,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
146,20 Apr 17,16:47:00,1,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
147,20 Apr 17,16:48:00,1,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
148,20 Apr 17,16:49:00,1,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
149,20 Apr 17,16:50:00,1,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21185,5 May 17,07:26:00,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
21186,5 May 17,07:27:00,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
21187,5 May 17,07:28:00,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
21188,5 May 17,07:29:00,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Creating Labels

In [4]:
# This function creates the column labels into a list 

def create_column_labels(date_time_label, light_label, spider_count):
    # Create column labels
    column_names = []
    column_names.append(date_time_label)
    column_names.append(light_label)
    
    # Create spider labels 
    for x in range(1, spider_count + 1):
        column_names.append('Spider ' + str(x))
        
    return column_names

# Processing File

In [5]:
# This function creates a new .csv file with columns labeled Light and Spider name and Date-Time as
# the index

def txt_cleaning_to_csv(df):
    # Change spider count, your light column index number, and date-time label as you see fit
    number_of_spiders = 32
    light_column = 9
    date_time_label = 'Date-Time'
    column_names = create_column_labels(date_time_label, 'Light', number_of_spiders)

    # Keep the last number of spiders columns, the light column, and the first 2 columns, the rest will be removed
    last_df = df.iloc[:, -number_of_spiders:]
    first_df = df.iloc[:, :2]
    light_df = df.iloc[:, light_column - 1]
    date_time_df = first_df[1].astype(str) + ' ' + first_df[2].astype(str)

    # Concatenate the first and last sections of the dataframe
    newdf = pd.concat([first_df, light_df, last_df], axis = 1)
    newdf.insert(2, date_time_label, date_time_df)
    newdf = newdf.drop([1, 2], axis = 1)

    # Save dataframe as .csv file and set Date and Time columns into datetime format
    newdf.to_csv("Metazygia Monitor 1.csv", header = column_names, index = False)
    newdf = pd.read_csv("Metazygia Monitor 1.csv", parse_dates = [date_time_label])
    newdf.set_index(date_time_label, inplace = True)
    newdf.index = pd.to_datetime(newdf.index)
    
    return newdf

df = txt_cleaning_to_csv(df)
display(df)

Unnamed: 0_level_0,Light,Spider 1,Spider 2,Spider 3,Spider 4,Spider 5,Spider 6,Spider 7,Spider 8,Spider 9,...,Spider 23,Spider 24,Spider 25,Spider 26,Spider 27,Spider 28,Spider 29,Spider 30,Spider 31,Spider 32
Date-Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-04-20 16:46:00,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2017-04-20 16:47:00,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2017-04-20 16:48:00,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2017-04-20 16:49:00,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2017-04-20 16:50:00,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-05-05 07:26:00,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2017-05-05 07:27:00,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2017-05-05 07:28:00,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2017-05-05 07:29:00,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Convert Cell Type Function

In [6]:
# Changes all count cell values to float

def convert_cell_type_to_float(df):
    for col in df.loc[:, df.columns.str.startswith('S')]:
        df[col] = df[col].astype(float)
        
    return df

df = convert_cell_type_to_float(df)
display(df)

Unnamed: 0_level_0,Light,Spider 1,Spider 2,Spider 3,Spider 4,Spider 5,Spider 6,Spider 7,Spider 8,Spider 9,...,Spider 23,Spider 24,Spider 25,Spider 26,Spider 27,Spider 28,Spider 29,Spider 30,Spider 31,Spider 32
Date-Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-04-20 16:46:00,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-04-20 16:47:00,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-04-20 16:48:00,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-04-20 16:49:00,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-04-20 16:50:00,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-05-05 07:26:00,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-05-05 07:27:00,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-05-05 07:28:00,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-05-05 07:29:00,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Remove Spider Function

In [7]:
# Add list of spider names, as seen in columns, of spiders you wish to remove

spider_list = ['Spider 1']

# This function removes desired Spider column and all of the data associated with it

def remove_nth_spider(df, spider_list):
    new_df = df.drop(spider_list, axis = 1)
    
    return new_df

new_df = remove_nth_spider(df, spider_list)
display(new_df)

Unnamed: 0_level_0,Light,Spider 2,Spider 3,Spider 4,Spider 5,Spider 6,Spider 7,Spider 8,Spider 9,Spider 10,...,Spider 23,Spider 24,Spider 25,Spider 26,Spider 27,Spider 28,Spider 29,Spider 30,Spider 31,Spider 32
Date-Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-04-20 16:46:00,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-04-20 16:47:00,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-04-20 16:48:00,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-04-20 16:49:00,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-04-20 16:50:00,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-05-05 07:26:00,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-05-05 07:27:00,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-05-05 07:28:00,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-05-05 07:29:00,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Filter Spider Movement Data Function

In [10]:
# Keep desired data from date-time range and replace data outside range with NaN for a specified column

def keep_specific_spider_values(df, column_name, death_date_time_start, death_date_time_end):
    ts = df[[column_name]]
    ts.loc[death_date_time_start : death_date_time_end] = np.nan
    df['Spider 1'] = ts['Spider 1']
    
    return df

df = keep_specific_spider_values(df, 'Spider 1', '2017-04-20 16:49:00', '2017-05-05 07:30:00')
display(df[['Spider 1']])

Unnamed: 0_level_0,Spider 1
Date-Time,Unnamed: 1_level_1
2017-04-20 16:46:00,0.0
2017-04-20 16:47:00,0.0
2017-04-20 16:48:00,0.0
2017-04-20 16:49:00,
2017-04-20 16:50:00,
...,...
2017-05-05 07:26:00,
2017-05-05 07:27:00,
2017-05-05 07:28:00,
2017-05-05 07:29:00,
