In [8]:
import pandas as pd
import numpy as np
import os

import matplotlib.pyplot as plt

In [9]:
def data_organizer(file_path):

    col_names = ["Index", "DateD", "DateM", "DateY", "Time", "MonStatus", "Extras", "MonN", "TubeN", "DataType", "Unused", "Light"]
    
    for i in range(1, 33):
        col_names.append(f"Sp{i}")
    
    df = pd.read_csv(file_path, names=col_names, sep='\s+', header=None)

    df = df.set_index('Index')
    df['Time'] = pd.to_datetime(df['Time'], format='%H:%M:%S', errors='coerce')
    deleted_data = df[df["MonStatus"] != 1]
    print(f"Removed rows of data where Monitor Status is not 1: {len(deleted_data)}\n")
    df = df[df["MonStatus"] == 1]
    
    month_map = {'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6, 'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12}
    df['DateM'] = df['DateM'].str[:3].map(month_map)
    df['DateY'] = df['DateY'].apply(lambda x: int(str(20) + str(x)))
    df['Date'] = pd.to_datetime(dict(year=df['DateY'], month=df['DateM'], day=df['DateD']), errors='coerce')
    
    df['Time'] = pd.to_datetime(dict(year=df['Date'].dt.year,
                                         month=df['Date'].dt.month,
                                         day=df['Date'].dt.day,
                                         hour=df['Time'].dt.hour,
                                         minute=df['Time'].dt.minute,
                                         second=df['Time'].dt.second))
    
    df = df.drop(["DateD", "DateM", "DateY", "Date", "MonStatus", "Extras", "MonN", "TubeN", "DataType", "Unused"], axis=1)
    
    day_map = {day: idx+1 for idx, day in enumerate(df['Time'].dt.day.unique())}
    
    df.insert(0, 'Day', df['Time'].dt.day.map(day_map))
    
    spiders = []
    
    for i in range(1, 33):
        if df[f"Sp{i}"].sum() > 10:
            spiders.append(i)
        if not i in spiders:
            df = df.drop([f"Sp{i}"], axis=1)

    return df, spiders

In [10]:
def rasterplot_binary(dataframe, spider_name, title, rolling_window=1):
    sp_df = dataframe[['Day', 'Time', 'Light', spider_name]]
    
    days = sp_df['Day'].unique()
    
    fig, axs = plt.subplots(len(days), 1)
    
    fig.suptitle(title)
    fig.supxlabel('Time (hrs)')
    fig.supylabel('Day')
    
    # Separate activity by day
    for day, ax in zip(days, axs):
        sp_day = sp_df[sp_df['Day'] == day]
        time = sp_day['Time'].dt.hour + sp_day['Time'].dt.minute / 60
        light = sp_day['Light']
        activity = sp_day[spider_name]
    
        ax.tick_params(left=False, bottom=False)
        ax.set_yticklabels([])
        ax.set_xticklabels([])
        ax.set_xlim(-0.5, 23.5)
        ax.set_ylim(0, 1)
    
        ax.set_ylabel(day, rotation='horizontal')

        ax.fill_between(time, 0, 1, where=light, alpha=.5, color='yellow')
        ax.fill_between(time, 0, 1, where=(activity.rolling(rolling_window).mean() > 0), alpha=1)
    
    axs[-1].set_xticks(np.arange(0, 25, 2))
    axs[-1].set_xticklabels(np.arange(0, 25, 2), rotation = 'horizontal')

    return fig, axs


In [11]:
file_name = "Steatoda A masking 02 pm.txt"

df, spiders = data_organizer(file_name)

display(df)
display(spiders)

Removed rows of data where Monitor Status is not 1: 0



Unnamed: 0_level_0,Day,Time,Light,Sp3,Sp4,Sp5,Sp7,Sp9,Sp10,Sp11,...,Sp14,Sp15,Sp16,Sp17,Sp18,Sp19,Sp21,Sp22,Sp23,Sp25
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1,2024-03-20 11:04:00,1,2,2,15,8,5,0,0,...,0,0,1,0,4,3,1,3,0,2
2,1,2024-03-20 11:05:00,1,5,1,1,0,8,2,0,...,3,1,4,0,11,4,1,2,0,0
3,1,2024-03-20 11:06:00,1,3,1,3,0,8,1,1,...,5,0,1,0,1,3,1,5,0,0
4,1,2024-03-20 11:07:00,1,0,1,3,0,12,2,0,...,0,1,6,0,2,3,3,0,1,1
5,1,2024-03-20 11:08:00,1,0,1,4,0,10,0,0,...,4,0,3,0,5,1,2,0,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10040,8,2024-03-27 10:23:00,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10041,8,2024-03-27 10:24:00,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10042,8,2024-03-27 10:25:00,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,14,0,0
10043,8,2024-03-27 10:26:00,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


[3, 4, 5, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 25]

In [12]:
#average masking 
def get_masking_start(df, window_length_mins = 120):
    times = df['Time']
    
    lights = df['Light']
    
    light_off_times = df[(lights == 0) & (lights.shift(1) == 1)]

    for index in light_off_times.index:
        print(index, times[index])

        if lights[index + window_length_mins + 10] == 1:
            return times[index]

time = get_masking_start(df)
print(time)

536 2024-03-20 19:59:00
1976 2024-03-21 19:59:00
3416 2024-03-22 19:59:00
4856 2024-03-23 19:59:00
5960 2024-03-24 14:23:00
2024-03-24 14:23:00


In [20]:
import pandas as pd

def calculate_spider_activity(df, start_time, duration_hours=1):
    start_time = pd.to_datetime(start_time)
    end_time = start_time + pd.Timedelta(hours=duration_hours)
    
    mask = (df['Time'] >= start_time) & (df['Time'] < end_time)
    df_filtered = df.loc[mask]
    
    spider_columns = [col for col in df.columns if col.startswith('Sp')]
    
    average_activity = df_filtered[spider_columns].mean()
    return average_activity

start_time = time 
average_spider_activity = calculate_spider_activity(df, start_time)

print("Average spider activity for the first hour after lights on:")
print(average_spider_activity)

Average spider activity for the first hour after lights on:
Sp3     0.000000
Sp4     0.000000
Sp5     0.000000
Sp7     0.000000
Sp9     0.000000
Sp10    0.100000
Sp11    0.000000
Sp12    0.000000
Sp13    0.050000
Sp14    0.000000
Sp15    0.016667
Sp16    0.000000
Sp17    0.000000
Sp18    0.066667
Sp19    0.000000
Sp21    0.100000
Sp22    0.000000
Sp23    0.000000
Sp25    3.983333
dtype: float64
