In [37]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
import datetime
import os

col_names = ["Index", "DateD", "DateM", "DateY", "Time", "MonStatus", "Extras", "MonN", "TubeN", "DataType", "Unused", "Light"]

for i in range(1, 33):
    col_names.append(f"Sp{i}")

file = 'Steatoda B masking 04 pm.txt'

df = pd.read_csv(file, names=col_names, sep='\s+', header=None)
df = df.set_index('Index')
df['Time'] = pd.to_datetime(df['Time'], format='%H:%M:%S', errors='coerce')
df = df[df["MonStatus"] == 1]


month_map = {'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6}
df['DateM'] = df['DateM'].str[:3].map(month_map)
df['DateY'] = df['DateY'].apply(lambda x: int(str(20) + str(x)))
df['Date'] = pd.to_datetime(dict(year=df['DateY'], month=df['DateM'], day=df['DateD']), errors='coerce')

df['Time'] = pd.to_datetime(dict(year=df['Date'].dt.year,
                                     month=df['Date'].dt.month,
                                     day=df['Date'].dt.day,
                                     hour=df['Time'].dt.hour,
                                     minute=df['Time'].dt.minute,
                                     second=df['Time'].dt.second))

df = df.drop(["DateD", "DateM", "DateY", "Date", "MonStatus", "Extras", "MonN", "TubeN", "DataType", "Unused"], axis=1)

day_map = {day: idx+1 for idx, day in enumerate(df['Time'].dt.day.unique())}

df.insert(0, 'Day', df['Time'].dt.day.map(day_map))

def filter_and_merge(df, threshold=0):
    additional_columns = df.columns[:3]
    filtered_dfs = []

    for day in range(1, 9):  
        day_df = df[df['Day'] == day] 
        count_mov = day_df.filter(like="Sp")  
        x = count_mov.sum(axis=0)  
        z = x > threshold  
        columns_to_keep = z[z].index 
        
        all_columns_to_keep = list(additional_columns) + list(columns_to_keep)
        filtered_df = day_df[all_columns_to_keep]
        filtered_dfs.append(filtered_df)

    merged_df = pd.concat(filtered_dfs)
    merged_df1 = merged_df.dropna(axis=1)
    
    return merged_df1
    
merged_df = filter_and_merge(df)

def entrainment(data, column):
    if column not in data.columns:
        return False
    
    dflight = data[data['Light'] == 1][column]
    dfdark = data[data['Light'] == 0][column]
    
    lightmean = np.mean(dflight)
    darkmean = np.mean(dfdark)
    
    if darkmean == 0:
        return False
    
    diff = lightmean / darkmean
    return diff > 0.25

spiders = ["Sp"+str(i) for i in range(1, 33)]

entrainment_results = []

for spider_column in spiders:
    if spider_column in merged_df.columns:  
        entrainment_result = entrainment(merged_df, spider_column)
        entrainment_results.append((spider_column, entrainment_result))

results_df = pd.DataFrame(entrainment_results, columns=['Spider', 'Entrained'])

entrained_spiders = results_df[results_df['Entrained'] == True]['Spider'].tolist()

finaldf = []

additional_columns = merged_df.columns[:3]
columns_to_keep = entrained_spiders
        
all_columns_to_keep = list(additional_columns) + list(columns_to_keep)
filtered_df = merged_df[all_columns_to_keep]  # Use merged_df here, not entrained_spiders

finaldf.append(filtered_df)

merged_dfx = pd.concat(finaldf)
merged_dfx1 = merged_dfx.dropna(axis=1)  # Use merged_dfx here, not merged_df

display(merged_dfx1)

Unnamed: 0_level_0,Day,Time,Light,Sp1,Sp3,Sp4,Sp5,Sp7,Sp8,Sp11,Sp12,Sp13,Sp18,Sp19,Sp21,Sp22,Sp23
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1,1,2024-03-27 09:45:00,1,0,1,3,5,0,4,0,0,11,0,10,0,4,0
2,1,2024-03-27 09:46:00,1,0,0,0,4,0,7,0,0,2,0,0,0,2,0
3,1,2024-03-27 09:47:00,1,0,1,0,3,0,1,0,0,0,0,3,0,3,0
4,1,2024-03-27 09:48:00,1,0,1,0,2,0,1,0,0,4,0,6,0,3,0
5,1,2024-03-27 09:49:00,1,0,1,0,2,0,4,0,0,3,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10090,8,2024-04-03 09:54:00,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
10091,8,2024-04-03 09:55:00,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
10092,8,2024-04-03 09:56:00,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
10093,8,2024-04-03 09:57:00,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [39]:
spidersA = ['Sp4', 'Sp9', 'Sp10', 'Sp12', 'Sp14', 'Sp15', 'Sp19', 'Sp21']
spidersB = ['Sp1', 'Sp3', 'Sp5', 'Sp7', 'Sp12', 'Sp14', 'Sp17', 'Sp18', 'Sp19', 'Sp21', 'Sp22', 'Sp23']

import pandas as pd
import matplotlib.pyplot as plt
import os

def filter_and_compare_activity(df, spider_cols, day_pulse, day_pre_pulse, start_hour, end_hour, save_folder):
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)

    pre_pulse_days = [2, 3, 4]
    after_pulse_days = [6, 7, 8]

    pre_pulse_dates = df.loc[df['Day'].isin(pre_pulse_days), 'Time'].dt.date.unique()
    pulse_date = df.loc[df['Day'] == day_pulse, 'Time'].dt.date.unique()[0]
    after_pulse_dates = df.loc[df['Day'].isin(after_pulse_days), 'Time'].dt.date.unique()
    
    start_time_pre_pulse = pd.to_datetime(f"{pre_pulse_dates[0]} {start_hour}")
    end_time_pre_pulse = pd.to_datetime(f"{pre_pulse_dates[-1]} {end_hour}")
    start_time_pulse = pd.to_datetime(f"{pulse_date} {start_hour}")
    end_time_pulse = pd.to_datetime(f"{pulse_date} {end_hour}")
    start_time_after_pulse = pd.to_datetime(f"{after_pulse_dates[0]} {start_hour}")
    end_time_after_pulse = pd.to_datetime(f"{after_pulse_dates[-1]} {end_hour}")

    days_pre_pulse = df.loc[df['Day'].isin(pre_pulse_days)]
    day_pulse = df.loc[df['Day'] == day_pulse]
    days_after_pulse = df.loc[df['Day'].isin(after_pulse_days)]
    
    day_pre_pulse_filtered = days_pre_pulse[(days_pre_pulse['Time'] >= start_time_pre_pulse) & (days_pre_pulse['Time'] <= end_time_pre_pulse)]
    day_pulse_filtered = day_pulse[(day_pulse['Time'] >= start_time_pulse) & (day_pulse['Time'] <= end_time_pulse)]
    day_after_pulse_filtered = days_after_pulse[(days_after_pulse['Time'] >= start_time_after_pulse) & (days_after_pulse['Time'] <= end_time_after_pulse)]
    
    for spider_col in spider_cols:
        mean_day_pre_pulse = day_pre_pulse_filtered[spider_col].mean()
        mean_day_pulse = day_pulse_filtered[spider_col].mean()
        mean_day_after_pulse = day_after_pulse_filtered[spider_col].mean()
        sem_day_pre_pulse = day_pre_pulse_filtered[spider_col].sem()
        sem_day_pulse = day_pulse_filtered[spider_col].sem()
        sem_day_after_pulse = day_after_pulse_filtered[spider_col].sem()
        
        plt.figure(figsize=(7, 5)) 
        plt.bar(['Pre-pulse days 2-4', '2-hour pulse', 'After-pulse days 6-8'],
                [mean_day_pre_pulse, mean_day_pulse, mean_day_after_pulse],
                yerr=[sem_day_pre_pulse, sem_day_pulse, sem_day_after_pulse], capsize=5)
        plt.suptitle(f'Comparison in activity for {spider_col} with and without the 2-hour pulse', fontsize=14)
        plt.ylabel('Average activity', fontsize=12)
        plt.xlabel('Time period', fontsize=12)
        plt.xticks(fontsize=10)
        plt.yticks(fontsize=10)
        plt.tight_layout() 
        
        file_path = os.path.join(save_folder, f"{spider_col}_comparison.png")
        plt.savefig(file_path)
        plt.close()

spider_cols = spidersB
day_pulse = 5 
day_pre_pulse = 4  
start_hour = '16:00:00' 
end_hour = '18:00:00'  
save_folder = 'TESTSTB04pm_three_bars'

merged_df['Time'] = pd.to_datetime(merged_df['Time'])

filter_and_compare_activity(merged_dfx1, spider_cols, day_pulse, day_pre_pulse, start_hour, end_hour, save_folder)

KeyError: 'Sp14'