In [2]:
import pandas as pd
from pathlib import Path

# === CONFIG ===
input_dir = Path(r"C:\Users\labadmin\Documents\Uppsala analyses\Floral buzzes\R shiny app output files")
frequency_range = (250, 450)  # Hz

# Load and filter all Excel files
filtered_data = {}

for file_path in input_dir.glob("*.xlsx"):
    df = pd.read_excel(file_path)

    # Filter by cepstral fundamental frequency
    df_filtered = df[
        (df["cep.fund.frequency"] >= frequency_range[0]) &
        (df["cep.fund.frequency"] <= frequency_range[1])
    ].copy()

    filtered_data[file_path.name] = df_filtered
    print(f"{file_path.name}: {len(df_filtered)} floral buzzes found")

# Example preview from one file:
filtered_data[list(filtered_data.keys())[0]].head()

respi_20250310_111645.xlsx: 33 floral buzzes found
respi_20250310_133455.xlsx: 70 floral buzzes found
respi_20250310_145836.xlsx: 62 floral buzzes found
respi_20250310_153402.xlsx: 37 floral buzzes found
respi_20250311_125224.xlsx: 22 floral buzzes found
respi_20250311_132313.xlsx: 41 floral buzzes found
respi_20250312_082147.xlsx: 62 floral buzzes found
respi_20250312_090451.xlsx: 74 floral buzzes found
respi_20250320_090201.xlsx: 75 floral buzzes found
respi_20250320_093847.xlsx: 74 floral buzzes found
respi_20250320_102350.xlsx: 47 floral buzzes found
respi_20250320_104858.xlsx: 32 floral buzzes found
respi_20250320_111333.xlsx: 81 floral buzzes found
respi_20250320_114134.xlsx: 224 floral buzzes found
respi_20250320_124101.xlsx: 40 floral buzzes found
respi_20250320_134513.xlsx: 58 floral buzzes found
respi_20250320_141545.xlsx: 129 floral buzzes found
respi_20250320_150319.xlsx: 42 floral buzzes found
respi_20250323_091109.xlsx: 106 floral buzzes found
respi_20250323_093517.xlsx: 

Unnamed: 0,buzz.number,spec.dom.frequency,cep.fund.frequency,cep.fund.frequency.median,start,stop,buzz.duration,rms,peak.amp,peak.amp.nosmooth,spec.wl,fund.wl,file
0,1,0.0,282.84,170.67,31.15,31.31,0.16,0.0,0.01,0.02,512,512,C:\Users\labadmin\AppData\Local\Temp\RtmpKKGkg...
1,2,2.67,353.98,330.32,31.35,31.73,0.38,0.0,0.01,0.02,512,512,C:\Users\labadmin\AppData\Local\Temp\RtmpKKGkg...
6,7,0.0,358.08,182.86,44.58,44.74,0.16,0.0,0.01,0.02,512,512,C:\Users\labadmin\AppData\Local\Temp\RtmpKKGkg...
9,10,2.35,401.74,286.23,45.78,46.2,0.43,0.0,0.01,0.02,512,512,C:\Users\labadmin\AppData\Local\Temp\RtmpKKGkg...
10,11,4097.6,375.97,335.83,46.4,46.78,0.38,0.0,0.01,0.02,512,512,C:\Users\labadmin\AppData\Local\Temp\RtmpKKGkg...


In [5]:
import pandas as pd
from pathlib import Path

# === CONFIG ===
input_dir = Path(r"C:\Users\labadmin\Documents\Uppsala analyses\Floral buzzes\R shiny app output files")
frequency_range = (250, 450)

# === Load metadata ===
laser_sync = pd.read_excel(r"D:\Laser\data\txt_files\flying_bees\peak_counts_in_windows.xlsx")
video_sync = pd.read_csv(r"C:\Users\labadmin\Documents\Uppsala analyses\BORIS manual labelling\respirometry syncing\respirometry_syncing.csv")
bee_meta = pd.read_excel(r"C:\Users\labadmin\Documents\Uppsala analyses\beeID laser file name + weight.xlsx", sheet_name="flying_bees")

# Extract laser_key from laser_sync file name: "_111645"
laser_sync["laser_key"] = laser_sync["file"].str.extract(r"_(\d{6})\.txt$")
laser_sync["laser_key"] = "_" + laser_sync["laser_key"]

# Prepare lookup table
bee_meta["BeeID"] = bee_meta["BeeID"].str.lower()
bee_meta["laser_key"] = bee_meta["Laser file name"]  # already in "_NNNNNN" format
video_sync["bee_id"] = video_sync["Observation id"].str.split("-").str[2].str.lower()

lookup = (
    video_sync[["bee_id", "Start (s)"]].rename(columns={"Start (s)": "video_poke_time"})
    .merge(
        bee_meta[["BeeID", "laser_key", "open_door_start_video_s", "open_door_stop_video_s"]],
        left_on="bee_id", right_on="BeeID", how="left"
    )
    .merge(
        laser_sync[["laser_key", "first_peak_time"]],
        on="laser_key", how="left"
    )
    .dropna(subset=["laser_key", "first_peak_time", "video_poke_time"])
)
lookup["time_offset"] = lookup["video_poke_time"] - lookup["first_peak_time"]
lookup = lookup.set_index("laser_key")

print(f"✅ Lookup table constructed with {len(lookup)} entries.")

# === Process Buzz Files ===
adjusted_buzzes = []

for file_path in input_dir.glob("*.xlsx"):
    print(f"\n📄 Processing file: {file_path.name}")
    df = pd.read_excel(file_path)

    # Filter floral buzzes
    floral_df = df[(df["cep.fund.frequency"] >= frequency_range[0]) &
                   (df["cep.fund.frequency"] <= frequency_range[1])].copy()
    print(f"→ Floral buzzes found: {len(floral_df)}")

    if floral_df.empty:
        continue

    # Extract laser_key directly from buzz file name
    laser_key = "_" + file_path.stem.split("_")[-1]
    floral_df["laser_key"] = laser_key

    # Check laser_key match
    if laser_key not in lookup.index:
        print(f"⚠️  No lookup match for {laser_key}")
        continue

    # Assign synced fields from lookup
    row = lookup.loc[laser_key]
    floral_df["time_offset"] = row["time_offset"]
    floral_df["door_open"] = row["open_door_start_video_s"]
    floral_df["door_close"] = row["open_door_stop_video_s"]

    # Sync time
    floral_df["start_video"] = floral_df["start"] + floral_df["time_offset"]
    floral_df["stop_video"] = floral_df["stop"] + floral_df["time_offset"]

    # Filter for buzzes within flower interaction window
    within = floral_df[
        (floral_df["start_video"] >= floral_df["door_open"]) &
        (floral_df["stop_video"] <= floral_df["door_close"])
    ]
    print(f"✅ Buzzes within flower access window: {len(within)}")

    if not within.empty:
        adjusted_buzzes.append(within)

# === Final Output ===
if adjusted_buzzes:
    combined = pd.concat(adjusted_buzzes, ignore_index=True)
    print(f"\n🎉 TOTAL floral buzzes during flower access: {len(combined)}")
    display(combined[["buzz.number", "cep.fund.frequency", "start_video", "stop_video"]].head())
else:
    print("\n⚠️ No valid buzzes found during flower access windows.")

✅ Lookup table constructed with 31 entries.

📄 Processing file: respi_20250310_111645.xlsx
→ Floral buzzes found: 33
✅ Buzzes within flower access window: 25

📄 Processing file: respi_20250310_133455.xlsx
→ Floral buzzes found: 70
✅ Buzzes within flower access window: 49

📄 Processing file: respi_20250310_145836.xlsx
→ Floral buzzes found: 62
✅ Buzzes within flower access window: 28

📄 Processing file: respi_20250310_153402.xlsx
→ Floral buzzes found: 37
✅ Buzzes within flower access window: 33

📄 Processing file: respi_20250311_125224.xlsx
→ Floral buzzes found: 22
✅ Buzzes within flower access window: 18

📄 Processing file: respi_20250311_132313.xlsx
→ Floral buzzes found: 41
✅ Buzzes within flower access window: 35

📄 Processing file: respi_20250312_082147.xlsx
→ Floral buzzes found: 62
✅ Buzzes within flower access window: 48

📄 Processing file: respi_20250312_090451.xlsx
→ Floral buzzes found: 74
✅ Buzzes within flower access window: 68

📄 Processing file: respi_20250320_090201.xl

Unnamed: 0,buzz.number,cep.fund.frequency,start_video,stop_video
0,27,335.83,604.24751,604.32751
1,28,333.08,604.40751,604.54751
2,29,329.63,604.80751,605.10751
3,30,291.67,605.15751,605.82751
4,31,276.24,608.26751,608.40751


In [6]:
combined.columns

Index(['buzz.number', 'spec.dom.frequency', 'cep.fund.frequency',
       'cep.fund.frequency.median', 'start', 'stop', 'buzz.duration', 'rms',
       'peak.amp', 'peak.amp.nosmooth', 'spec.wl', 'fund.wl', 'file',
       'laser_key', 'time_offset', 'door_open', 'door_close', 'start_video',
       'stop_video'],
      dtype='object')

In [7]:
import pandas as pd
import numpy as np
import soundfile as sf
from pathlib import Path

# === CONFIG ===
laser_data_dir = Path(r"D:\Laser\data\txt_files\flying_bees")
output_dir = Path("floral_buzz_clips_from_txt")
output_dir.mkdir(exist_ok=True)
sampling_rate = 10240
padding = 0.75  # seconds

# === REBUILD LOOKUP with 'file' column ===
laser_sync = pd.read_excel(r"D:\Laser\data\txt_files\flying_bees\peak_counts_in_windows.xlsx")
video_sync = pd.read_csv(r"C:\Users\labadmin\Documents\Uppsala analyses\BORIS manual labelling\respirometry syncing\respirometry_syncing.csv")
bee_meta = pd.read_excel(r"C:\Users\labadmin\Documents\Uppsala analyses\beeID laser file name + weight.xlsx", sheet_name="flying_bees")

# Extract laser_key from filename
laser_sync["laser_key"] = laser_sync["file"].str.extract(r"_(\d{6})\.txt$")
laser_sync["laser_key"] = "_" + laser_sync["laser_key"]

# Prepare lookup table
bee_meta["BeeID"] = bee_meta["BeeID"].str.lower()
bee_meta["laser_key"] = bee_meta["Laser file name"]
video_sync["bee_id"] = video_sync["Observation id"].str.split("-").str[2].str.lower()

lookup = (
    video_sync[["bee_id", "Start (s)"]].rename(columns={"Start (s)": "video_poke_time"})
    .merge(
        bee_meta[["BeeID", "laser_key", "open_door_start_video_s", "open_door_stop_video_s"]],
        left_on="bee_id", right_on="BeeID", how="left"
    )
    .merge(
        laser_sync[["laser_key", "file", "first_peak_time"]],  # include 'file'
        on="laser_key", how="left"
    )
    .dropna(subset=["laser_key", "first_peak_time", "video_poke_time"])
)

lookup["time_offset"] = lookup["video_poke_time"] - lookup["first_peak_time"]
lookup = lookup[~lookup["laser_key"].duplicated(keep="first")].set_index("laser_key")

print(f"✅ Lookup rebuilt with {len(lookup)} entries.")

✅ Lookup rebuilt with 31 entries.


In [8]:
combined.head()

Unnamed: 0,buzz.number,spec.dom.frequency,cep.fund.frequency,cep.fund.frequency.median,start,stop,buzz.duration,rms,peak.amp,peak.amp.nosmooth,spec.wl,fund.wl,file,laser_key,time_offset,door_open,door_close,start_video,stop_video
0,27,342.47,335.83,335.83,480.49,480.57,0.09,0.0,0.01,0.01,512,512,C:\Users\labadmin\AppData\Local\Temp\RtmpKKGkg...,_111645,123.75751,593.63,882.23,604.24751,604.32751
1,28,327.04,333.08,330.32,480.65,480.79,0.14,0.0,0.01,0.01,512,512,C:\Users\labadmin\AppData\Local\Temp\RtmpKKGkg...,_111645,123.75751,593.63,882.23,604.40751,604.54751
2,29,333.22,329.63,330.32,481.05,481.35,0.3,0.0,0.01,0.01,512,512,C:\Users\labadmin\AppData\Local\Temp\RtmpKKGkg...,_111645,123.75751,593.63,882.23,604.80751,605.10751
3,30,327.36,291.67,325.16,481.4,482.07,0.68,0.0,0.01,0.01,512,512,C:\Users\labadmin\AppData\Local\Temp\RtmpKKGkg...,_111645,123.75751,593.63,882.23,605.15751,605.82751
4,31,312.51,276.24,315.15,484.51,484.65,0.14,0.0,0.01,0.01,512,512,C:\Users\labadmin\AppData\Local\Temp\RtmpKKGkg...,_111645,123.75751,593.63,882.23,608.26751,608.40751


In [20]:
# === EXPORT BUZZ CLIPS ===
saved, skipped = 0, 0
print(f"💾 Exporting buzz clips to: {output_dir.resolve()}")

for key, buzzes in combined.groupby("laser_key"):
    if key not in lookup.index:
        print(f"⚠️ Skipping unknown key: {key}")
        skipped += len(buzzes)
        continue

    laser_filename = lookup.loc[key]["file"]
    laser_txt = laser_data_dir / laser_filename

    if not laser_txt.exists():
        print(f"❌ File not found: {laser_txt}")
        skipped += len(buzzes)
        continue

    try:
        df_laser = pd.read_csv(laser_txt, delim_whitespace=True)
        signal = df_laser["channel_1"].values
    except Exception as e:
        print(f"❌ Error reading {laser_txt}: {e}")
        skipped += len(buzzes)
        continue

    total_samples = len(signal)
    duration = total_samples / sampling_rate

    # Subfolder for this laser_key
    subfolder = output_dir / key
    subfolder.mkdir(exist_ok=True)

    for _, row in buzzes.iterrows():
        start_t = max(row["start_video"] - padding, 0)
        stop_t = min(row["stop_video"] + padding, duration)
        start_sample = int(start_t * sampling_rate)
        stop_sample = int(stop_t * sampling_rate)
        clip = signal[start_sample:stop_sample]

        out_name = f"{key}_buzz{int(row['buzz.number'])}.wav"
        out_path = subfolder / out_name

        try:
            sf.write(out_path, clip, sampling_rate)
            saved += 1
        except Exception as e:
            print(f"❌ Failed to save {out_name}: {e}")
            skipped += 1

print(f"\n✅ Saved {saved} buzz clips.")
if skipped:
    print(f"⚠️ Skipped {skipped} due to errors or missing files.")

✅ Lookup rebuilt with 31 entries.
💾 Exporting buzz clips to: C:\Users\labadmin\Documents\Uppsala analyses\floral_buzz_clips_from_txt

✅ Saved 1424 buzz clips.


In [11]:
from pathlib import Path
import pandas as pd

# === CONFIG ===
output_dir = Path("floral_buzz_clips_from_txt")
label_file = Path("buzz_label_sheet.csv")

# === Prepare label table ===
label_data = []

for _, row in combined.iterrows():
    laser_key = row["laser_key"]
    buzz_num = int(row["buzz.number"])
    wav_filename = f"{laser_key}_buzz{buzz_num}.wav"
    wav_path = output_dir / laser_key / wav_filename

    label_data.append({
        "laser_key": laser_key,
        "buzz.number": buzz_num,
        "wav_file": str(wav_path.resolve()),
        "cep.fund.frequency": row["cep.fund.frequency"],
        "start_laser": row["start"],
        "stop_laser": row["stop"],
        "is_floral_buzz": ""  # to be filled manually
    })

# === Export ===
label_df = pd.DataFrame(label_data)
label_df.to_csv(label_file, index=False)
print(f"✅ Label sheet saved to: {label_file.resolve()}")

✅ Label sheet saved to: C:\Users\labadmin\Documents\Uppsala analyses\buzz_label_sheet.csv
