In [2]:
import librosa
import numpy as np
import glob
import os
import math
import pandas as pd
import re
import datetime
from datetime import datetime, timedelta

In [3]:
# ✅ Define folder containing audio files
AUDIO_FOLDER = os.getcwd().replace("/_scripts", "/_data/Audio/Full/Starlink_Group_7-13")
audio_files = glob.glob(os.path.join(AUDIO_FOLDER, "**/*.wav"), recursive=True)
RESULTS_DIR = os.getcwd().replace("/_scripts", "/_results/Model/output")
OUTPUT_CSV = os.path.join(RESULTS_DIR,"audiofiles_filtered.csv")

In [5]:
# ✅ Create launch dataframe
launch_data = {
    "Rocket": ["G7-13", "G7-14", "G7-15", "Transporter 10", "G7-17", "G7-16", "G7-18", "G8-1", "USSF-62",
               "Digital Globe", "G8-2", "G8-7", "NROL-146", "Earthcare", "G8-8"],
    "Day": ["2/9/24", "2/15/24", "2/22/24", "3/4/24", "3/10/24", "3/18/24", "4/1/24", "4/6/24", "4/11/24",
            "5/2/24", "5/9/24", "5/14/24", "5/22/24", "5/28/24", "6/8/24"],
    "Time (PT)": ["16:34", "13:34", "20:11", "14:05", "21:09", "19:28", "19:30", "19:25", "19:25",
                  "11:36", "21:30", "11:39", "1:00", "15:20", "5:58"]
}
df_launches = pd.DataFrame(launch_data)

# ✅ Convert launch date & time into a single datetime column
df_launches["launch_datetime"] = pd.to_datetime(df_launches["Day"] + " " + df_launches["Time (PT)"], format="%m/%d/%y %H:%M")

# ✅ Define 24-hour window before & after launch
df_launches["window_start"] = df_launches["launch_datetime"] - timedelta(hours=24)
df_launches["window_end"] = df_launches["launch_datetime"] + timedelta(hours=24)

df_launches.head()


Unnamed: 0,Rocket,Day,Time (PT),launch_datetime,window_start,window_end
0,G7-13,2/9/24,16:34,2024-02-09 16:34:00,2024-02-08 16:34:00,2024-02-10 16:34:00
1,G7-14,2/15/24,13:34,2024-02-15 13:34:00,2024-02-14 13:34:00,2024-02-16 13:34:00
2,G7-15,2/22/24,20:11,2024-02-22 20:11:00,2024-02-21 20:11:00,2024-02-23 20:11:00
3,Transporter 10,3/4/24,14:05,2024-03-04 14:05:00,2024-03-03 14:05:00,2024-03-05 14:05:00
4,G7-17,3/10/24,21:09,2024-03-10 21:09:00,2024-03-09 21:09:00,2024-03-11 21:09:00


In [6]:
# ✅ Function to extract date, time, recorder, and folder path
def extract_file_info(filepath):
    """
    Extracts datetime, recorder number, and folder path from an audio filename.
    Assumes format: /.../RecorderName/20240208_233504.wav
    """
    filename = os.path.basename(filepath)  # Get file name
    folder_path = os.path.basename(os.path.dirname(filepath))

    # Extract datetime
    match = re.search(r"_(\d{8})_(\d{6})", filename)  # Find YYYYMMDD_HHMMSS
    file_datetime = datetime.strptime(f"{match.group(1)} {match.group(2)}", "%Y%m%d %H%M%S") if match else None

    # Extract recorder number (Assuming it's the first part of the filename before "_")
    recorder_match = re.match(r"(\w+)_\d{8}_\d{6}", filename)
    recorder = recorder_match.group(1) if recorder_match else "Unknown"

    return file_datetime, recorder, folder_path

In [7]:
# ✅ Create an empty list for filtered audio files
filtered_files = []

# ✅ Iterate through launches and filter audio files
for _, launch in df_launches.iterrows():
    launch_start = launch["launch_datetime"]
    window_start = launch["window_start"]
    window_end = launch["window_end"]

    for file in audio_files:
        file_datetime, recorder, folder_path = extract_file_info(file)

        if file_datetime and window_start <= file_datetime <= window_end:
            filtered_files.append({
                "audiofile": file,
                "recorder": recorder,
                "folder_path": folder_path,
                "datetime": file_datetime,
                "launch_name": launch["Rocket"],
                "launch_start": launch_start
            })

# ✅ Convert to DataFrame
df_filtered_files = pd.DataFrame(filtered_files)

# ✅ Show first few results
print(df_filtered_files.head())

                                           audiofile  recorder    folder_path  \
0  /Users/ec/Documents/Data/Frog-Call-Classifier/...  S4A14476  Before_Launch   
1  /Users/ec/Documents/Data/Frog-Call-Classifier/...  S4A14476  Before_Launch   
2  /Users/ec/Documents/Data/Frog-Call-Classifier/...  S4A14476  Before_Launch   
3  /Users/ec/Documents/Data/Frog-Call-Classifier/...  S4A14476  Before_Launch   
4  /Users/ec/Documents/Data/Frog-Call-Classifier/...  S4A14476  Before_Launch   

             datetime launch_name        launch_start  
0 2024-02-08 19:35:04       G7-13 2024-02-09 16:34:00  
1 2024-02-08 20:05:06       G7-13 2024-02-09 16:34:00  
2 2024-02-09 09:05:06       G7-13 2024-02-09 16:34:00  
3 2024-02-09 00:05:04       G7-13 2024-02-09 16:34:00  
4 2024-02-09 08:05:06       G7-13 2024-02-09 16:34:00  


In [None]:
# display(df_filtered_files.head(5000).style.set_sticky())

Unnamed: 0,audiofile,recorder,folder_path,datetime,launch_name,launch_start
0,/Users/ec/Documents/Data/Frog-Call-Classifier/_data/Audio/Full/Starlink_Group_7-13/Beer_Creek_E/Before_Launch/S4A14476_20240209_153505.wav,S4A14476,Before_Launch,2024-02-09 15:35:05,G7-13,2024-02-09 16:34:00
1,/Users/ec/Documents/Data/Frog-Call-Classifier/_data/Audio/Full/Starlink_Group_7-13/Beer_Creek_E/Before_Launch/S4A14476_20240209_160504.wav,S4A14476,Before_Launch,2024-02-09 16:05:04,G7-13,2024-02-09 16:34:00
2,/Users/ec/Documents/Data/Frog-Call-Classifier/_data/Audio/Full/Starlink_Group_7-13/Beer_Creek_E/Includes_Launch/S4A14476_20240209_163504.wav,S4A14476,Includes_Launch,2024-02-09 16:35:04,G7-13,2024-02-09 16:34:00
3,/Users/ec/Documents/Data/Frog-Call-Classifier/_data/Audio/Full/Starlink_Group_7-13/Beer_Creek_E/After_Launch/S4A14476_20240209_170504.wav,S4A14476,After_Launch,2024-02-09 17:05:04,G7-13,2024-02-09 16:34:00


In [8]:
# ✅ Save to CSV
df_filtered_files.to_csv(OUTPUT_CSV, index=False)