In [95]:
# Import Packages
import os
import librosa
import librosa.display
import numpy as np
import pandas as pd
import glob

os.environ["TF_ENABLE_MLIR"] = "1"
os.environ["TF_GPU_THREAD_MODE"] = "gpu_private"
os.environ["TF_USE_LEGACY_GPU_KERNELS"] = "1"

import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

import matplotlib.pyplot as plt

In [96]:
# model = os.getcwd().replace("/_scripts","/_results/Model/my_model_20_col_waa.keras")
model = os.getcwd().replace("/_scripts","/_results/Model/my_model_50_all.keras")
model = load_model(model)

In [None]:
# Define the audio folder path
AUDIO_FOLDER = os.getcwd().replace("/_scripts", "/_data/Audio/Full/Starlink_Group_7-13")




# Print results
if audio_files:
    print(f"Found {len(audio_files)} audio files.")
    for file in audio_files[:10]:  # Show only the first 10 files
        print(file)
else:
    print("No audio files found in the specified directory and subdirectories.")

# Directory to save spectrogram images
SPECTROGRAM_SAVE_DIR = os.getcwd().replace("/_scripts","/_results/Model/spectrograms")
# POS_FOLDER = os.path.join(SPECTROGRAM_SAVE_DIR, "positives")
# NEG_FOLDER = os.path.join(SPECTROGRAM_SAVE_DIR, "negatives")

# Ensure output directories exist
os.makedirs(SPECTROGRAM_SAVE_DIR, exist_ok=True)  # Create directory if not exists
# os.makedirs(POS_FOLDER, exist_ok=True)
# os.makedirs(NEG_FOLDER, exist_ok=True)

Found 86 audio files.
/Users/ec/Documents/Data/Frog-Call-Classifier/_data/Audio/Full/Starlink_Group_7-13/Beer_Creek_E/Before_Launch/S4A14476_20240208_193504.wav
/Users/ec/Documents/Data/Frog-Call-Classifier/_data/Audio/Full/Starlink_Group_7-13/Beer_Creek_E/Before_Launch/S4A14476_20240208_200506.wav
/Users/ec/Documents/Data/Frog-Call-Classifier/_data/Audio/Full/Starlink_Group_7-13/Beer_Creek_E/Before_Launch/S4A14476_20240209_090506.wav
/Users/ec/Documents/Data/Frog-Call-Classifier/_data/Audio/Full/Starlink_Group_7-13/Beer_Creek_E/Before_Launch/S4A14476_20240209_000504.wav
/Users/ec/Documents/Data/Frog-Call-Classifier/_data/Audio/Full/Starlink_Group_7-13/Beer_Creek_E/Before_Launch/S4A14476_20240209_080506.wav
/Users/ec/Documents/Data/Frog-Call-Classifier/_data/Audio/Full/Starlink_Group_7-13/Beer_Creek_E/Before_Launch/S4A14476_20240209_010504.wav
/Users/ec/Documents/Data/Frog-Call-Classifier/_data/Audio/Full/Starlink_Group_7-13/Beer_Creek_E/Before_Launch/S4A14476_20240208_183504.wav
/User

In [None]:
# Constants
SAMPLE_RATE = 40000
CLIP_LENGTH = 1  # seconds
MAX_FILES = 1800
F_MIN, F_MAX = 0, 1500
SPEC_HEIGHT, SPEC_WIDTH = 120, 80  # Height (mel bands) and fixed width for x seconds
WINDOW_SIZE = SAMPLE_RATE * CLIP_LENGTH


In [None]:
# Initialize a counter to assign sequential numbers to spectrograms
spectrogram_counter = 1

def plot_spectrogram(spectrogram, sr, filename):
    plt.figure(figsize=(12, 6))
    librosa.display.specshow(spectrogram.numpy().squeeze(), sr=sr, hop_length=512, x_axis='time', y_axis='mel')
    plt.tight_layout()
    plt.savefig(filename, bbox_inches='tight')
    plt.close()  # Free memory

# Function to convert waveform to spectrogram
def waveform_to_spectrogram(clip, sr, filename, total_duration):
    """
    Load an audio file, preprocess it (trim, pad), and convert it to a Mel spectrogram.

    Args:
        file_path (str or tf.Tensor): Path to the audio file.
        sr (int): Sampling rate.

    Returns:
        tf.Tensor: Preprocessed Mel spectrogram with shape (SPEC_HEIGHT, SPEC_WIDTH, 1).
    """

    # Load the audio file (ensure 16 kHz mono)
    wav, sr = clip, sr

    # Ensure fixed length (trim or zero-pad) 
    target_length = sr * CLIP_LENGTH
    wav = np.pad(wav[:target_length], (0, max(0, target_length - len(wav))), mode="constant")

    # Compute Mel spectrogram (set frequency limits)
    mel_spec = librosa.feature.melspectrogram(
        y=wav, sr=sr, n_mels=128, fmin=F_MIN, fmax=F_MAX, hop_length=256, n_fft=4096

    )

    # Convert power to decibels
    mel_spec_db = librosa.power_to_db(mel_spec, ref=1)

    # Convert to TensorFlow tensor and normalize
    mel_spec_db = np.expand_dims(mel_spec_db, axis=-1)  # Add channel dimension
    mel_spec_db = tf.image.resize(mel_spec_db, (SPEC_HEIGHT, SPEC_WIDTH))  # Resize    
    # mel_spec_db.set_shape((SPEC_HEIGHT, SPEC_WIDTH, 1))
    mel_spec_db = (mel_spec_db - tf.reduce_mean(mel_spec_db)) / tf.math.reduce_std(mel_spec_db)


    # # Normalize non-padded values
    # non_silent_mask = tf.not_equal(mel_tensor, -80.0)
    # valid_values = tf.boolean_mask(mel_tensor, non_silent_mask)
    # mean, std = tf.reduce_mean(valid_values), tf.math.reduce_std(valid_values)
    # mel_tensor = (mel_tensor - mean) / (std + 1e-6)

    global spectrogram_counter

     # Reset counter if it reaches MAX_FILES
    if spectrogram_counter > total_duration:
        spectrogram_counter = 1

    # Generate filename: <audiofile>_s<counter>.jpeg
    base_name = os.path.basename(filename)
    save_path = os.path.join(SPECTROGRAM_SAVE_DIR, f"{base_name}_s_{spectrogram_counter:04d}.jpeg")
    plot_spectrogram(mel_spec_db, sr=sr, filename=save_path)

    print(f"Saved spectrogram: {base_name}_s_{spectrogram_counter:04d}")

    spectrogram_counter += 1  # Increment counter for the next file

    # return mel_tensor
    return mel_spec_db

def get_last_three_folders(filepath):
    parts = filepath.split(os.sep)  # Split path by OS separator
    return os.sep.join(parts[-4:-1])  # Keep last 3 folders + filename

# Function to process an audio file into 5-second clips and predict labels
def process_audio_file(audio_path):
    # Load audio without resampling (retain original sample rate)
    wav, sr = librosa.load(audio_path, sr=SAMPLE_RATE)
    total_duration = librosa.get_duration(y=wav, sr=sr)
    print(f"Processing: {os.path.basename(audio_path)}, Duration: {total_duration:.2f} seconds, SR: {sr} Hz")

    # Dynamic window size based on sample rate
    window_size = sr * CLIP_LENGTH

    # Split into 5-second clips (pad last clip if shorter)
    clips = [wav[i: i + window_size] for i in range(0, len(wav), window_size)]
    if len(clips[-1]) < window_size:
        clips[-1] = np.pad(clips[-1], (0, window_size - len(clips[-1])), mode="constant")

    # Convert to spectrograms and predict
    spectrograms = np.array([waveform_to_spectrogram(clip, sr, audio_path, total_duration) for clip in clips])
    predictions = model.predict(spectrograms)
    predicted_labels = (predictions > 0.5).astype(int)
    confidence_scores = predictions.flatten()  # Get raw probabilities

    # Generate results
    results = []
    for i, (label, confidence) in enumerate(zip(predicted_labels, confidence_scores)):
        start_time = i * CLIP_LENGTH
        end_time = min((i + 1) * CLIP_LENGTH, total_duration)
        results.append({
            "audiofile": os.path.basename(audio_path),
            "clip_no": i + 1,
            "start_time": start_time,
            "end_time": end_time,
            "prediction": int(label),
            "confidence": round(confidence, 4),
            "filepath": audio_path  # Keep last 3 folders
        })

    return results

# Process all audio files in the folder
def process_all_audio_files():
    all_results = []
    # Search for all .wav and .mp3 files recursively
    audio_files = glob.glob(os.path.join(AUDIO_FOLDER, "**/*.wav"), recursive=True) + \
              glob.glob(os.path.join(AUDIO_FOLDER, "**/*.mp3"), recursive=True)
    if not audio_files:
        print("No audio files found!")
        return pd.DataFrame()  # Return empty DataFrame if no files are found
    
    for audio_path in audio_files:  # Iterate directly over the file paths
        all_results.extend(process_audio_file(audio_path))
        
    df = pd.DataFrame(all_results)
    return df

In [79]:
# Initialize a counter to assign sequential numbers to spectrograms
spectrogram_counter = 1

# Define the specific audio file to test
TEST_AUDIO_FILE = os.getcwd().replace("/_scripts","/_data/Audio/Full/Detection_Test_Files_20250227/Digital_Globe/Ag Ditch - Y/S4A23845_20240501_120000.wav")  # Change to your test file name
OUTPUT_FILE = os.getcwd().replace("/_scripts","/_results/Model/test_audio_results.csv")

# Build full path
audio_path = os.path.join(AUDIO_FOLDER, TEST_AUDIO_FILE)

# Ensure the file exists before processing
if os.path.exists(audio_path):
    results = process_audio_file(audio_path)

    # Convert to DataFrame for analysis
    results_df = pd.DataFrame(results)

    # check
    results_df.head()

    # Save results to CSV if needed
    results_df.to_csv(OUTPUT_FILE, index=False)
else:
    print(f"File not found: {audio_path}")

Processing: S4A23845_20240501_120000.wav, Duration: 1800.00 seconds, SR: 40000 Hz
Saved spectrogram: /Users/ec/Documents/Data/Frog-Call-Classifier/_results/Model/spectrograms/S4A23845_20240501_120000.wav_s_1.jpeg
Saved spectrogram: /Users/ec/Documents/Data/Frog-Call-Classifier/_results/Model/spectrograms/S4A23845_20240501_120000.wav_s_2.jpeg
Saved spectrogram: /Users/ec/Documents/Data/Frog-Call-Classifier/_results/Model/spectrograms/S4A23845_20240501_120000.wav_s_3.jpeg
Saved spectrogram: /Users/ec/Documents/Data/Frog-Call-Classifier/_results/Model/spectrograms/S4A23845_20240501_120000.wav_s_4.jpeg
Saved spectrogram: /Users/ec/Documents/Data/Frog-Call-Classifier/_results/Model/spectrograms/S4A23845_20240501_120000.wav_s_5.jpeg
Saved spectrogram: /Users/ec/Documents/Data/Frog-Call-Classifier/_results/Model/spectrograms/S4A23845_20240501_120000.wav_s_6.jpeg
Saved spectrogram: /Users/ec/Documents/Data/Frog-Call-Classifier/_results/Model/spectrograms/S4A23845_20240501_120000.wav_s_7.jpeg
S

  "prediction": int(label),


In [91]:
# Initialize a counter to assign sequential numbers to spectrograms
spectrogram_counter = 1

# Run the processing and display the DataFrame
results_df = process_all_audio_files()

In [84]:
# results
filename = os.getcwd().replace("/_scripts","/_results/Model/Counts.csv")

results_df.sort_values(by=['audiofile', 'clip_no'], inplace=True)
results_df.to_csv(filename, index=False)

results_df.head()

KeyError: 'audiofile'

In [90]:
# Load the generated results and the saved CSV from GitHub
filename = os.getcwd().replace("/_scripts","/_data/Example/results.csv")
saved_df = pd.read_csv("/Users/ec/Documents/Data/Frog-Call-Classifier/_data/Example/results.csv")
saved_df = saved_df.rename(columns={'recording':'audiofile','capuchin_calls':'prediction' })
saved_df = saved_df.reset_index()
saved_df.head()

Unnamed: 0,index,audiofile,prediction
0,0,recording_00.mp3,5
1,1,recording_01.mp3,0
2,2,recording_02.mp3,0
3,3,recording_03.mp3,0
4,4,recording_04.mp3,4


In [91]:
# Reformat the summary and add column with original data
generated_summary = results_df[results_df['prediction'] == 1].groupby('audiofile').sum()
generated_summary = generated_summary.drop(columns=['clip_no','start_time','end_time'])
generated_summary = generated_summary.rename(columns={'audiofile':'recording'})
generated_summary.head()

Unnamed: 0_level_0,prediction
audiofile,Unnamed: 1_level_1
recording_00.mp3,7
recording_01.mp3,2
recording_04.mp3,5
recording_06.mp3,8
recording_07.mp3,2


In [92]:
generated_summary = generated_summary.reset_index()
generated_summary['prediction_og'] = saved_df['prediction']

filename = os.getcwd().replace("/_scripts","/_results/Example/Model/Counts_Summary.csv")
generated_summary.to_csv(filename, index=False)

generated_summary.head()

Unnamed: 0,audiofile,prediction,prediction_og
0,recording_00.mp3,7,5
1,recording_01.mp3,2,0
2,recording_04.mp3,5,0
3,recording_06.mp3,8,0
4,recording_07.mp3,2,4
