In [2]:
# Import Packages
import os
import librosa
import librosa.display
import numpy as np
import pandas as pd

os.environ["TF_ENABLE_MLIR"] = "1"
os.environ["TF_GPU_THREAD_MODE"] = "gpu_private"
os.environ["TF_USE_LEGACY_GPU_KERNELS"] = "1"

import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

import matplotlib.pyplot as plt

In [3]:
model = os.getcwd().replace("/_scripts","/_results/Example/Model/my_model.keras")
model = load_model(model)

  saveable.load_own_variables(weights_store.get(inner_path))


In [48]:
# Constants
AUDIO_FOLDER = os.getcwd().replace("/_scripts","/_data/Example/Forest Recordings")
SAMPLE_RATE = 16000
CLIP_LENGTH = 5  # seconds
WINDOW_SIZE = SAMPLE_RATE * CLIP_LENGTH

In [85]:
# Function to convert waveform to spectrogram
def waveform_to_spectrogram(wav, sr):
    mel_spec = librosa.feature.melspectrogram(y=wav, sr=sr, n_mels=128)
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
    mel_spec_db = np.expand_dims(mel_spec_db, axis=-1)
    mel_spec_db = tf.image.resize(mel_spec_db, (128, 100))
    mel_spec_db = (mel_spec_db - tf.reduce_mean(mel_spec_db)) / tf.math.reduce_std(mel_spec_db)
    
    # # Convert TensorFlow tensor to NumPy array for plotting
    # mel_spec_db_np = mel_spec_db.numpy()

    # # Plotting
    # plt.figure(figsize=(10, 4))
    # librosa.display.specshow(mel_spec_db_np[:, :, 0], sr=SAMPLE_RATE, x_axis="time", y_axis="mel")
    # plt.axis('off')  # Turn off axes completely
    # plt.tight_layout()
    # plt.show()

    return mel_spec_db

# Function to process an audio file into 5-second clips and predict labels
def process_audio_file(audio_path):
    # Load audio without resampling (retain original sample rate)
    wav, sr = librosa.load(audio_path, sr=SAMPLE_RATE)
    total_duration = librosa.get_duration(y=wav, sr=sr)
    print(f"Processing: {os.path.basename(audio_path)}, Duration: {total_duration:.2f} seconds, SR: {sr} Hz")

    # Dynamic window size based on sample rate
    window_size = sr * CLIP_LENGTH

    # Split into 5-second clips (pad last clip if shorter)
    clips = [wav[i: i + window_size] for i in range(0, len(wav), window_size)]
    if len(clips[-1]) < window_size:
        clips[-1] = np.pad(clips[-1], (0, window_size - len(clips[-1])), mode="constant")

    # Convert to spectrograms and predict
    spectrograms = np.array([waveform_to_spectrogram(clip, sr) for clip in clips])
    predictions = model.predict(spectrograms)
    predicted_labels = (predictions > 0.5).astype(int)

    # Generate results
    results = []
    for i, label in enumerate(predicted_labels):
        start_time = i * CLIP_LENGTH
        end_time = min((i + 1) * CLIP_LENGTH, total_duration)
        results.append({
            "audiofile": os.path.basename(audio_path),
            "clip_no": i + 1,
            "start_time": start_time,
            "end_time": end_time,
            "prediction": int(label)
        })

    return results

# Process all audio files in the folder
def process_all_audio_files():
    all_results = []
    for filename in os.listdir(AUDIO_FOLDER):
        if filename.endswith(('.wav', '.mp3')):
            audio_path = os.path.join(AUDIO_FOLDER, filename)
            all_results.extend(process_audio_file(audio_path))

    df = pd.DataFrame(all_results)
    return df

In [88]:
# Run the processing and display the DataFrame
results_df = process_all_audio_files()

Processing: recording_08.mp3, Duration: 180.04 seconds, SR: 16000 Hz
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step


  "prediction": int(label)


Processing: recording_00.mp3, Duration: 180.04 seconds, SR: 16000 Hz
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Processing: recording_01.mp3, Duration: 180.04 seconds, SR: 16000 Hz
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
Processing: recording_03.mp3, Duration: 180.04 seconds, SR: 16000 Hz
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
Processing: recording_02.mp3, Duration: 180.04 seconds, SR: 16000 Hz
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Processing: recording_06.mp3, Duration: 180.04 seconds, SR: 16000 Hz
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
Processing: recording_07.mp3, Duration: 180.04 seconds, SR: 16000 Hz
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Processing: recording_05.mp3, Duration: 180.04 seconds, SR: 16000 Hz
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
Processing: r

In [89]:
# results
filename = os.getcwd().replace("/_scripts","/_results/Example/Model/Counts.csv")

results_df.sort_values(by=['audiofile', 'clip_no'], inplace=True)
results_df.to_csv(filename, index=False)

results_df.head()

Unnamed: 0,audiofile,clip_no,start_time,end_time,prediction
37,recording_00.mp3,1,0,5.0,0
38,recording_00.mp3,2,5,10.0,0
39,recording_00.mp3,3,10,15.0,1
40,recording_00.mp3,4,15,20.0,0
41,recording_00.mp3,5,20,25.0,0


In [90]:
# Load the generated results and the saved CSV from GitHub
filename = os.getcwd().replace("/_scripts","/_data/Example/results.csv")
saved_df = pd.read_csv("/Users/ec/Documents/Data/Frog-Call-Classifier/_data/Example/results.csv")
saved_df = saved_df.rename(columns={'recording':'audiofile','capuchin_calls':'prediction' })
saved_df = saved_df.reset_index()
saved_df.head()

Unnamed: 0,index,audiofile,prediction
0,0,recording_00.mp3,5
1,1,recording_01.mp3,0
2,2,recording_02.mp3,0
3,3,recording_03.mp3,0
4,4,recording_04.mp3,4


In [91]:
# Reformat the summary and add column with original data
generated_summary = results_df[results_df['prediction'] == 1].groupby('audiofile').sum()
generated_summary = generated_summary.drop(columns=['clip_no','start_time','end_time'])
generated_summary = generated_summary.rename(columns={'audiofile':'recording'})
generated_summary.head()

Unnamed: 0_level_0,prediction
audiofile,Unnamed: 1_level_1
recording_00.mp3,7
recording_01.mp3,2
recording_04.mp3,5
recording_06.mp3,8
recording_07.mp3,2


In [92]:
generated_summary = generated_summary.reset_index()
generated_summary['prediction_og'] = saved_df['prediction']

filename = os.getcwd().replace("/_scripts","/_results/Example/Model/Counts_Summary.csv")
generated_summary.to_csv(filename, index=False)

generated_summary.head()

Unnamed: 0,audiofile,prediction,prediction_og
0,recording_00.mp3,7,5
1,recording_01.mp3,2,0
2,recording_04.mp3,5,0
3,recording_06.mp3,8,0
4,recording_07.mp3,2,4
