In [19]:
# Import Packages
import os
import librosa
import librosa.display
import numpy as np
import pandas as pd

os.environ["TF_ENABLE_MLIR"] = "1"
os.environ["TF_GPU_THREAD_MODE"] = "gpu_private"
os.environ["TF_USE_LEGACY_GPU_KERNELS"] = "1"

import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

import matplotlib.pyplot as plt

In [20]:
model = os.getcwd().replace("/_scripts","/_results/Example/Model/my_model.keras")
model = load_model(model)

ValueError: File format not supported: filepath=/Users/ec/Documents/Data/Frog-Call-Classifier. Keras 3 only supports V3 `.keras` files and legacy H5 format files (`.h5` extension). Note that the legacy SavedModel format is not supported by `load_model()` in Keras 3. In order to reload a TensorFlow SavedModel as an inference-only layer in Keras 3, use `keras.layers.TFSMLayer(/Users/ec/Documents/Data/Frog-Call-Classifier, call_endpoint='serving_default')` (note that your `call_endpoint` might have a different name).

In [None]:
# Constants
AUDIO_FOLDER = os.getcwd().replace("/_scripts","/_data/Example/Forest Recordings")
SAMPLE_RATE = 16000
CLIP_LENGTH = 5  # seconds
WINDOW_SIZE = SAMPLE_RATE * CLIP_LENGTH

In [8]:
# Function to convert waveform to spectrogram
def waveform_to_spectrogram(wav):
    mel_spec = librosa.feature.melspectrogram(y=wav, sr=SAMPLE_RATE, n_mels=128)
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
    mel_spec_db = np.expand_dims(mel_spec_db, axis=-1)
    mel_spec_db = tf.image.resize(mel_spec_db, (128, 100))
    mel_spec_db = (mel_spec_db - tf.reduce_mean(mel_spec_db)) / tf.math.reduce_std(mel_spec_db)
    return mel_spec_db

# Function to process an audio file into 5-second clips and predict labels
def process_audio_file(audio_path):
    wav, sr = librosa.load(audio_path, sr=SAMPLE_RATE)
    clips = [wav[i : i + WINDOW_SIZE] for i in range(0, len(wav), WINDOW_SIZE)]
    clips = [clip for clip in clips if len(clip) == WINDOW_SIZE]

    spectrograms = np.array([waveform_to_spectrogram(clip) for clip in clips])
    predictions = model.predict(spectrograms)
    predicted_labels = (predictions > 0.5).astype(int)

    results = []
    for i, label in enumerate(predicted_labels):
        start_time = i * CLIP_LENGTH
        end_time = start_time + CLIP_LENGTH
        results.append({
            "audiofile": os.path.basename(audio_path),
            "clip_no": i + 1,
            "start_time": start_time,
            "end_time": end_time,
            "prediction": int(label)
        })
    return results

# Process all audio files in the folder
def process_all_audio_files():
    all_results = []
    for filename in os.listdir(AUDIO_FOLDER):
        if filename.endswith(('.wav', '.mp3')):
            audio_path = os.path.join(AUDIO_FOLDER, filename)
            all_results.extend(process_audio_file(audio_path))

    df = pd.DataFrame(all_results)
    return df

In [9]:
# Run the processing and display the DataFrame
results_df = process_all_audio_files()

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 364ms/step


  "prediction": int(label)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20

In [17]:
results_df
filename = os.getcwd().replace("/_scripts","/_results/Example/Model/Counts.csv")
print(filename)
results_df.to_csv(filename, index=False)

/Users/ec/Documents/Data/Frog-Call-Classifier


IsADirectoryError: [Errno 21] Is a directory: '/Users/ec/Documents/Data/Frog-Call-Classifier'

In [11]:
# Load the generated results and the saved CSV from GitHub
# generated_df = pd.read_csv("audio_predictions.csv")
saved_df = pd.read_csv("/Users/ec/Documents/Data/Frog-Call-Classifier/_data/Example/results.csv")
saved_df = saved_df.rename(columns={'recording':'audiofile','capuchin_calls':'prediction' })
saved_df = saved_df.reset_index()
saved_df.head()

Unnamed: 0,index,audiofile,prediction
0,0,recording_00.mp3,5
1,1,recording_01.mp3,0
2,2,recording_02.mp3,0
3,3,recording_03.mp3,0
4,4,recording_04.mp3,4


In [12]:
generated_summary = results_df[results_df['prediction'] == 1].groupby('audiofile').sum()
generated_summary = generated_summary.drop(columns=['clip_no','start_time','end_time'])
generated_summary = generated_summary.rename(columns={'audiofile':'recording'})
generated_summary = generated_summary.reset_index()
generated_summary.head()

Unnamed: 0,audiofile,prediction
0,recording_00.mp3,7
1,recording_01.mp3,2
2,recording_04.mp3,5
3,recording_06.mp3,8
4,recording_07.mp3,2


In [13]:
generated_summary['prediction_og'] = saved_df['prediction']
generated_summary

Unnamed: 0,audiofile,prediction,prediction_og
0,recording_00.mp3,7,5
1,recording_01.mp3,2,0
2,recording_04.mp3,5,0
3,recording_06.mp3,8,0
4,recording_07.mp3,2,4
...,...,...,...
66,recording_95.mp3,5,0
67,recording_96.mp3,1,0
68,recording_97.mp3,9,1
69,recording_98.mp3,34,1
