In [None]:
# Powerful Owl Call Detection using Deep Learning
#This first notebook trains a binary call classifier, which we use to make predictions on long-duration audio before generating ROIS and clustering 
#there is a small example training dataset available to download in the supplementary materials we but encourage you to try on your own data
#you can also use our models in any additional notebooks if you'd like to skip this step


In [None]:
#generate spectrograms
#this code generates spectrograms from 5 second .wav file snippets to be used for training our binary classifier
#place your audio files in a folder in 'positive' and 'negative' subfolders depending on whether it contains your code of interest

import os
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image, ImageEnhance
import io

# Parameters
n_mels = 256   # Number of Mel bands
n_fft = 4096  # FFT size
hop_length = 512
fmax = 8000
img_size = (224, 224)  # Size for EfficientNet/ResNet
brightness_factor = 0.8  # Factor to adjust brightness
contrast_factor = 2.0  # Factor to adjust contrast

# Choose your colormap here
colormap = 'viridis'  # Options: 'viridis', 'inferno', 'magma', 'cividis', 'plasma'

def create_spectrogram(filename, save_path, cmap):
    # Load audio file
    y, sr = librosa.load(filename, sr=None)
    
    # Generate mel-spectrogram
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=n_fft, n_mels=n_mels, fmax=fmax, hop_length=hop_length)
    S_DB = librosa.power_to_db(S, ref=np.max, amin=1e-10, top_db=80)
    
    # Create figure for the spectrogram
    fig, ax = plt.subplots(figsize=(3.5, 3.5))
    librosa.display.specshow(S_DB, sr=sr, hop_length=hop_length, x_axis='time', y_axis='mel', fmax=fmax, cmap=cmap, ax=ax, vmin=S_DB.max() - 80, vmax=S_DB.max())
    ax.axis('off')  # Remove axes
    plt.subplots_adjust(left=0, right=1, top=1, bottom=0)  # Adjust margins to fill the figure

    # Save the figure to a buffer
    buf = io.BytesIO()
    fig.savefig(buf, format='png', dpi=100, bbox_inches='tight', pad_inches=0)
    plt.close(fig)
    buf.seek(0)

    # Open the image and convert to RGB
    img = Image.open(buf).convert('RGB')
    img = img.resize(img_size)

    # Adjust brightness and contrast
    enhancer = ImageEnhance.Brightness(img)
    img = enhancer.enhance(brightness_factor)
    enhancer = ImageEnhance.Contrast(img)
    img = enhancer.enhance(contrast_factor)

    # Save the adjusted image
    img.save(save_path)

def process_directory(base_dir, sub_dirs, cmap):
    for sub_dir in sub_dirs:
        current_dir = os.path.join(base_dir, sub_dir)
        save_dir = os.path.join(base_dir, 'spectrograms', sub_dir)
        os.makedirs(save_dir, exist_ok=True)
        
        for filename in os.listdir(current_dir):
            if filename.endswith('.wav'):
                file_path = os.path.join(current_dir, filename)
                save_path = os.path.join(save_dir, os.path.splitext(filename)[0] + '.png')
                create_spectrogram(file_path, save_path, cmap)
                print(f"Processed {filename}")

# Your directory setup
#make sure your audio files are in negative and positive subfolders
base_dir = r"path_to_your_files"
sub_dirs = ["positive", "negative"]

# Process directory with chosen colormap
process_directory(base_dir, sub_dirs, colormap)


In [None]:
#hold out data for testing 
#this holds out a percentage of the training data for testing (10% here, you can change this by tweaking test_size
#you can skip this step and use our test datasets available in the test folder

import os
import shutil
import random
from pathlib import Path

def split_dataset(src_dir, test_size=0.10):
    # Convert to Path object for easier handling
    src_path = Path(src_dir)
    
    # Create test directory at same level as source directory
    test_path = src_path.parent / 'test'
    
    # Create positive and negative directories in test folder
    for class_name in ['positive', 'negative']:
        test_class_path = test_path / class_name
        test_class_path.mkdir(parents=True, exist_ok=True)
        
        # Get list of all files in source class directory
        src_class_path = src_path / class_name
        all_files = list(src_class_path.glob('*'))
        
        # Calculate number of files to move to test
        n_test = int(len(all_files) * test_size)
        
        # Randomly select files for test set
        test_files = random.sample(all_files, n_test)
        
        # Move files to test directory
        for file_path in test_files:
            shutil.move(str(file_path), str(test_class_path / file_path.name))
        
        print(f"Moved {n_test} files from {class_name} to test set")

if __name__ == "__main__":
    # Set random seed for reproducibility
    random.seed(42)
    
    # Set source directory
    src_dir = r"path_to_your_spectrograms"
    
    # Split the dataset
    split_dataset(src_dir)

In [None]:
#TRAIN MODEL
#make sure your spectrograms are in folders named 'positive and 'negative'
#you may need to experiment with tweaking the hyperparameters depending on your training data

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

def create_mobilenet():
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    # Freeze all layers except the last few for fine-tuning
    for layer in base_model.layers[:-30]:  
        layer.trainable = False
    
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(256, activation='relu')(x)  # Reduced the size of the dense layer
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)  # Reduced dropout rate
    outputs = Dense(1, activation='sigmoid')(x)
    return Model(inputs=base_model.input, outputs=outputs)

# Create and compile the model
model = create_mobilenet()
model.compile(optimizer=Adam(learning_rate=0.0005), loss='binary_crossentropy', metrics=['accuracy'])

# Setup Data Generators with appropriate data augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=10,  
    width_shift_range=0.1,  
    height_shift_range=0.1,  
    brightness_range=(0.8, 1.2),  
    shear_range=0.05,  
    zoom_range=0.1, 
    fill_mode='nearest',
    validation_split=0.2
)

base_dir = r'path_to_your_spectrograms'
train_generator = train_datagen.flow_from_directory(
    base_dir,
    target_size=(224, 224),
    batch_size=64,
    class_mode='binary',
    subset='training'
)
validation_generator = train_datagen.flow_from_directory(
    base_dir,
    target_size=(224, 224),
    batch_size=64,
    class_mode='binary',
    subset='validation'
)

# Set up callbacks
callbacks = [
    EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.00001)
]

# Start training
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // validation_generator.batch_size,
    epochs=50,
    callbacks=callbacks
)


In [None]:
# Save the model - you can then use your saved model for predictions and testing in the other notebooks
model.save(r'path_to_your_model.keras')
