In [1]:
import tensorflow as tf
import numpy as np
import librosa
import os
import pandas as pd
from tensorflow.keras import layers, models, metrics, callbacks
import matplotlib.pyplot as plt

# ============================
# Global Parameters & Paths
# ============================
SR = 32000
DURATION = 5.0
BATCH_SIZE = 4
EPOCHS = 100
N_MELS = 42
N_MFCC = 128
Patience = 5  # Early stopping patience
TIME_STRETCH_RATE = 0.0
PITCH_SHIFT_SEMITONES = 0
NOISE_LEVEL = 0.01
DROPOUT_RATE = 0.1
alpha = 0.01
frac = 1
TOP_K = 3

BASE_DIR_2024 = r"C:\BIRD_DATA\2024"
TRAIN_AUDIO_DIR_2024 = os.path.join(BASE_DIR_2024, "train_audio")
TRAIN_CSV_PATH_2024 = os.path.join(BASE_DIR_2024, "train_metadata.csv")

# ============================
# Load Metadata (train.csv)
# ============================
train_df_2024 = pd.read_csv(TRAIN_CSV_PATH_2024)
sampled_df_2024 = train_df_2024.sample(frac=frac, random_state=42)

# ============================
# Load 2024 Training Audio File Paths
# ============================
train_file_paths_2024 = [os.path.join(TRAIN_AUDIO_DIR_2024, fname) for fname in sampled_df_2024["filename"].values]

# ============================
# Audio Processing Functions (Without Noise Augmentation)
# ============================
def load_and_process_audio(path, sr=SR, duration=DURATION, n_mels=N_MELS, n_mfcc=N_MFCC, target_length=313):
    try:
        audio, _ = librosa.load(path, sr=sr, duration=duration)
        mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=n_mels)
        mfcc = librosa.feature.mfcc(S=librosa.power_to_db(mel_spectrogram), n_mfcc=n_mfcc)
        mfcc = mfcc.T
        if mfcc.shape[0] < target_length:
            padding = target_length - mfcc.shape[0]
            mfcc = np.pad(mfcc, ((0, padding), (0, 0)), mode='constant')
        elif mfcc.shape[0] > target_length:
            mfcc = mfcc[:target_length, :]
        return mfcc
    except FileNotFoundError:
        print(f"File not found: {path}")
        return None
    except librosa.LibrosaError as e:
        print(f"Librosa error processing {path}: {e}")
        return None
    except Exception as e:
        print(f"General error processing {path}: {e}")
        return None

# ============================
# Data Generator using MFCC Features for Autoencoder
# ============================
def data_generator_autoencoder(paths, batch_size=BATCH_SIZE):
    while True:
        batch_paths = np.random.choice(paths, size=batch_size)
        batch_features = [load_and_process_audio(path) for path in batch_paths]
        batch_features = [feature for feature in batch_features if feature is not None]
        if len(batch_features) == 0:
            continue
        batch_features = np.array(batch_features)
        yield batch_features, batch_features  # Input and output are the same for autoencoder

feature_dim = load_and_process_audio(train_file_paths_2024[0]).shape

train_generator_2024 = data_generator_autoencoder(train_file_paths_2024)

# ============================
# Model Definition (Autoencoder)
# ============================
def create_autoencoder_mfcc_model(input_shape, dropout_rate=0.0):
    inputs = layers.Input(shape=input_shape)
    x = layers.Reshape((input_shape[0], input_shape[1], 1))(inputs)

    # Encoder
    x = layers.Conv2D(32, kernel_size=(3, 3), padding='same', activation='relu')(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Conv2D(64, kernel_size=(3, 3), padding='same', activation='relu')(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Conv2D(128, kernel_size=(3, 3), padding='same', activation='relu')(x)
    encoded = layers.GlobalAveragePooling2D()(x)

    # Decoder
    num_units = input_shape[0] * input_shape[1]
    x = layers.Dense(num_units, activation='relu')(encoded)
    x = layers.Reshape((input_shape[0], input_shape[1], 1))(x)
    decoded = layers.Conv2D(1, kernel_size=(3, 3), padding='same', activation='sigmoid')(x)
    decoded = layers.Reshape(input_shape)(decoded)

    model = models.Model(inputs, decoded)
    model.compile(optimizer='adam', loss='mse', metrics=['accuracy', tf.keras.metrics.AUC(), tf.keras.metrics.top_k_categorical_accuracy]) #Added auc)  # Only MSE loss
    return model

model_2024 = create_autoencoder_mfcc_model(((313, 42)))

# ============================
# Training (Pretraining on 2024 Data)
# ============================
early_stopping = callbacks.EarlyStopping(monitor='loss', patience=Patience, restore_best_weights=True)

if tf.config.list_physical_devices('GPU'):
    print("GPU available, training on GPU.")
    with tf.device('/GPU:0'):
        try:
            history = model_2024.fit(
                train_generator_2024,
                epochs=EPOCHS,
                steps_per_epoch=len(train_file_paths_2024) // BATCH_SIZE,
                callbacks=[early_stopping]
            )
        except Exception as e:
            print(f"Error during model.fit() on GPU: {e}")
else:
    print("GPU not available, training on CPU.")
    try:
        history = model_2024.fit(
            train_generator_2024,
            epochs=EPOCHS,
            steps_per_epoch=len(train_file_paths_2024) // BATCH_SIZE,
            callbacks=[early_stopping]
        )
    except Exception as e:
        print(f"Error during model.fit() on CPU: {e}")

# ============================
# Saving Pretrained Model
# ============================
model_2024.save("pretrained_autoencoder_2024")

print("Pretraining completed. Model saved as 'pretrained_autoencoder_2024'")

GPU available, training on GPU.
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100




INFO:tensorflow:Assets written to: pretrained_autoencoder_2024\assets


INFO:tensorflow:Assets written to: pretrained_autoencoder_2024\assets


Pretraining completed. Model saved as 'pretrained_autoencoder_2024'
