<a href="https://colab.research.google.com/github/jkranyak/project_3/blob/main/bird_competition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Project Index:
# 1. Install Necessary Libraries
# 2. Set Up Directory Structure for Processed Data
# 3. Define Spectrogram Conversion Function
# 4. Preprocess Audio Files into Spectrograms
# 5. Data Augmentation (Optional)
# 6. Split Dataset
# 7. Prepare Model Input
# 8. Define Model Architecture
# 9. Compile Model
# 10. Prepare for Training (Callbacks)
# 11. Train Model
# 12. Evaluate Model
# 13. Submit to Kaggle

## Step 1: Install Necessary Libraries


In [None]:
!pip install librosa soundfile tensorflow scikit-learn numpy pandas matplotlib seaborn tqdm
!pip install audiomentations


Collecting audiomentations
  Downloading audiomentations-0.35.0-py3-none-any.whl (82 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.3/82.3 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: audiomentations
Successfully installed audiomentations-0.35.0


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, LSTM, TimeDistributed, Dense, Dropout, concatenate
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.utils import Sequence
from tensorflow.keras.preprocessing.image import load_img, img_to_array

from keras.utils import Sequence
from keras.preprocessing.image import load_img, img_to_array

import os
import librosa
import soundfile as sf
import torch
import torch.nn as nn

import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import glob
from tqdm import tqdm

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC

import joblib
from multiprocessing import Pool
from scipy import signal
from scipy.io import wavfile
from audiomentations import Compose, AddGaussianNoise, TimeStretch, PitchShift, Shift


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

#Data Setup

---

###Download and unpack dataset


###Set up directory structure for processed data

In [None]:
# Define output directories
output_directories = ['processed_data', 'models', 'submissions', 'visualizations']
base_working_dir = '/kaggle/working/'

# Creating subdirectories
for directory in output_directories:
    path = os.path.join(base_working_dir, directory)
    os.makedirs(path, exist_ok=True)
    print(f"Created directory: {path}")

# Input data paths
train_audio_path = "/kaggle/input/birdclef-2024/train_audio"
unlabeled_soundscapes_path = "/kaggle/input/birdclef-2024/unlabeled_soundscapes"
train_metadata_csv_path = "/kaggle/input/birdclef-2024/train_metadata.csv"
taxonomy_csv_path = "/kaggle/input/birdclef-2024/eBird_Taxonomy_v2021.csv"
sample_submission_csv_path = "/kaggle/input/birdclef-2024/sample_submission.csv"
test_soundscapes_path = "/kaggle/input/birdclef-2024/test_soundscapes"


Created directory: /kaggle/working/processed_data
Created directory: /kaggle/working/models
Created directory: /kaggle/working/submissions
Created directory: /kaggle/working/visualizations


Convert audio to spectrograms


---


augment for processing later


Geographic Area of Western Ghats:

Latitude: Approximately ranges from 8°N to 21°N.
Longitude: Approximately ranges from 72°E to 78°E.

We'll add a cushion to these ranges to account for nearby areas and potential data variability or migration patterns slightly beyond these strict boundaries.

In [None]:

# Load metadata
metadata = pd.read_csv('/kaggle/input/birdclef-2024/train_metadata.csv')

# Define the geographical bounds of the Western Ghats with some cushion
lat_bounds = (8, 21)
lon_bounds = (72, 78)

# Filter entries based on these bounds
western_ghats_metadata = metadata[
    (metadata['latitude'] >= lat_bounds[0]) & (metadata['latitude'] <= lat_bounds[1]) &
    (metadata['longitude'] >= lon_bounds[0]) & (metadata['longitude'] <= lon_bounds[1])
]

# Update the filename to point to the spectrogram images expected to be in the processed data directory
processed_data_dir = '/kaggle/working/processed_data/'
western_ghats_metadata['filename'] = western_ghats_metadata['filename'].apply(
    lambda x: processed_data_dir + x.split('/')[-1].replace('.ogg', '_augmented_spectrogram.png')
)

# Save the filtered metadata
western_ghats_metadata.to_csv('/kaggle/working/western_ghats_birds_metadata.csv', index=False)


FileNotFoundError: [Errno 2] No such file or directory: '/kaggle/input/birdclef-2024/train_metadata.csv'

In [None]:
def advanced_audio_augmentation(audio_signal, sr):
    time_stretch_factor = np.random.uniform(0.9, 1.1)
    audio_signal = librosa.effects.time_stretch(audio_signal, rate=time_stretch_factor)
    n_steps = np.random.randint(-2, 3)
    audio_signal = librosa.effects.pitch_shift(audio_signal, sr=sr, n_steps=n_steps)
    noise_level = np.random.uniform(0.001, 0.005)
    noise = np.random.normal(0, noise_level, len(audio_signal))
    audio_signal += noise
    return audio_signal

def create_and_augment_spectrogram(audio_path, save_path):
    y, sr = librosa.load(audio_path, sr=None)
    y_augmented = advanced_audio_augmentation(y, sr)
    S = librosa.feature.melspectrogram(y=y_augmented, sr=sr)
    S_DB = librosa.power_to_db(S, ref=np.max)
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(S_DB, sr=sr, x_axis='time', y_axis='mel')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Augmented Mel-frequency spectrogram')
    plt.tight_layout()
    # Ensure the directory exists before saving
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    plt.savefig(save_path)
    plt.close()

def process_filtered_audio_files(metadata, base_audio_dir, processed_data_dir):
    for index, row in metadata.iterrows():
        audio_path = os.path.join(base_audio_dir, row['filename'])
        save_path = os.path.join(processed_data_dir, f"{os.path.splitext(row['filename'])[0]}_augmented_spectrogram.png")
        if os.path.exists(audio_path):
            create_and_augment_spectrogram(audio_path, save_path)
        else:
            print(f"File not found: {audio_path}")

# Load metadata
metadata_path = '/kaggle/input/birdclef-2024/train_metadata.csv'
metadata = pd.read_csv(metadata_path)

# Filter metadata for Western Ghats
western_ghats_bounds = {'min_latitude': 10.0, 'max_latitude': 20.0, 'min_longitude': 73.0, 'max_longitude': 78.0}
filtered_metadata = metadata[
    (metadata['latitude'] >= western_ghats_bounds['min_latitude']) &
    (metadata['latitude'] <= western_ghats_bounds['max_latitude']) &
    (metadata['longitude'] >= western_ghats_bounds['min_longitude']) &
    (metadata['longitude'] <= western_ghats_bounds['max_longitude'])
]

base_audio_dir = '/kaggle/input/birdclef-2024/train_audio'
processed_data_dir = '/kaggle/working/processed_data'
process_filtered_audio_files(filtered_metadata, base_audio_dir, processed_data_dir)


In [None]:
# Filter metadata to include relevant columns
filtered_metadata_df = train_metadata[['primary_label', 'latitude', 'longitude', 'url', 'filename']]

# Extract features to be normalized and standardized
features_to_scale = ['latitude', 'longitude']

# Initialize StandardScaler
scaler = StandardScaler()

# Fit scaler to the features and transform them
filtered_metadata_df.loc[:, features_to_scale] = scaler.fit_transform(filtered_metadata_df.loc[:, features_to_scale])

# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Fit label encoder to the primary_label column and transform it
filtered_metadata_df['primary_label'] = label_encoder.fit_transform(filtered_metadata_df['primary_label']) + 1

# Print the mapping between original labels and encoded values
label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
print("Label Encoding Mapping:")
print(label_mapping)

# Print the first few rows of the DataFrame
print("Processed DataFrame:")
print(filtered_metadata_df.head())

In [None]:
# Visualize the spectrogram of the processed sample
sample_audio_file = '/kaggle/input/birdclef-2024/train_audio/asbfly/XC134896.ogg'  # Example path, adjust as needed

# Loading and processing the audio file to create a spectrogram
y, sr = librosa.load(sample_audio_file, sr=None)
S = librosa.feature.melspectrogram(y=y, sr=sr)
S_DB = librosa.power_to_db(S, ref=np.max)

plt.figure(figsize=(10, 4))
librosa.display.specshow(S_DB, sr=sr, x_axis='time', y_axis='mel')
plt.colorbar(format='%+2.0f dB')
plt.title('Mel-frequency spectrogram of Asbfly')
plt.tight_layout()
plt.show()

lets check out the metadata

Normalize and standardize features

extra augmentation, lets go ahead and add in a function to add in some background noise to the training set and pitch shifting.

Model Development
Define the CRNN model arhitecture
Compile the model with appropriate loss function and optimizer

In [None]:
def build_crnn_model(input_shape=(128, 128, 1), num_metadata_features=2, time_steps=1):
    # Adjust input shape to include time dimension
    adjusted_input_shape = (time_steps,) + input_shape  # Now it's (time_steps, 128, 128, 1)

    # Spectrogram input branch
    spectrogram_input = Input(shape=adjusted_input_shape, name='spectrogram_input')
    x = TimeDistributed(Conv2D(32, kernel_size=(3, 3), activation='relu'))(spectrogram_input)
    x = TimeDistributed(MaxPooling2D((2, 2)))(x)
    x = TimeDistributed(Conv2D(64, (3, 3), activation='relu'))(x)
    x = TimeDistributed(MaxPooling2D((2, 2)))(x)
    x = TimeDistributed(Flatten())(x)

    # LSTM layer to handle temporal features
    x = LSTM(64, return_sequences=False)(x)

    # Metadata input branch
    metadata_input = Input(shape=(num_metadata_features,), name='metadata_input')
    metadata_dense = Dense(32, activation='relu')(metadata_input)

    # Combining both branches
    combined = concatenate([x, metadata_dense])
    combined = Dense(64, activation='relu')(combined)
    combined = Dropout(0.5)(combined)
    outputs = Dense(1, activation='sigmoid')(combined)

    model = Model(inputs=[spectrogram_input, metadata_input], outputs=outputs)

    model.compile(optimizer=Adam(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy'])

    return model

In [None]:
filtered_metadata_df['filename'] = filtered_metadata_df['filename'].apply(
    lambda x: os.path.join(processed_data_dir, x.replace('.ogg', '_augmented_spectrogram.png'))
)

In [None]:
class DataGenerator(Sequence):
    'Generates data for Keras'
    def __init__(self, file_paths, labels, batch_size=32, dim=(128, 128), n_channels=1, shuffle=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.file_paths = file_paths
        self.n_channels = n_channels
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.file_paths) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        batch_paths = [self.file_paths[k] for k in indexes]
        X, y = self.__data_generation(batch_paths)
        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.file_paths))
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __data_generation(self, file_paths_temp):
        'Generates data containing batch_size samples'
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size), dtype=int)
        actual_batch_size = 0  # To handle cases where files might be missing

        for i, file_path in enumerate(file_paths_temp):
          if not os.path.exists(file_path):
            print(f"File not found: {file_path}")
            continue  # Skip this file if not found
          img = load_img(file_path, target_size=self.dim, color_mode='grayscale')
          img = img_to_array(img) / 255.0
          X[actual_batch_size,] = img
          y[actual_batch_size] = self.labels[i]
          actual_batch_size += 1

        return X[:actual_batch_size], np.array(y[:actual_batch_size])


In [None]:
import os

# Verify files exist
processed_data_dir = '/kaggle/working/processed_data'
all_files = os.listdir(processed_data_dir)

if not all_files:
    print("No files in the directory.")
else:
    print(f"Number of files in the directory: {len(all_files)}")
    print("Sample files:", all_files[:5])  # Print first 5 file names to check


###Model Training
Since we are using images, we will need to set up datagenerators and then we can Split data into training and validation sets


In [None]:
missing_files = [f for f in filtered_metadata_df['filename'] if not os.path.exists(f)]
if missing_files:
    print(f"Missing files: {missing_files[:5]}")  # Print first few missing files
else:
    print("All files accounted for.")

In [None]:
# Generate the model
model = build_crnn_model(input_shape=(128, 128, 1))

# Split data into training and validation sets correctly referencing 'filename' and 'primary_label'
X = filtered_metadata_df['filename'].values
y = filtered_metadata_df['primary_label'].values

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Use updated parameters
params = {'dim': (128, 128), 'batch_size': 32, 'n_channels': 1, 'shuffle': True}

training_generator = DataGenerator(X_train, y_train, **params)
validation_generator = DataGenerator(X_val, y_val, **params)

# Model training
history = model.fit(training_generator, validation_data=validation_generator, epochs=100, callbacks=callbacks, verbose=1)


###Callbacks

In [None]:
# Define callbacks
callbacks = [
    tf.keras.callbacks.ModelCheckpoint(
        filepath=os.path.join(base_working_dir, 'models/best_model.h5'),
        monitor='val_loss',
        save_best_only=True,
        verbose=1
    ),
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=5,
        verbose=1
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.1,
        patience=3,
        min_lr=1e-6,
        verbose=1
    )
]


###build & train the model

In [None]:
# Model training with properly defined callbacks and using the DataGenerator
history = model.fit(
    training_generator,
    validation_data=validation_generator,
    epochs=100,  # Adjust epochs and batch_size as needed
    callbacks=callbacks,
    verbose=1
)

valuation and Testing
Evaluate the model on the test set or soundscapes
Apply any post-processing needed for predictions
Analyze model performance metrics

In [None]:

# After training, summarize the model
model.summary()


# Plot accuracy
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(loc='upper left')
plt.show()

# Plot loss
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(loc='upper left')
plt.show()

Optimization and Tuning
Fine-tune model parameters
Experiment with different architectures or features
Re-train and evaluate the model

submission
