In [308]:
import os
import librosa
import numpy as np
from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd
import noisereduce as nr
import sklearn
from sklearn.metrics import roc_curve
from sklearn.metrics import accuracy_score, classification_report
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn import metrics
from scipy.optimize import brentq
from scipy.interpolate import interp1d
import seaborn as sns
import warnings
import statistics as st
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

In [255]:
train_data = "birdclef-2025/train_audio/"
test_data = "test_soundscapes/"

In [256]:
names = pd.read_csv("new_train.csv")
tax = pd.read_csv("birdclef-2025/taxonomy.csv")
ids = pd.read_csv("birdclef-2025/sample_submission.csv")

In [257]:
paths=[]

In [258]:
for i in range(0,4000):
        p = f'{train_data}{names.filename[i]}'
        paths.append(p)

In [259]:
def extract_audio_features(y, sr):
    onset_env = librosa.onset.onset_strength(y=y, sr=sr)
    return {
        "rms": np.mean(librosa.feature.rms(y=y)).squeeze(),  
        "zcr": np.mean(librosa.feature.zero_crossing_rate(y)).squeeze(),  
        "flatness": np.mean(librosa.feature.spectral_flatness(y=y)).squeeze(),
        "spectral_centroid": np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)).squeeze(),
        "roll_off_high" : np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr, roll_percent=0.95)).squeeze(),
        "roll_off_low" : np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr, roll_percent=0.01)).squeeze(),
        "mfcc": np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13))  
    }

def load_and_extract_features(path, sr=32000):
    try:
        if not os.path.isfile(path) or not path.lower().endswith(".ogg"):
            raise ValueError(f"Invalid OGG file: {path}")
        y, sr = librosa.load(path, sr=sr)
        y_nr = nr.reduce_noise(y=y,sr=sr)
        features = extract_audio_features(y_nr, sr)
        return path, {
            "audio": y_nr,
            "sr": sr,
            "features": features
        }
    except Exception as e:
        return path, e

def features_threadpool(paths, sr=32000, max_workers=5):
    results = {}
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {executor.submit(load_and_extract_features, path, sr): path for path in paths}
        for future in as_completed(futures):
            path = futures[future]
            try:
                key, value = future.result()
                results[key] = value
            except Exception as e:
                results[path] = e
    return results

In [260]:
results = features_threadpool(paths)

In [262]:
print(type(results))

<class 'dict'>


In [263]:
 first_item = next(iter(results.items()))
 print(first_item)

('birdclef-2025/train_audio/1564122/CSA34200.ogg', {'audio': array([-2.3949902e-05, -3.8392193e-04, -6.6638028e-04, ...,
       -4.9166137e-04, -4.3223481e-04, -4.2514617e-04],
      shape=(95715,), dtype=float32), 'sr': 32000, 'features': {'rms': np.float32(0.00023533333), 'zcr': np.float64(0.3964425969251337), 'flatness': np.float32(0.096430674), 'spectral_centroid': np.float64(7414.9290055651545), 'roll_off_high': np.float64(14131.35026737968), 'roll_off_low': np.float64(652.9913101604278), 'mfcc': np.float32(-66.89174)}})


In [264]:
print(type(first_item))

<class 'tuple'>


In [265]:
embed_f = first_item[1]['features']

In [266]:
print(embed_f)

{'rms': np.float32(0.00023533333), 'zcr': np.float64(0.3964425969251337), 'flatness': np.float32(0.096430674), 'spectral_centroid': np.float64(7414.9290055651545), 'roll_off_high': np.float64(14131.35026737968), 'roll_off_low': np.float64(652.9913101604278), 'mfcc': np.float32(-66.89174)}


In [273]:
print(len(results))

4000


In [274]:
df = pd.DataFrame()

In [275]:
i=0
for path, result in results.items():
    audio_feat = result['features']
    df.loc[i,['path']] = path
    for feature in audio_feat.items():
        df.loc[i,['rms']] = audio_feat['rms']
        df.loc[i,['zcr']] = audio_feat['zcr']
        df.loc[i,['flatness']] = audio_feat['flatness']
        df.loc[i,['spectral_centroid']] = audio_feat['spectral_centroid']
        df.loc[i,['roll_off_high']] = audio_feat['roll_off_high']
        df.loc[i,['roll_off_low']] = audio_feat['roll_off_low']
        df.loc[i,['mfcc']] = audio_feat['mfcc']
    i=i+1

In [276]:
print(len(df))

4000


In [277]:
print(df)

                                                  path       rms       zcr  \
0       birdclef-2025/train_audio/1564122/CSA34200.ogg  0.000235  0.396443   
1        birdclef-2025/train_audio/523060/CSA34181.ogg  0.000096  0.258184   
2         birdclef-2025/train_audio/50186/CSA18282.ogg  0.006677  0.253378   
3        birdclef-2025/train_audio/868458/CSA34218.ogg  0.000476  0.290055   
4         birdclef-2025/train_audio/48124/CSA18785.ogg  0.008967  0.239208   
...                                                ...       ...       ...   
3995  birdclef-2025/train_audio/eardov1/iNat873932.ogg  0.006314  0.073954   
3996    birdclef-2025/train_audio/trokin/iNat48335.ogg  0.001650  0.179769   
3997    birdclef-2025/train_audio/yeofly1/XC249339.ogg  0.004452  0.221909   
3998    birdclef-2025/train_audio/whwswa1/XC211444.ogg  0.009103  0.175306   
3999    birdclef-2025/train_audio/linwoo1/XC961351.ogg  0.000284  0.259656   

      flatness  spectral_centroid  roll_off_high  roll_off_low 

In [278]:
for j in range(0,len(df)):
    str = df.path[j]
    label = str.split('/')
    primary_label = label[2]
    df.loc[j,['primary_label']] = primary_label

In [279]:
tax.head(2)

Unnamed: 0,primary_label,inat_taxon_id,scientific_name,common_name,class_name
0,1139490,1139490,Ragoniella pulchella,Ragoniella pulchella,Insecta
1,1192948,1192948,Oxyprora surinamensis,Oxyprora surinamensis,Insecta


In [280]:
df.head(2)

Unnamed: 0,path,rms,zcr,flatness,spectral_centroid,roll_off_high,roll_off_low,mfcc,primary_label
0,birdclef-2025/train_audio/1564122/CSA34200.ogg,0.000235,0.396443,0.096431,7414.929006,14131.350267,652.99131,-66.891739,1564122
1,birdclef-2025/train_audio/523060/CSA34181.ogg,9.6e-05,0.258184,0.118878,5793.383528,12711.11052,700.223595,-73.397148,523060


In [281]:
df = df.merge(tax[['primary_label', 'scientific_name']], on='primary_label', how='left')

In [282]:
df.head(5)

Unnamed: 0,path,rms,zcr,flatness,spectral_centroid,roll_off_high,roll_off_low,mfcc,primary_label,scientific_name
0,birdclef-2025/train_audio/1564122/CSA34200.ogg,0.000235,0.396443,0.096431,7414.929006,14131.350267,652.99131,-66.891739,1564122,Daedadelus waehnerorum
1,birdclef-2025/train_audio/523060/CSA34181.ogg,9.6e-05,0.258184,0.118878,5793.383528,12711.11052,700.223595,-73.397148,523060,Eschatoceras bipunctatus
2,birdclef-2025/train_audio/50186/CSA18282.ogg,0.006677,0.253378,0.106493,5307.952838,8308.787886,1697.265625,-47.413315,50186,Cicadidae
3,birdclef-2025/train_audio/868458/CSA34218.ogg,0.000476,0.290055,0.024536,5924.033798,13301.201923,700.309066,-69.345619,868458,Typophyllum inflatum
4,birdclef-2025/train_audio/48124/CSA18785.ogg,0.008967,0.239208,0.129556,4890.700882,7507.693528,1427.022903,-40.313438,48124,Tettigoniidae


In [283]:
le = LabelEncoder()
df['primary_label'] = le.fit_transform(df['primary_label'])

In [284]:
df.head(5)

Unnamed: 0,path,rms,zcr,flatness,spectral_centroid,roll_off_high,roll_off_low,mfcc,primary_label,scientific_name
0,birdclef-2025/train_audio/1564122/CSA34200.ogg,0.000235,0.396443,0.096431,7414.929006,14131.350267,652.99131,-66.891739,9,Daedadelus waehnerorum
1,birdclef-2025/train_audio/523060/CSA34181.ogg,9.6e-05,0.258184,0.118878,5793.383528,12711.11052,700.223595,-73.397148,30,Eschatoceras bipunctatus
2,birdclef-2025/train_audio/50186/CSA18282.ogg,0.006677,0.253378,0.106493,5307.952838,8308.787886,1697.265625,-47.413315,28,Cicadidae
3,birdclef-2025/train_audio/868458/CSA34218.ogg,0.000476,0.290055,0.024536,5924.033798,13301.201923,700.309066,-69.345619,56,Typophyllum inflatum
4,birdclef-2025/train_audio/48124/CSA18785.ogg,0.008967,0.239208,0.129556,4890.700882,7507.693528,1427.022903,-40.313438,27,Tettigoniidae


In [285]:
df['scientific_name'] = le.fit_transform(df['scientific_name'])

In [286]:
df.head(5)

Unnamed: 0,path,rms,zcr,flatness,spectral_centroid,roll_off_high,roll_off_low,mfcc,primary_label,scientific_name
0,birdclef-2025/train_audio/1564122/CSA34200.ogg,0.000235,0.396443,0.096431,7414.929006,14131.350267,652.99131,-66.891739,9,56
1,birdclef-2025/train_audio/523060/CSA34181.ogg,9.6e-05,0.258184,0.118878,5793.383528,12711.11052,700.223595,-73.397148,30,75
2,birdclef-2025/train_audio/50186/CSA18282.ogg,0.006677,0.253378,0.106493,5307.952838,8308.787886,1697.265625,-47.413315,28,40
3,birdclef-2025/train_audio/868458/CSA34218.ogg,0.000476,0.290055,0.024536,5924.033798,13301.201923,700.309066,-69.345619,56,195
4,birdclef-2025/train_audio/48124/CSA18785.ogg,0.008967,0.239208,0.129556,4890.700882,7507.693528,1427.022903,-40.313438,27,182


In [287]:
sc = StandardScaler()

df['scientific_name'] = sc.fit_transform(df[['scientific_name']])

In [288]:
df.head(5)

Unnamed: 0,path,rms,zcr,flatness,spectral_centroid,roll_off_high,roll_off_low,mfcc,primary_label,scientific_name
0,birdclef-2025/train_audio/1564122/CSA34200.ogg,0.000235,0.396443,0.096431,7414.929006,14131.350267,652.99131,-66.891739,9,-0.899491
1,birdclef-2025/train_audio/523060/CSA34181.ogg,9.6e-05,0.258184,0.118878,5793.383528,12711.11052,700.223595,-73.397148,30,-0.575525
2,birdclef-2025/train_audio/50186/CSA18282.ogg,0.006677,0.253378,0.106493,5307.952838,8308.787886,1697.265625,-47.413315,28,-1.172304
3,birdclef-2025/train_audio/868458/CSA34218.ogg,0.000476,0.290055,0.024536,5924.033798,13301.201923,700.309066,-69.345619,56,1.470574
4,birdclef-2025/train_audio/48124/CSA18785.ogg,0.008967,0.239208,0.129556,4890.700882,7507.693528,1427.022903,-40.313438,27,1.248913


In [304]:
X = df.drop(['path','primary_label'],axis='columns')
y = df.primary_label

In [306]:
print(X_test)

           rms       zcr  flatness  spectral_centroid  roll_off_high  \
555   0.002812  0.147559  0.015619        2592.161935    4753.021204   
3491  0.009129  0.323161  0.007963        5464.527442   11242.679598   
527   0.000295  0.149928  0.027803        2810.847652    6319.600167   
3925  0.003779  0.163985  0.003238        2720.940870    4939.205786   
2989  0.015988  0.412306  0.004597        6583.688650   12479.798274   
...        ...       ...       ...                ...            ...   
1922  0.000955  0.156207  0.085669        3846.305407   10080.775177   
865   0.000829  0.184915  0.004771        3155.908917    5774.505132   
3943  0.002463  0.133332  0.145313        2251.706020    4662.939504   
1642  0.003507  0.168150  0.065592        3716.706005    8642.472898   
2483  0.002996  0.290532  0.001833        4919.775054    7968.405659   

      roll_off_low       mfcc  scientific_name  
555     530.193505 -57.292465         0.362270  
3491   1152.693966 -48.181530        

In [323]:
X_scaled = sc.fit_transform(X)

In [324]:
X_train, X_test, y_train, y_test = train_test_split( X_scaled, y, test_size=0.2, random_state=42)

In [325]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

In [326]:
train_loader = DataLoader(TensorDataset(X_train_tensor, y_train_tensor), batch_size=64, shuffle=True)
test_loader = DataLoader(TensorDataset(X_test_tensor, y_test_tensor), batch_size=64)

In [334]:
class MLPClassifier(nn.Module):
    def __init__(self, input_size, num_classes):
        super(MLPClassifier, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_size, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        return self.net(x)

In [335]:
input_size = X_train.shape[1]
num_classes = len(np.unique(y))

model = MLPClassifier(input_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)

epochs = 500
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss:.4f}")

Epoch 1/500, Loss: 264.4925
Epoch 2/500, Loss: 261.3167
Epoch 3/500, Loss: 257.1023
Epoch 4/500, Loss: 251.5706
Epoch 5/500, Loss: 245.3242
Epoch 6/500, Loss: 240.9462
Epoch 7/500, Loss: 237.4148
Epoch 8/500, Loss: 233.9617
Epoch 9/500, Loss: 231.2178
Epoch 10/500, Loss: 228.1169
Epoch 11/500, Loss: 225.6971
Epoch 12/500, Loss: 222.7588
Epoch 13/500, Loss: 220.0495
Epoch 14/500, Loss: 217.6000
Epoch 15/500, Loss: 215.6939
Epoch 16/500, Loss: 213.3933
Epoch 17/500, Loss: 211.2618
Epoch 18/500, Loss: 209.8375
Epoch 19/500, Loss: 208.1539
Epoch 20/500, Loss: 206.5188
Epoch 21/500, Loss: 205.7331
Epoch 22/500, Loss: 203.8690
Epoch 23/500, Loss: 202.9005
Epoch 24/500, Loss: 201.0445
Epoch 25/500, Loss: 200.5184
Epoch 26/500, Loss: 198.5856
Epoch 27/500, Loss: 198.3854
Epoch 28/500, Loss: 197.2837
Epoch 29/500, Loss: 196.5782
Epoch 30/500, Loss: 195.0648
Epoch 31/500, Loss: 194.8623
Epoch 32/500, Loss: 194.7154
Epoch 33/500, Loss: 193.6816
Epoch 34/500, Loss: 191.8512
Epoch 35/500, Loss: 191

In [336]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        output = model(X_batch)
        _, predicted = torch.max(output, 1)
        correct += (predicted == y_batch).sum().item()
        total += y_batch.size(0)

print(f"Test Accuracy: {100 * correct / total:.2f}%")

Test Accuracy: 34.12%
