In [None]:
%pip install kagglehub --quiet
import kagglehub
import shutil
import os

path = kagglehub.dataset_download("rohanrao/xeno-canto-bird-recordings-extended-n-z")

print("Path to dataset files:", path)

os.makedirs("./data", exist_ok=True)
shutil.move(path, "./data")
print("Dataset installed into the ./data directory.")

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
import tensorflow as tf
import numpy as np

data =  pd.read_csv("./data/11/train_extended.csv")
data

Unnamed: 0,rating,playback_used,ebird_code,channels,date,duration,filename,species,title,secondary_labels,...,background,xc_id,url,country,author,primary_label,longitude,time,recordist,license
0,0.0,no,aldfly,2 (stereo),2019-06-11,49,XC554809.mp3,Alder Flycatcher,XC554809 Alder Flycatcher (Empidonax alnorum),[],...,,554809,https://www.xeno-canto.org/554809,United States,Ron Overholtz,Empidonax alnorum_Alder Flycatcher,-149.9757,07:00,Ron Overholtz,Creative Commons Attribution-NonCommercial-Sha...
1,0.0,no,aldfly,2 (stereo),2020-04-14,21,XC552408.mp3,Alder Flycatcher,XC552408 Alder Flycatcher (Empidonax alnorum),[],...,,552408,https://www.xeno-canto.org/552408,Colombia,Jerome Fischer,Empidonax alnorum_Alder Flycatcher,-74.6520,17:00,Jerome Fischer,Creative Commons Attribution-NonCommercial-Sha...
2,0.0,yes,aldfly,2 (stereo),2020-04-05,35,XC544552.mp3,Alder Flycatcher,XC544552 Alder Flycatcher (Empidonax alnorum),[],...,,544552,https://www.xeno-canto.org/544552,Colombia,Jerome Fischer,Empidonax alnorum_Alder Flycatcher,-74.6520,07:00,Jerome Fischer,Creative Commons Attribution-NonCommercial-Sha...
3,0.0,yes,aldfly,2 (stereo),2020-04-05,12,XC544551.mp3,Alder Flycatcher,XC544551 Alder Flycatcher (Empidonax alnorum),[],...,,544551,https://www.xeno-canto.org/544551,Colombia,Jerome Fischer,Empidonax alnorum_Alder Flycatcher,-74.6520,07:00,Jerome Fischer,Creative Commons Attribution-NonCommercial-Sha...
4,0.0,yes,aldfly,2 (stereo),2020-04-05,38,XC544550.mp3,Alder Flycatcher,XC544550 Alder Flycatcher (Empidonax alnorum),[],...,,544550,https://www.xeno-canto.org/544550,Colombia,Jerome Fischer,Empidonax alnorum_Alder Flycatcher,-74.6520,07:00,Jerome Fischer,Creative Commons Attribution-NonCommercial-Sha...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23779,4.5,no,yetvir,2 (stereo),2020-05-16,63,XC558915.mp3,Yellow-throated Vireo,XC558915 Yellow-throated Vireo (Vireo flavifrons),['Setophaga striata_Blackpoll Warbler'],...,Blackpoll Warbler (Setophaga striata),558915,https://www.xeno-canto.org/558915,United States,William Whitehead,Vireo flavifrons_Yellow-throated Vireo,-74.2495,07:00,William Whitehead,Creative Commons Attribution-NonCommercial-Sha...
23780,4.0,no,yetvir,1 (mono),2020-07-27,41,XC579823.mp3,Yellow-throated Vireo,XC579823 Yellow-throated Vireo (Vireo flavifrons),[],...,,579823,https://www.xeno-canto.org/579823,United States,Bobby Wilcox,Vireo flavifrons_Yellow-throated Vireo,-91.0756,12:00,Bobby Wilcox,Creative Commons Attribution-NonCommercial-Sha...
23781,4.0,no,yetvir,2 (stereo),2020-07-10,25,XC574737.mp3,Yellow-throated Vireo,XC574737 Yellow-throated Vireo (Vireo flavifrons),[],...,,574737,https://www.xeno-canto.org/574737,Canada,Jon Ruddy,Vireo flavifrons_Yellow-throated Vireo,-76.3558,10:30,Jon Ruddy,Creative Commons Attribution-NonCommercial-Sha...
23782,3.5,no,yetvir,1 (mono),2020-07-03,33,XC573213.mp3,Yellow-throated Vireo,XC573213 Yellow-throated Vireo (Vireo flavifrons),['Geothlypis trichas_Common Yellowthroat'],...,Common Yellowthroat (Geothlypis trichas),573213,https://www.xeno-canto.org/573213,United States,Annette McClellan,Vireo flavifrons_Yellow-throated Vireo,-87.8015,06:00,Annette McClellan,Creative Commons Attribution-NonCommercial-Sha...


In [2]:
data.columns

Index(['rating', 'playback_used', 'ebird_code', 'channels', 'date', 'duration',
       'filename', 'species', 'title', 'secondary_labels', 'bird_seen',
       'sci_name', 'location', 'latitude', 'sampling_rate', 'type',
       'elevation', 'bitrate_of_mp3', 'file_type', 'background', 'xc_id',
       'url', 'country', 'author', 'primary_label', 'longitude', 'time',
       'recordist', 'license'],
      dtype='object')

In [3]:
train_set, test_set = train_test_split(data, test_size=0.2, random_state=42)
train_set.shape, test_set.shape

((19027, 29), (4757, 29))

In [4]:
cols_to_drop = [
    'license', 'filename', 'ebird_code', 'channels',
    'date', 'title', 'secondary_labels', 'sci_name',
    'location', 'sampling_rate', 'bitrate_of_mp3',
    'file_type', 'url', 'author', 'primary_label',
    'time'
]

target_col = 'species'

X_train = train_set.drop(columns=cols_to_drop + [target_col])
y_train = train_set[target_col]

X_test = test_set.drop(columns=cols_to_drop + [target_col])
y_test = test_set[target_col]


In [5]:
X_train.shape, y_train.shape

((19027, 12), (19027,))

In [6]:
preprocessor = Pipeline(steps=[
    ('OHE', OneHotEncoder(handle_unknown='ignore', sparse_output=False)),
    ('scaler', StandardScaler())
])

In [7]:
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)
y_train = LabelEncoder().fit_transform(y_train)
y_test = LabelEncoder().fit_transform(y_test)

In [8]:
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(258, activation='relu'),
    tf.keras.layers.Dense(258, activation='relu'),
    tf.keras.layers.Dense(258, activation='relu'),
    tf.keras.layers.Dense(len(np.unique(y_train)), activation='softmax')
])
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)
model.summary()

In [None]:
history = model.fit(
    X_train, y_train,
    epochs=5,
    batch_size=32,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='loss',
            patience=10,
            restore_best_weights=True
        )
    ]
)
y_pred = model.predict(X_train)

label_encoder = LabelEncoder()
label_encoder.fit(train_set[target_col])

class_names = label_encoder.classes_

labels = np.unique(y_train)

y_pred_train = model.predict(X_train)
train_report = classification_report(
	y_train, y_pred_train.argmax(axis=1), target_names=class_names[labels], labels=labels, zero_division=0
)

y_pred_test = model.predict(X_test)
test_report = classification_report(
	y_test, y_pred_test.argmax(axis=1), target_names=class_names[labels], labels=labels, zero_division=0
)

[1m595/595[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step
[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step


In [16]:
print(train_report)

                               precision    recall  f1-score   support

             Alder Flycatcher       1.00      1.00      1.00       111
              American Avocet       1.00      1.00      1.00         6
             American Bittern       1.00      0.83      0.91         6
             American Bushtit       1.00      1.00      1.00        50
       American Cliff Swallow       1.00      1.00      1.00        25
                American Coot       1.00      1.00      1.00        39
                American Crow       1.00      1.00      1.00       116
    American Dusky Flycatcher       1.00      1.00      1.00        15
           American Goldfinch       0.98      1.00      0.99        65
     American Grey Flycatcher       1.00      1.00      1.00        11
             American Kestrel       1.00      1.00      1.00        10
            American Redstart       1.00      1.00      1.00       162
               American Robin       1.00      1.00      1.00       336
     

In [17]:
print(test_report)

                               precision    recall  f1-score   support

             Alder Flycatcher       0.58      0.55      0.56        20
              American Avocet       0.67      0.67      0.67         3
             American Bittern       0.00      0.00      0.00        13
             American Bushtit       0.00      0.00      0.00         9
       American Cliff Swallow       0.00      0.00      0.00         8
                American Coot       0.00      0.00      0.00        31
                American Crow       0.00      0.00      0.00         2
    American Dusky Flycatcher       0.00      0.00      0.00        23
           American Goldfinch       0.00      0.00      0.00         2
     American Grey Flycatcher       0.00      0.00      0.00         2
             American Kestrel       0.00      0.00      0.00        36
            American Redstart       0.00      0.00      0.00        71
               American Robin       0.00      0.00      0.00         3
     