In [1]:
from skimage import feature
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import signal
import os
from pathlib import Path
import sys
from sklearn.model_selection import train_test_split, cross_val_score
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from sklearn.metrics import accuracy_score, classification_report, average_precision_score
from sklearn.metrics import precision_recall_curve

from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier as DTC
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.neighbors import KNeighborsClassifier as KNN
import xgboost as XGB


In [2]:
parent_folder = Path().resolve().parent
src_path = parent_folder / 'src'
sys.path.append(str(src_path))

from tools import get_embedding_birdnet

#env to use: clef

In [3]:
root_folder='../data/train_data/embedding/birdnet/'

In [4]:
pos_embeddings = get_embedding_birdnet(root_folder, 1)
neg_embeddings = get_embedding_birdnet(root_folder, 0)

In [7]:
df_pos = pd.DataFrame(data=pos_embeddings)
df_pos['target'] = 1

df_neg = pd.DataFrame(data=neg_embeddings)
df_neg['target'] = 0

Perform 5-fold split for the negative data only

In [8]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Create a new column to store fold numbers
df_neg["fold"] = -1  # Initialize with -1

for fold, (train_idx, test_idx) in enumerate(kf.split(df_neg)):
    df_neg.loc[test_idx, "fold"] = fold  # Assign fold number to test samples

<IPython.core.display.Javascript object>

In [77]:
fold = 4 # Run the rest of the code for each fold

In [78]:
df_neg_fold = df_neg[df_neg.fold==fold]
df_neg_fold = df_neg_fold.drop("fold", axis=1)

In [79]:
df_combined = pd.concat([df_pos, df_neg_fold], ignore_index=True, axis=0)
df_combined = df_combined.sample(frac=1, random_state=232)

In [80]:
#Generate Test and Train datasets
X = df_combined.iloc[:, :-1] #All values except the last column
y = df_combined.iloc[:, -1] #All values from the last column

#train, test, train_target, test_target = train_test_split(X, y, test_size=0.20, random_state=23)

SVM

In [81]:
model = SVC(kernel='linear', cache_size=500)

accuracies = cross_val_score(model, X, y, cv=5, scoring='accuracy')

#print(f"Cross-validation accuracies: {accuracies}")
print(f"Mean Accuracy: {accuracies.mean():.4f}")

ap = cross_val_score(model, X, y, cv=5, scoring='average_precision')

print(f"Cross-validation AP: {ap}")
print(f"mAP: {ap.mean():.4f}")


<IPython.core.display.Javascript object>

Mean Accuracy: 0.9980


<IPython.core.display.Javascript object>

Cross-validation AP: [1. 1. 1. 1. 1.]
mAP: 1.0000


Random Forest

In [82]:
model = RFC(n_jobs = -1)
accuracies = cross_val_score(model, X, y, cv=5, scoring='accuracy')

#print(f"Cross-validation accuracies: {accuracies}")
print(f"Mean Accuracy: {accuracies.mean():.4f}")

ap = cross_val_score(model, X, y, cv=5, scoring='average_precision')

print(f"Cross-validation AP: {ap}")
print(f"mAP: {ap.mean():.4f}")

<IPython.core.display.Javascript object>

Mean Accuracy: 0.9830


<IPython.core.display.Javascript object>

Cross-validation AP: [1.         1.         0.99960291 0.99862066 0.99960874]
mAP: 0.9996


XGBoost

In [83]:
model = XGB.XGBClassifier(objective='binary:logistic')

accuracies = cross_val_score(model, X, y, cv=5, scoring='accuracy')

#print(f"Cross-validation accuracies: {accuracies}")
print(f"Mean Accuracy: {accuracies.mean():.4f}")

ap = cross_val_score(model, X, y, cv=5, scoring='average_precision')

print(f"Cross-validation AP: {ap}")
print(f"mAP: {ap.mean():.4f}")

<IPython.core.display.Javascript object>

Mean Accuracy: 0.9810


<IPython.core.display.Javascript object>

Cross-validation AP: [0.99961636 0.99961538 0.99901965 0.99933396 0.99753346]
mAP: 0.9990


In [76]:
#predictions = model.predict(test)
#ap = average_precision_score(test_target, predictions)
#print("Test set average precision:", ap)

#report=classification_report(test_target, predictions, digits=4)
#print(report)

# Only for binary classification (adjust for multi-class)
#precision, recall, thr = precision_recall_curve(test_target, predictions)


In [60]:
#plt.plot(recall, precision, marker='.')
#plt.xlabel('Recall')
#plt.ylabel('Precision')
#plt.title('Precision-Recall Curve')
#plt.show()