In [6]:
from skimage import feature
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import signal
import os
from pathlib import Path
import sys
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from sklearn.metrics import accuracy_score, classification_report, average_precision_score
from sklearn.metrics import precision_recall_curve

from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier as DTC
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.neighbors import KNeighborsClassifier as KNN
import xgboost as XGB


In [7]:
parent_folder = Path().resolve().parent
src_path = parent_folder / 'src'
sys.path.append(str(src_path))

from tools import get_embedding_birdnet

#env to use: clef

In [8]:
root_folder='C:/Users/dgnhk/Woodcock-CNN/data/train_data/embedding/birdnet/'

In [9]:
pos_embeddings = get_embedding_birdnet(root_folder, 1)
neg_embeddings = get_embedding_birdnet(root_folder, 0)

In [10]:
df_pos = pd.DataFrame(data=pos_embeddings)
df_pos['target'] = 1

df_neg = pd.DataFrame(data=neg_embeddings)
df_neg['target'] = 0
df_neg = df_neg.sample(frac=0.2, random_state=413)

In [11]:
df_combined = pd.concat([df_pos, df_neg], ignore_index=True, axis=0)
df_combined = df_combined.sample(frac=1, random_state=232)

In [12]:
#Generate Test and Train datasets
X = df_combined.iloc[:, :-1] #All values except the last column
y = df_combined.iloc[:, -1] #All values from the last column

train, test, train_target, test_target = train_test_split(X, y, test_size=0.20, random_state=424)

In [13]:
class Config:
    oversample = True
    undersample = False

oversampler = SMOTE()
undersampler = RandomUnderSampler()

if Config.oversample:
    train, train_target = oversampler.fit_resample(train, train_target)
elif Config.undersample:
    train, train_target = undersampler.fit_resample(train, train_target)
else: 
    print("Using the original dataset")

SVM

In [14]:
model = SVC(cache_size=500)
model.fit(train,train_target)

In [15]:
predictions = model.predict(test)
ap = average_precision_score(test_target, predictions)
print("Test set average precision:", ap)

report=classification_report(test_target, predictions, digits=4)
print(report)


Test set average precision: 0.8626404611908184
              precision    recall  f1-score   support

           0     0.7462    0.9700    0.8435       100
           1     0.9691    0.7402    0.8393       127

    accuracy                         0.8414       227
   macro avg     0.8576    0.8551    0.8414       227
weighted avg     0.8709    0.8414    0.8411       227



Random Forest

In [16]:
model = RFC(n_jobs = -1)
model.fit(train,train_target)

In [17]:
predictions = model.predict(test)
ap = average_precision_score(test_target, predictions)
print("Test set average precision:", ap)

report=classification_report(test_target, predictions, digits=4)
print(report)

# Only for binary classification (adjust for multi-class)
precision, recall, thr = precision_recall_curve(test_target, predictions)


Test set average precision: 0.8147594760338353
              precision    recall  f1-score   support

           0     0.7417    0.8900    0.8091       100
           1     0.8972    0.7559    0.8205       127

    accuracy                         0.8150       227
   macro avg     0.8194    0.8230    0.8148       227
weighted avg     0.8287    0.8150    0.8155       227



XGBoost

In [18]:
model = XGB.XGBClassifier(objective='binary:logistic')
model.fit(train,train_target)

In [19]:
predictions = model.predict(test)
ap = average_precision_score(test_target, predictions)
print("Test set average precision:", ap)

report=classification_report(test_target, predictions, digits=4)
print(report)

# Only for binary classification (adjust for multi-class)
precision, recall, thr = precision_recall_curve(test_target, predictions)


Test set average precision: 0.8090330088665098
              precision    recall  f1-score   support

           0     0.7500    0.8700    0.8056       100
           1     0.8829    0.7717    0.8235       127

    accuracy                         0.8150       227
   macro avg     0.8164    0.8208    0.8145       227
weighted avg     0.8243    0.8150    0.8156       227



In [20]:
#plt.plot(recall, precision, marker='.')
#plt.xlabel('Recall')
#plt.ylabel('Precision')
#plt.title('Precision-Recall Curve')
#plt.show()