In [None]:
import pandas as pd                                             # data handling
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler  # convert categorical to numerical
from sklearn.svm import SVC                                     # support vector classifier for ML model
from sklearn.metrics import classification_report               # model evaluation
from pathlib import Path                                        # filesystem path management

# Set base directory: if working inside a notebooks folder, go one level up
BASE_DIR = Path.cwd() if 'notebooks' not in str(Path.cwd()) else Path.cwd().parent

# Define data directories relative to the project root
DATA_DIR = BASE_DIR / 'data'
PROCESSED_DATA_DIR = DATA_DIR / 'processed' / 'birdcall_segments_3s'
FEATURES_PATH = DATA_DIR / 'features' / 'bird_features.csv'

# Load the extracted features CSV
features_df = pd.read_csv(FEATURES_PATH)

# Separate features and labels
X = features_df.drop(columns=["label", "source_file"])
y = features_df["label"]

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)      # standardize features (0 mean, 1 variance)

# Split into train and test
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)

# Train SVM
clf = SVC(kernel='rbf', C=1, gamma='scale')
clf.fit(X_train, y_train)

# Evaluate
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))


                       precision    recall  f1-score   support

       Andean_Tinamou       0.70      1.00      0.82         7
Chestnut-bellied_Guan       1.00      1.00      1.00         7
                  Emu       1.00      1.00      1.00        13
       Little_Tinamou       1.00      0.92      0.96        12
                Maleo       0.35      1.00      0.52         8
           Malleefowl       1.00      0.67      0.80         6
 Micronesian_Megapode       1.00      0.92      0.96        13
    Moluccan_Megapode       0.00      0.00      0.00        13
       Somali_Ostrich       1.00      1.00      1.00         5
  Speckled_Chachalaca       0.90      0.82      0.86        11
           Spixs_Guan       0.80      0.80      0.80        10

             accuracy                           0.80       105
            macro avg       0.80      0.83      0.79       105
         weighted avg       0.78      0.80      0.77       105



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
