In [None]:
import os
import random
import numpy as np

from sklearn.preprocessing import StandardScaler

from core.preprocessing import load_and_preprocess_data
from core.outlier_detection import remove_outliers
from core.classification import (
    classify_with_knn,
    classify_with_knn_without_hyperparameter,
)
from core.visualization import (
    visualize_ground_truth,
    visualize_knn_decision_boundary,
)

from utils import find_project_root

seed = 42
random.seed(seed)
np.random.seed(seed)

PROJECT_ROOT = find_project_root()
train_path = os.path.join(PROJECT_ROOT, "data", "train_data.csv")
test_path = os.path.join(PROJECT_ROOT, "data", "test_data.csv")

# Preprocessing
X_train_raw, X_test_raw, y_train, y_test, X_train_scaled, X_test_scaled, label_encoder, scaler = (
    load_and_preprocess_data(train_path, test_path)
)

# Remove outliers
X_train_clean, y_train_clean = remove_outliers(
    X_train_scaled,  # scaled training features
    y_train
)

# No feature selection
X_train_selected = X_train_clean
X_test_selected = X_test_scaled  # already scaled

# Scale
selected_scaler = StandardScaler()
X_train_selected_scaled = selected_scaler.fit_transform(X_train_selected)
X_test_selected_scaled = selected_scaler.transform(X_test_selected)

# Visualization
# visualize_ground_truth(X_train_selected_scaled, y_train_clean, label_encoder)
# visualize_knn_decision_boundary(X_train_selected_scaled, y_train_clean, n_neighbors=5)

print("Running KNN with hyperparameter tuning AND No Feature Selection...")
knn = classify_with_knn(
    X_train_selected_scaled,
    y_train_clean,
    label_encoder,
    X_test=X_test_selected_scaled,
    y_test=y_test
)

# print("Running KNN with NO hyperparameter tuning AND No Feature Selection...")
# knn = classify_with_knn_without_hyperparameter(
#     X_train_selected_scaled,
#     y_train_clean,
#     label_encoder,
#     X_test=X_test_selected_scaled,
#     y_test=y_test
# )
