In [None]:
import sys, os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "../../")))
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestClassifier
from backend.model.preprocessing import split_scale, FEATURE_COLUMNS
from backend.model.evaluation import evaluate_clf

In [None]:
data_path = os.path.abspath(os.path.join(os.getcwd(), "../../dataset/heart.csv"))
df = pd.read_csv(data_path)

In [None]:
X_train, X_test, y_train, y_test, scaler = split_scale(df)


In [None]:
clf = RandomForestClassifier(n_estimators=200, random_state=42, class_weight='balanced')
clf.fit(X_train, y_train)

In [None]:
#  Evaluate model
metrics = evaluate_clf(clf, X_test, y_test)
print("\n Model Evaluation Metrics:")
for k, v in metrics.items():
    if k != "confusion_matrix" and k != "classification_report":
        print(f"{k}: {v}")
print("\nConfusion Matrix:")
print(metrics["confusion_matrix"])
print("\nClassification Report:")
print(metrics["classification_report"])


In [None]:
# Save model and scaler
model_path = os.path.abspath(os.path.join(os.getcwd(), "../heart_model.pkl"))
scaler_path = os.path.abspath(os.path.join(os.getcwd(), "../scaler.pkl"))

In [None]:
joblib.dump(clf, model_path)
joblib.dump(scaler, scaler_path)

In [None]:
print(f"\n Model and scaler saved successfully at:\n{model_path}\n{scaler_path}")


In [6]:
import os, sys, joblib, pandas as pd, numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score, confusion_matrix

# Add project root dynamically

# Add project root
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "../../")))
from backend.model.preprocessing import FEATURE_COLUMNS
from backend.model.evaluation import evaluate_clf



# Locate dataset safely
df = pd.read_csv('../../dataset/heart.csv')


#  Prepare features and target
X = df[FEATURE_COLUMNS]
y = df["target"]

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ===================================================================================
#  Define all models and their hyperparameter grids
# ===================================================================================
models = {
    "DecisionTree": (
        DecisionTreeClassifier(random_state=42, class_weight="balanced"),
        {"max_depth": [3, 5, 7, 9, None],
         "min_samples_split": [2, 5, 10]}
    ),
    "KNN": (
        KNeighborsClassifier(),
        {"n_neighbors": [3, 5, 7, 9],
         "weights": ["uniform", "distance"]}
    ),
    "SVC": (
        SVC(probability=True, class_weight="balanced", random_state=42),
        {"C": [0.1, 1, 10],
         "kernel": ["linear", "rbf"]}
    ),
    "LogisticRegression": (
        LogisticRegression(max_iter=1000, class_weight="balanced"),
        {"C": [0.01, 0.1, 1, 10],
         "solver": ["liblinear", "lbfgs"]}
    ),
    "RandomForest": (
        RandomForestClassifier(random_state=42, class_weight="balanced"),
        {"n_estimators": [100, 200, 300],
         "max_depth": [5, 10, None],
         "min_samples_split": [2, 5, 10]}
    ),
}

# ===================================================================================
# Train, tune, and evaluate all models
# ===================================================================================
results = []
best_model = None
best_auc = 0

for name, (model, params) in models.items():
    print(f"\n Training {name} ...")
    grid = GridSearchCV(model, params, cv=5, scoring="roc_auc", n_jobs=-1)
    grid.fit(X_train_scaled, y_train)
    
    y_pred = grid.best_estimator_.predict(X_test_scaled)
    y_proba = grid.best_estimator_.predict_proba(X_test_scaled)[:, 1]
    
    acc = accuracy_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_proba)
    
    print(f" Best Params for {name}: {grid.best_params_}")
    print(f"Accuracy: {acc:.4f} | AUC: {auc:.4f}")
    print("Classification Report:\n", classification_report(y_test, y_pred))
    
    results.append((name, acc, auc, grid.best_estimator_))
    
    # Track best model
    if auc > best_auc:
        best_auc = auc
        best_model = grid.best_estimator_

# ===================================================================================
#  Summary Table
# ===================================================================================
print("\n Model Comparison Summary:")
print(f"{'Model':<20}{'Accuracy':<12}{'AUC':<10}")
print("-" * 45)
for name, acc, auc, _ in results:
    print(f"{name:<20}{acc:<12.4f}{auc:<10.4f}")
print("-" * 45)
print(f" Best Model: {type(best_model).__name__} | AUC = {best_auc:.4f}")



# ===================================================================================
#  Save the best model and scaler (works in script or notebook)
# ===================================================================================

if "__file__" in globals():
    # Running as a Python script
    save_dir = os.path.dirname(os.path.abspath(__file__))
else:
    # Running in a Jupyter notebook or interactive console
    save_dir = os.path.abspath(os.path.join(os.getcwd(), "backend", "model"))

os.makedirs(save_dir, exist_ok=True)

model_path = os.path.join(save_dir, "heart_model.pkl")
scaler_path = os.path.join(save_dir, "scaler.pkl")

joblib.dump(best_model, model_path)
joblib.dump(scaler, scaler_path)

print("\n✅ Model and Scaler Saved Successfully!")
print(f"Model Path: {model_path}")
print(f"Scaler Path: {scaler_path}")




 Training DecisionTree ...
 Best Params for DecisionTree: {'max_depth': 9, 'min_samples_split': 2}
Accuracy: 0.9854 | AUC: 0.9857
Classification Report:
               precision    recall  f1-score   support

           0       0.97      1.00      0.99       100
           1       1.00      0.97      0.99       105

    accuracy                           0.99       205
   macro avg       0.99      0.99      0.99       205
weighted avg       0.99      0.99      0.99       205


 Training KNN ...
 Best Params for KNN: {'n_neighbors': 7, 'weights': 'distance'}
Accuracy: 1.0000 | AUC: 1.0000
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       100
           1       1.00      1.00      1.00       105

    accuracy                           1.00       205
   macro avg       1.00      1.00      1.00       205
weighted avg       1.00      1.00      1.00       205


 Training SVC ...
 Best Params for SVC: {'C': 10, 'k