# Heart Disease Model Training\nTrain a classifier, evaluate it, and export `heart_model.pkl` and `scaler.pkl`.

In [3]:
import pandas as pd\nimport joblib\nfrom sklearn.ensemble import RandomForestClassifier\nfrom backend.model.preprocessing import split_scale, FEATURE_COLUMNS\nfrom backend.model.evaluation import evaluate_clf\n\ndf = pd.read_csv('dataset/heart.csv')\nX_train, X_test, y_train, y_test, scaler = split_scale(df)\nclf = RandomForestClassifier(n_estimators=200, random_state=42, class_weight='balanced')\nclf.fit(X_train, y_train)\nmetrics = evaluate_clf(clf, X_test, y_test)\nprint(metrics)\njoblib.dump(clf, 'backend/model/heart_model.pkl')\njoblib.dump(scaler, 'backend/model/scaler.pkl')\nprint('Saved model and scaler to backend/model/')\n

SyntaxError: unexpected character after line continuation character (3761497189.py, line 1)

In [4]:
import sys, os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "../../")))
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestClassifier
from backend.model.preprocessing import split_scale, FEATURE_COLUMNS
from backend.model.evaluation import evaluate_clf

In [5]:
data_path = os.path.abspath(os.path.join(os.getcwd(), "../../dataset/heart.csv"))
df = pd.read_csv(data_path)

In [6]:
X_train, X_test, y_train, y_test, scaler = split_scale(df)


In [7]:
clf = RandomForestClassifier(n_estimators=200, random_state=42, class_weight='balanced')
clf.fit(X_train, y_train)

In [8]:
#  Evaluate model
metrics = evaluate_clf(clf, X_test, y_test)
print("\n Model Evaluation Metrics:")
for k, v in metrics.items():
    if k != "confusion_matrix" and k != "classification_report":
        print(f"{k}: {v}")
print("\nConfusion Matrix:")
print(metrics["confusion_matrix"])
print("\nClassification Report:")
print(metrics["classification_report"])



 Model Evaluation Metrics:
accuracy: 1.0
precision: 1.0
recall: 1.0
f1: 1.0
roc_auc: 1.0

Confusion Matrix:
[[100, 0], [0, 105]]

Classification Report:
              precision    recall  f1-score   support

           0     1.0000    1.0000    1.0000       100
           1     1.0000    1.0000    1.0000       105

    accuracy                         1.0000       205
   macro avg     1.0000    1.0000    1.0000       205
weighted avg     1.0000    1.0000    1.0000       205



In [10]:
# Save model and scaler
model_path = os.path.abspath(os.path.join(os.getcwd(), "../heart_model.pkl"))
scaler_path = os.path.abspath(os.path.join(os.getcwd(), "../scaler.pkl"))

In [11]:
joblib.dump(clf, model_path)
joblib.dump(scaler, scaler_path)

['D:\\PRO_Cdac\\HeartDiseasePrediction\\backend\\scaler.pkl']

In [12]:
print(f"\n Model and scaler saved successfully at:\n{model_path}\n{scaler_path}")



 Model and scaler saved successfully at:
D:\PRO_Cdac\HeartDiseasePrediction\backend\heart_model.pkl
D:\PRO_Cdac\HeartDiseasePrediction\backend\scaler.pkl


In [15]:
import sys, os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "../../")))
import pandas as pd


data_path = os.path.abspath(os.path.join(os.getcwd(), "../../dataset/heart.csv"))
df = pd.read_csv(data_path)
print(df['target'].value_counts())


target
1    526
0    499
Name: count, dtype: int64


In [16]:
import sys, os, pandas as pd, joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler

# Add project root
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "../../")))
from backend.model.preprocessing import FEATURE_COLUMNS
from backend.model.evaluation import evaluate_clf

# Load data
df = pd.read_csv('../../dataset/heart.csv')

X = df[FEATURE_COLUMNS]
y = df['target']

# Train/Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Scale
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ðŸ”§ Tune RandomForest slightly
param_grid = {
    'n_estimators': [200, 300],
    'max_depth': [5, 7, 10, None],
    'min_samples_split': [2, 5, 10],
}
grid = GridSearchCV(
    RandomForestClassifier(random_state=42, class_weight='balanced'),
    param_grid, cv=5, scoring='f1', n_jobs=-1
)
grid.fit(X_train_scaled, y_train)

best_model = grid.best_estimator_

# Evaluate
print("Best Params:", grid.best_params_)
metrics = evaluate_clf(best_model, X_test_scaled, y_test)
print("Metrics:", metrics)
print(classification_report(y_test, best_model.predict(X_test_scaled)))

# Save
os.makedirs('../../backend/model', exist_ok=True)
joblib.dump(best_model, '../../backend/model/heart_model.pkl')
joblib.dump(scaler, '../../backend/model/scaler.pkl')

print("âœ… Saved tuned model and scaler.")


Best Params: {'max_depth': 10, 'min_samples_split': 2, 'n_estimators': 200}
Metrics: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'roc_auc': 1.0, 'confusion_matrix': [[100, 0], [0, 105]], 'classification_report': '              precision    recall  f1-score   support\n\n           0     1.0000    1.0000    1.0000       100\n           1     1.0000    1.0000    1.0000       105\n\n    accuracy                         1.0000       205\n   macro avg     1.0000    1.0000    1.0000       205\nweighted avg     1.0000    1.0000    1.0000       205\n'}
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       100
           1       1.00      1.00      1.00       105

    accuracy                           1.00       205
   macro avg       1.00      1.00      1.00       205
weighted avg       1.00      1.00      1.00       205

âœ… Saved tuned model and scaler.
