In [None]:
import pandas as pd
import joblib

from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load data
df = pd.read_csv("cardio_train_cleaned.csv")

X = df.drop("cardio", axis=1)
y = df["cardio"]

# Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Pipeline
pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("logreg", LogisticRegression(max_iter=1000))
])

# Cross Validation
cv_scores = cross_val_score(pipeline, X, y, cv=3, n_jobs=-1)
print("Average K-Fold Accuracy:", cv_scores.mean())

# Hyperparameter tuning
params = {
    "logreg__C": [0.1, 1, 10]
}

grid = GridSearchCV(
    pipeline,
    params,
    cv=3,
    n_jobs=-1,
    verbose=2
)

grid.fit(X_train, y_train)

print("\nBest Parameters:", grid.best_params_)

best_model = grid.best_estimator_

# Evaluation
y_pred = best_model.predict(X_test)

print("\nTest Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Save model
joblib.dump(best_model, "cardio_logistic_model.pkl")

print("\nModel saved successfully.")


Average K-Fold Accuracy: 0.7188763783422072
Fitting 3 folds for each of 3 candidates, totalling 9 fits

Best Parameters: {'logreg__C': 10}

Test Accuracy: 0.7212160884427958

Classification Report:
               precision    recall  f1-score   support

           0       0.70      0.77      0.74      6946
           1       0.74      0.67      0.70      6803

    accuracy                           0.72     13749
   macro avg       0.72      0.72      0.72     13749
weighted avg       0.72      0.72      0.72     13749


Confusion Matrix:
 [[5361 1585]
 [2248 4555]]

Model saved successfully.
