In [46]:
import numpy as np
import pandas as pd

# Read datasets
heartDF = pd.read_csv("./data/heart_statlog_cleveland_hungary_final.csv")
wineDF = pd.read_csv("./data/winequality-red.csv")

# Preprocess datasets
wineDF['label'] = wineDF['quality'].apply(lambda x: 'good' if x > 6 else 'bad')
wineDF.drop('quality', axis=1)

wine_Y = wineDF['label'].values
wine_X = wineDF.drop('label', axis=1).values

heart_Y = heartDF['target'].values
heart_X = heartDF.drop('target', axis=1).values

In [47]:
# Standardize datasets
# wine_X = (wine_X - np.mean(wine_X)) / np.std(wine_X)
# heart_X = (heart_X - np.mean(heart_X)) / np.std(heart_X)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
wine_X = scaler.fit_transform(wine_X)

In [58]:
# Get the dataset split for training the models
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(wine_X, wine_Y, test_size=0.3, stratify=wine_Y)
X_train_heart, X_test_heart, y_train_heart, y_test_heart = train_test_split(heart_X, heart_Y, test_size=0.3, stratify=heart_Y)

In [49]:
# Get the best hyperparameters
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

from sklearn.exceptions import ConvergenceWarning
from sklearn.exceptions import FitFailedWarning
import warnings

# Define parameter grid
param_grid = {
    'C': np.logspace(-3,3, 7),  # Regularization parameter
    'penalty': ['l1', 'l2', 'elasticnet', None],  # Penalty options
    'solver': ['liblinear', 'newton-cg', 'lbfgs', 'sag', 'saga', 'newton-cholesky'],  # Algorithm to use in the optimization problem
    'max_iter': [100, 500, 1000]  # Maximum number of iterations
}

# Create a Logistic Regression model
logreg = LogisticRegression()

for i in range(0, 5):
    # Create GridSearchCV
    grid_search = GridSearchCV(logreg, param_grid, cv=5, n_jobs=4)
    # Suppress warnings
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=ConvergenceWarning)
        warnings.filterwarnings("ignore", category=FitFailedWarning)
        warnings.filterwarnings("ignore", category=UserWarning)
        # Fit the grid search to the data
        grid_search.fit(X_train, y_train)

    # Print the best parameters found
    print(f"[{i}] Best Parameters:", grid_search.best_params_)

    # Get the best model
    best_model = grid_search.best_estimator_

    # Evaluate the best model on the test set
    y_pred = best_model.predict(X_test)
    print(classification_report(y_test, y_pred))


[0] Best Parameters: {'C': 0.001, 'max_iter': 100, 'penalty': None, 'solver': 'newton-cg'}
              precision    recall  f1-score   support

         bad       1.00      1.00      1.00       415
        good       1.00      1.00      1.00        65

    accuracy                           1.00       480
   macro avg       1.00      1.00      1.00       480
weighted avg       1.00      1.00      1.00       480

[1] Best Parameters: {'C': 0.001, 'max_iter': 100, 'penalty': None, 'solver': 'newton-cg'}
              precision    recall  f1-score   support

         bad       1.00      1.00      1.00       415
        good       1.00      1.00      1.00        65

    accuracy                           1.00       480
   macro avg       1.00      1.00      1.00       480
weighted avg       1.00      1.00      1.00       480

[2] Best Parameters: {'C': 0.001, 'max_iter': 100, 'penalty': None, 'solver': 'newton-cg'}
              precision    recall  f1-score   support

         bad      

In [60]:
# Get best hyper-parameters for heart dataset
# Define parameter grid
param_grid = {
    'C': np.logspace(-3,3, 7),  # Regularization parameter
    'penalty': ['l1', 'l2', 'elasticnet', None],  # Penalty options
    'solver': ['liblinear', 'newton-cg', 'lbfgs', 'sag', 'saga', 'newton-cholesky'],  # Algorithm to use in the optimization problem
    'max_iter': [100, 500, 1000]  # Maximum number of iterations
}

# Create a Logistic Regression model
logreg = LogisticRegression()

for i in range(0, 5):
    # Create GridSearchCV
    grid_search = GridSearchCV(logreg, param_grid, cv=5, n_jobs=4)
    # Suppress warnings
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=ConvergenceWarning)
        warnings.filterwarnings("ignore", category=FitFailedWarning)
        warnings.filterwarnings("ignore", category=UserWarning)
        # Fit the grid search to the data
        grid_search.fit(X_train_heart, y_train_heart)

    # Print the best parameters found
    print(f"[{i}] Best Parameters:", grid_search.best_params_)

    # Get the best model
    best_model = grid_search.best_estimator_

    # Evaluate the best model on the test set
    y_pred_heart = best_model.predict(X_test_heart)
    print(classification_report(y_test_heart, y_pred_heart))


[0] Best Parameters: {'C': 1000.0, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
              precision    recall  f1-score   support

           0       0.86      0.79      0.82       168
           1       0.82      0.88      0.85       189

    accuracy                           0.84       357
   macro avg       0.84      0.83      0.84       357
weighted avg       0.84      0.84      0.84       357

[1] Best Parameters: {'C': 1000.0, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
              precision    recall  f1-score   support

           0       0.86      0.79      0.82       168
           1       0.82      0.88      0.85       189

    accuracy                           0.84       357
   macro avg       0.84      0.83      0.84       357
weighted avg       0.84      0.84      0.84       357

[2] Best Parameters: {'C': 1000.0, 'max_iter': 500, 'penalty': 'l2', 'solver': 'lbfgs'}
              precision    recall  f1-score   support

           0       0.86    

In [57]:
logreg = LogisticRegression(max_iter=100, penalty=None, solver='newton-cg')
logreg.fit(X_train, y_train)


y_pred = logreg.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

         bad       1.00      1.00      1.00       415
        good       1.00      1.00      1.00        65

    accuracy                           1.00       480
   macro avg       1.00      1.00      1.00       480
weighted avg       1.00      1.00      1.00       480



In [73]:
logreg = LogisticRegression(C=1000, max_iter=500, penalty='l2', solver='lbfgs')
logreg.fit(X_train_heart, y_train_heart)


y_pred_heart = logreg.predict(X_test_heart)
print(classification_report(y_test_heart, y_pred_heart))

              precision    recall  f1-score   support

           0       0.86      0.79      0.82       168
           1       0.82      0.88      0.85       189

    accuracy                           0.84       357
   macro avg       0.84      0.83      0.84       357
weighted avg       0.84      0.84      0.84       357

