In [2]:
import optuna
import numpy as np
import pandas as pd
import torch

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import roc_auc_score, accuracy_score
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF, DotProduct, Matern, RationalQuadratic, WhiteKernel

from optuna import create_study, Trial

# General parameters
Test_Size = 0.2
Random_Seed = 82024
Num_trials = 1000
Num_folds = 10
Study_name = "gp_scikit_heart_1"
Score = "roc_auc"  # Or "f1"

# Kernel setup
Kernels = {
    "rbf": 1 * RBF(),
    "dot": 1 * DotProduct(),
    "matern": 1 * Matern(),
    "quad": 1 * RationalQuadratic(),
    "white": 1 * WhiteKernel(),
}

# Data handling
# Fetch dataset from UCI Repository
from ucimlrepo import fetch_ucirepo
heart_disease = fetch_ucirepo(id=45)
df = heart_disease.data.original

# Drop missing values
df = df.dropna()
df = df.reset_index(drop=True)

# Binarize data
df.loc[df["num"] != 0, "num"] = 1

# Define features and target
X = df.iloc[:, :-1].values
y = df['num'].values

# Split the data into train and test sets
train_index, test_index = train_test_split(list(range(y.size)), test_size=Test_Size, random_state=Random_Seed)

train_df = df.loc[train_index]
test_df = df.loc[test_index]

# Convert to PyTorch tensors (double precision for compatibility with sklearn)
train_X = torch.tensor(train_df.iloc[:, :-1].values).double()
train_y = torch.tensor(train_df['num'].values).double()

test_X = torch.tensor(test_df.iloc[:, :-1].values).double()
test_y = torch.tensor(test_df['num'].values).double()

# Convert back to NumPy arrays for scikit-learn
#train_X_np = train_X.numpy()
#train_y_np = train_y.numpy()

#test_X_np = test_X.numpy()
#test_y_np = test_y.numpy()

# Function to create model instances
def create_instance_model(trial):
    """Create an instance of the model."""
    kernel_id = trial.suggest_categorical("kernel", ["rbf", "white", "dot", "matern", "quad"])

    parameters = {
        "kernel": Kernels[kernel_id],
        "n_restarts_optimizer": trial.suggest_int("n_restarts_optimizer", 0, 10),
        "max_iter_predict": trial.suggest_int("max_iter_predict", 50, 1000, log=True),
        "random_state": Random_Seed,
    }

    model = GaussianProcessClassifier(**parameters)
    return model

# Objective function for Optuna
def objective_function(trial, X, y, Num_folds=Num_folds, random_state=Random_Seed):
    """Optuna's objective function"""
    model = create_instance_model(trial)

    metrics = cross_val_score(model, X, y, scoring=Score, cv=Num_folds)
    return metrics.mean()

# Create the study with Optuna
study = create_study(
    study_name=Study_name,
    storage=f"sqlite:///{Study_name}.db",
    direction="maximize",
    load_if_exists=True,
)

#study.optimize(lambda trial: objective_function(trial, train_X_np, train_y_np), n_trials=Num_trials)
study.optimize(lambda trial: objective_function(trial, train_X, train_y), n_trials=Num_trials)
# Save and display the best results
trialdf = study.trials_dataframe()
trialdf.to_csv("trial_df.csv", index=False)

best_trial = study.best_trial
print(best_trial)

# Train and evaluate the final model
model = create_instance_model(best_trial)
#model.fit(train_X_np, train_y_np)
model.fit(train_X, train_y)

# Test the model
#y_pred = model.predict(test_X_np)
#pred_probs = model.predict_proba(test_X_np)
y_pred = model.predict(test_X)
pred_probs = model.predict_proba(test_X)

# Model evaluation
#acc = accuracy_score(test_y_np, y_pred)
#roc_auc = roc_auc_score(test_y_np, pred_probs[:, 1])
acc = accuracy_score(test_y, y_pred)
roc_auc = roc_auc_score(test_y, pred_probs[:, 1])

print(f"Accuracy: {acc:.2f}")
print(f"AUC-ROC: {roc_auc:.2f}")


[I 2024-09-04 12:28:41,225] A new study created in RDB with name: gp_scikit_heart_1
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)
[I 2024-09-04 12:31:27,500] Trial 0 finished with value: 0.889918414918415 and parameters: {'kernel': 'matern', 'n_restarts_optimizer': 8, 'max_iter_predict': 531}. Best is trial 0 with value: 0.889918414918415.
[I 2024-09-04 12:31:53,989] Trial 1 finished with value: 0.

ValueError: 
All the 10 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
2 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 1152, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 741, in fit
    self.base_estimator_.fit(X, y)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 246, in fit
    self._constrained_optimization(obj_func, theta_initial, bounds)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 474, in _constrained_optimization
    opt_res = scipy.optimize.minimize(
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_minimize.py", line 713, in minimize
    res = _minimize_lbfgsb(fun, x0, args, jac, bounds,
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_lbfgsb_py.py", line 347, in _minimize_lbfgsb
    sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps,
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_optimize.py", line 288, in _prepare_scalar_function
    sf = ScalarFunction(fun, x0, args, grad, hess,
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_differentiable_functions.py", line 166, in __init__
    self._update_fun()
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_differentiable_functions.py", line 262, in _update_fun
    self._update_fun_impl()
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_differentiable_functions.py", line 163, in update_fun
    self.f = fun_wrapped(self.x)
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_differentiable_functions.py", line 145, in fun_wrapped
    fx = fun(np.copy(x), *args)
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_optimize.py", line 79, in __call__
    self._compute_if_needed(x, *args)
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_optimize.py", line 73, in _compute_if_needed
    fg = self.fun(x, *args)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 220, in obj_func
    lml, grad = self.log_marginal_likelihood(
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 385, in log_marginal_likelihood
    Z, (pi, W_sr, L, b, a) = self._posterior_mode(K, return_temporaries=True)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 444, in _posterior_mode
    L = cholesky(B, lower=True)
  File "/usr/local/lib/python3.10/dist-packages/scipy/linalg/_decomp_cholesky.py", line 88, in cholesky
    c, lower = _cholesky(a, lower=lower, overwrite_a=overwrite_a, clean=True,
  File "/usr/local/lib/python3.10/dist-packages/scipy/linalg/_decomp_cholesky.py", line 36, in _cholesky
    raise LinAlgError("%d-th leading minor of the array is not positive "
numpy.linalg.LinAlgError: 118-th leading minor of the array is not positive definite

--------------------------------------------------------------------------------
7 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 1152, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 741, in fit
    self.base_estimator_.fit(X, y)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 246, in fit
    self._constrained_optimization(obj_func, theta_initial, bounds)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 474, in _constrained_optimization
    opt_res = scipy.optimize.minimize(
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_minimize.py", line 713, in minimize
    res = _minimize_lbfgsb(fun, x0, args, jac, bounds,
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_lbfgsb_py.py", line 347, in _minimize_lbfgsb
    sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps,
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_optimize.py", line 288, in _prepare_scalar_function
    sf = ScalarFunction(fun, x0, args, grad, hess,
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_differentiable_functions.py", line 166, in __init__
    self._update_fun()
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_differentiable_functions.py", line 262, in _update_fun
    self._update_fun_impl()
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_differentiable_functions.py", line 163, in update_fun
    self.f = fun_wrapped(self.x)
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_differentiable_functions.py", line 145, in fun_wrapped
    fx = fun(np.copy(x), *args)
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_optimize.py", line 79, in __call__
    self._compute_if_needed(x, *args)
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_optimize.py", line 73, in _compute_if_needed
    fg = self.fun(x, *args)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 220, in obj_func
    lml, grad = self.log_marginal_likelihood(
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 385, in log_marginal_likelihood
    Z, (pi, W_sr, L, b, a) = self._posterior_mode(K, return_temporaries=True)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 444, in _posterior_mode
    L = cholesky(B, lower=True)
  File "/usr/local/lib/python3.10/dist-packages/scipy/linalg/_decomp_cholesky.py", line 88, in cholesky
    c, lower = _cholesky(a, lower=lower, overwrite_a=overwrite_a, clean=True,
  File "/usr/local/lib/python3.10/dist-packages/scipy/linalg/_decomp_cholesky.py", line 36, in _cholesky
    raise LinAlgError("%d-th leading minor of the array is not positive "
numpy.linalg.LinAlgError: 115-th leading minor of the array is not positive definite

--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 1152, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 741, in fit
    self.base_estimator_.fit(X, y)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 246, in fit
    self._constrained_optimization(obj_func, theta_initial, bounds)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 474, in _constrained_optimization
    opt_res = scipy.optimize.minimize(
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_minimize.py", line 713, in minimize
    res = _minimize_lbfgsb(fun, x0, args, jac, bounds,
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_lbfgsb_py.py", line 347, in _minimize_lbfgsb
    sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps,
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_optimize.py", line 288, in _prepare_scalar_function
    sf = ScalarFunction(fun, x0, args, grad, hess,
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_differentiable_functions.py", line 166, in __init__
    self._update_fun()
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_differentiable_functions.py", line 262, in _update_fun
    self._update_fun_impl()
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_differentiable_functions.py", line 163, in update_fun
    self.f = fun_wrapped(self.x)
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_differentiable_functions.py", line 145, in fun_wrapped
    fx = fun(np.copy(x), *args)
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_optimize.py", line 79, in __call__
    self._compute_if_needed(x, *args)
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_optimize.py", line 73, in _compute_if_needed
    fg = self.fun(x, *args)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 220, in obj_func
    lml, grad = self.log_marginal_likelihood(
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 385, in log_marginal_likelihood
    Z, (pi, W_sr, L, b, a) = self._posterior_mode(K, return_temporaries=True)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 444, in _posterior_mode
    L = cholesky(B, lower=True)
  File "/usr/local/lib/python3.10/dist-packages/scipy/linalg/_decomp_cholesky.py", line 88, in cholesky
    c, lower = _cholesky(a, lower=lower, overwrite_a=overwrite_a, clean=True,
  File "/usr/local/lib/python3.10/dist-packages/scipy/linalg/_decomp_cholesky.py", line 36, in _cholesky
    raise LinAlgError("%d-th leading minor of the array is not positive "
numpy.linalg.LinAlgError: 117-th leading minor of the array is not positive definite
