# Dataset Exploratory Data Analysis and Pre-processing Pipeline

In [None]:
# Import needed libraries and modules
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

In [None]:
# Fetch dataset from UCI Repository
!pip install ucimlrepo
from ucimlrepo import fetch_ucirepo
heart_disease = fetch_ucirepo(id=45)



In [None]:
# Dataset overview
df = heart_disease.data.original
df.dropna(inplace=True)
display(df.head())
display(df.info())

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,num
0,63,1,1,145,233,1,2,150,0,2.3,3,0.0,6.0,0
1,67,1,4,160,286,0,2,108,1,1.5,2,3.0,3.0,2
2,67,1,4,120,229,0,2,129,1,2.6,2,2.0,7.0,1
3,37,1,3,130,250,0,0,187,0,3.5,3,0.0,3.0,0
4,41,0,2,130,204,0,2,172,0,1.4,1,0.0,3.0,0


<class 'pandas.core.frame.DataFrame'>
Index: 297 entries, 0 to 301
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       297 non-null    int64  
 1   sex       297 non-null    int64  
 2   cp        297 non-null    int64  
 3   trestbps  297 non-null    int64  
 4   chol      297 non-null    int64  
 5   fbs       297 non-null    int64  
 6   restecg   297 non-null    int64  
 7   thalach   297 non-null    int64  
 8   exang     297 non-null    int64  
 9   oldpeak   297 non-null    float64
 10  slope     297 non-null    int64  
 11  ca        297 non-null    float64
 12  thal      297 non-null    float64
 13  num       297 non-null    int64  
dtypes: float64(3), int64(11)
memory usage: 34.8 KB


None

In [None]:
# Metadata
print(heart_disease.metadata)

# Variables information
variables = heart_disease.variables
display(variables)


{'uci_id': 45, 'name': 'Heart Disease', 'repository_url': 'https://archive.ics.uci.edu/dataset/45/heart+disease', 'data_url': 'https://archive.ics.uci.edu/static/public/45/data.csv', 'abstract': '4 databases: Cleveland, Hungary, Switzerland, and the VA Long Beach', 'area': 'Health and Medicine', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 303, 'num_features': 13, 'feature_types': ['Categorical', 'Integer', 'Real'], 'demographics': ['Age', 'Sex'], 'target_col': ['num'], 'index_col': None, 'has_missing_values': 'yes', 'missing_values_symbol': 'NaN', 'year_of_dataset_creation': 1989, 'last_updated': 'Fri Nov 03 2023', 'dataset_doi': '10.24432/C52P4X', 'creators': ['Andras Janosi', 'William Steinbrunn', 'Matthias Pfisterer', 'Robert Detrano'], 'intro_paper': {'title': 'International application of a new probability algorithm for the diagnosis of coronary artery disease.', 'authors': 'R. Detrano, A. Jánosi, W. Steinbrunn, M. Pfisterer, J. Schmid, S. Sa

Unnamed: 0,name,role,type,demographic,description,units,missing_values
0,age,Feature,Integer,Age,,years,no
1,sex,Feature,Categorical,Sex,,,no
2,cp,Feature,Categorical,,,,no
3,trestbps,Feature,Integer,,resting blood pressure (on admission to the ho...,mm Hg,no
4,chol,Feature,Integer,,serum cholestoral,mg/dl,no
5,fbs,Feature,Categorical,,fasting blood sugar > 120 mg/dl,,no
6,restecg,Feature,Categorical,,,,no
7,thalach,Feature,Integer,,maximum heart rate achieved,,no
8,exang,Feature,Categorical,,exercise induced angina,,no
9,oldpeak,Feature,Integer,,ST depression induced by exercise relative to ...,,no


## Binarizing the target:

In [None]:
different_values = df['num'].unique()
print(different_values)

[0 2 1 3 4]


In [None]:
df["num_binarized"] = df["num"].apply(lambda x: 1 if x != 0 else 0)

print(df)

     age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  \
0     63    1   1       145   233    1        2      150      0      2.3   
1     67    1   4       160   286    0        2      108      1      1.5   
2     67    1   4       120   229    0        2      129      1      2.6   
3     37    1   3       130   250    0        0      187      0      3.5   
4     41    0   2       130   204    0        2      172      0      1.4   
..   ...  ...  ..       ...   ...  ...      ...      ...    ...      ...   
297   57    0   4       140   241    0        0      123      1      0.2   
298   45    1   1       110   264    0        0      132      0      1.2   
299   68    1   4       144   193    1        0      141      0      3.4   
300   57    1   4       130   131    0        0      115      1      1.2   
301   57    0   2       130   236    0        2      174      0      0.0   

     slope   ca  thal  num  num_binarized  
0        3  0.0   6.0    0              0  

### Train-test split:

In [None]:
Test_Size = 0.2
Random_Seed = 82024
dataset_name = "df"

In [None]:
#index = df.index
#train_index, test_index = train_test_split(index, test_size = Test_Size, random_state=Random_Seed)
#train_df = df.loc[train_index]
#test_df = df.loc[test_index]



#X_train = train_df.reindex(features, axis=1).values
#y_train = train_df.reindex(target, axis=1).values
#X_test = test_df.reindex(features, axis=1).values
#y_test = test_df.reindex(target, axis=1).values

#print("Train:")
#print(train_df)
#print()
#print("Test:")
#print(test_df)



# Extraindo os nomes das colunas para X e y
features = df.columns[:-2]  # Todas as colunas, exceto a última (ou ajuste conforme necessário)
target = "num_binarized"    # Nome da coluna target

# Verificando se os nomes das colunas foram extraídos corretamente
print("Features:", features)
print("Target:", target)

# Dividindo o índice do DataFrame para treino e teste
index = df.index
train_index, test_index = train_test_split(index, test_size=Test_Size, random_state=Random_Seed)

# Separando o DataFrame em treino e teste
train_df = df.loc[train_index]
test_df = df.loc[test_index]

# Extraindo as features e o target para treino e teste
X_train = train_df.reindex(columns=features).values
y_train = train_df[target].values
X_test = test_df.reindex(columns=features).values
y_test = test_df[target].values

# Verificando as shapes dos dados
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)





Features: Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal'],
      dtype='object')
Target: num_binarized
X_train shape: (237, 13)
y_train shape: (237,)
X_test shape: (60, 13)
y_test shape: (60,)


### Gaussian Process Model:

In [None]:
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.gaussian_process.kernels import DotProduct
from sklearn.gaussian_process.kernels import Matern
from sklearn.gaussian_process.kernels import RationalQuadratic
from sklearn.gaussian_process.kernels import WhiteKernel

# Cria o modelo
model_gp = GaussianProcessClassifier(random_state=Random_Seed, kernel=RBF(length_scale=1.0))

In [None]:

# train the model
model_gp.fit(X_train, y_train)

# make predictions using the trained model
y_pred = model_gp.predict(X_test)

# calculates the probabilities of the model predictions
y_pred_proba = model_gp.predict_proba(X_test)

# calculates the incertainty like standard deviation of probabilities to each one of the classes
y_pred_std = np.std(y_pred_proba, axis=1)

# calculates the mean incertainty
mean_std = np.mean(y_pred_std)

#calculates the area under the ROC curve(AUC-ROC)
roc_auc_gp = roc_auc_score(y_test, y_pred_proba[:, 1])

#print(f"Prevision of the gaussian process model: {y_pred}")
#print(f"Score of the gaussian process model: {model_gp.score(X_train, y_train)}")
#print(f"Mean incertainty of the gaussian process model: {mean_std}")
print(f"AUC-ROC of the gaussian process model: {roc_auc_gp}")

AUC-ROC of the gaussian process model: 0.510662177328844


In [None]:
#finished with value: 0.909965034965035 and parameters: {'kernel': 'quad', 'n_restarts_optimizer': 8, 'max_iter_predict': 643}.
# Cria o modelo
#model_gp = GaussianProcessClassifier(random_state=Random_Seed, kernel= 1 * RationalQuadratic(),  n_restarts_optimizer= 8, max_iter_predict= 643)

# train the model
#model_gp.fit(X_train, y_train)

# make predictions using the trained model
#y_pred = model_gp.predict(X_test)

# calculates the probabilities of the model predictions
#y_pred_proba = model_gp.predict_proba(X_test)

# calculates the incertainty like standard deviation of probabilities to each one of the classes
#y_pred_std = np.std(y_pred_proba, axis=1)

# calculates the mean incertainty
#mean_std = np.mean(y_pred_std)

#calculates the area under the ROC curve(AUC-ROC)
#roc_auc_gp = roc_auc_score(y_test, y_pred_proba[:, 1])

#print(f"Prevision of the gaussian process model: {y_pred}")
#print(f"Score of the gaussian process model: {model_gp.score(X_train, y_train)}")
#print(f"Mean incertainty of the gaussian process model: {mean_std}")
#print(f"AUC-ROC of the gaussian process model: {roc_auc_gp}")

# Hyperparameter tuning with optuna


In [None]:
#!/usr/bin/env python
# coding: utf-8
!pip install optuna
import optuna

import numpy as np
import pickle
import seaborn as sns
#import pandas as pd

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error
#from sklearn.metrics import roc_auc_score
#from sklearn.gaussian_process import GaussianProcessClassifier

#from sklearn.gaussian_process.kernels import RBF
#from sklearn.gaussian_process.kernels import DotProduct
#from sklearn.gaussian_process.kernels import Matern
#from sklearn.gaussian_process.kernels import RationalQuadratic
#from sklearn.gaussian_process.kernels import WhiteKernel

from optuna import create_study, Trial


TAMANHO_TESTE = Test_Size
SEMENTE_ALEATORIA = Random_Seed



DATASET_NAME = df
FEATURES = features
TARGET = [target]

NUM_TRIALS = 1000

NUM_FOLDS = 10

STUDY_NAME = "gp_vba_class_ha"

SCORE = "roc_auc"
# SCORE = "f1"



# Ref: https://machinelearningmastery.com/gaussian-processes-for-classification-with-python/
KERNELS = {
    "rbf": 1 * RBF(),
    "dot": 1 * DotProduct(),
    "matern": 1 * Matern(),
    "quad": 1 * RationalQuadratic(),
    "white": 1 * WhiteKernel(),
}

###############################################################################
#                             Tratamento de dados                             #
##############################################################################+

#df = pd.read_excel(DATASET_NAME)

#df = df.reindex(FEATURES + TARGET, axis=1)
#df = df.dropna()

indices = df.index
indices_treino, indices_teste = train_test_split(
    indices, test_size=TAMANHO_TESTE, random_state=SEMENTE_ALEATORIA
)

df_treino = df.loc[indices_treino]
df_teste = df.loc[indices_teste]

X_treino = df_treino.reindex(FEATURES, axis=1).values
y_treino = df_treino.reindex(TARGET, axis=1).values.ravel()

X_teste = df_teste.reindex(FEATURES, axis=1).values
y_teste = df_teste.reindex(TARGET, axis=1).values.ravel()

X = df.reindex(FEATURES, axis=1).values
y = df.reindex(TARGET, axis=1).values.ravel()




def cria_instancia_modelo(trial):
    """Cria uma instância do modelo.

    Args:
      trial: objeto tipo Trial do optuna.

    Returns:
      Uma instância do modelo desejado.
    """

    kernel_id = trial.suggest_categorical(
        "kernel", ["rbf", "white", "dot", "matern", "quad"]
    )

    parametros = {
        "kernel": KERNELS[kernel_id],
        "n_restarts_optimizer": trial.suggest_int(
            "n_restarts_optimizer", 0, 10
        ),
        "max_iter_predict": trial.suggest_int(
            "max_iter_predict", 50, 1000, log=True
        ),
        "n_jobs": -1,
        "random_state": SEMENTE_ALEATORIA,
    }

    model = GaussianProcessClassifier(**parametros)

    return model


def funcao_objetivo(
    trial,
    X,
    y,
    num_folds=NUM_FOLDS,
    random_state=SEMENTE_ALEATORIA,
):
    """Função objetivo do optuna

    Referencia:
      https://medium.com/@walter_sperat/ using-optuna-with-sklearn-the-right-way-part-1-6b4ad0ab2451
    """

    modelo = cria_instancia_modelo(trial)

    metricas = cross_val_score(
        modelo,
        X,
        y,
        scoring=SCORE,
        cv=num_folds,
    )

    return metricas.mean()


study = create_study(
    study_name=STUDY_NAME,
    storage=f"sqlite:///{STUDY_NAME}.db",
    direction="maximize",
    load_if_exists=True,
)

study.optimize(
    lambda trial: funcao_objetivo(trial, X_treino, y_treino),
    n_trials=NUM_TRIALS,
)

trialdf = study.trials_dataframe()
trialdf.to_csv("trial_df.csv", index=False)

melhor_trial = study.best_trial
print(melhor_trial)


modelo = cria_instancia_modelo(melhor_trial)
modelo.fit(X_treino, y_treino)

y_verdadeiro = y_teste
y_previsao = modelo.predict(X_teste)

ROC_AUC = roc_auc_score(y_verdadeiro, y_previsao)

print(ROC_AUC)




[I 2024-08-20 18:16:27,680] Using an existing study with name 'gp_vba_class_ha' instead of creating a new one.
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)
ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)
ABNORMAL_TERMINATION_IN_LNSRC

ValueError: 
All the 10 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
2 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 1152, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 741, in fit
    self.base_estimator_.fit(X, y)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 246, in fit
    self._constrained_optimization(obj_func, theta_initial, bounds)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 474, in _constrained_optimization
    opt_res = scipy.optimize.minimize(
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_minimize.py", line 713, in minimize
    res = _minimize_lbfgsb(fun, x0, args, jac, bounds,
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_lbfgsb_py.py", line 347, in _minimize_lbfgsb
    sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps,
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_optimize.py", line 288, in _prepare_scalar_function
    sf = ScalarFunction(fun, x0, args, grad, hess,
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_differentiable_functions.py", line 166, in __init__
    self._update_fun()
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_differentiable_functions.py", line 262, in _update_fun
    self._update_fun_impl()
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_differentiable_functions.py", line 163, in update_fun
    self.f = fun_wrapped(self.x)
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_differentiable_functions.py", line 145, in fun_wrapped
    fx = fun(np.copy(x), *args)
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_optimize.py", line 79, in __call__
    self._compute_if_needed(x, *args)
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_optimize.py", line 73, in _compute_if_needed
    fg = self.fun(x, *args)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 220, in obj_func
    lml, grad = self.log_marginal_likelihood(
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 385, in log_marginal_likelihood
    Z, (pi, W_sr, L, b, a) = self._posterior_mode(K, return_temporaries=True)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 444, in _posterior_mode
    L = cholesky(B, lower=True)
  File "/usr/local/lib/python3.10/dist-packages/scipy/linalg/_decomp_cholesky.py", line 88, in cholesky
    c, lower = _cholesky(a, lower=lower, overwrite_a=overwrite_a, clean=True,
  File "/usr/local/lib/python3.10/dist-packages/scipy/linalg/_decomp_cholesky.py", line 36, in _cholesky
    raise LinAlgError("%d-th leading minor of the array is not positive "
numpy.linalg.LinAlgError: 118-th leading minor of the array is not positive definite

--------------------------------------------------------------------------------
7 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 1152, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 741, in fit
    self.base_estimator_.fit(X, y)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 246, in fit
    self._constrained_optimization(obj_func, theta_initial, bounds)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 474, in _constrained_optimization
    opt_res = scipy.optimize.minimize(
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_minimize.py", line 713, in minimize
    res = _minimize_lbfgsb(fun, x0, args, jac, bounds,
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_lbfgsb_py.py", line 347, in _minimize_lbfgsb
    sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps,
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_optimize.py", line 288, in _prepare_scalar_function
    sf = ScalarFunction(fun, x0, args, grad, hess,
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_differentiable_functions.py", line 166, in __init__
    self._update_fun()
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_differentiable_functions.py", line 262, in _update_fun
    self._update_fun_impl()
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_differentiable_functions.py", line 163, in update_fun
    self.f = fun_wrapped(self.x)
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_differentiable_functions.py", line 145, in fun_wrapped
    fx = fun(np.copy(x), *args)
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_optimize.py", line 79, in __call__
    self._compute_if_needed(x, *args)
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_optimize.py", line 73, in _compute_if_needed
    fg = self.fun(x, *args)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 220, in obj_func
    lml, grad = self.log_marginal_likelihood(
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 385, in log_marginal_likelihood
    Z, (pi, W_sr, L, b, a) = self._posterior_mode(K, return_temporaries=True)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 444, in _posterior_mode
    L = cholesky(B, lower=True)
  File "/usr/local/lib/python3.10/dist-packages/scipy/linalg/_decomp_cholesky.py", line 88, in cholesky
    c, lower = _cholesky(a, lower=lower, overwrite_a=overwrite_a, clean=True,
  File "/usr/local/lib/python3.10/dist-packages/scipy/linalg/_decomp_cholesky.py", line 36, in _cholesky
    raise LinAlgError("%d-th leading minor of the array is not positive "
numpy.linalg.LinAlgError: 115-th leading minor of the array is not positive definite

--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 1152, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 741, in fit
    self.base_estimator_.fit(X, y)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 246, in fit
    self._constrained_optimization(obj_func, theta_initial, bounds)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 474, in _constrained_optimization
    opt_res = scipy.optimize.minimize(
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_minimize.py", line 713, in minimize
    res = _minimize_lbfgsb(fun, x0, args, jac, bounds,
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_lbfgsb_py.py", line 347, in _minimize_lbfgsb
    sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps,
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_optimize.py", line 288, in _prepare_scalar_function
    sf = ScalarFunction(fun, x0, args, grad, hess,
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_differentiable_functions.py", line 166, in __init__
    self._update_fun()
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_differentiable_functions.py", line 262, in _update_fun
    self._update_fun_impl()
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_differentiable_functions.py", line 163, in update_fun
    self.f = fun_wrapped(self.x)
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_differentiable_functions.py", line 145, in fun_wrapped
    fx = fun(np.copy(x), *args)
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_optimize.py", line 79, in __call__
    self._compute_if_needed(x, *args)
  File "/usr/local/lib/python3.10/dist-packages/scipy/optimize/_optimize.py", line 73, in _compute_if_needed
    fg = self.fun(x, *args)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 220, in obj_func
    lml, grad = self.log_marginal_likelihood(
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 385, in log_marginal_likelihood
    Z, (pi, W_sr, L, b, a) = self._posterior_mode(K, return_temporaries=True)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py", line 444, in _posterior_mode
    L = cholesky(B, lower=True)
  File "/usr/local/lib/python3.10/dist-packages/scipy/linalg/_decomp_cholesky.py", line 88, in cholesky
    c, lower = _cholesky(a, lower=lower, overwrite_a=overwrite_a, clean=True,
  File "/usr/local/lib/python3.10/dist-packages/scipy/linalg/_decomp_cholesky.py", line 36, in _cholesky
    raise LinAlgError("%d-th leading minor of the array is not positive "
numpy.linalg.LinAlgError: 117-th leading minor of the array is not positive definite
