In [1]:
import optuna
import joblib
import numpy as np
import pandas as pd
import sys
from project_resources.import_utils import NotebookFinder
sys.meta_path.append(NotebookFinder())
from project_resources.cytochrome_P450 import fp_from_smiles, HyperparamTuner

importing Jupyter notebook from C:\Users\Lukas\Documents\datacytochromy\project_resources\cytochrome_P450.ipynb


In [2]:
model_identifiers = ["linear", "KRR", "GB", "RF", "ANN"]
isozymes = ["3A4", "RLM", "HLC"]
data_splits = ["train", "test"]
splitters = ["rand", "scaff"]
rel_paths = {
    "3A4_source": r"project_resources/ChEMBL_3A4.csv",
    "3A4_sep": ";",
    "3A4": r"project_resources/3A4.csv",
    "3A4_train_scaff": r"project_resources/base_splits/scaffold_splitter/3A4_train.csv",
    "3A4_test_scaff": r"project_resources/base_splits/scaffold_splitter/3A4_test.csv",
    "3A4_train_rand": r"project_resources/base_splits/random/3A4_train.csv",
    "3A4_test_rand": r"project_resources/base_splits/random/3A4_test.csv",

    "RLM_source": r"project_resources/AID_1508591_datatable_all.csv",
    "RLM_sep": ",",
    "RLM": r"project_resources/RLM.csv",
    "RLM_train_scaff": r"project_resources/base_splits/scaffold_splitter/RLM_train.csv",
    "RLM_test_scaff": r"project_resources/base_splits/scaffold_splitter/RLM_test.csv",
    "RLM_train_rand": r"project_resources/base_splits/random/RLM_train.csv",
    "RLM_test_rand": r"project_resources/base_splits/random/RLM_test.csv",

    "HLC_source": r"project_resources/AID_1508603_datatable_all.csv",
    "HLC_sep": ",",
    "HLC": r"project_resources/HLC.csv",
    "HLC_train_scaff": r"project_resources/base_splits/scaffold_splitter/HLC_train.csv",
    "HLC_test_scaff": r"project_resources/base_splits/scaffold_splitter/HLC_test.csv",
    "HLC_train_rand": r"project_resources/base_splits/random/HLC_train.csv",
    "HLC_test_rand": r"project_resources/base_splits/random/HLC_test.csv"
}
# sampler - a method used to generate new sets of hyperparameters in each iteration of the optimization process
samplers = {
    'RandomSampler': optuna.samplers.RandomSampler,          # Sampler that selects hyperparameters randomly from the search space.
    'GridSampler': optuna.samplers.GridSampler,              # Sampler that performs a grid search over the hyperparameter space.
    'TPESampler': optuna.samplers.TPESampler,                # Sampler that uses a tree-structured Parzen estimator to model the objective function and sample new points from the search space.
    'CmaEsSampler': optuna.samplers.CmaEsSampler,            # Sampler that uses the Covariance Matrix Adaptation Evolution Strategy algorithm to efficiently search the hyperparameter space.
    'NSGAIISampler': optuna.samplers.NSGAIISampler,          # Multi-objective evolutionary algorithm that generates new samples using non-dominated sorting and crowding distance selection.
    'QMCSampler': optuna.samplers.QMCSampler,                # Quasi-Monte Carlo sampler that uses low-discrepancy sequences to sample the search space in a more efficient and evenly distributed way than random sampling.
    'BoTorchSampler': optuna.integration.BoTorchSampler,     # Sampler that leverages the BoTorch library for Bayesian optimization and can handle both continuous and categorical hyperparameters.
    'BruteForceSampler': optuna.samplers.BruteForceSampler,  # Sampler that exhaustively evaluates all possible combinations of hyperparameters in the search space.
}
# pruner - a technique used to eliminate unpromising trials during the course of hyperparameter optimization.
pruners = {
    'BasePruner': optuna.pruners.BasePruner,                            # This is the base class for all pruning strategies in Optuna. It provides a skeleton for implementing custom pruning strategies.
    'MedianPruner': optuna.pruners.MedianPruner,                        # A pruner that prunes unpromising trials that have median objective values, as determined in previous steps.
    'SuccessiveHalvingPruner': optuna.pruners.SuccessiveHalvingPruner,  # This pruner repeatedly splits trials into halves, discarding the lower performing half at each iteration.
    'HyperbandPruner': optuna.pruners.HyperbandPruner,                  # This pruner implements the Hyperband algorithm, which selects promising trials and runs them with different resource allocation schemes to determine the best one.
    'PercentilePruner': optuna.pruners.PercentilePruner,                # A pruner that prunes unpromising trials based on their percentile rank relative to all completed trials.
    'NopPruner': optuna.pruners.NopPruner,                              # A pruner that does nothing and does not prune any trials.
    'ThresholdPruner': optuna.pruners.ThresholdPruner,                  # This pruner prunes trials that have not reached a certain level of performance (i.e., objective value).
    'PatientPruner': optuna.pruners.PatientPruner,                      # This pruner prunes trials that do not show improvement over a certain number of steps (or epochs).
}
smiles = {}
halflives = {}
fingerprints = {}

In [3]:
# load train-test
for splitter in splitters:
    print(splitter)
    smiles[splitter] = {}
    halflives[splitter] = {}
    for isozyme in isozymes:
        smiles[splitter][isozyme] = {}
        halflives[splitter][isozyme] = {}

        for data_split in data_splits:
            # load smiles
            split_df = pd.read_csv(rel_paths[f"{isozyme}_{data_split}_{splitter}"])
            split_smi = np.array(split_df["smiles"])
            smiles[splitter][isozyme][data_split] = split_smi

            # load half-life
            split_df = pd.read_csv(rel_paths[f"{isozyme}_{data_split}_{splitter}"])
            split_halflife = np.array(split_df["half-life"])
            halflives[splitter][isozyme][data_split] = split_halflife

        print(f"""{isozyme}
    x_train: {smiles[splitter][isozyme]["train"][0]}
    x_test: {smiles[splitter][isozyme]["test"][0]}
    y_train: {halflives[splitter][isozyme]["train"][:3]}
    y_test: {halflives[splitter][isozyme]["test"][:3]}
    """)

rand
3A4
    x_train: CC(C)(O)c1cc(F)c2c(c1)C(=O)N(Cc1ccc(Cl)cn1)[C@@]2(OCC1(O)CC1)c1ccc(Cl)cc1
    x_test: Cc1ncsc1-c1ccc([C@H](CC(=O)NCCCCCCNC(=O)COc2c(-c3csc(N4CCOCC4)n3)ccc(F)c2F)NC(=O)[C@@H]2C[C@@H](O)CN2C(=O)[C@@H](NC(=O)C2(F)CC2)C(C)(C)C)cc1
    y_train: [6.   0.02 0.5 ]
    y_test: [0.3767  0.3333  0.01433]
    
RLM
    x_train: O=c1cc(N2CCOCC2)oc2c1ccc1ccccc12
    x_test: Cc1ccc(OCCn2c(CCNC(=O)N3CCCCC3)nc3ccccc32)cc1
    y_train: [30.    4.4  26.58]
    y_test: [ 1.7  1.7 30. ]
    
HLC
    x_train: N#Cc1ccc(CN2CCC(N3CCNC3=O)CC2)cc1
    x_test: c1ccc(Nc2ncc(-c3cncnc3)c3c2OCC3)cc1
    y_train: [93.2 21.  50.8]
    y_test: [120.  111.9  57.4]
    
scaff
3A4
    x_train: COc1cccc([C@@H](CO)NC(=O)[C@@H](C)N2Cc3ccc(-c4nc(NC5CCOCC5)ncc4Cl)cc3C2=O)c1
    x_test: O=C1CCC(N2C(=O)c3cccc(NCCOCCOCCNC(=O)c4ccc5c(c4)nc(Nc4cccc(Cl)c4)c4ccncc45)c3C2=O)C(=O)N1
    y_train: [0.09167 0.08333 0.8167 ]
    y_test: [0.2433 0.055  0.2667]
    
RLM
    x_train: CS(=O)(=O)c1ccccc1-c1csc(N2CCC(C(N)=O)C

In [4]:
# convert smiles from every isozyme and split to Morgan fingerprint as numpy array
for splitter in splitters:
    fingerprints[splitter] = {}
    for isozyme in isozymes:
        fingerprints[splitter][isozyme] = {}
        for data_split in data_splits:
            fps = fp_from_smiles(smiles[splitter][isozyme][data_split])
            fingerprints[splitter][isozyme][data_split] = np.array(fps)
            print(splitter, isozyme, data_split)
            print(fps[0], len(fps))
        print("\n")

rand 3A4 train
[1 1 1 0 1 0 1 1 1 1 1 0 1 1 1 1 0 1 0 1 0 0 0 1 1 0 1 0 1 0 1 0 1 0 1 0 1
 0 0 0 0 0 1 1 0 0 1 0 0 0 1 0 0 1 0 1 1 1 0 1 1 0 0 0 0 0 0 0 1 0 0 0 1 0
 1 0 1 0 1 0 0 0 0 0 1 0 0 1 1 1 0 0 0 1 0 0 0 0 0 0 1 1 1 0 1 0 0 0 1 0 0
 1 0 1 0 0 1 0 0 0 1 1 1 0] 56
rand 3A4 test
[1 1 1 1 1 1 0 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0 0 0 0 1 1 1 0 0 1 1
 1 0 0 0 1 1 1 1 1 1 0 1 0 0 1 1 1 1 1 0 1 1 1 1 0 0 0 1 1 0 1 1 0 0 0 0 0
 1 1 1 1 0 1 1 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 1 1 0 0 1 1 1 0 0 0 0 0 1 0 1
 1 1 0 0 0 1 0 1 1 1 1 1 1] 14


rand RLM train
[0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0
 1 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 1 1 1 0 0 1 0 0 0 0 0 0 0 1 0 1 0 0 0 0
 1 0 1 0 0 0 1 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 1
 1 0 0 1 0 0 0 0 0 1 0 1 0] 2024
rand RLM test
[1 0 0 1 0 1 0 1 0 0 1 0 1 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0
 1 1 1 0 1 1 1 1 1 1 0 0 1 0 1 0 1 0 1 0 0 1 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0
 1 0 1 0 0 0 1 0 0 0

In [6]:
sampler = samplers['TPESampler']
pruner = pruners["BasePruner"]
n_trials = 200
for splitter in splitters:
    if splitter == "rand":
        splitter_name = "random"
    else:
        splitter_name = "scaffold_splitter"

    for isozyme in isozymes:
        X_train = fingerprints[splitter][isozyme]["train"]
        y_train = np.log(halflives[splitter][isozyme]["train"])  # natural log half-lives -> values are less spread out
        X_test = fingerprints[splitter][isozyme]["test"]
        y_test = np.log(halflives[splitter][isozyme]["test"])
        for model_identifier in model_identifiers:
            print(splitter_name, isozyme, model_identifier)
            study = optuna.create_study(study_name=model_identifier, directions=['minimize'], pruner=pruner,
                                        storage=f"sqlite:///project_resources/optuna/morgan/{splitter_name}/{isozyme}/db.{model_identifier}", load_if_exists=True)
            test = HyperparamTuner(model_identifier, X_train, y_train, X_test, y_test)
            study.optimize(test.objective, n_trials=n_trials, n_jobs=-1)  # catch=(ValueError,)
            joblib.dump(study, f"./project_resources/optuna/morgan/{splitter_name}/{isozyme}/{model_identifier}.pkl")

[I 2023-10-11 06:59:03,513] Using an existing study with name 'linear' instead of creating a new one.


random 3A4 linear


[I 2023-10-11 06:59:04,238] Trial 5 finished with value: 1.421083665447745 and parameters: {'alpha': 0.0017206090153465438, 'l1_ratio': 0.3607761285976985}. Best is trial 1 with value: 1.41339201903038.
[I 2023-10-11 06:59:04,506] Trial 7 finished with value: 1.4005567409594584 and parameters: {'alpha': 0.034182316569594876, 'l1_ratio': 0.040218015766864235}. Best is trial 12 with value: 1.397574603354423.
[I 2023-10-11 06:59:04,879] Trial 12 finished with value: 1.397574603354423 and parameters: {'alpha': 0.09634697330993412, 'l1_ratio': 0.8290080184890423}. Best is trial 9 with value: 1.364301123873657.
[I 2023-10-11 06:59:04,883] Trial 11 finished with value: 1.4098424701088776 and parameters: {'alpha': 0.08352605159468512, 'l1_ratio': 0.5331546134265865}. Best is trial 9 with value: 1.364301123873657.
[I 2023-10-11 06:59:05,020] Trial 2 finished with value: 1.4172549203443723 and parameters: {'alpha': 0.009257384420163262, 'l1_ratio': 0.6749086570598711}. Best is trial 9 with value

random 3A4 KRR


[I 2023-10-11 06:59:29,385] Trial 4 finished with value: 1.316615300283318 and parameters: {'alpha': 0.1404454424455278, 'gamma': 8.110123168191861e-15, 'kernel': 'laplacian'}. Best is trial 1 with value: 1.2661800986447964.
[I 2023-10-11 06:59:29,606] Trial 8 finished with value: 1.3013046538052324 and parameters: {'alpha': 0.8539772337915429, 'gamma': 1.1577539461944065e-16, 'kernel': 'linear'}. Best is trial 1 with value: 1.2661800986447964.
[I 2023-10-11 06:59:29,628] Trial 5 finished with value: 1.3048828888122321 and parameters: {'alpha': 0.5843071052819766, 'gamma': 8.811461294954098e-15, 'kernel': 'linear'}. Best is trial 1 with value: 1.2661800986447964.
[I 2023-10-11 06:59:29,838] Trial 9 finished with value: 1.3369142241412042 and parameters: {'alpha': 0.6606570552264435, 'gamma': 3.0699530023231835e-15, 'kernel': 'laplacian'}. Best is trial 1 with value: 1.2661800986447964.
[I 2023-10-11 06:59:29,857] Trial 3 finished with value: 1.3061829769240296 and parameters: {'alpha':

random 3A4 GB


[I 2023-10-11 06:59:59,326] Trial 2 finished with value: 1.4993153917423194 and parameters: {'n_estimators': 50, 'learning_rate': 0.27352058062644286, 'max_depth': 5}. Best is trial 0 with value: 1.2807484970065994.
[I 2023-10-11 07:00:00,436] Trial 5 finished with value: 1.3303992843060943 and parameters: {'n_estimators': 200, 'learning_rate': 0.33641417690272946, 'max_depth': 3}. Best is trial 0 with value: 1.2807484970065994.
[I 2023-10-11 07:00:00,644] Trial 6 finished with value: 1.2744968883687173 and parameters: {'n_estimators': 50, 'learning_rate': 0.6854249289902893, 'max_depth': 4}. Best is trial 6 with value: 1.2744968883687173.
[I 2023-10-11 07:00:00,886] Trial 3 finished with value: 1.2790190352453112 and parameters: {'n_estimators': 50, 'learning_rate': 0.18428421122832164, 'max_depth': 3}. Best is trial 6 with value: 1.2744968883687173.
[I 2023-10-11 07:00:01,072] Trial 7 finished with value: 1.2857239040954518 and parameters: {'n_estimators': 500, 'learning_rate': 0.880

random 3A4 RF


[I 2023-10-11 07:01:29,115] Trial 13 finished with value: 1.1857603294290437 and parameters: {'n_estimators': 200, 'max_features': 'log2', 'max_depth': 2}. Best is trial 1 with value: 1.1498073756776013.
[I 2023-10-11 07:01:29,628] Trial 7 finished with value: 1.2388940928483438 and parameters: {'n_estimators': 50, 'max_features': 'sqrt', 'max_depth': 10}. Best is trial 1 with value: 1.1498073756776013.
[I 2023-10-11 07:01:29,991] Trial 2 finished with value: 1.224281088888996 and parameters: {'n_estimators': 500, 'max_features': 'sqrt', 'max_depth': None}. Best is trial 1 with value: 1.1498073756776013.
[I 2023-10-11 07:01:30,259] Trial 3 finished with value: 1.311034516192712 and parameters: {'n_estimators': 500, 'max_features': 'auto', 'max_depth': 10}. Best is trial 1 with value: 1.1498073756776013.
[I 2023-10-11 07:01:30,270] Trial 6 finished with value: 1.2149335455293393 and parameters: {'n_estimators': 500, 'max_features': 'log2', 'max_depth': 4}. Best is trial 1 with value: 1.

random 3A4 ANN


[I 2023-10-11 07:06:00,432] Trial 3 finished with value: 1.1894837518020298 and parameters: {'learning_rate_init': 0.07561700195430833, 'hidden_layer_sizes': [10, 10]}. Best is trial 0 with value: 1.1678425906626275.
[I 2023-10-11 07:06:00,440] Trial 8 finished with value: 1.2738111678710118 and parameters: {'learning_rate_init': 0.04712655439634574, 'hidden_layer_sizes': [10, 10, 10]}. Best is trial 0 with value: 1.1678425906626275.
[I 2023-10-11 07:06:00,711] Trial 5 finished with value: 1.2917450033140963 and parameters: {'learning_rate_init': 0.029875523118280168, 'hidden_layer_sizes': [10, 10, 10]}. Best is trial 0 with value: 1.1678425906626275.
[I 2023-10-11 07:06:00,888] Trial 7 finished with value: 1.289624832770856 and parameters: {'learning_rate_init': 0.07220667620883432, 'hidden_layer_sizes': [10]}. Best is trial 0 with value: 1.1678425906626275.
[I 2023-10-11 07:06:01,738] Trial 6 finished with value: 1.2669781159462918 and parameters: {'learning_rate_init': 0.06385390981

random RLM linear


[I 2023-10-11 07:14:42,010] Trial 4 finished with value: 1.0978412796094756 and parameters: {'alpha': 0.06423591814606484, 'l1_ratio': 0.4068619292268528}. Best is trial 1 with value: 1.0976486745063534.
[I 2023-10-11 07:14:42,228] Trial 8 finished with value: 1.0980491627831463 and parameters: {'alpha': 0.0574654640197821, 'l1_ratio': 0.8528063816554905}. Best is trial 1 with value: 1.0976486745063534.
[I 2023-10-11 07:14:42,300] Trial 6 finished with value: 1.0977514852557242 and parameters: {'alpha': 0.019604141257395135, 'l1_ratio': 0.42619808492311884}. Best is trial 1 with value: 1.0976486745063534.
[I 2023-10-11 07:14:42,374] Trial 11 finished with value: 1.0977683905768025 and parameters: {'alpha': 0.06305803538194313, 'l1_ratio': 0.7559909252676771}. Best is trial 1 with value: 1.0976486745063534.
[I 2023-10-11 07:14:42,378] Trial 10 finished with value: 1.097784226245443 and parameters: {'alpha': 0.05920979141928423, 'l1_ratio': 0.4214712759271243}. Best is trial 1 with value

random RLM KRR


[I 2023-10-11 07:15:22,344] Trial 6 finished with value: 1.018226598231772 and parameters: {'alpha': 0.6996537839831571, 'gamma': 6.039339677660866e-15, 'kernel': 'linear'}. Best is trial 0 with value: 1.010931164075948.
[I 2023-10-11 07:15:22,355] Trial 5 finished with value: 1.0176221625839261 and parameters: {'alpha': 0.4913747191810986, 'gamma': 8.825186973484492e-15, 'kernel': 'rbf'}. Best is trial 0 with value: 1.010931164075948.
[I 2023-10-11 07:15:23,076] Trial 3 finished with value: 1.0138198071476585 and parameters: {'alpha': 0.8935841791418125, 'gamma': 6.6359248469446296e-15, 'kernel': 'laplacian'}. Best is trial 0 with value: 1.010931164075948.
[I 2023-10-11 07:15:23,144] Trial 4 finished with value: 1.0161115966631524 and parameters: {'alpha': 0.0711867707664586, 'gamma': 6.901110908266384e-16, 'kernel': 'rbf'}. Best is trial 0 with value: 1.010931164075948.
[I 2023-10-11 07:15:23,617] Trial 8 finished with value: 1.0132490758657118 and parameters: {'alpha': 0.06116301431

random RLM GB


[I 2023-10-11 07:18:43,322] Trial 8 finished with value: 0.9724908658862503 and parameters: {'n_estimators': 200, 'learning_rate': 0.612672242469066, 'max_depth': 4}. Best is trial 8 with value: 0.9724908658862503.
[I 2023-10-11 07:18:43,793] Trial 10 finished with value: 0.9755369806309471 and parameters: {'n_estimators': 500, 'learning_rate': 0.8708505935003802, 'max_depth': 5}. Best is trial 8 with value: 0.9724908658862503.
[I 2023-10-11 07:18:44,054] Trial 6 finished with value: 0.9771559028699673 and parameters: {'n_estimators': 20, 'learning_rate': 0.6368041905265752, 'max_depth': 3}. Best is trial 8 with value: 0.9724908658862503.
[I 2023-10-11 07:18:44,083] Trial 3 finished with value: 0.9732003332208139 and parameters: {'n_estimators': 10, 'learning_rate': 0.35553740107250137, 'max_depth': 1}. Best is trial 8 with value: 0.9724908658862503.
[I 2023-10-11 07:18:44,455] Trial 9 finished with value: 0.9750617877901249 and parameters: {'n_estimators': 200, 'learning_rate': 0.6804

random RLM RF


[I 2023-10-11 07:25:03,051] Trial 6 finished with value: 0.9557754163157003 and parameters: {'n_estimators': 50, 'max_features': 'auto', 'max_depth': 2}. Best is trial 1 with value: 0.9536424760789356.
[I 2023-10-11 07:25:03,255] Trial 10 finished with value: 0.9562752394559851 and parameters: {'n_estimators': 500, 'max_features': 'auto', 'max_depth': 4}. Best is trial 1 with value: 0.9536424760789356.
[I 2023-10-11 07:25:03,464] Trial 3 finished with value: 0.9535599949009358 and parameters: {'n_estimators': 10, 'max_features': 'log2', 'max_depth': 2}. Best is trial 3 with value: 0.9535599949009358.
[I 2023-10-11 07:25:03,864] Trial 8 finished with value: 0.9546258478753642 and parameters: {'n_estimators': 20, 'max_features': 'sqrt', 'max_depth': None}. Best is trial 3 with value: 0.9535599949009358.
[I 2023-10-11 07:25:06,112] Trial 11 finished with value: 0.9572139250519718 and parameters: {'n_estimators': 10, 'max_features': 'sqrt', 'max_depth': 10}. Best is trial 3 with value: 0.9

random RLM ANN


[I 2023-10-11 07:42:47,228] Trial 6 finished with value: 1.0646121722414628 and parameters: {'learning_rate_init': 0.08937017672610192, 'hidden_layer_sizes': [50, 50, 50]}. Best is trial 0 with value: 1.0639709567255349.
[I 2023-10-11 07:42:48,367] Trial 8 finished with value: 1.0630574169413078 and parameters: {'learning_rate_init': 0.0308185730724965, 'hidden_layer_sizes': [10, 10, 10]}. Best is trial 8 with value: 1.0630574169413078.
[I 2023-10-11 07:42:48,516] Trial 3 finished with value: 1.0688845272623662 and parameters: {'learning_rate_init': 0.03901440371019488, 'hidden_layer_sizes': [5]}. Best is trial 8 with value: 1.0630574169413078.
[I 2023-10-11 07:42:49,361] Trial 13 finished with value: 1.0636403281760276 and parameters: {'learning_rate_init': 0.07574321276408763, 'hidden_layer_sizes': [5, 5, 5]}. Best is trial 8 with value: 1.0630574169413078.
[I 2023-10-11 07:42:49,854] Trial 7 finished with value: 1.0732253905788116 and parameters: {'learning_rate_init': 0.02927660253

random HLC linear


[I 2023-10-11 08:57:24,677] Trial 9 finished with value: 0.6304503521875685 and parameters: {'alpha': 0.05110925964336348, 'l1_ratio': 0.8500993411091046}. Best is trial 0 with value: 0.626079959918068.
[I 2023-10-11 08:57:24,763] Trial 13 finished with value: 0.632106517346044 and parameters: {'alpha': 0.05948471352874285, 'l1_ratio': 0.005314891431466018}. Best is trial 0 with value: 0.626079959918068.
[I 2023-10-11 08:57:24,910] Trial 7 finished with value: 0.6294641191805092 and parameters: {'alpha': 0.08468118905554574, 'l1_ratio': 0.9124631253075206}. Best is trial 0 with value: 0.626079959918068.
[I 2023-10-11 08:57:24,947] Trial 3 finished with value: 0.6284872643487921 and parameters: {'alpha': 0.035680894362090175, 'l1_ratio': 0.13554142139320513}. Best is trial 0 with value: 0.626079959918068.
[I 2023-10-11 08:57:24,984] Trial 6 finished with value: 0.6302030703061807 and parameters: {'alpha': 0.020929278710843144, 'l1_ratio': 0.6253199438096736}. Best is trial 0 with value:

random HLC KRR


[I 2023-10-11 08:57:56,465] Trial 8 finished with value: 0.6550766010572083 and parameters: {'alpha': 0.9351393100241859, 'gamma': 2.3589669629752373e-15, 'kernel': 'linear'}. Best is trial 3 with value: 0.6117371312388746.
[I 2023-10-11 08:57:56,574] Trial 2 finished with value: 0.6210774127023905 and parameters: {'alpha': 0.4385045484334438, 'gamma': 4.675975407439236e-16, 'kernel': 'rbf'}. Best is trial 3 with value: 0.6117371312388746.
[I 2023-10-11 08:57:56,590] Trial 10 finished with value: 0.6216902162057655 and parameters: {'alpha': 0.043947876849015785, 'gamma': 4.6814487965201545e-15, 'kernel': 'rbf'}. Best is trial 3 with value: 0.6117371312388746.
[I 2023-10-11 08:57:56,688] Trial 3 finished with value: 0.6117371312388746 and parameters: {'alpha': 0.5459377657641703, 'gamma': 2.704196738194997e-16, 'kernel': 'laplacian'}. Best is trial 11 with value: 0.5928673015394378.
[I 2023-10-11 08:57:56,796] Trial 11 finished with value: 0.5928673015394378 and parameters: {'alpha': 0.

random HLC GB


[I 2023-10-11 08:58:30,000] Trial 13 finished with value: 0.5507161523723555 and parameters: {'n_estimators': 10, 'learning_rate': 0.09746137802082219, 'max_depth': 2}. Best is trial 13 with value: 0.5507161523723555.
[I 2023-10-11 08:58:30,254] Trial 5 finished with value: 0.6216049702789 and parameters: {'n_estimators': 10, 'learning_rate': 0.788412109891162, 'max_depth': 1}. Best is trial 13 with value: 0.5507161523723555.
[I 2023-10-11 08:58:30,626] Trial 3 finished with value: 0.5217166785221864 and parameters: {'n_estimators': 200, 'learning_rate': 0.5736306150906966, 'max_depth': 2}. Best is trial 3 with value: 0.5217166785221864.
[I 2023-10-11 08:58:30,821] Trial 11 finished with value: 0.5399774550602695 and parameters: {'n_estimators': 200, 'learning_rate': 0.3283986423817903, 'max_depth': 3}. Best is trial 3 with value: 0.5217166785221864.
[I 2023-10-11 08:58:31,030] Trial 2 finished with value: 0.5473246127004754 and parameters: {'n_estimators': 20, 'learning_rate': 0.52604

random HLC RF


[I 2023-10-11 09:00:33,902] Trial 13 finished with value: 0.5575143003624322 and parameters: {'n_estimators': 20, 'max_features': 'sqrt', 'max_depth': 5}. Best is trial 1 with value: 0.5572311225843343.
[I 2023-10-11 09:00:34,491] Trial 11 finished with value: 0.5419370104606865 and parameters: {'n_estimators': 10, 'max_features': 'auto', 'max_depth': 4}. Best is trial 11 with value: 0.5419370104606865.
[I 2023-10-11 09:00:34,841] Trial 6 finished with value: 0.5445910684001998 and parameters: {'n_estimators': 50, 'max_features': 'sqrt', 'max_depth': 2}. Best is trial 11 with value: 0.5419370104606865.
[I 2023-10-11 09:00:34,862] Trial 5 finished with value: 0.5435073598340968 and parameters: {'n_estimators': 500, 'max_features': 'log2', 'max_depth': 3}. Best is trial 11 with value: 0.5419370104606865.
[I 2023-10-11 09:00:35,310] Trial 8 finished with value: 0.5434969181472199 and parameters: {'n_estimators': 500, 'max_features': 'log2', 'max_depth': 4}. Best is trial 11 with value: 0.

random HLC ANN


[I 2023-10-11 09:07:04,680] Trial 8 finished with value: 1.0652168786186644 and parameters: {'learning_rate_init': 0.07953034277607635, 'hidden_layer_sizes': [50, 50, 50]}. Best is trial 0 with value: 1.0617275470746037.
[I 2023-10-11 09:07:04,688] Trial 9 finished with value: 1.0804689455616634 and parameters: {'learning_rate_init': 0.055340546687001946, 'hidden_layer_sizes': [50, 50, 50]}. Best is trial 0 with value: 1.0617275470746037.
[I 2023-10-11 09:07:04,889] Trial 2 finished with value: 1.037397717792424 and parameters: {'learning_rate_init': 0.009781223443024668, 'hidden_layer_sizes': [20, 20]}. Best is trial 2 with value: 1.037397717792424.
[I 2023-10-11 09:07:05,071] Trial 5 finished with value: 1.0877289414441524 and parameters: {'learning_rate_init': 0.09920250512815944, 'hidden_layer_sizes': [10]}. Best is trial 2 with value: 1.037397717792424.
[I 2023-10-11 09:07:05,186] Trial 4 finished with value: 1.045066298010999 and parameters: {'learning_rate_init': 0.0970693435999

scaffold_splitter 3A4 linear


[I 2023-10-11 09:08:32,762] Trial 4 finished with value: 1.417720241291882 and parameters: {'alpha': 0.09101501615583495, 'l1_ratio': 0.2957172716794997}. Best is trial 1 with value: 1.3892138107378207.
[I 2023-10-11 09:08:32,793] Trial 7 finished with value: 1.4102715346693804 and parameters: {'alpha': 0.043523836744618884, 'l1_ratio': 0.8794337726903543}. Best is trial 1 with value: 1.3892138107378207.
[I 2023-10-11 09:08:32,799] Trial 10 finished with value: 1.402110139016952 and parameters: {'alpha': 0.057419507248870516, 'l1_ratio': 0.13947395470652824}. Best is trial 1 with value: 1.3892138107378207.
[I 2023-10-11 09:08:32,865] Trial 3 finished with value: 1.4294446950961028 and parameters: {'alpha': 0.024304843584971286, 'l1_ratio': 0.7445108041997717}. Best is trial 1 with value: 1.3892138107378207.
[I 2023-10-11 09:08:32,887] Trial 8 finished with value: 1.402415102767276 and parameters: {'alpha': 0.001446232163271412, 'l1_ratio': 0.24454490411891017}. Best is trial 11 with va

scaffold_splitter 3A4 KRR


[I 2023-10-11 09:09:04,459] Trial 13 finished with value: 1.300319626946931 and parameters: {'alpha': 0.21291553394990767, 'gamma': 3.1965048951687946e-15, 'kernel': 'rbf'}. Best is trial 13 with value: 1.300319626946931.
[I 2023-10-11 09:09:04,880] Trial 5 finished with value: 1.5479814402193564 and parameters: {'alpha': 0.7964204796408364, 'gamma': 9.169472664967582e-15, 'kernel': 'rbf'}. Best is trial 13 with value: 1.300319626946931.
[I 2023-10-11 09:09:04,882] Trial 4 finished with value: 1.418541954199035 and parameters: {'alpha': 0.9064696738015855, 'gamma': 8.965503217759695e-15, 'kernel': 'laplacian'}. Best is trial 13 with value: 1.300319626946931.
[I 2023-10-11 09:09:04,896] Trial 2 finished with value: 1.4347860398599834 and parameters: {'alpha': 0.7154695006639592, 'gamma': 2.321018801152902e-15, 'kernel': 'laplacian'}. Best is trial 13 with value: 1.300319626946931.
[I 2023-10-11 09:09:05,004] Trial 3 finished with value: 1.4367402079991154 and parameters: {'alpha': 0.671

scaffold_splitter 3A4 GB


[I 2023-10-11 09:09:37,181] Trial 13 finished with value: 1.3318877057509597 and parameters: {'n_estimators': 500, 'learning_rate': 0.16731165912381382, 'max_depth': 1}. Best is trial 1 with value: 1.1815171088460534.
[I 2023-10-11 09:09:37,872] Trial 5 finished with value: 1.1769243248295969 and parameters: {'n_estimators': 10, 'learning_rate': 0.6880016086784803, 'max_depth': 4}. Best is trial 5 with value: 1.1769243248295969.
[I 2023-10-11 09:09:38,279] Trial 6 finished with value: 1.2480291560902443 and parameters: {'n_estimators': 50, 'learning_rate': 0.25223595804876087, 'max_depth': 5}. Best is trial 5 with value: 1.1769243248295969.
[I 2023-10-11 09:09:38,503] Trial 8 finished with value: 1.2478743782408819 and parameters: {'n_estimators': 50, 'learning_rate': 0.059022159393396674, 'max_depth': 5}. Best is trial 5 with value: 1.1769243248295969.
[I 2023-10-11 09:09:38,955] Trial 12 finished with value: 1.1885809410376593 and parameters: {'n_estimators': 20, 'learning_rate': 0.9

scaffold_splitter 3A4 RF


[I 2023-10-11 09:11:39,827] Trial 2 finished with value: 1.2769189036960167 and parameters: {'n_estimators': 500, 'max_features': 'auto', 'max_depth': 4}. Best is trial 0 with value: 1.2710593957483545.
[I 2023-10-11 09:11:40,033] Trial 3 finished with value: 1.1343234846269226 and parameters: {'n_estimators': 500, 'max_features': 'sqrt', 'max_depth': 5}. Best is trial 3 with value: 1.1343234846269226.
[I 2023-10-11 09:11:40,340] Trial 5 finished with value: 1.2403410077570582 and parameters: {'n_estimators': 20, 'max_features': 'sqrt', 'max_depth': 3}. Best is trial 3 with value: 1.1343234846269226.
[I 2023-10-11 09:11:40,902] Trial 7 finished with value: 1.1913718056406062 and parameters: {'n_estimators': 500, 'max_features': 'log2', 'max_depth': 10}. Best is trial 3 with value: 1.1343234846269226.
[I 2023-10-11 09:11:40,906] Trial 11 finished with value: 1.197582320178831 and parameters: {'n_estimators': 20, 'max_features': 'sqrt', 'max_depth': 2}. Best is trial 3 with value: 1.1343

scaffold_splitter 3A4 ANN


[I 2023-10-11 09:18:19,847] Trial 9 finished with value: 1.244948187954388 and parameters: {'learning_rate_init': 0.09138360915170668, 'hidden_layer_sizes': [50]}. Best is trial 1 with value: 1.240565229473985.
[I 2023-10-11 09:18:20,379] Trial 13 finished with value: 1.2525763313661253 and parameters: {'learning_rate_init': 0.04763293609448332, 'hidden_layer_sizes': [10]}. Best is trial 1 with value: 1.240565229473985.
[I 2023-10-11 09:18:20,613] Trial 11 finished with value: 1.250970828143006 and parameters: {'learning_rate_init': 0.04768684367694925, 'hidden_layer_sizes': [10]}. Best is trial 1 with value: 1.240565229473985.
[I 2023-10-11 09:18:20,841] Trial 3 finished with value: 1.2307527017068232 and parameters: {'learning_rate_init': 0.030147897633607715, 'hidden_layer_sizes': [5]}. Best is trial 3 with value: 1.2307527017068232.
[I 2023-10-11 09:18:20,988] Trial 10 finished with value: 1.2520386723653498 and parameters: {'learning_rate_init': 0.03590356678863882, 'hidden_layer_

scaffold_splitter RLM linear


[I 2023-10-11 09:24:53,611] Trial 7 finished with value: 1.1097087574230147 and parameters: {'alpha': 0.008618013032880525, 'l1_ratio': 0.06622459954467896}. Best is trial 7 with value: 1.1097087574230147.
[I 2023-10-11 09:24:53,621] Trial 2 finished with value: 1.1098564230950998 and parameters: {'alpha': 0.0501476236548844, 'l1_ratio': 0.0022025792282731604}. Best is trial 0 with value: 1.1097657604592588.
[I 2023-10-11 09:24:53,680] Trial 11 finished with value: 1.1098772866596691 and parameters: {'alpha': 0.09927816714190371, 'l1_ratio': 0.4091500624655394}. Best is trial 0 with value: 1.1097657604592588.
[I 2023-10-11 09:24:53,747] Trial 13 finished with value: 1.109876581484191 and parameters: {'alpha': 0.07252683230623047, 'l1_ratio': 0.31129655130300116}. Best is trial 12 with value: 1.1095687669602932.
[I 2023-10-11 09:24:53,813] Trial 4 finished with value: 1.1103052643196154 and parameters: {'alpha': 0.04127723481315957, 'l1_ratio': 0.3637616101971036}. Best is trial 7 with 

scaffold_splitter RLM KRR


[I 2023-10-11 09:25:32,278] Trial 2 finished with value: 1.019569434079575 and parameters: {'alpha': 0.7621289758222093, 'gamma': 9.94296553915148e-15, 'kernel': 'linear'}. Best is trial 2 with value: 1.019569434079575.
[I 2023-10-11 09:25:32,938] Trial 6 finished with value: 1.0212265121723272 and parameters: {'alpha': 0.3835517153393922, 'gamma': 8.553614537525436e-15, 'kernel': 'rbf'}. Best is trial 2 with value: 1.019569434079575.
[I 2023-10-11 09:25:33,986] Trial 10 finished with value: 1.0189543477113516 and parameters: {'alpha': 0.7854526094673281, 'gamma': 1.8190233018937662e-15, 'kernel': 'laplacian'}. Best is trial 10 with value: 1.0189543477113516.
[I 2023-10-11 09:25:34,124] Trial 8 finished with value: 1.0222893179630423 and parameters: {'alpha': 0.6106220774023687, 'gamma': 7.81227408055467e-15, 'kernel': 'laplacian'}. Best is trial 10 with value: 1.0189543477113516.
[I 2023-10-11 09:25:34,165] Trial 9 finished with value: 1.0192388908817913 and parameters: {'alpha': 0.92

scaffold_splitter RLM GB


[I 2023-10-11 09:28:18,065] Trial 2 finished with value: 0.9813264890670181 and parameters: {'n_estimators': 10, 'learning_rate': 0.47725688295607993, 'max_depth': 4}. Best is trial 2 with value: 0.9813264890670181.
[I 2023-10-11 09:28:18,245] Trial 4 finished with value: 0.9827745929684216 and parameters: {'n_estimators': 500, 'learning_rate': 0.9979421210601545, 'max_depth': 1}. Best is trial 2 with value: 0.9813264890670181.
[I 2023-10-11 09:28:18,426] Trial 10 finished with value: 0.9804673975259558 and parameters: {'n_estimators': 500, 'learning_rate': 0.2764245667330527, 'max_depth': 4}. Best is trial 10 with value: 0.9804673975259558.
[I 2023-10-11 09:28:18,436] Trial 8 finished with value: 0.9806851549642431 and parameters: {'n_estimators': 20, 'learning_rate': 0.8378007644353092, 'max_depth': 1}. Best is trial 10 with value: 0.9804673975259558.
[I 2023-10-11 09:28:18,638] Trial 13 finished with value: 0.9756933581827228 and parameters: {'n_estimators': 50, 'learning_rate': 0.9

scaffold_splitter RLM RF


[I 2023-10-11 09:32:50,586] Trial 13 finished with value: 0.9662286163250169 and parameters: {'n_estimators': 10, 'max_features': 'auto', 'max_depth': 3}. Best is trial 1 with value: 0.9624396615817834.
[I 2023-10-11 09:32:50,854] Trial 8 finished with value: 0.9611043453332099 and parameters: {'n_estimators': 500, 'max_features': 'auto', 'max_depth': 10}. Best is trial 8 with value: 0.9611043453332099.
[I 2023-10-11 09:32:51,039] Trial 4 finished with value: 0.9701373863952641 and parameters: {'n_estimators': 10, 'max_features': 'sqrt', 'max_depth': 2}. Best is trial 8 with value: 0.9611043453332099.
[I 2023-10-11 09:32:51,201] Trial 11 finished with value: 0.9676923579197193 and parameters: {'n_estimators': 20, 'max_features': 'sqrt', 'max_depth': 5}. Best is trial 8 with value: 0.9611043453332099.
[I 2023-10-11 09:32:51,372] Trial 2 finished with value: 0.9605047244375418 and parameters: {'n_estimators': 500, 'max_features': 'log2', 'max_depth': 2}. Best is trial 2 with value: 0.960

scaffold_splitter RLM ANN


[I 2023-10-11 09:49:50,293] Trial 6 finished with value: 1.0538669185023652 and parameters: {'learning_rate_init': 0.06321038843591549, 'hidden_layer_sizes': [50, 50]}. Best is trial 0 with value: 1.0467954808042317.
[I 2023-10-11 09:49:50,501] Trial 5 finished with value: 1.0443609989847606 and parameters: {'learning_rate_init': 0.019714490402802133, 'hidden_layer_sizes': [50, 50, 50]}. Best is trial 5 with value: 1.0443609989847606.
[I 2023-10-11 09:49:50,692] Trial 2 finished with value: 1.061123349599571 and parameters: {'learning_rate_init': 0.08237481510712043, 'hidden_layer_sizes': [5, 5]}. Best is trial 5 with value: 1.0443609989847606.
[I 2023-10-11 09:49:51,505] Trial 11 finished with value: 1.055636553298013 and parameters: {'learning_rate_init': 0.015206338538686383, 'hidden_layer_sizes': [5, 5, 5]}. Best is trial 5 with value: 1.0443609989847606.
[I 2023-10-11 09:49:52,131] Trial 12 finished with value: 1.0501756319311024 and parameters: {'learning_rate_init': 0.0656033352

scaffold_splitter HLC linear


[I 2023-10-11 10:52:09,026] Trial 9 finished with value: 0.5061240269261343 and parameters: {'alpha': 0.0841614316752653, 'l1_ratio': 0.6697338867298197}. Best is trial 0 with value: 0.49806108695940704.
[I 2023-10-11 10:52:09,255] Trial 6 finished with value: 0.4996130225446644 and parameters: {'alpha': 0.06412579589112788, 'l1_ratio': 0.11903649592530663}. Best is trial 0 with value: 0.49806108695940704.
[I 2023-10-11 10:52:09,260] Trial 4 finished with value: 0.5011232782021834 and parameters: {'alpha': 0.04471400283680145, 'l1_ratio': 0.45507470905421354}. Best is trial 8 with value: 0.4936385454534244.
[I 2023-10-11 10:52:09,308] Trial 5 finished with value: 0.504705268714817 and parameters: {'alpha': 0.08055542721140167, 'l1_ratio': 0.7339114934171234}. Best is trial 8 with value: 0.4936385454534244.
[I 2023-10-11 10:52:09,324] Trial 2 finished with value: 0.5012842948681753 and parameters: {'alpha': 0.07832509702561602, 'l1_ratio': 0.8478270659637557}. Best is trial 8 with value

scaffold_splitter HLC KRR


[I 2023-10-11 10:52:33,628] Trial 5 finished with value: 0.5982310052970082 and parameters: {'alpha': 0.18086500114151263, 'gamma': 3.942966971598164e-15, 'kernel': 'rbf'}. Best is trial 1 with value: 0.5527180903023075.
[I 2023-10-11 10:52:33,788] Trial 8 finished with value: 0.5845501018873468 and parameters: {'alpha': 0.44992434138920384, 'gamma': 9.420234341606654e-15, 'kernel': 'linear'}. Best is trial 1 with value: 0.5527180903023075.
[I 2023-10-11 10:52:33,881] Trial 10 finished with value: 0.5609360259481903 and parameters: {'alpha': 0.3414561871961838, 'gamma': 9.830952207628223e-15, 'kernel': 'linear'}. Best is trial 1 with value: 0.5527180903023075.
[I 2023-10-11 10:52:33,943] Trial 2 finished with value: 0.5780772425855026 and parameters: {'alpha': 0.7658567401051751, 'gamma': 7.260204266544153e-15, 'kernel': 'linear'}. Best is trial 1 with value: 0.5527180903023075.
[I 2023-10-11 10:52:34,023] Trial 11 finished with value: 0.5684105469731248 and parameters: {'alpha': 0.485

scaffold_splitter HLC GB


[I 2023-10-11 10:53:03,865] Trial 3 finished with value: 0.48412821616591745 and parameters: {'n_estimators': 200, 'learning_rate': 0.3615633780360931, 'max_depth': 5}. Best is trial 0 with value: 0.48155930976594513.
[I 2023-10-11 10:53:04,011] Trial 2 finished with value: 0.47232144666930626 and parameters: {'n_estimators': 200, 'learning_rate': 0.41160039301299556, 'max_depth': 4}. Best is trial 2 with value: 0.47232144666930626.
[I 2023-10-11 10:53:04,159] Trial 11 finished with value: 0.48232700566354714 and parameters: {'n_estimators': 20, 'learning_rate': 0.3298673084576561, 'max_depth': 5}. Best is trial 2 with value: 0.47232144666930626.
[I 2023-10-11 10:53:04,376] Trial 12 finished with value: 0.4775337864945348 and parameters: {'n_estimators': 50, 'learning_rate': 0.6203318178357982, 'max_depth': 5}. Best is trial 2 with value: 0.47232144666930626.
[I 2023-10-11 10:53:04,377] Trial 5 finished with value: 0.4737587603467596 and parameters: {'n_estimators': 10, 'learning_rate'

scaffold_splitter HLC RF


[I 2023-10-11 10:54:30,211] Trial 4 finished with value: 0.4556590114777011 and parameters: {'n_estimators': 50, 'max_features': 'auto', 'max_depth': 5}. Best is trial 4 with value: 0.4556590114777011.
[I 2023-10-11 10:54:30,658] Trial 2 finished with value: 0.4551913641022682 and parameters: {'n_estimators': 200, 'max_features': 'auto', 'max_depth': 10}. Best is trial 2 with value: 0.4551913641022682.
[I 2023-10-11 10:54:30,848] Trial 8 finished with value: 0.44888300172053824 and parameters: {'n_estimators': 500, 'max_features': 'sqrt', 'max_depth': 10}. Best is trial 8 with value: 0.44888300172053824.
[I 2023-10-11 10:54:31,086] Trial 9 finished with value: 0.4567462894697057 and parameters: {'n_estimators': 50, 'max_features': 'auto', 'max_depth': 10}. Best is trial 8 with value: 0.44888300172053824.
[I 2023-10-11 10:54:31,167] Trial 3 finished with value: 0.4773299379194891 and parameters: {'n_estimators': 10, 'max_features': 'log2', 'max_depth': 2}. Best is trial 8 with value: 0.

scaffold_splitter HLC ANN


[I 2023-10-11 10:58:41,173] Trial 3 finished with value: 1.1001960384127079 and parameters: {'learning_rate_init': 0.08734537742177881, 'hidden_layer_sizes': [5]}. Best is trial 3 with value: 1.1001960384127079.
[I 2023-10-11 10:58:42,233] Trial 5 finished with value: 1.1413938897257134 and parameters: {'learning_rate_init': 0.05243182597418623, 'hidden_layer_sizes': [50, 50]}. Best is trial 3 with value: 1.1001960384127079.
[I 2023-10-11 10:58:42,459] Trial 9 finished with value: 1.1389840147883767 and parameters: {'learning_rate_init': 0.011765522341717492, 'hidden_layer_sizes': [50, 50, 50]}. Best is trial 3 with value: 1.1001960384127079.
[I 2023-10-11 10:58:42,738] Trial 6 finished with value: 1.134865027759564 and parameters: {'learning_rate_init': 0.04696993256470445, 'hidden_layer_sizes': [50]}. Best is trial 3 with value: 1.1001960384127079.
[I 2023-10-11 10:58:42,987] Trial 11 finished with value: 1.113020861930817 and parameters: {'learning_rate_init': 0.0684665307863407, 'h