In [1]:
import optuna
import joblib
import numpy as np
import pandas as pd
import sys
from project_resources.import_utils import NotebookFinder
sys.meta_path.append(NotebookFinder())
from project_resources.cytochrome_P450 import HyperparamTuner, parse_jazzy_df

importing Jupyter notebook from C:\Users\Lukas\Documents\datacytochromy\project_resources\cytochrome_P450.ipynb


In [2]:
model_identifiers = ["linear", "KRR", "GB", "RF", "ANN"]
isozymes = ["3A4", "RLM", "HLC"]
data_splits = ["train", "test"]
splitters = ["rand", "scaff"]
rel_paths = {
    "3A4_source": r"project_resources/ChEMBL_3A4.csv",
    "3A4_sep": ";",
    "3A4": r"project_resources/3A4.csv",
    "3A4_train_scaff": r"project_resources/base_splits/scaffold_splitter/3A4_train.csv",
    "3A4_test_scaff": r"project_resources/base_splits/scaffold_splitter/3A4_test.csv",
    "3A4_train_rand": r"project_resources/base_splits/random/3A4_train.csv",
    "3A4_test_rand": r"project_resources/base_splits/random/3A4_test.csv",
    "3A4_jazzy_train_rand": r"project_resources/jazzy_splits/random/3A4_train.csv",
    "3A4_jazzy_test_rand": r"project_resources/jazzy_splits/random/3A4_test.csv",
    "3A4_jazzy_train_scaff": r"project_resources/jazzy_splits/scaffold_splitter/3A4_train.csv",
    "3A4_jazzy_test_scaff": r"project_resources/jazzy_splits/scaffold_splitter/3A4_test.csv",

    "RLM_source": r"project_resources/AID_1508591_datatable_all.csv",
    "RLM_sep": ",",
    "RLM": r"project_resources/RLM.csv",
    "RLM_train_scaff": r"project_resources/base_splits/scaffold_splitter/RLM_train.csv",
    "RLM_test_scaff": r"project_resources/base_splits/scaffold_splitter/RLM_test.csv",
    "RLM_train_rand": r"project_resources/base_splits/random/RLM_train.csv",
    "RLM_test_rand": r"project_resources/base_splits/random/RLM_test.csv",
    "RLM_jazzy_train_rand": r"project_resources/jazzy_splits/random/RLM_train.csv",
    "RLM_jazzy_test_rand": r"project_resources/jazzy_splits/random/RLM_test.csv",
    "RLM_jazzy_train_scaff": r"project_resources/jazzy_splits/scaffold_splitter/RLM_train.csv",
    "RLM_jazzy_test_scaff": r"project_resources/jazzy_splits/scaffold_splitter/RLM_test.csv",


    "HLC_source": r"project_resources/AID_1508603_datatable_all.csv",
    "HLC_sep": ",",
    "HLC": r"project_resources/HLC.csv",
    "HLC_train_scaff": r"project_resources/base_splits/scaffold_splitter/HLC_train.csv",
    "HLC_test_scaff": r"project_resources/base_splits/scaffold_splitter/HLC_test.csv",
    "HLC_train_rand": r"project_resources/base_splits/random/HLC_train.csv",
    "HLC_test_rand": r"project_resources/base_splits/random/HLC_test.csv",
    "HLC_jazzy_train_rand": r"project_resources/jazzy_splits/random/HLC_train.csv",
    "HLC_jazzy_test_rand": r"project_resources/jazzy_splits/random/HLC_test.csv",
    "HLC_jazzy_train_scaff": r"project_resources/jazzy_splits/scaffold_splitter/HLC_train.csv",
    "HLC_jazzy_test_scaff": r"project_resources/jazzy_splits/scaffold_splitter/HLC_test.csv"
}
# sampler - a method used to generate new sets of hyperparameters in each iteration of the optimization process
samplers = {
    'RandomSampler': optuna.samplers.RandomSampler,          # Sampler that selects hyperparameters randomly from the search space.
    'GridSampler': optuna.samplers.GridSampler,              # Sampler that performs a grid search over the hyperparameter space.
    'TPESampler': optuna.samplers.TPESampler,                # Sampler that uses a tree-structured Parzen estimator to model the objective function and sample new points from the search space.
    'CmaEsSampler': optuna.samplers.CmaEsSampler,            # Sampler that uses the Covariance Matrix Adaptation Evolution Strategy algorithm to efficiently search the hyperparameter space.
    'NSGAIISampler': optuna.samplers.NSGAIISampler,          # Multi-objective evolutionary algorithm that generates new samples using non-dominated sorting and crowding distance selection.
    'QMCSampler': optuna.samplers.QMCSampler,                # Quasi-Monte Carlo sampler that uses low-discrepancy sequences to sample the search space in a more efficient and evenly distributed way than random sampling.
    'BoTorchSampler': optuna.integration.BoTorchSampler,     # Sampler that leverages the BoTorch library for Bayesian optimization and can handle both continuous and categorical hyperparameters.
    'BruteForceSampler': optuna.samplers.BruteForceSampler,  # Sampler that exhaustively evaluates all possible combinations of hyperparameters in the search space.
}
# pruner - a technique used to eliminate unpromising trials during the course of hyperparameter optimization.
pruners = {
    'BasePruner': optuna.pruners.BasePruner,                            # This is the base class for all pruning strategies in Optuna. It provides a skeleton for implementing custom pruning strategies.
    'MedianPruner': optuna.pruners.MedianPruner,                        # A pruner that prunes unpromising trials that have median objective values, as determined in previous steps.
    'SuccessiveHalvingPruner': optuna.pruners.SuccessiveHalvingPruner,  # This pruner repeatedly splits trials into halves, discarding the lower performing half at each iteration.
    'HyperbandPruner': optuna.pruners.HyperbandPruner,                  # This pruner implements the Hyperband algorithm, which selects promising trials and runs them with different resource allocation schemes to determine the best one.
    'PercentilePruner': optuna.pruners.PercentilePruner,                # A pruner that prunes unpromising trials based on their percentile rank relative to all completed trials.
    'NopPruner': optuna.pruners.NopPruner,                              # A pruner that does nothing and does not prune any trials.
    'ThresholdPruner': optuna.pruners.ThresholdPruner,                  # This pruner prunes trials that have not reached a certain level of performance (i.e., objective value).
    'PatientPruner': optuna.pruners.PatientPruner,                      # This pruner prunes trials that do not show improvement over a certain number of steps (or epochs).
}
halflives = {}
mol_features = {}

In [3]:
# load Jazzy features from csv files
for splitter in splitters:
    print("\n")
    print(splitter)
    mol_features[splitter] = {}
    halflives[splitter] = {}
    for isozyme in isozymes:
        mol_features[splitter][isozyme] = {}
        halflives[splitter][isozyme] = {}
        for split in data_splits:
            print(isozyme, split)
            df = pd.read_csv(rel_paths[f"{isozyme}_jazzy_{split}_{splitter}"])
            features, thalfs, contains_nan = parse_jazzy_df(df)
            mol_features[splitter][isozyme][split] = features
            halflives[splitter][isozyme][split] = thalfs
            print(f"     {mol_features[splitter][isozyme][split][0]}")
            print(f"     {isozyme} mol_features {split} contains NaN: {contains_nan}")



rand
3A4 train
     56, [63, 'CC(C)(O)c1cc(F)c2c(c1)C(=O)N(Cc1ccc(Cl)cn1)[C@@]2(OCC1(O)CC1)c1ccc(Cl)cc1', 6.0, 13.1753, 1.4177, 11.3044, -7.3559, -125.8906, -119.4889]
     [  13.1753    1.4177   11.3044   -7.3559 -125.8906 -119.4889]
     3A4 mol_features train contains NaN: False
3A4 test
     14, [23, 'Cc1ncsc1-c1ccc([C@H](CC(=O)NCCCCCCNC(=O)COc2c(-c3csc(N4CCOCC4)n3)ccc(F)c2F)NC(=O)[C@@H]2C[C@@H](O)CN2C(=O)[C@@H](NC(=O)C2(F)CC2)C(C)(C)C)cc1', 0.3767, 28.4327, 3.274, 23.4872, -4.0348, -286.1545, -257.0326]
     [  28.4327    3.274    23.4872   -4.0348 -286.1545 -257.0326]
     3A4 mol_features test contains NaN: False
RLM train
     removed index 1047 corresponding to NaN
     removed index 1517 corresponding to NaN
     2022, [158, 'O=c1cc(N2CCOCC2)oc2c1ccc1ccccc12', 30.0, 8.0486, 0.0, 5.2165, -13.5913, -63.7029, -67.7504]
     [  8.0486   0.       5.2165 -13.5913 -63.7029 -67.7504]
     RLM mol_features train contains NaN: False
RLM test
     removed index 174 corresponding to Na

In [5]:
sampler = samplers['TPESampler']
pruner = pruners["BasePruner"]
n_trials = 200
for splitter in splitters:
    if splitter == "rand":
        splitter_name = "random"
    else:
        splitter_name = "scaffold_splitter"

    for isozyme in isozymes:
        X_train = mol_features[splitter][isozyme]["train"]
        y_train = np.log(halflives[splitter][isozyme]["train"])  # natural log half-lives -> values are less spread out
        X_test = mol_features[splitter][isozyme]["test"]
        y_test = np.log(halflives[splitter][isozyme]["test"])
        for model_identifier in model_identifiers:
            print(splitter_name, isozyme, model_identifier)
            study = optuna.create_study(study_name=model_identifier, directions=['minimize'], pruner=pruner,
                                        storage=f"sqlite:///project_resources/optuna/jazzy/{splitter_name}/{isozyme}/db.{model_identifier}", load_if_exists=True)
            test = HyperparamTuner(model_identifier, X_train, y_train, X_test, y_test)
            study.optimize(test.objective, n_trials=n_trials, n_jobs=-1)  # catch=(ValueError,)
            joblib.dump(study, f"./project_resources/optuna/jazzy/{splitter_name}/{isozyme}/{model_identifier}.pkl")

[I 2023-10-11 06:59:12,987] Using an existing study with name 'linear' instead of creating a new one.


random 3A4 linear


[I 2023-10-11 06:59:13,671] Trial 8 finished with value: 1.4055792338666269 and parameters: {'alpha': 0.017591569877912345, 'l1_ratio': 0.964680833116307}. Best is trial 0 with value: 1.4038627192898365.
[I 2023-10-11 06:59:13,937] Trial 11 finished with value: 1.361241861694033 and parameters: {'alpha': 0.0812355109698709, 'l1_ratio': 0.08450223915968114}. Best is trial 11 with value: 1.361241861694033.
[I 2023-10-11 06:59:14,092] Trial 7 finished with value: 1.4178962175797631 and parameters: {'alpha': 0.009864948436884355, 'l1_ratio': 0.9901756813079965}. Best is trial 11 with value: 1.361241861694033.
[I 2023-10-11 06:59:14,343] Trial 14 finished with value: 1.3812513765516237 and parameters: {'alpha': 0.08910494275045223, 'l1_ratio': 0.5135890752430664}. Best is trial 11 with value: 1.361241861694033.
[I 2023-10-11 06:59:14,344] Trial 13 finished with value: 1.4122052683532835 and parameters: {'alpha': 0.07428471558688238, 'l1_ratio': 0.15148988414336717}. Best is trial 11 with va

random 3A4 KRR


[I 2023-10-11 06:59:41,765] Trial 13 finished with value: 1.453434684705731 and parameters: {'alpha': 0.833868652227199, 'gamma': 6.1920434330254446e-15, 'kernel': 'rbf'}. Best is trial 9 with value: 1.376115276011723.
[I 2023-10-11 06:59:41,774] Trial 9 finished with value: 1.376115276011723 and parameters: {'alpha': 0.7868905362344104, 'gamma': 5.224902974359913e-15, 'kernel': 'rbf'}. Best is trial 9 with value: 1.376115276011723.
[I 2023-10-11 06:59:41,886] Trial 5 finished with value: 1.3840306290217816 and parameters: {'alpha': 0.2255045958969689, 'gamma': 6.081953865800383e-15, 'kernel': 'linear'}. Best is trial 9 with value: 1.376115276011723.
[I 2023-10-11 06:59:41,983] Trial 6 finished with value: 1.4109942622308378 and parameters: {'alpha': 0.9639486790426622, 'gamma': 7.32624119186187e-16, 'kernel': 'rbf'}. Best is trial 9 with value: 1.376115276011723.
[I 2023-10-11 06:59:42,011] Trial 3 finished with value: 1.3867713855700263 and parameters: {'alpha': 0.4851231136817665, '

random 3A4 GB


[I 2023-10-11 07:00:11,519] Trial 6 finished with value: 1.1746766766563306 and parameters: {'n_estimators': 200, 'learning_rate': 0.7752508595747122, 'max_depth': 1}. Best is trial 1 with value: 1.1273370837555208.
[I 2023-10-11 07:00:12,411] Trial 8 finished with value: 1.295103889038406 and parameters: {'n_estimators': 500, 'learning_rate': 0.13152458935919364, 'max_depth': 2}. Best is trial 1 with value: 1.1273370837555208.
[I 2023-10-11 07:00:12,673] Trial 3 finished with value: 1.2398568865451491 and parameters: {'n_estimators': 500, 'learning_rate': 0.6939824925550618, 'max_depth': 2}. Best is trial 1 with value: 1.1273370837555208.
[I 2023-10-11 07:00:12,860] Trial 4 finished with value: 1.2822310873557992 and parameters: {'n_estimators': 20, 'learning_rate': 0.88295356330156, 'max_depth': 2}. Best is trial 1 with value: 1.1273370837555208.
[I 2023-10-11 07:00:12,871] Trial 7 finished with value: 1.1062570255195852 and parameters: {'n_estimators': 50, 'learning_rate': 0.4303802

random 3A4 RF


[I 2023-10-11 07:01:40,092] Trial 3 finished with value: 1.274729191677178 and parameters: {'n_estimators': 20, 'max_features': 'sqrt', 'max_depth': 2}. Best is trial 1 with value: 1.2481594964941076.
[I 2023-10-11 07:01:40,835] Trial 9 finished with value: 1.2131377438851727 and parameters: {'n_estimators': 500, 'max_features': 'auto', 'max_depth': 2}. Best is trial 9 with value: 1.2131377438851727.
[I 2023-10-11 07:01:40,875] Trial 5 finished with value: 1.2717945474655579 and parameters: {'n_estimators': 500, 'max_features': 'log2', 'max_depth': 4}. Best is trial 9 with value: 1.2131377438851727.
[I 2023-10-11 07:01:41,410] Trial 6 finished with value: 1.213418881723695 and parameters: {'n_estimators': 50, 'max_features': 'sqrt', 'max_depth': 5}. Best is trial 4 with value: 1.1617976235736898.
[I 2023-10-11 07:01:41,471] Trial 4 finished with value: 1.1617976235736898 and parameters: {'n_estimators': 10, 'max_features': 'sqrt', 'max_depth': 2}. Best is trial 7 with value: 1.13177041

random 3A4 ANN


[I 2023-10-11 07:06:08,907] Trial 12 finished with value: 5.329007338350352 and parameters: {'learning_rate_init': 0.09524134809217391, 'hidden_layer_sizes': [20]}. Best is trial 0 with value: 3.2120353102656423.
[I 2023-10-11 07:06:12,135] Trial 7 finished with value: 4.839709670418197 and parameters: {'learning_rate_init': 0.006786317182791311, 'hidden_layer_sizes': [50]}. Best is trial 0 with value: 3.2120353102656423.
[I 2023-10-11 07:06:14,454] Trial 13 finished with value: 4.701366015841191 and parameters: {'learning_rate_init': 0.05639422619014486, 'hidden_layer_sizes': [20]}. Best is trial 0 with value: 3.2120353102656423.
[I 2023-10-11 07:06:15,681] Trial 6 finished with value: 4.362874117490541 and parameters: {'learning_rate_init': 0.03192952448833296, 'hidden_layer_sizes': [10, 10]}. Best is trial 0 with value: 3.2120353102656423.
[I 2023-10-11 07:06:16,299] Trial 8 finished with value: 4.875844480886632 and parameters: {'learning_rate_init': 0.05397213877600072, 'hidden_la

random RLM linear


[I 2023-10-11 07:09:05,122] Trial 13 finished with value: 1.0538883721311816 and parameters: {'alpha': 0.08665732094421544, 'l1_ratio': 0.2758144044066958}. Best is trial 0 with value: 1.0537742063632034.
[I 2023-10-11 07:09:05,329] Trial 9 finished with value: 1.053776487005757 and parameters: {'alpha': 0.07531651941507833, 'l1_ratio': 0.18047669319717408}. Best is trial 0 with value: 1.0537742063632034.
[I 2023-10-11 07:09:05,510] Trial 2 finished with value: 1.0541121047648585 and parameters: {'alpha': 0.036108485857179046, 'l1_ratio': 0.021178051617604665}. Best is trial 0 with value: 1.0537742063632034.
[I 2023-10-11 07:09:05,653] Trial 6 finished with value: 1.054082509111424 and parameters: {'alpha': 0.0476539203989834, 'l1_ratio': 0.5110785018815447}. Best is trial 0 with value: 1.0537742063632034.
[I 2023-10-11 07:09:05,732] Trial 4 finished with value: 1.0543195788924697 and parameters: {'alpha': 0.07443497779314358, 'l1_ratio': 0.7219024132746201}. Best is trial 0 with value

random RLM KRR


[I 2023-10-11 07:09:49,706] Trial 5 finished with value: 1.1090418429557776 and parameters: {'alpha': 0.19075053370348397, 'gamma': 2.8793219414623415e-15, 'kernel': 'linear'}. Best is trial 0 with value: 1.108322815229059.
[I 2023-10-11 07:09:50,785] Trial 2 finished with value: 1.1088542862044672 and parameters: {'alpha': 0.6368354815310948, 'gamma': 3.129086884561939e-15, 'kernel': 'linear'}. Best is trial 0 with value: 1.108322815229059.
[I 2023-10-11 07:09:51,598] Trial 6 finished with value: 1.110303467392411 and parameters: {'alpha': 0.9156040375000805, 'gamma': 7.994864819236114e-16, 'kernel': 'linear'}. Best is trial 0 with value: 1.108322815229059.
[I 2023-10-11 07:09:51,613] Trial 3 finished with value: 1.108951732992194 and parameters: {'alpha': 0.9688395125247294, 'gamma': 1.92428709584352e-15, 'kernel': 'rbf'}. Best is trial 0 with value: 1.108322815229059.
[I 2023-10-11 07:09:52,332] Trial 4 finished with value: 1.1087444829350186 and parameters: {'alpha': 0.720116943865

random RLM GB


[I 2023-10-11 07:12:24,804] Trial 3 finished with value: 0.9971127400594756 and parameters: {'n_estimators': 500, 'learning_rate': 0.305205284905323, 'max_depth': 5}. Best is trial 1 with value: 0.9927576709342737.
[I 2023-10-11 07:12:25,001] Trial 8 finished with value: 0.9948546917147546 and parameters: {'n_estimators': 200, 'learning_rate': 0.42727868398089824, 'max_depth': 5}. Best is trial 1 with value: 0.9927576709342737.
[I 2023-10-11 07:12:25,301] Trial 5 finished with value: 0.9940797272178911 and parameters: {'n_estimators': 200, 'learning_rate': 0.4986562516990842, 'max_depth': 3}. Best is trial 1 with value: 0.9927576709342737.
[I 2023-10-11 07:12:25,426] Trial 2 finished with value: 0.9966898811797016 and parameters: {'n_estimators': 200, 'learning_rate': 0.696856730810548, 'max_depth': 1}. Best is trial 1 with value: 0.9927576709342737.
[I 2023-10-11 07:12:25,628] Trial 9 finished with value: 0.991984353416031 and parameters: {'n_estimators': 200, 'learning_rate': 0.61354

random RLM RF


[I 2023-10-11 07:15:03,872] Trial 7 finished with value: 0.9933711130156806 and parameters: {'n_estimators': 10, 'max_features': 'auto', 'max_depth': 2}. Best is trial 0 with value: 0.9926985913490077.
[I 2023-10-11 07:15:04,043] Trial 9 finished with value: 0.9917688179901746 and parameters: {'n_estimators': 50, 'max_features': 'log2', 'max_depth': 2}. Best is trial 9 with value: 0.9917688179901746.
[I 2023-10-11 07:15:04,268] Trial 3 finished with value: 0.9975601700994127 and parameters: {'n_estimators': 10, 'max_features': 'log2', 'max_depth': 4}. Best is trial 9 with value: 0.9917688179901746.
[I 2023-10-11 07:15:04,309] Trial 8 finished with value: 0.993859852656677 and parameters: {'n_estimators': 20, 'max_features': 'auto', 'max_depth': 4}. Best is trial 9 with value: 0.9917688179901746.
[I 2023-10-11 07:15:04,317] Trial 4 finished with value: 0.9984947014379087 and parameters: {'n_estimators': 200, 'max_features': 'auto', 'max_depth': 5}. Best is trial 9 with value: 0.99176881

random RLM ANN


[I 2023-10-11 07:30:12,912] Trial 11 finished with value: 1.0491034500855028 and parameters: {'learning_rate_init': 0.03552626816613149, 'hidden_layer_sizes': [5]}. Best is trial 0 with value: 1.0440669369038673.
[I 2023-10-11 07:30:38,977] Trial 3 finished with value: 1.0593046474479866 and parameters: {'learning_rate_init': 0.06751507627981412, 'hidden_layer_sizes': [50]}. Best is trial 0 with value: 1.0440669369038673.
[I 2023-10-11 07:30:45,604] Trial 6 finished with value: 1.045311101796673 and parameters: {'learning_rate_init': 0.04826736094443037, 'hidden_layer_sizes': [50, 50]}. Best is trial 0 with value: 1.0440669369038673.
[I 2023-10-11 07:30:51,807] Trial 7 finished with value: 1.0497883646589117 and parameters: {'learning_rate_init': 0.036656095490928194, 'hidden_layer_sizes': [10, 10, 10]}. Best is trial 0 with value: 1.0440669369038673.
[I 2023-10-11 07:30:52,179] Trial 4 finished with value: 1.067119126404242 and parameters: {'learning_rate_init': 0.004454265579415348, 

random HLC linear


[I 2023-10-11 08:12:29,695] Trial 7 finished with value: 0.6006157638973132 and parameters: {'alpha': 0.04741106644007442, 'l1_ratio': 0.5842422880292795}. Best is trial 7 with value: 0.6006157638973132.
[I 2023-10-11 08:12:29,739] Trial 5 finished with value: 0.6144125990227162 and parameters: {'alpha': 0.09305019015954856, 'l1_ratio': 0.6483590830950197}. Best is trial 7 with value: 0.6006157638973132.
[I 2023-10-11 08:12:29,802] Trial 13 finished with value: 0.6082718433158579 and parameters: {'alpha': 0.07471034422850172, 'l1_ratio': 0.7766618521150315}. Best is trial 7 with value: 0.6006157638973132.
[I 2023-10-11 08:12:29,912] Trial 12 finished with value: 0.6087632270890871 and parameters: {'alpha': 0.06600816795664033, 'l1_ratio': 0.2311037627399687}. Best is trial 7 with value: 0.6006157638973132.
[I 2023-10-11 08:12:30,004] Trial 9 finished with value: 0.6158413265312037 and parameters: {'alpha': 0.01769403957862713, 'l1_ratio': 0.6028560368231882}. Best is trial 7 with value

random HLC KRR


[I 2023-10-11 08:13:05,586] Trial 11 finished with value: 0.8811725857609128 and parameters: {'alpha': 0.5208414556754035, 'gamma': 3.200536218683876e-15, 'kernel': 'linear'}. Best is trial 11 with value: 0.8811725857609128.
[I 2023-10-11 08:13:05,636] Trial 13 finished with value: 0.8964109532436854 and parameters: {'alpha': 0.2997724604778846, 'gamma': 2.940162125189184e-15, 'kernel': 'laplacian'}. Best is trial 11 with value: 0.8811725857609128.
[I 2023-10-11 08:13:05,691] Trial 8 finished with value: 0.892894558235282 and parameters: {'alpha': 0.3718831440454651, 'gamma': 8.804120786196344e-15, 'kernel': 'linear'}. Best is trial 11 with value: 0.8811725857609128.
[I 2023-10-11 08:13:05,747] Trial 9 finished with value: 0.8719263970958355 and parameters: {'alpha': 0.5684225077060806, 'gamma': 5.4122570980557125e-15, 'kernel': 'linear'}. Best is trial 9 with value: 0.8719263970958355.
[I 2023-10-11 08:13:05,750] Trial 3 finished with value: 0.8862187473178363 and parameters: {'alpha'

random HLC GB


[I 2023-10-11 08:13:45,183] Trial 12 finished with value: 0.6475700696891846 and parameters: {'n_estimators': 20, 'learning_rate': 0.07620925009967527, 'max_depth': 4}. Best is trial 0 with value: 0.6320929573668942.
[I 2023-10-11 08:13:45,524] Trial 7 finished with value: 0.6535706512124418 and parameters: {'n_estimators': 20, 'learning_rate': 0.8792243444132004, 'max_depth': 5}. Best is trial 0 with value: 0.6320929573668942.
[I 2023-10-11 08:13:45,750] Trial 5 finished with value: 0.6636973669090133 and parameters: {'n_estimators': 20, 'learning_rate': 0.6016126190101292, 'max_depth': 2}. Best is trial 0 with value: 0.6320929573668942.
[I 2023-10-11 08:13:46,058] Trial 11 finished with value: 0.6506207502038145 and parameters: {'n_estimators': 20, 'learning_rate': 0.32575770379604313, 'max_depth': 3}. Best is trial 0 with value: 0.6320929573668942.
[I 2023-10-11 08:13:46,392] Trial 10 finished with value: 0.6451904318150911 and parameters: {'n_estimators': 10, 'learning_rate': 0.979

random HLC RF


[I 2023-10-11 08:16:04,747] Trial 5 finished with value: 0.6070505247613139 and parameters: {'n_estimators': 50, 'max_features': 'auto', 'max_depth': 4}. Best is trial 0 with value: 0.5887749788625218.
[I 2023-10-11 08:16:05,329] Trial 12 finished with value: 0.6055871386941257 and parameters: {'n_estimators': 10, 'max_features': 'sqrt', 'max_depth': 4}. Best is trial 0 with value: 0.5887749788625218.
[I 2023-10-11 08:16:05,764] Trial 10 finished with value: 0.6021925494908782 and parameters: {'n_estimators': 50, 'max_features': 'log2', 'max_depth': 3}. Best is trial 0 with value: 0.5887749788625218.
[I 2023-10-11 08:16:06,170] Trial 2 finished with value: 0.6075575581497167 and parameters: {'n_estimators': 20, 'max_features': 'log2', 'max_depth': 4}. Best is trial 0 with value: 0.5887749788625218.
[I 2023-10-11 08:16:06,188] Trial 7 finished with value: 0.6097579477503889 and parameters: {'n_estimators': 200, 'max_features': 'auto', 'max_depth': 2}. Best is trial 0 with value: 0.58877

random HLC ANN


[I 2023-10-11 08:23:14,303] Trial 4 finished with value: 3.168961171113325 and parameters: {'learning_rate_init': 0.02618605905347908, 'hidden_layer_sizes': [10, 10]}. Best is trial 1 with value: 2.322899019219586.
[I 2023-10-11 08:23:14,629] Trial 11 finished with value: 3.3908673113056556 and parameters: {'learning_rate_init': 0.024464821530531124, 'hidden_layer_sizes': [50, 50, 50]}. Best is trial 1 with value: 2.322899019219586.
[I 2023-10-11 08:23:15,046] Trial 9 finished with value: 2.8696075934766614 and parameters: {'learning_rate_init': 0.08013988582454275, 'hidden_layer_sizes': [20]}. Best is trial 1 with value: 2.322899019219586.
[I 2023-10-11 08:23:15,562] Trial 13 finished with value: 2.5285115674394394 and parameters: {'learning_rate_init': 0.03600403567270159, 'hidden_layer_sizes': [20]}. Best is trial 1 with value: 2.322899019219586.
[I 2023-10-11 08:23:16,710] Trial 2 finished with value: 2.890698807005089 and parameters: {'learning_rate_init': 0.08929414501850622, 'hi

scaffold_splitter 3A4 linear


[I 2023-10-11 08:26:41,557] Trial 13 finished with value: 1.4116554612264807 and parameters: {'alpha': 0.05814610519496384, 'l1_ratio': 0.33507814471673625}. Best is trial 1 with value: 1.3839503875270378.
[I 2023-10-11 08:26:41,953] Trial 4 finished with value: 1.4155840305891445 and parameters: {'alpha': 0.04713704215321248, 'l1_ratio': 0.7293974592650193}. Best is trial 1 with value: 1.3839503875270378.
[I 2023-10-11 08:26:41,961] Trial 7 finished with value: 1.3869662502355027 and parameters: {'alpha': 0.04174586722926792, 'l1_ratio': 0.346640444760719}. Best is trial 1 with value: 1.3839503875270378.
[I 2023-10-11 08:26:41,979] Trial 9 finished with value: 1.3955431008752317 and parameters: {'alpha': 0.07691353485308301, 'l1_ratio': 0.5124650719339627}. Best is trial 1 with value: 1.3839503875270378.
[I 2023-10-11 08:26:42,012] Trial 2 finished with value: 1.3966760092595374 and parameters: {'alpha': 0.07841288295427219, 'l1_ratio': 0.9724373294839493}. Best is trial 1 with value:

scaffold_splitter 3A4 KRR


[I 2023-10-11 08:27:18,241] Trial 5 finished with value: 1.3859945305041779 and parameters: {'alpha': 0.19342333960383826, 'gamma': 4.386713304826955e-15, 'kernel': 'linear'}. Best is trial 5 with value: 1.3859945305041779.
[I 2023-10-11 08:27:18,273] Trial 4 finished with value: 1.4294563793278137 and parameters: {'alpha': 0.2497415864483582, 'gamma': 3.8647042715511284e-15, 'kernel': 'laplacian'}. Best is trial 5 with value: 1.3859945305041779.
[I 2023-10-11 08:27:18,281] Trial 3 finished with value: 1.4416195679875166 and parameters: {'alpha': 0.7641790636875261, 'gamma': 3.255592921132706e-15, 'kernel': 'laplacian'}. Best is trial 5 with value: 1.3859945305041779.
[I 2023-10-11 08:27:18,323] Trial 10 finished with value: 1.4296353669738153 and parameters: {'alpha': 0.004873117267565607, 'gamma': 7.221260371093786e-15, 'kernel': 'linear'}. Best is trial 5 with value: 1.3859945305041779.
[I 2023-10-11 08:27:18,351] Trial 12 finished with value: 1.4294973584056254 and parameters: {'al

scaffold_splitter 3A4 GB


[I 2023-10-11 08:27:56,816] Trial 11 finished with value: 1.0705788636404205 and parameters: {'n_estimators': 50, 'learning_rate': 0.7934659590666656, 'max_depth': 2}. Best is trial 11 with value: 1.0705788636404205.
[I 2023-10-11 08:27:57,295] Trial 13 finished with value: 1.2143792198675443 and parameters: {'n_estimators': 500, 'learning_rate': 0.9893214884986163, 'max_depth': 5}. Best is trial 11 with value: 1.0705788636404205.
[I 2023-10-11 08:27:57,734] Trial 4 finished with value: 1.2081090449570047 and parameters: {'n_estimators': 500, 'learning_rate': 0.8538311063418639, 'max_depth': 3}. Best is trial 11 with value: 1.0705788636404205.
[I 2023-10-11 08:27:58,305] Trial 12 finished with value: 1.1668244588061079 and parameters: {'n_estimators': 20, 'learning_rate': 0.9064943355893861, 'max_depth': 4}. Best is trial 11 with value: 1.0705788636404205.
[I 2023-10-11 08:27:58,348] Trial 6 finished with value: 1.045493001163724 and parameters: {'n_estimators': 50, 'learning_rate': 0.

scaffold_splitter 3A4 RF


[I 2023-10-11 08:30:15,336] Trial 4 finished with value: 1.2129155648188141 and parameters: {'n_estimators': 200, 'max_features': 'log2', 'max_depth': 4}. Best is trial 9 with value: 1.1565868959427201.
[I 2023-10-11 08:30:15,390] Trial 9 finished with value: 1.1565868959427201 and parameters: {'n_estimators': 200, 'max_features': 'auto', 'max_depth': None}. Best is trial 9 with value: 1.1565868959427201.
[I 2023-10-11 08:30:16,126] Trial 6 finished with value: 1.1629253660372132 and parameters: {'n_estimators': 20, 'max_features': 'auto', 'max_depth': 10}. Best is trial 2 with value: 1.1283767222591576.
[I 2023-10-11 08:30:16,278] Trial 2 finished with value: 1.1283767222591576 and parameters: {'n_estimators': 20, 'max_features': 'sqrt', 'max_depth': 4}. Best is trial 2 with value: 1.1283767222591576.
[I 2023-10-11 08:30:16,439] Trial 10 finished with value: 1.115261135752176 and parameters: {'n_estimators': 50, 'max_features': 'sqrt', 'max_depth': 10}. Best is trial 10 with value: 1.

scaffold_splitter 3A4 ANN


[I 2023-10-11 08:37:18,578] Trial 8 finished with value: 5.487372023666164 and parameters: {'learning_rate_init': 0.04598854318431147, 'hidden_layer_sizes': [50, 50, 50]}. Best is trial 0 with value: 3.587870843320496.
[I 2023-10-11 08:37:19,194] Trial 7 finished with value: 4.552444985898613 and parameters: {'learning_rate_init': 0.023104595967124962, 'hidden_layer_sizes': [10]}. Best is trial 0 with value: 3.587870843320496.
[I 2023-10-11 08:37:20,486] Trial 11 finished with value: 4.295285618843409 and parameters: {'learning_rate_init': 0.07841621388023659, 'hidden_layer_sizes': [5, 5]}. Best is trial 0 with value: 3.587870843320496.
[I 2023-10-11 08:37:20,517] Trial 2 finished with value: 5.707887702066501 and parameters: {'learning_rate_init': 0.02597624683660189, 'hidden_layer_sizes': [20]}. Best is trial 0 with value: 3.587870843320496.
[I 2023-10-11 08:37:21,605] Trial 9 finished with value: 4.180265730521675 and parameters: {'learning_rate_init': 0.03557536080878476, 'hidden_l

scaffold_splitter RLM linear


[I 2023-10-11 08:40:22,413] Trial 2 finished with value: 1.0608829060523188 and parameters: {'alpha': 0.020173672330248155, 'l1_ratio': 0.5165156097780443}. Best is trial 0 with value: 1.060306211969177.
[I 2023-10-11 08:40:22,623] Trial 11 finished with value: 1.0605713040524343 and parameters: {'alpha': 0.09098041939503676, 'l1_ratio': 0.7614545702114944}. Best is trial 0 with value: 1.060306211969177.
[I 2023-10-11 08:40:22,633] Trial 3 finished with value: 1.0610938005458481 and parameters: {'alpha': 0.0890888377611793, 'l1_ratio': 0.7699531881304675}. Best is trial 0 with value: 1.060306211969177.
[I 2023-10-11 08:40:22,641] Trial 5 finished with value: 1.060559931358185 and parameters: {'alpha': 0.04447563062593482, 'l1_ratio': 0.6738953575185068}. Best is trial 0 with value: 1.060306211969177.
[I 2023-10-11 08:40:23,047] Trial 12 finished with value: 1.0609152284045111 and parameters: {'alpha': 0.029710497770833597, 'l1_ratio': 0.6712460270791114}. Best is trial 9 with value: 1.

scaffold_splitter RLM KRR


[I 2023-10-11 08:41:04,857] Trial 8 finished with value: 1.111492373101443 and parameters: {'alpha': 0.07134648692222449, 'gamma': 1.9111243196104022e-16, 'kernel': 'linear'}. Best is trial 1 with value: 1.110939859904786.
[I 2023-10-11 08:41:05,722] Trial 13 finished with value: 1.1111956606451592 and parameters: {'alpha': 0.13408849138658857, 'gamma': 7.852295216391335e-15, 'kernel': 'rbf'}. Best is trial 1 with value: 1.110939859904786.
[I 2023-10-11 08:41:05,807] Trial 5 finished with value: 1.111479747725716 and parameters: {'alpha': 0.6975019756405307, 'gamma': 6.854231057381431e-15, 'kernel': 'rbf'}. Best is trial 1 with value: 1.110939859904786.
[I 2023-10-11 08:41:06,912] Trial 9 finished with value: 1.1119642307650304 and parameters: {'alpha': 0.9380036836373646, 'gamma': 6.64212629985226e-15, 'kernel': 'rbf'}. Best is trial 1 with value: 1.110939859904786.
[I 2023-10-11 08:41:07,212] Trial 4 finished with value: 1.112305653805685 and parameters: {'alpha': 0.10572290701791592

scaffold_splitter RLM GB


[I 2023-10-11 08:43:53,780] Trial 13 finished with value: 1.0087369200452228 and parameters: {'n_estimators': 500, 'learning_rate': 0.28339184138269535, 'max_depth': 1}. Best is trial 0 with value: 1.0062548683072043.
[I 2023-10-11 08:43:53,975] Trial 5 finished with value: 1.0042393090579163 and parameters: {'n_estimators': 10, 'learning_rate': 0.039682426217908774, 'max_depth': 1}. Best is trial 5 with value: 1.0042393090579163.
[I 2023-10-11 08:43:54,080] Trial 12 finished with value: 1.0045574957200454 and parameters: {'n_estimators': 10, 'learning_rate': 0.3085919501245398, 'max_depth': 1}. Best is trial 5 with value: 1.0042393090579163.
[I 2023-10-11 08:43:54,422] Trial 3 finished with value: 1.0085355392283992 and parameters: {'n_estimators': 10, 'learning_rate': 0.7022299666861104, 'max_depth': 4}. Best is trial 5 with value: 1.0042393090579163.
[I 2023-10-11 08:43:54,763] Trial 2 finished with value: 1.002922423799623 and parameters: {'n_estimators': 10, 'learning_rate': 0.803

scaffold_splitter RLM RF


[I 2023-10-11 08:46:39,372] Trial 6 finished with value: 1.0022231363834877 and parameters: {'n_estimators': 20, 'max_features': 'log2', 'max_depth': None}. Best is trial 0 with value: 0.996736952308356.
[I 2023-10-11 08:46:39,636] Trial 2 finished with value: 1.004020944053835 and parameters: {'n_estimators': 10, 'max_features': 'sqrt', 'max_depth': 2}. Best is trial 0 with value: 0.996736952308356.
[I 2023-10-11 08:46:40,117] Trial 7 finished with value: 1.0016935917829726 and parameters: {'n_estimators': 500, 'max_features': 'auto', 'max_depth': 5}. Best is trial 0 with value: 0.996736952308356.
[I 2023-10-11 08:46:40,372] Trial 10 finished with value: 0.9966031680985044 and parameters: {'n_estimators': 50, 'max_features': 'auto', 'max_depth': 3}. Best is trial 10 with value: 0.9966031680985044.
[I 2023-10-11 08:46:40,396] Trial 8 finished with value: 1.0011139056188167 and parameters: {'n_estimators': 50, 'max_features': 'auto', 'max_depth': 4}. Best is trial 10 with value: 0.99660

scaffold_splitter RLM ANN


[I 2023-10-11 08:57:29,910] Trial 5 finished with value: 1.0363732224334516 and parameters: {'learning_rate_init': 0.06938000055364091, 'hidden_layer_sizes': [50]}. Best is trial 5 with value: 1.0363732224334516.
[I 2023-10-11 08:57:35,637] Trial 6 finished with value: 1.0466837239814568 and parameters: {'learning_rate_init': 0.08398638001554032, 'hidden_layer_sizes': [5, 5, 5]}. Best is trial 5 with value: 1.0363732224334516.
[I 2023-10-11 08:57:38,290] Trial 8 finished with value: 1.0591310977784383 and parameters: {'learning_rate_init': 0.0993777909915001, 'hidden_layer_sizes': [50, 50, 50]}. Best is trial 5 with value: 1.0363732224334516.
[I 2023-10-11 08:57:39,180] Trial 9 finished with value: 1.0609988371722074 and parameters: {'learning_rate_init': 0.08318928177692181, 'hidden_layer_sizes': [5, 5]}. Best is trial 5 with value: 1.0363732224334516.
[I 2023-10-11 08:57:43,712] Trial 4 finished with value: 1.0504969970840738 and parameters: {'learning_rate_init': 0.00861852531784046

scaffold_splitter HLC linear


[I 2023-10-11 09:35:10,862] Trial 7 finished with value: 0.49122953326268903 and parameters: {'alpha': 0.05820932667614205, 'l1_ratio': 0.08725251752534413}. Best is trial 9 with value: 0.4889365003217509.
[I 2023-10-11 09:35:10,930] Trial 4 finished with value: 0.4942054907495616 and parameters: {'alpha': 0.023070945564157647, 'l1_ratio': 0.3660811658204609}. Best is trial 9 with value: 0.4889365003217509.
[I 2023-10-11 09:35:10,935] Trial 2 finished with value: 0.4899080289901326 and parameters: {'alpha': 0.07925265122773831, 'l1_ratio': 0.5272781425090791}. Best is trial 9 with value: 0.4889365003217509.
[I 2023-10-11 09:35:10,948] Trial 5 finished with value: 0.4897176694942813 and parameters: {'alpha': 0.07052518283770319, 'l1_ratio': 0.09897811666431622}. Best is trial 9 with value: 0.4889365003217509.
[I 2023-10-11 09:35:11,121] Trial 9 finished with value: 0.4889365003217509 and parameters: {'alpha': 0.0034895280165306644, 'l1_ratio': 0.7739813582419609}. Best is trial 6 with v

scaffold_splitter HLC KRR


[I 2023-10-11 09:35:41,457] Trial 2 finished with value: 0.8118450786401424 and parameters: {'alpha': 0.6918015558179385, 'gamma': 2.170713710274548e-15, 'kernel': 'rbf'}. Best is trial 1 with value: 0.7952621275750551.
[I 2023-10-11 09:35:41,941] Trial 9 finished with value: 0.7988344822153398 and parameters: {'alpha': 0.5183317500907673, 'gamma': 5.943599468400056e-15, 'kernel': 'linear'}. Best is trial 8 with value: 0.7938986712331727.
[I 2023-10-11 09:35:42,015] Trial 8 finished with value: 0.7938986712331727 and parameters: {'alpha': 0.34920908659862254, 'gamma': 7.620114703127559e-15, 'kernel': 'laplacian'}. Best is trial 8 with value: 0.7938986712331727.
[I 2023-10-11 09:35:42,102] Trial 7 finished with value: 0.7988263962803521 and parameters: {'alpha': 0.33297988152503344, 'gamma': 3.236065504439756e-16, 'kernel': 'linear'}. Best is trial 8 with value: 0.7938986712331727.
[I 2023-10-11 09:35:42,147] Trial 5 finished with value: 0.8044399313585479 and parameters: {'alpha': 0.55

scaffold_splitter HLC GB


[I 2023-10-11 09:36:17,139] Trial 13 finished with value: 0.48184267592507696 and parameters: {'n_estimators': 20, 'learning_rate': 0.21174785612696173, 'max_depth': 3}. Best is trial 1 with value: 0.4756038094216188.
[I 2023-10-11 09:36:17,308] Trial 3 finished with value: 0.46605549050083095 and parameters: {'n_estimators': 50, 'learning_rate': 0.5242148467570048, 'max_depth': 4}. Best is trial 3 with value: 0.46605549050083095.
[I 2023-10-11 09:36:18,094] Trial 11 finished with value: 0.49355829998698253 and parameters: {'n_estimators': 200, 'learning_rate': 0.05677881541182676, 'max_depth': 3}. Best is trial 3 with value: 0.46605549050083095.
[I 2023-10-11 09:36:18,364] Trial 12 finished with value: 0.4655988010298464 and parameters: {'n_estimators': 10, 'learning_rate': 0.9528752836361396, 'max_depth': 2}. Best is trial 12 with value: 0.4655988010298464.
[I 2023-10-11 09:36:18,378] Trial 8 finished with value: 0.4811918976721066 and parameters: {'n_estimators': 20, 'learning_rate'

scaffold_splitter HLC RF


[I 2023-10-11 09:38:17,692] Trial 4 finished with value: 0.44587166608541434 and parameters: {'n_estimators': 50, 'max_features': 'log2', 'max_depth': 4}. Best is trial 4 with value: 0.44587166608541434.
[I 2023-10-11 09:38:18,267] Trial 5 finished with value: 0.4537877903893184 and parameters: {'n_estimators': 20, 'max_features': 'auto', 'max_depth': 4}. Best is trial 4 with value: 0.44587166608541434.
[I 2023-10-11 09:38:18,810] Trial 9 finished with value: 0.4674159276758301 and parameters: {'n_estimators': 20, 'max_features': 'sqrt', 'max_depth': 5}. Best is trial 4 with value: 0.44587166608541434.
[I 2023-10-11 09:38:18,872] Trial 8 finished with value: 0.46956800727132086 and parameters: {'n_estimators': 50, 'max_features': 'auto', 'max_depth': 3}. Best is trial 4 with value: 0.44587166608541434.
[I 2023-10-11 09:38:18,983] Trial 12 finished with value: 0.4737672949583082 and parameters: {'n_estimators': 200, 'max_features': 'auto', 'max_depth': 5}. Best is trial 4 with value: 0.

scaffold_splitter HLC ANN


[I 2023-10-11 09:44:39,941] Trial 11 finished with value: 3.165180089393108 and parameters: {'learning_rate_init': 0.039937546710066676, 'hidden_layer_sizes': [20, 20]}. Best is trial 1 with value: 2.572426858100426.
[I 2023-10-11 09:44:44,308] Trial 8 finished with value: 2.480471711161741 and parameters: {'learning_rate_init': 0.031787266108631224, 'hidden_layer_sizes': [50]}. Best is trial 8 with value: 2.480471711161741.
[I 2023-10-11 09:44:44,937] Trial 13 finished with value: 2.5675879557894756 and parameters: {'learning_rate_init': 0.07006039827925171, 'hidden_layer_sizes': [50, 50]}. Best is trial 8 with value: 2.480471711161741.
[I 2023-10-11 09:44:45,076] Trial 6 finished with value: 2.5629325131055203 and parameters: {'learning_rate_init': 0.023531691896758335, 'hidden_layer_sizes': [20]}. Best is trial 8 with value: 2.480471711161741.
[I 2023-10-11 09:44:47,686] Trial 5 finished with value: 2.6828633607301473 and parameters: {'learning_rate_init': 0.004503132535918747, 'hid