In [1]:
import optuna
import joblib
import numpy as np
import pandas as pd
import sys
from project_resources.import_utils import NotebookFinder
sys.meta_path.append(NotebookFinder())
from project_resources.cytochrome_P450 import fp_from_smiles, HyperparamTuner

importing Jupyter notebook from C:\Users\Lukas\Documents\datacytochromy\project_resources\cytochrome_P450.ipynb


In [2]:
model_identifiers = ["linear", "KRR", "GB", "RF", "ANN"]
isozymes = ["3A4", "RLM", "HLC"]
data_splits = ["train", "test"]
splitters = ["rand", "scaff", "time"]
rel_paths = {
    "3A4_source": r"project_resources/ChEMBL_3A4.csv",
    "3A4_sep": ";",
    "3A4": r"project_resources/3A4.csv",
    "3A4_train_scaff": r"project_resources/base_splits/scaffold_splitter/3A4_train.csv",
    "3A4_test_scaff": r"project_resources/base_splits/scaffold_splitter/3A4_test.csv",
    "3A4_train_rand": r"project_resources/base_splits/random/3A4_train.csv",
    "3A4_test_rand": r"project_resources/base_splits/random/3A4_test.csv",
    "3A4_train_time": r"project_resources/base_splits/time_split/3A4_train.csv",
    "3A4_test_time": r"project_resources/base_splits/time_split/3A4_test.csv",

    "RLM_source": r"project_resources/AID_1508591_datatable_all.csv",
    "RLM_sep": ",",
    "RLM": r"project_resources/RLM.csv",
    "RLM_train_scaff": r"project_resources/base_splits/scaffold_splitter/RLM_train.csv",
    "RLM_test_scaff": r"project_resources/base_splits/scaffold_splitter/RLM_test.csv",
    "RLM_train_rand": r"project_resources/base_splits/random/RLM_train.csv",
    "RLM_test_rand": r"project_resources/base_splits/random/RLM_test.csv",
    "RLM_train_time": r"project_resources/base_splits/time_split/RLM_train.csv",
    "RLM_test_time": r"project_resources/base_splits/time_split/RLM_test.csv",

    "HLC_source": r"project_resources/AID_1508603_datatable_all.csv",
    "HLC_sep": ",",
    "HLC": r"project_resources/HLC.csv",
    "HLC_train_scaff": r"project_resources/base_splits/scaffold_splitter/HLC_train.csv",
    "HLC_test_scaff": r"project_resources/base_splits/scaffold_splitter/HLC_test.csv",
    "HLC_train_rand": r"project_resources/base_splits/random/HLC_train.csv",
    "HLC_test_rand": r"project_resources/base_splits/random/HLC_test.csv",
    "HLC_train_time": r"project_resources/base_splits/time_split/HLC_train.csv",
    "HLC_test_time": r"project_resources/base_splits/time_split/HLC_test.csv"
}
# sampler - a method used to generate new sets of hyperparameters in each iteration of the optimization process
samplers = {
    'RandomSampler': optuna.samplers.RandomSampler,          # Sampler that selects hyperparameters randomly from the search space.
    'GridSampler': optuna.samplers.GridSampler,              # Sampler that performs a grid search over the hyperparameter space.
    'TPESampler': optuna.samplers.TPESampler,                # Sampler that uses a tree-structured Parzen estimator to model the objective function and sample new points from the search space.
    'CmaEsSampler': optuna.samplers.CmaEsSampler,            # Sampler that uses the Covariance Matrix Adaptation Evolution Strategy algorithm to efficiently search the hyperparameter space.
    'NSGAIISampler': optuna.samplers.NSGAIISampler,          # Multi-objective evolutionary algorithm that generates new samples using non-dominated sorting and crowding distance selection.
    'QMCSampler': optuna.samplers.QMCSampler,                # Quasi-Monte Carlo sampler that uses low-discrepancy sequences to sample the search space in a more efficient and evenly distributed way than random sampling.
    'BoTorchSampler': optuna.integration.BoTorchSampler,     # Sampler that leverages the BoTorch library for Bayesian optimization and can handle both continuous and categorical hyperparameters.
    'BruteForceSampler': optuna.samplers.BruteForceSampler,  # Sampler that exhaustively evaluates all possible combinations of hyperparameters in the search space.
}
# pruner - a technique used to eliminate unpromising trials during the course of hyperparameter optimization.
pruners = {
    'BasePruner': optuna.pruners.BasePruner,                            # This is the base class for all pruning strategies in Optuna. It provides a skeleton for implementing custom pruning strategies.
    'MedianPruner': optuna.pruners.MedianPruner,                        # A pruner that prunes unpromising trials that have median objective values, as determined in previous steps.
    'SuccessiveHalvingPruner': optuna.pruners.SuccessiveHalvingPruner,  # This pruner repeatedly splits trials into halves, discarding the lower performing half at each iteration.
    'HyperbandPruner': optuna.pruners.HyperbandPruner,                  # This pruner implements the Hyperband algorithm, which selects promising trials and runs them with different resource allocation schemes to determine the best one.
    'PercentilePruner': optuna.pruners.PercentilePruner,                # A pruner that prunes unpromising trials based on their percentile rank relative to all completed trials.
    'NopPruner': optuna.pruners.NopPruner,                              # A pruner that does nothing and does not prune any trials.
    'ThresholdPruner': optuna.pruners.ThresholdPruner,                  # This pruner prunes trials that have not reached a certain level of performance (i.e., objective value).
    'PatientPruner': optuna.pruners.PatientPruner,                      # This pruner prunes trials that do not show improvement over a certain number of steps (or epochs).
}
smiles = {}
halflives = {}
fingerprints = {}

In [3]:
# load train-test
for splitter in splitters:
    print(splitter)
    smiles[splitter] = {}
    halflives[splitter] = {}
    for isozyme in isozymes:
        smiles[splitter][isozyme] = {}
        halflives[splitter][isozyme] = {}

        for data_split in data_splits:
            # load smiles
            split_df = pd.read_csv(rel_paths[f"{isozyme}_{data_split}_{splitter}"])
            split_smi = np.array(split_df["smiles"])
            smiles[splitter][isozyme][data_split] = split_smi

            # load half-life
            split_df = pd.read_csv(rel_paths[f"{isozyme}_{data_split}_{splitter}"])
            split_halflife = np.array(split_df["half-life"])
            halflives[splitter][isozyme][data_split] = split_halflife

        print(f"""{isozyme}
    x_train: {smiles[splitter][isozyme]["train"][0]}
    x_test: {smiles[splitter][isozyme]["test"][0]}
    y_train: {halflives[splitter][isozyme]["train"][:3]}
    y_test: {halflives[splitter][isozyme]["test"][:3]}
    """)

rand
3A4
    x_train: CC(C)(O)c1cc(F)c2c(c1)C(=O)N(Cc1ccc(Cl)cn1)[C@@]2(OCC1(O)CC1)c1ccc(Cl)cc1
    x_test: Cc1ncsc1-c1ccc([C@H](CC(=O)NCCCCCCNC(=O)COc2c(-c3csc(N4CCOCC4)n3)ccc(F)c2F)NC(=O)[C@@H]2C[C@@H](O)CN2C(=O)[C@@H](NC(=O)C2(F)CC2)C(C)(C)C)cc1
    y_train: [6.   0.02 0.5 ]
    y_test: [0.3767  0.3333  0.01433]
    
RLM
    x_train: O=c1cc(N2CCOCC2)oc2c1ccc1ccccc12
    x_test: Cc1ccc(OCCn2c(CCNC(=O)N3CCCCC3)nc3ccccc32)cc1
    y_train: [30.    4.4  26.58]
    y_test: [ 1.7  1.7 30. ]
    
HLC
    x_train: N#Cc1ccc(CN2CCC(N3CCNC3=O)CC2)cc1
    x_test: c1ccc(Nc2ncc(-c3cncnc3)c3c2OCC3)cc1
    y_train: [93.2 21.  50.8]
    y_test: [120.  111.9  57.4]
    
scaff
3A4
    x_train: COc1cccc([C@@H](CO)NC(=O)[C@@H](C)N2Cc3ccc(-c4nc(NC5CCOCC5)ncc4Cl)cc3C2=O)c1
    x_test: O=C1CCC(N2C(=O)c3cccc(NCCOCCOCCNC(=O)c4ccc5c(c4)nc(Nc4cccc(Cl)c4)c4ccncc45)c3C2=O)C(=O)N1
    y_train: [0.09167 0.08333 0.8167 ]
    y_test: [0.2433 0.055  0.2667]
    
RLM
    x_train: CS(=O)(=O)c1ccccc1-c1csc(N2CCC(C(N)=O)C

In [4]:
# convert smiles from every isozyme and split to Morgan fingerprint as numpy array
for splitter in splitters:
    fingerprints[splitter] = {}
    for isozyme in isozymes:
        fingerprints[splitter][isozyme] = {}
        for data_split in data_splits:
            fps = fp_from_smiles(smiles[splitter][isozyme][data_split])
            fingerprints[splitter][isozyme][data_split] = np.array(fps)
            print(splitter, isozyme, data_split)
            print(fps[0], len(fps))
        print("\n")

rand 3A4 train
[1 1 1 0 1 0 1 1 1 1 1 0 1 1 1 1 0 1 0 1 0 0 0 1 1 0 1 0 1 0 1 0 1 0 1 0 1
 0 0 0 0 0 1 1 0 0 1 0 0 0 1 0 0 1 0 1 1 1 0 1 1 0 0 0 0 0 0 0 1 0 0 0 1 0
 1 0 1 0 1 0 0 0 0 0 1 0 0 1 1 1 0 0 0 1 0 0 0 0 0 0 1 1 1 0 1 0 0 0 1 0 0
 1 0 1 0 0 1 0 0 0 1 1 1 0] 56
rand 3A4 test
[1 1 1 1 1 1 0 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0 0 0 0 1 1 1 0 0 1 1
 1 0 0 0 1 1 1 1 1 1 0 1 0 0 1 1 1 1 1 0 1 1 1 1 0 0 0 1 1 0 1 1 0 0 0 0 0
 1 1 1 1 0 1 1 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 1 1 0 0 1 1 1 0 0 0 0 0 1 0 1
 1 1 0 0 0 1 0 1 1 1 1 1 1] 14


rand RLM train
[0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0
 1 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 1 1 1 0 0 1 0 0 0 0 0 0 0 1 0 1 0 0 0 0
 1 0 1 0 0 0 1 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 1
 1 0 0 1 0 0 0 0 0 1 0 1 0] 2024
rand RLM test
[1 0 0 1 0 1 0 1 0 0 1 0 1 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0
 1 1 1 0 1 1 1 1 1 1 0 0 1 0 1 0 1 0 1 0 0 1 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0
 1 0 1 0 0 0 1 0 0 0

In [7]:
sampler = samplers['TPESampler']
pruner = pruners["BasePruner"]
n_trials = 200
for splitter in splitters:
    if splitter == "rand":
        splitter_name = "random"
    elif splitter == "scaff":
        splitter_name = "scaffold_splitter"
    else:
        splitter_name = "time_split"

    for isozyme in isozymes:
        X_train = fingerprints[splitter][isozyme]["train"]
        y_train = np.log(halflives[splitter][isozyme]["train"])  # natural log half-lives -> values are less spread out
        X_test = fingerprints[splitter][isozyme]["test"]
        y_test = np.log(halflives[splitter][isozyme]["test"])
        for model_identifier in model_identifiers:
            print(splitter_name, isozyme, model_identifier)
            study = optuna.create_study(study_name=model_identifier, directions=['minimize'], pruner=pruner,
                                        storage=f"sqlite:///project_resources/optuna/morgan/{splitter_name}/{isozyme}/db.{model_identifier}", load_if_exists=True)
            test = HyperparamTuner(model_identifier, X_train, y_train, X_test, y_test)
            study.optimize(test.objective, n_trials=n_trials, n_jobs=-1)  # catch=(ValueError,)
            joblib.dump(study, f"./project_resources/optuna/morgan/{splitter_name}/{isozyme}/{model_identifier}.pkl")

time_split 3A4 linear


[I 2023-10-14 14:01:44,618] A new study created in RDB with name: linear
[I 2023-10-14 14:01:45,288] Trial 2 finished with value: 1.4619409953610372 and parameters: {'alpha': 0.014025347454866128, 'l1_ratio': 0.6811910449201463}. Best is trial 2 with value: 1.4619409953610372.
[I 2023-10-14 14:01:45,536] Trial 0 finished with value: 1.4774412888545303 and parameters: {'alpha': 0.07256699432926833, 'l1_ratio': 0.3802995162392181}. Best is trial 2 with value: 1.4619409953610372.
[I 2023-10-14 14:01:45,649] Trial 4 finished with value: 1.4658535075202543 and parameters: {'alpha': 0.04966391325860327, 'l1_ratio': 0.5885824561612225}. Best is trial 2 with value: 1.4619409953610372.
[I 2023-10-14 14:01:45,742] Trial 6 finished with value: 1.4892278545235509 and parameters: {'alpha': 0.010943346357006131, 'l1_ratio': 0.18337811737520093}. Best is trial 2 with value: 1.4619409953610372.
[I 2023-10-14 14:01:45,803] Trial 3 finished with value: 1.4730405131570812 and parameters: {'alpha': 0.0364

time_split 3A4 KRR


[I 2023-10-14 14:02:11,045] Trial 11 finished with value: 1.5331661244770638 and parameters: {'alpha': 0.8572343005604235, 'gamma': 2.9917028045631786e-16, 'kernel': 'rbf'}. Best is trial 11 with value: 1.5331661244770638.
[I 2023-10-14 14:02:11,317] Trial 0 finished with value: 1.5677234361619836 and parameters: {'alpha': 0.007082335182508908, 'gamma': 3.209818494228256e-16, 'kernel': 'rbf'}. Best is trial 1 with value: 1.5063304258835037.
[I 2023-10-14 14:02:11,329] Trial 1 finished with value: 1.5063304258835037 and parameters: {'alpha': 0.021930078006946054, 'gamma': 6.091103551522895e-15, 'kernel': 'linear'}. Best is trial 1 with value: 1.5063304258835037.
[I 2023-10-14 14:02:11,348] Trial 10 finished with value: 1.5948500443184528 and parameters: {'alpha': 0.5631486662195113, 'gamma': 1.8549199592492495e-15, 'kernel': 'linear'}. Best is trial 1 with value: 1.5063304258835037.
[I 2023-10-14 14:02:11,352] Trial 4 finished with value: 1.5819980781518002 and parameters: {'alpha': 0.8

time_split 3A4 GB


[I 2023-10-14 14:02:40,714] Trial 2 finished with value: 1.3726786334172616 and parameters: {'n_estimators': 200, 'learning_rate': 0.3104564639805111, 'max_depth': 5}. Best is trial 2 with value: 1.3726786334172616.
[I 2023-10-14 14:02:40,947] Trial 3 finished with value: 1.4951411225123115 and parameters: {'n_estimators': 200, 'learning_rate': 0.7175441504500244, 'max_depth': 2}. Best is trial 2 with value: 1.3726786334172616.
[I 2023-10-14 14:02:41,121] Trial 5 finished with value: 1.487495477275158 and parameters: {'n_estimators': 200, 'learning_rate': 0.7517702329259015, 'max_depth': 1}. Best is trial 2 with value: 1.3726786334172616.
[I 2023-10-14 14:02:41,188] Trial 6 finished with value: 1.3870541225615673 and parameters: {'n_estimators': 10, 'learning_rate': 0.6750011322767321, 'max_depth': 2}. Best is trial 2 with value: 1.3726786334172616.
[I 2023-10-14 14:02:41,430] Trial 4 finished with value: 1.439444932922748 and parameters: {'n_estimators': 10, 'learning_rate': 0.4070910

time_split 3A4 RF


[I 2023-10-14 14:04:13,389] Trial 5 finished with value: 1.3552317427518397 and parameters: {'n_estimators': 500, 'max_features': 'auto', 'max_depth': 4}. Best is trial 5 with value: 1.3552317427518397.
[I 2023-10-14 14:04:13,605] Trial 1 finished with value: 1.30600953453576 and parameters: {'n_estimators': 10, 'max_features': 'auto', 'max_depth': 2}. Best is trial 1 with value: 1.30600953453576.
[I 2023-10-14 14:04:13,757] Trial 6 finished with value: 1.4112440360605316 and parameters: {'n_estimators': 200, 'max_features': 'auto', 'max_depth': 5}. Best is trial 1 with value: 1.30600953453576.
[I 2023-10-14 14:04:14,065] Trial 8 finished with value: 1.335310370671075 and parameters: {'n_estimators': 500, 'max_features': 'log2', 'max_depth': 2}. Best is trial 1 with value: 1.30600953453576.
[I 2023-10-14 14:04:14,286] Trial 9 finished with value: 1.3423035779427936 and parameters: {'n_estimators': 10, 'max_features': 'sqrt', 'max_depth': 10}. Best is trial 1 with value: 1.3060095345357

time_split 3A4 ANN


[I 2023-10-14 14:08:42,682] Trial 0 finished with value: 1.4621989625324165 and parameters: {'learning_rate_init': 0.004976336662882075, 'hidden_layer_sizes': [50]}. Best is trial 0 with value: 1.4621989625324165.
[I 2023-10-14 14:08:43,204] Trial 10 finished with value: 1.3467647529716837 and parameters: {'learning_rate_init': 0.05443928111700932, 'hidden_layer_sizes': [20, 20]}. Best is trial 10 with value: 1.3467647529716837.
[I 2023-10-14 14:08:43,551] Trial 11 finished with value: 1.5123498200642602 and parameters: {'learning_rate_init': 0.00871692427762134, 'hidden_layer_sizes': [5, 5]}. Best is trial 10 with value: 1.3467647529716837.
[I 2023-10-14 14:08:44,045] Trial 7 finished with value: 1.5307315605182275 and parameters: {'learning_rate_init': 0.08607225392653739, 'hidden_layer_sizes': [5]}. Best is trial 10 with value: 1.3467647529716837.
[I 2023-10-14 14:08:44,551] Trial 6 finished with value: 1.3620477227637313 and parameters: {'learning_rate_init': 0.05379173552024259, '

time_split RLM linear


[I 2023-10-14 14:15:14,195] A new study created in RDB with name: linear
[I 2023-10-14 14:15:15,991] Trial 0 finished with value: 1.1008192984224403 and parameters: {'alpha': 0.00283837538780405, 'l1_ratio': 0.12351246750061984}. Best is trial 0 with value: 1.1008192984224403.
[I 2023-10-14 14:15:16,179] Trial 3 finished with value: 1.1008980144602767 and parameters: {'alpha': 0.0728367984659049, 'l1_ratio': 0.6862705762496326}. Best is trial 8 with value: 1.1006556976025978.
[I 2023-10-14 14:15:16,305] Trial 10 finished with value: 1.1010904930477396 and parameters: {'alpha': 0.05851009806121538, 'l1_ratio': 0.08941280378077354}. Best is trial 8 with value: 1.1006556976025978.
[I 2023-10-14 14:15:16,392] Trial 4 finished with value: 1.1009110366891264 and parameters: {'alpha': 0.09975817112898487, 'l1_ratio': 0.36928864604523837}. Best is trial 8 with value: 1.1006556976025978.
[I 2023-10-14 14:15:16,411] Trial 9 finished with value: 1.10104045408586 and parameters: {'alpha': 0.061108

time_split RLM RF


[I 2023-10-14 14:26:01,789] Trial 0 finished with value: 0.9695888765875393 and parameters: {'n_estimators': 500, 'max_features': 'auto', 'max_depth': 10}. Best is trial 0 with value: 0.9695888765875393.
[I 2023-10-14 14:26:01,963] Trial 7 finished with value: 0.9680653156771232 and parameters: {'n_estimators': 50, 'max_features': 'auto', 'max_depth': 10}. Best is trial 7 with value: 0.9680653156771232.
[I 2023-10-14 14:26:02,327] Trial 9 finished with value: 0.9668846454866301 and parameters: {'n_estimators': 500, 'max_features': 'auto', 'max_depth': 5}. Best is trial 9 with value: 0.9668846454866301.
[I 2023-10-14 14:26:02,506] Trial 3 finished with value: 0.970284263919074 and parameters: {'n_estimators': 500, 'max_features': 'log2', 'max_depth': 4}. Best is trial 9 with value: 0.9668846454866301.
[I 2023-10-14 14:26:02,703] Trial 1 finished with value: 0.9691198016473329 and parameters: {'n_estimators': 200, 'max_features': 'sqrt', 'max_depth': 5}. Best is trial 2 with value: 0.962

time_split RLM ANN


[I 2023-10-14 14:39:29,955] A new study created in RDB with name: ANN
[I 2023-10-14 14:44:20,917] Trial 1 finished with value: 1.0810909615512931 and parameters: {'learning_rate_init': 0.060431666726871905, 'hidden_layer_sizes': [5]}. Best is trial 1 with value: 1.0810909615512931.
[I 2023-10-14 14:44:21,230] Trial 9 finished with value: 1.0802023087564552 and parameters: {'learning_rate_init': 0.01760219424730395, 'hidden_layer_sizes': [10]}. Best is trial 9 with value: 1.0802023087564552.
[I 2023-10-14 14:44:21,872] Trial 10 finished with value: 1.0805679211552193 and parameters: {'learning_rate_init': 0.00874661473426478, 'hidden_layer_sizes': [50]}. Best is trial 9 with value: 1.0802023087564552.
[I 2023-10-14 14:44:22,746] Trial 2 finished with value: 1.069288060469616 and parameters: {'learning_rate_init': 0.06877209619505353, 'hidden_layer_sizes': [10]}. Best is trial 2 with value: 1.069288060469616.
[I 2023-10-14 14:44:22,973] Trial 11 finished with value: 1.0666791066454377 an

time_split HLC linear


[I 2023-10-14 15:59:20,851] Trial 3 finished with value: 0.6143339428549061 and parameters: {'alpha': 0.04176112887122143, 'l1_ratio': 0.8680886543790897}. Best is trial 9 with value: 0.6071532240587403.
[I 2023-10-14 15:59:21,016] Trial 2 finished with value: 0.6088398547904669 and parameters: {'alpha': 0.08543817192589859, 'l1_ratio': 0.6867339001805226}. Best is trial 9 with value: 0.6071532240587403.
[I 2023-10-14 15:59:21,080] Trial 0 finished with value: 0.6114304420193292 and parameters: {'alpha': 0.0017485764979060377, 'l1_ratio': 0.741919247918685}. Best is trial 9 with value: 0.6071532240587403.
[I 2023-10-14 15:59:21,110] Trial 9 finished with value: 0.6071532240587403 and parameters: {'alpha': 0.0406103460305599, 'l1_ratio': 0.5116079404841702}. Best is trial 9 with value: 0.6071532240587403.
[I 2023-10-14 15:59:21,115] Trial 1 finished with value: 0.608790722722488 and parameters: {'alpha': 0.094995814854231, 'l1_ratio': 0.05568970558520503}. Best is trial 9 with value: 0.

time_split HLC KRR


[I 2023-10-14 15:59:46,559] Trial 1 finished with value: 0.752675295555442 and parameters: {'alpha': 0.2007226659034032, 'gamma': 7.987923374070897e-15, 'kernel': 'rbf'}. Best is trial 2 with value: 0.6910924390985898.
[I 2023-10-14 15:59:46,711] Trial 0 finished with value: 0.7325703097642374 and parameters: {'alpha': 0.1659118746692013, 'gamma': 5.16741266011842e-15, 'kernel': 'laplacian'}. Best is trial 2 with value: 0.6910924390985898.
[I 2023-10-14 15:59:46,728] Trial 2 finished with value: 0.6910924390985898 and parameters: {'alpha': 0.2182515576463361, 'gamma': 7.010772970331026e-16, 'kernel': 'rbf'}. Best is trial 2 with value: 0.6910924390985898.
[I 2023-10-14 15:59:46,796] Trial 7 finished with value: 0.714126473319974 and parameters: {'alpha': 0.1613843196834401, 'gamma': 9.824272735688612e-15, 'kernel': 'laplacian'}. Best is trial 2 with value: 0.6910924390985898.
[I 2023-10-14 15:59:46,805] Trial 3 finished with value: 0.7691330599850619 and parameters: {'alpha': 0.0050366

time_split HLC GB


[I 2023-10-14 16:00:16,087] Trial 1 finished with value: 0.5822288619117991 and parameters: {'n_estimators': 200, 'learning_rate': 0.8215104234365851, 'max_depth': 4}. Best is trial 1 with value: 0.5822288619117991.
[I 2023-10-14 16:00:16,237] Trial 3 finished with value: 0.5792470383563 and parameters: {'n_estimators': 50, 'learning_rate': 0.9813225214750986, 'max_depth': 3}. Best is trial 3 with value: 0.5792470383563.
[I 2023-10-14 16:00:16,594] Trial 2 finished with value: 0.5869973647931954 and parameters: {'n_estimators': 200, 'learning_rate': 0.9106095458081934, 'max_depth': 5}. Best is trial 3 with value: 0.5792470383563.
[I 2023-10-14 16:00:16,798] Trial 7 finished with value: 0.5573731524476299 and parameters: {'n_estimators': 500, 'learning_rate': 0.1509342121776812, 'max_depth': 2}. Best is trial 7 with value: 0.5573731524476299.
[I 2023-10-14 16:00:16,807] Trial 0 finished with value: 0.6111298874016114 and parameters: {'n_estimators': 500, 'learning_rate': 0.9693170955167

time_split HLC RF


[I 2023-10-14 16:01:50,713] Trial 0 finished with value: 0.5574671093597502 and parameters: {'n_estimators': 200, 'max_features': 'sqrt', 'max_depth': 2}. Best is trial 0 with value: 0.5574671093597502.
[I 2023-10-14 16:01:51,206] Trial 2 finished with value: 0.567131662216429 and parameters: {'n_estimators': 50, 'max_features': 'auto', 'max_depth': 10}. Best is trial 0 with value: 0.5574671093597502.
[I 2023-10-14 16:01:51,397] Trial 3 finished with value: 0.5643841563319754 and parameters: {'n_estimators': 50, 'max_features': 'log2', 'max_depth': 10}. Best is trial 0 with value: 0.5574671093597502.
[I 2023-10-14 16:01:51,600] Trial 7 finished with value: 0.5775777574132633 and parameters: {'n_estimators': 200, 'max_features': 'log2', 'max_depth': 5}. Best is trial 0 with value: 0.5574671093597502.
[I 2023-10-14 16:01:51,894] Trial 8 finished with value: 0.5497539058086651 and parameters: {'n_estimators': 10, 'max_features': 'sqrt', 'max_depth': 10}. Best is trial 8 with value: 0.5497

time_split HLC ANN


[I 2023-10-14 16:06:24,462] Trial 4 finished with value: 1.088053966962832 and parameters: {'learning_rate_init': 0.051575975715912624, 'hidden_layer_sizes': [50, 50]}. Best is trial 4 with value: 1.088053966962832.
[I 2023-10-14 16:06:24,735] Trial 10 finished with value: 1.0810605300597012 and parameters: {'learning_rate_init': 0.05260997639371628, 'hidden_layer_sizes': [10, 10]}. Best is trial 10 with value: 1.0810605300597012.
[I 2023-10-14 16:06:24,738] Trial 5 finished with value: 1.0864196824880634 and parameters: {'learning_rate_init': 0.054618454876633145, 'hidden_layer_sizes': [5, 5, 5]}. Best is trial 10 with value: 1.0810605300597012.
[I 2023-10-14 16:06:25,115] Trial 9 finished with value: 1.0639815192412205 and parameters: {'learning_rate_init': 0.036395180632753224, 'hidden_layer_sizes': [20, 20]}. Best is trial 9 with value: 1.0639815192412205.
[I 2023-10-14 16:06:25,120] Trial 8 finished with value: 1.07469587479338 and parameters: {'learning_rate_init': 0.060343884482