# Classification

In [13]:
import pandas as pd
import sys;sys.path.append('..');from thesis_package import aimodels as my_ai, utils, metrics
import pandas as pd
import matplotlib.pyplot as plt
from copy import deepcopy
import numpy as np
import os
import optuna

## Max u

In [14]:
exogenous_data = pd.read_csv('..\data\processed\production\exogenous_data_extended.csv').drop(columns=['date'])
y_max_u_sparse_bool = pd.read_csv('..\data\ground_truth\\res_bus_vm_pu_max_sparse_bool_constr.csv').drop(columns=['timestamps'])
y_max_u_sparse_bool = pd.DataFrame(y_max_u_sparse_bool['bus_16'], columns=['bus_16'])
# Testing data
train_x, valid_x, train_y, valid_y, scaler = utils.split_and_suffle(exogenous_data, y_max_u_sparse_bool, test_size=0.2, scaling=True)
data_max_class = {'X_train': train_x, 'X_test': valid_x, 'y_train': train_y, 'y_test': valid_y}

In [15]:
num_trials = 30

In [16]:
def objective(trial):
    # import data
    y_max_u_balanced_bool = pd.read_csv('..\data\ground_truth\\res_bus_vm_pu_max_balanced_bool_constr.csv')
    y_max_u_balanced_bool = pd.DataFrame(y_max_u_balanced_bool['bus_16'], columns=['bus_16'])
    exogenous_data = pd.read_csv('..\data\ground_truth\\exogenous_data_vm_pu_max_balanced.csv').drop(columns=['date'])
    train_x, valid_x, train_y, valid_y, scaler = utils.split_and_suffle(exogenous_data, y_max_u_balanced_bool, test_size=0.2, scaling=True)
    data = {'X_train': train_x, 'X_test': valid_x, 'y_train': train_y, 'y_test': valid_y}
    param = {
        "verbosity": 0,
        "objective": "binary:logistic",
        # use exact for small dataset.
        "tree_method": "exact",
        # defines booster, gblinear for linear functions.
        "booster": trial.suggest_categorical("booster", ["gbtree", "gblinear", "dart"]),
        # L2 regularization weight.
        "lambda": trial.suggest_float("lambda", 1e-8, 1.0, log=True),
        # L1 regularization weight.
        "alpha": trial.suggest_float("alpha", 1e-8, 1.0, log=True),
        # sampling ratio for training data.
        "subsample": trial.suggest_float("subsample", 0.2, 1.0),
        # sampling according to each tree.
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.2, 1.0),
    }

    if param["booster"] in ["gbtree", "dart"]:
        # maximum depth of the tree, signifies complexity of the tree.
        param["max_depth"] = trial.suggest_int("max_depth", 3, 9, step=2)
        # minimum child weight, larger the term more conservative the tree.
        param["min_child_weight"] = trial.suggest_int("min_child_weight", 2, 10)
        param["eta"] = trial.suggest_float("eta", 1e-8, 1.0, log=True)
        # defines how selective algorithm is.
        param["gamma"] = trial.suggest_float("gamma", 1e-8, 1.0, log=True)
        param["grow_policy"] = trial.suggest_categorical("grow_policy", ["depthwise", "lossguide"])

    if param["booster"] == "dart":
        param["sample_type"] = trial.suggest_categorical("sample_type", ["uniform", "weighted"])
        param["normalize_type"] = trial.suggest_categorical("normalize_type", ["tree", "forest"])
        param["rate_drop"] = trial.suggest_float("rate_drop", 1e-8, 1.0, log=True)
        param["skip_drop"] = trial.suggest_float("skip_drop", 1e-8, 1.0, log=True)
    model = my_ai.Context(my_ai.XGBoostClassifierStrategy(param))
    model.fit(data)
    prediction = model.predict(data_max_class)
    prediction = pd.DataFrame(prediction , columns=valid_y.columns)
    tp, tn, fp, fn = 0, 0, 0, 0
    tp += sum((prediction['bus_16'] == 1) & (data_max_class['y_test']['bus_16'] == 1))
    tn += sum((prediction['bus_16'] == 0) & (data_max_class['y_test']['bus_16'] == 0))
    fp += sum((prediction['bus_16'] == 1) & (data_max_class['y_test']['bus_16'] == 0))
    fn += sum((prediction['bus_16'] == 0) & (data_max_class['y_test']['bus_16'] == 1))
    if (tp+fp)*(tp+fn)*(tn+fp)*(tn+fn) > 0:
        mcc_score = (tp*tn - fp*fn) / np.sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn))
    else: 
        mcc_score = 0
    return mcc_score
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=num_trials)
print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial
print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))
# Write the results to a csv file.
with open("./hyper_params_results_mcc/params_xgboost_balanced_classifier_max_u_test_bus_16.csv", "w") as f:
    f.write("params,value\n")
    for key, value in trial.params.items():
        f.write("{},{}\n".format(key, value))
    f.write("value,{}\n".format(trial.value))

[32m[I 2022-10-21 11:54:40,358][0m A new study created in memory with name: no-name-15dbeefd-eab2-4049-9473-0e512f919b2b[0m
[32m[I 2022-10-21 11:54:48,338][0m Trial 0 finished with value: 0.5714118338843156 and parameters: {'booster': 'dart', 'lambda': 1.8835371958220824e-07, 'alpha': 0.0011088719841518557, 'subsample': 0.38388708006168515, 'colsample_bytree': 0.4229607331161819, 'max_depth': 5, 'min_child_weight': 3, 'eta': 2.949719000498082e-08, 'gamma': 0.000951669624987712, 'grow_policy': 'depthwise', 'sample_type': 'uniform', 'normalize_type': 'forest', 'rate_drop': 5.29171444639736e-07, 'skip_drop': 2.52446204905082e-07}. Best is trial 0 with value: 0.5714118338843156.[0m
[32m[I 2022-10-21 11:54:53,150][0m Trial 1 finished with value: 0.5577008373282989 and parameters: {'booster': 'dart', 'lambda': 0.38092412350783944, 'alpha': 0.0109230063924983, 'subsample': 0.8200628263535812, 'colsample_bytree': 0.25117596916460444, 'max_depth': 9, 'min_child_weight': 8, 'eta': 0.0035

Number of finished trials:  30
Best trial:
  Value: 0.6404548085149984
  Params: 
    booster: dart
    lambda: 0.14182485122786342
    alpha: 0.0001488031719695729
    subsample: 0.7911264853768102
    colsample_bytree: 0.6710795352112778
    max_depth: 9
    min_child_weight: 9
    eta: 0.11948410602614275
    gamma: 2.970868915721095e-08
    grow_policy: lossguide
    sample_type: weighted
    normalize_type: tree
    rate_drop: 0.06814776932706042
    skip_drop: 0.95055313893407


# Hybrid training and testing

In [17]:
# Classification data sparse
y_max_u_bool = pd.read_csv('..\data\ground_truth\\res_bus_vm_pu_max_sparse_bool_constr.csv').drop(columns=['timestamps'])
y_max_u_bool = pd.DataFrame(y_max_u_bool['bus_16'], columns=['bus_16'])
train_x, test_x, train_y, test_y, scaler = utils.split_and_suffle(exogenous_data, y_max_u_bool, test_size=0.2, scaling=True)
data_max_u_bool = {'X_train': deepcopy(train_x), 'X_test': deepcopy(test_x), 'y_train': deepcopy(train_y), 'y_test': deepcopy(test_y), 'scaler': deepcopy(scaler)}
# reg data sparse
y_max_u = pd.read_csv('..\data\ground_truth\\res_bus_vm_pu_max_constr.csv').drop(columns=['timestamps'])
y_max_u = pd.DataFrame(y_max_u['bus_16'], columns=['bus_16'])
exogenous_data = pd.read_csv('..\data\processed\production\exogenous_data_extended.csv').drop(columns=['date'])
train_x, valid_x, train_y, valid_y, scaler_max = utils.split_and_suffle(exogenous_data, y_max_u, test_size=0.2, scaling=True)
data_max_reg = {'X_train': deepcopy(train_x), 'X_test': deepcopy(valid_x), 'y_train': deepcopy(train_y), 'y_test': deepcopy(valid_y)}
threshold_max = utils.compute_threshold(y_max_u) / scaler_max['y']
# balanced data reg
y_max_u_balanced = pd.read_csv('..\data\ground_truth\\res_bus_vm_pu_max_balanced_constr.csv')
y_max_u_balanced = pd.DataFrame(y_max_u_balanced['bus_16'], columns=['bus_16'])
exogenous_data_vm_pu_max_balanced = pd.read_csv('..\data\ground_truth\\exogenous_data_vm_pu_max_balanced.csv').drop(columns=['date'])
X_max_u_train, X_max_u_test, y_max_u_train, y_max_u_test = utils.split_and_suffle(exogenous_data_vm_pu_max_balanced, y_max_u_balanced)
data_max_u_balanced = {'X_train': deepcopy(X_max_u_train), 'X_test': deepcopy(X_max_u_test), 'y_train': deepcopy(y_max_u_train), 'y_test': deepcopy(y_max_u_test)}
# class data bool balanced
y_max_u_balanced_bool = pd.read_csv('..\data\ground_truth\\res_bus_vm_pu_max_balanced_bool_constr.csv')
y_max_u_balanced_bool = pd.DataFrame(y_max_u_balanced_bool['bus_16'], columns=['bus_16'])
exogenous_data_balanced = pd.read_csv('..\data\ground_truth\\exogenous_data_vm_pu_max_balanced.csv').drop(columns=['date'])
train_x, valid_x, train_y, valid_y, scaler = utils.split_and_suffle(exogenous_data_balanced, y_max_u_balanced_bool, test_size=0.2, scaling=True)
data_max_u_bool_balanced = {'X_train': train_x, 'X_test': valid_x, 'y_train': train_y, 'y_test': valid_y}
# hybrid data training
data_max_u_hybrid = {}
data_max_u_hybrid['X_train'] = pd.concat([data_max_u_balanced['X_train'], data_max_u_bool_balanced['y_train']], axis=1)
data_max_u_hybrid['y_train'] = deepcopy(data_max_u_balanced['y_train'])


In [18]:

# hybrid data test
import ast
def get_hyper_params_from_df(df):
    output = {}
    for row in df.iterrows():
        if row[1]['params'] != 'value':
            try:
                output[row[1]['params']] = ast.literal_eval(row[1]['value'])
            except :
                output[row[1]['params']] = row[1]['value']
    return output
# import classifier hyper params 
df = pd.read_csv('hyper_params_results_mcc/params_xgboost_balanced_classifier_max_u_test_bus_16.csv')
hyper_params = get_hyper_params_from_df(df)
classifier_max_u = my_ai.Context(strategy=my_ai.XGBoostClassifierStrategy(hyper_params)) 
classifier_max_u.fit(data_max_u_bool_balanced)
class_prediction = classifier_max_u.predict(data_max_u_bool).to_csv('class_prediction_max_u.csv', index=False)
data_max_u_hybrid['X_test']  = pd.concat([data_max_reg['X_test'], class_prediction], axis=1)
data_max_u_hybrid['y_test'] = deepcopy(data_max_reg['y_test'])


# # 
# if 'hybrid_regressor_max_u.pickle' not in os.listdir('pickles\hybrid_models_benchmark'):
#     print('training max u hybrid model... wait for it...')
#     hybrid_regressor_max_u = my_ai.Context(strategy=my_ai.GradientBoostRegressorStrategy(hyper_params)) 
#     hybrid_regressor_max_u.fit(data=data_max_u_hybrid)
#     utils.serialize_object('pickles\hybrid_models_benchmark\hybrid_regressor_max_u_one_bus', hybrid_regressor_max_u)
# else: 
#     print('loading max u hybrid model...')
#     hybrid_regressor_max_u = utils.deserialize_object('pickles\hybrid_models_benchmark\hybrid_regressor_max_u_one_bus')


In [19]:
print('data_max_u_hybrid X_train shape: ', data_max_u_hybrid['X_train'].shape)
print('data_max_u_hybrid y_train shape: ', data_max_u_hybrid['y_train'].shape)
print('data_max_u_hybrid X_test shape: ', data_max_u_hybrid['X_test'].shape)
print('data_max_u_hybrid y_test shape: ', data_max_u_hybrid['y_test'].shape)


data_max_u_hybrid X_train shape:  (5561, 12)
data_max_u_hybrid y_train shape:  (5561, 1)
data_max_u_hybrid X_test shape:  (9044, 11)
data_max_u_hybrid y_test shape:  (9044, 1)


In [20]:
num_trials = 30
# Same implementation as above, but for Gradient Boosting Regression.
def objective(trial):
    # import data
    y_max_u = pd.read_csv('..\data\ground_truth\\res_bus_vm_pu_max_balanced_constr.csv')
    y_max_u = pd.DataFrame(y_max_u['bus_16'], columns=['bus_16'])
    exogenous_data = pd.read_csv('..\data\ground_truth\\exogenous_data_vm_pu_max_balanced.csv').drop(columns=['date'])
    train_x, valid_x, train_y, valid_y, scaler = utils.split_and_suffle(exogenous_data, y_max_u, test_size=0.2, scaling=True)
    data = {'X_train': deepcopy(train_x), 'X_test': deepcopy(valid_x), 'y_train': deepcopy(train_y), 'y_test': deepcopy(valid_y)}
    # class data bool balanced
    y_max_u_balanced_bool = pd.read_csv('..\data\ground_truth\\res_bus_vm_pu_max_balanced_bool_constr.csv')
    y_max_u_balanced_bool = pd.DataFrame(y_max_u_balanced_bool['bus_16'], columns=['bus_16'])
    train_x, valid_x, train_y, valid_y, scaler = utils.split_and_suffle(exogenous_data, y_max_u_balanced_bool, test_size=0.2, scaling=True)
    data_max_u_bool_balanced = {'X_train': deepcopy(train_x), 'X_test': deepcopy(valid_x), 'y_train': deepcopy(train_y), 'y_test': deepcopy(valid_y)}
    # hybrid data training
    data_max_u_hybrid = {}
    data_max_u_hybrid['X_train'] =  pd.concat([data['X_train'], data_max_u_bool_balanced['y_train']], axis=1)
    data_max_u_hybrid['y_train'] = data['y_train']
    # Testing data
    class_prediction = pd.read_csv('class_prediction_max_u.csv')
    data_max_u_hybrid['X_test']  = pd.concat([data_max_reg['X_test'], class_prediction], axis=1)
    data_max_u_hybrid['y_test'] = data_max_reg['y_test']
    param = {
        'n_estimators': trial.suggest_int('n_estimators', 10, 1000, log=True),
        'learning_rate': trial.suggest_float('learning_rate', 0.1, 1.0, log=True) ,
        'loss': trial.suggest_categorical('loss', ['squared_error', 'absolute_error'])
    }
    model = my_ai.Context(my_ai.GradientBoostRegressorStrategy(param))
    model.fit(data_max_u_hybrid)
    prediction = model.predict(data_max_u_hybrid)
    # evaluate the regression performance with my metrics
    metric = metrics.Metrics()
    metric.get_prediction_scores(prediction, data_max_u_hybrid['y_test'], threshold=threshold_max)
    print('hybrid_recall', metric.hybrid_recall)
    print('hybrid_precision', metric.hybrid_precision)
    print('hybrid_f1', metric.hybrid_f1)
    print('hybrid_accuracy', metric.hybrid_accuracy)
    print('hybrid_mcc', metric.hybrid_mcc)
    return metric.hybrid_mcc
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=num_trials)
print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial
print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))
# Write the results to a csv file.
with open("./hyper_params_results_mcc/params_gradient_boost_regression_balanced_max_u_test_one_bus.csv", "w") as f:
    f.write("params,value\n")
    for key, value in trial.params.items():
        f.write("{},{}\n".format(key, value))
    f.write("value,{}\n".format(trial.value))

[32m[I 2022-10-21 11:55:52,501][0m A new study created in memory with name: no-name-e199fcd9-c8be-4118-9b13-fdecc5c04c25[0m
[32m[I 2022-10-21 11:56:02,280][0m Trial 0 finished with value: 0.4002266609671506 and parameters: {'n_estimators': 417, 'learning_rate': 0.35977006145588475, 'loss': 'squared_error'}. Best is trial 0 with value: 0.4002266609671506.[0m


true_positives_ctr:  591
true_negatives_ctr:  7315
false_positives_ctr:  1035
false_negatives_ctr:  103
69896159893200
hybrid_recall 0.7765614215615608
hybrid_precision 0.2719271446503238
hybrid_f1 0.4028048312892285
hybrid_accuracy 0.8491732157762555
hybrid_mcc 0.4002266609671506


[32m[I 2022-10-21 11:56:15,829][0m Trial 1 finished with value: 0.25309767453785365 and parameters: {'n_estimators': 623, 'learning_rate': 0.5356408547957667, 'loss': 'squared_error'}. Best is trial 0 with value: 0.4002266609671506.[0m


true_positives_ctr:  545
true_negatives_ctr:  6633
false_positives_ctr:  1717
false_negatives_ctr:  149
88898888691600
hybrid_recall 0.6857393435463653
hybrid_precision 0.1719975824556419
hybrid_f1 0.2750155804401447
hybrid_accuracy 0.756767255065997
hybrid_mcc 0.25309767453785365


[32m[I 2022-10-21 11:56:20,495][0m Trial 2 finished with value: 0.5281841316632668 and parameters: {'n_estimators': 160, 'learning_rate': 0.12869537094053457, 'loss': 'absolute_error'}. Best is trial 2 with value: 0.5281841316632668.[0m


true_positives_ctr:  619
true_negatives_ctr:  7716
false_positives_ctr:  634
false_negatives_ctr:  75
56570526572700
hybrid_recall 0.8371331662465814
hybrid_precision 0.3847820863700829
hybrid_f1 0.5272278017451111
hybrid_accuracy 0.9061877354323458
hybrid_mcc 0.5281841316632668


[32m[I 2022-10-21 11:56:22,675][0m Trial 3 finished with value: 0.5003477812636629 and parameters: {'n_estimators': 55, 'learning_rate': 0.26149573493887496, 'loss': 'absolute_error'}. Best is trial 2 with value: 0.5281841316632668.[0m


true_positives_ctr:  599
true_negatives_ctr:  7711
false_positives_ctr:  639
false_negatives_ctr:  95
56000916877200
hybrid_recall 0.790376794269534
hybrid_precision 0.3730161507185551
hybrid_f1 0.5068335865122705
hybrid_accuracy 0.9021138578808843
hybrid_mcc 0.5003477812636629


[32m[I 2022-10-21 11:56:24,253][0m Trial 4 finished with value: 0.49890218503492495 and parameters: {'n_estimators': 21, 'learning_rate': 0.14773255250896278, 'loss': 'squared_error'}. Best is trial 2 with value: 0.5281841316632668.[0m


true_positives_ctr:  683
true_negatives_ctr:  7306
false_positives_ctr:  1044
false_negatives_ctr:  11
73227016259100
hybrid_recall 0.9769701855065157
hybrid_precision 0.3009031443103157
hybrid_f1 0.46009787332908925
hybrid_accuracy 0.8622951944506604
hybrid_mcc 0.49890218503492495


[32m[I 2022-10-21 11:56:26,926][0m Trial 5 finished with value: 0.4943204945994142 and parameters: {'n_estimators': 77, 'learning_rate': 0.24798521893885972, 'loss': 'absolute_error'}. Best is trial 2 with value: 0.5281841316632668.[0m


true_positives_ctr:  591
true_negatives_ctr:  7719
false_positives_ctr:  631
false_negatives_ctr:  103
55390458931600
hybrid_recall 0.7756727330359768
hybrid_precision 0.3722593784123848
hybrid_f1 0.5030810560514538
hybrid_accuracy 0.9024784500428922
hybrid_mcc 0.4943204945994142


[32m[I 2022-10-21 11:56:28,154][0m Trial 6 finished with value: 0.4575268821786998 and parameters: {'n_estimators': 11, 'learning_rate': 0.12531912545843432, 'loss': 'squared_error'}. Best is trial 2 with value: 0.5281841316632668.[0m


true_positives_ctr:  692
true_negatives_ctr:  7047
false_positives_ctr:  1303
false_negatives_ctr:  2
81492258949500
hybrid_recall 0.9960655590754403
hybrid_precision 0.2582978492561267
hybrid_f1 0.4102185856481556
hybrid_accuracy 0.8262977996786877
hybrid_mcc 0.4575268821786998


[32m[I 2022-10-21 11:56:29,840][0m Trial 7 finished with value: 0.3314669289022847 and parameters: {'n_estimators': 35, 'learning_rate': 0.813933865024019, 'loss': 'squared_error'}. Best is trial 2 with value: 0.5281841316632668.[0m


true_positives_ctr:  595
true_negatives_ctr:  6874
false_positives_ctr:  1476
false_negatives_ctr:  99
83684631876700
hybrid_recall 0.7847847383272057
hybrid_precision 0.20785050792211282
hybrid_f1 0.3286562855533785
hybrid_accuracy 0.794061236786938
hybrid_mcc 0.3314669289022847


[32m[I 2022-10-21 11:56:44,186][0m Trial 8 finished with value: 0.18452880547749603 and parameters: {'n_estimators': 665, 'learning_rate': 0.9909196288796606, 'loss': 'squared_error'}. Best is trial 2 with value: 0.5281841316632668.[0m


true_positives_ctr:  535
true_negatives_ctr:  6087
false_positives_ctr:  2263
false_negatives_ctr:  159
101273457229200
hybrid_recall 0.6618712939902469
hybrid_precision 0.13186388552537484
hybrid_f1 0.2199144571027393
hybrid_accuracy 0.6850682525285895
hybrid_mcc 0.18452880547749603


[32m[I 2022-10-21 11:56:45,464][0m Trial 9 finished with value: 0.5046775266559367 and parameters: {'n_estimators': 15, 'learning_rate': 0.6257655382315078, 'loss': 'squared_error'}. Best is trial 2 with value: 0.5281841316632668.[0m


true_positives_ctr:  609
true_negatives_ctr:  7679
false_positives_ctr:  671
false_negatives_ctr:  85
57589252608000
hybrid_recall 0.813719155236725
hybrid_precision 0.36833222515828407
hybrid_f1 0.5071166821904209
hybrid_accuracy 0.8989782533373248
hybrid_mcc 0.5046775266559367


[32m[I 2022-10-21 11:56:50,882][0m Trial 10 finished with value: 0.5287239253184698 and parameters: {'n_estimators': 208, 'learning_rate': 0.17511257188219423, 'loss': 'absolute_error'}. Best is trial 10 with value: 0.5287239253184698.[0m


true_positives_ctr:  600
true_negatives_ctr:  7786
false_positives_ctr:  564
false_negatives_ctr:  94
53152677168000
hybrid_recall 0.7967130895608335
hybrid_precision 0.4045164738610634
hybrid_f1 0.5365894737888819
hybrid_accuracy 0.9126470143055045
hybrid_mcc 0.5287239253184698


[32m[I 2022-10-21 11:56:56,392][0m Trial 11 finished with value: 0.5189808294255156 and parameters: {'n_estimators': 218, 'learning_rate': 0.1772274822375657, 'loss': 'absolute_error'}. Best is trial 10 with value: 0.5287239253184698.[0m


true_positives_ctr:  593
true_negatives_ctr:  7782
false_positives_ctr:  568
false_negatives_ctr:  101
53035869368700
hybrid_recall 0.7819259354881972
hybrid_precision 0.3996096690071596
hybrid_f1 0.5289136663841941
hybrid_accuracy 0.9111869901034131
hybrid_mcc 0.5189808294255156


[32m[I 2022-10-21 11:57:00,440][0m Trial 12 finished with value: 0.5209606611154847 and parameters: {'n_estimators': 140, 'learning_rate': 0.1062566139562704, 'loss': 'absolute_error'}. Best is trial 10 with value: 0.5287239253184698.[0m


true_positives_ctr:  638
true_negatives_ctr:  7617
false_positives_ctr:  733
false_negatives_ctr:  56
60960511016700
hybrid_recall 0.8760704640615262
hybrid_precision 0.35989717978634184
hybrid_f1 0.510199908354015
hybrid_accuracy 0.8960828547261229
hybrid_mcc 0.5209606611154847


[32m[I 2022-10-21 11:57:05,953][0m Trial 13 finished with value: 0.5108025927442006 and parameters: {'n_estimators': 212, 'learning_rate': 0.1846923975914618, 'loss': 'absolute_error'}. Best is trial 10 with value: 0.5287239253184698.[0m


true_positives_ctr:  583
true_negatives_ctr:  7798
false_positives_ctr:  552
false_negatives_ctr:  111
52019165753500
hybrid_recall 0.7588319902170009
hybrid_precision 0.4012146162224169
hybrid_f1 0.5249004376930724
hybrid_accuracy 0.9111141534243593
hybrid_mcc 0.5108025927442006


[32m[I 2022-10-21 11:57:12,970][0m Trial 14 finished with value: 0.5263705658438922 and parameters: {'n_estimators': 293, 'learning_rate': 0.10275982448478943, 'loss': 'absolute_error'}. Best is trial 10 with value: 0.5287239253184698.[0m


true_positives_ctr:  612
true_negatives_ctr:  7738
false_positives_ctr:  612
false_negatives_ctr:  82
55466928432000
hybrid_recall 0.8215554544595527
hybrid_precision 0.38971172582515407
hybrid_f1 0.5286526362305062
hybrid_accuracy 0.9079799176915988
hybrid_mcc 0.5263705658438922


[32m[I 2022-10-21 11:57:16,595][0m Trial 15 finished with value: 0.5211378881443595 and parameters: {'n_estimators': 125, 'learning_rate': 0.19380014986157038, 'loss': 'absolute_error'}. Best is trial 10 with value: 0.5287239253184698.[0m


true_positives_ctr:  613
true_negatives_ctr:  7715
false_positives_ctr:  635
false_negatives_ctr:  81
56380946419200
hybrid_recall 0.8232388738161609
hybrid_precision 0.38278018885071213
hybrid_f1 0.5225780277332814
hybrid_accuracy 0.9054068670227083
hybrid_mcc 0.5211378881443595


[32m[I 2022-10-21 11:57:38,661][0m Trial 16 finished with value: 0.5156274982614288 and parameters: {'n_estimators': 1000, 'learning_rate': 0.38637390604363514, 'loss': 'absolute_error'}. Best is trial 10 with value: 0.5287239253184698.[0m


true_positives_ctr:  580
true_negatives_ctr:  7820
false_positives_ctr:  530
false_negatives_ctr:  114
51034177626000
hybrid_recall 0.7557899607151171
hybrid_precision 0.4086649341003996
hybrid_f1 0.5304883097912008
hybrid_accuracy 0.9136903529892371
hybrid_mcc 0.5156274982614288


[32m[I 2022-10-21 11:57:40,832][0m Trial 17 finished with value: 0.5057940655777319 and parameters: {'n_estimators': 56, 'learning_rate': 0.144394158147231, 'loss': 'absolute_error'}. Best is trial 10 with value: 0.5287239253184698.[0m


true_positives_ctr:  663
true_negatives_ctr:  7444
false_positives_ctr:  906
false_negatives_ctr:  31
67964180797500
hybrid_recall 0.931763014332912
hybrid_precision 0.3226044184382977
hybrid_f1 0.47927083804639964
hybrid_accuracy 0.8772784538872922
hybrid_mcc 0.5057940655777319


[32m[I 2022-10-21 11:57:44,930][0m Trial 18 finished with value: 0.5255116610351447 and parameters: {'n_estimators': 144, 'learning_rate': 0.26773166325854336, 'loss': 'absolute_error'}. Best is trial 10 with value: 0.5287239253184698.[0m


true_positives_ctr:  599
true_negatives_ctr:  7783
false_positives_ctr:  567
false_negatives_ctr:  95
53230491085200
hybrid_recall 0.7935375807746359
hybrid_precision 0.40204698739923334
hybrid_f1 0.5336960717480949
hybrid_accuracy 0.9119302413800753
hybrid_mcc 0.5255116610351447


[32m[I 2022-10-21 11:57:53,213][0m Trial 19 finished with value: 0.513882558673673 and parameters: {'n_estimators': 345, 'learning_rate': 0.22259898498034209, 'loss': 'absolute_error'}. Best is trial 10 with value: 0.5287239253184698.[0m


true_positives_ctr:  582
true_negatives_ctr:  7806
false_positives_ctr:  544
false_negatives_ctr:  112
51665404493200
hybrid_recall 0.7598940120610658
hybrid_precision 0.4043667964898049
hybrid_f1 0.5278472058359119
hybrid_accuracy 0.9123889361453664
hybrid_mcc 0.513882558673673


[32m[I 2022-10-21 11:57:56,083][0m Trial 20 finished with value: 0.5271977735454643 and parameters: {'n_estimators': 87, 'learning_rate': 0.15052057916276904, 'loss': 'absolute_error'}. Best is trial 10 with value: 0.5287239253184698.[0m


true_positives_ctr:  644
true_negatives_ctr:  7614
false_positives_ctr:  736
false_negatives_ctr:  50
61288716768000
hybrid_recall 0.8888233869065337
hybrid_precision 0.3618028355947402
hybrid_f1 0.5142684775672489
hybrid_accuracy 0.8964317527355293
hybrid_mcc 0.5271977735454643


[32m[I 2022-10-21 11:57:59,060][0m Trial 21 finished with value: 0.519158388716859 and parameters: {'n_estimators': 92, 'learning_rate': 0.1405089528919976, 'loss': 'absolute_error'}. Best is trial 10 with value: 0.5287239253184698.[0m


true_positives_ctr:  648
true_negatives_ctr:  7567
false_positives_ctr:  783
false_negatives_ctr:  46
63130816964700
hybrid_recall 0.8975048892915979
hybrid_precision 0.34949847228453346
hybrid_f1 0.5030885999839583
hybrid_accuracy 0.8909767018898098
hybrid_mcc 0.519158388716859


[32m[I 2022-10-21 11:58:04,969][0m Trial 22 finished with value: 0.5256377844298865 and parameters: {'n_estimators': 224, 'learning_rate': 0.16374221568149538, 'loss': 'absolute_error'}. Best is trial 10 with value: 0.5287239253184698.[0m


true_positives_ctr:  595
true_negatives_ctr:  7797
false_positives_ctr:  553
false_negatives_ctr:  99
52528496899200
hybrid_recall 0.7852683488469753
hybrid_precision 0.406244549379448
hybrid_f1 0.5354721497251654
hybrid_accuracy 0.9132822032476946
hybrid_mcc 0.5256377844298865


[32m[I 2022-10-21 11:58:06,697][0m Trial 23 finished with value: 0.49703942968007836 and parameters: {'n_estimators': 33, 'learning_rate': 0.11715890623323941, 'loss': 'absolute_error'}. Best is trial 10 with value: 0.5287239253184698.[0m


true_positives_ctr:  689
true_negatives_ctr:  7273
false_positives_ctr:  1077
false_negatives_ctr:  5
74481548365200
hybrid_recall 0.9895652683109355
hybrid_precision 0.29500281004625045
hybrid_f1 0.4545100252673809
hybrid_accuracy 0.858405460328526
hybrid_mcc 0.49703942968007836


[32m[I 2022-10-21 11:58:09,325][0m Trial 24 finished with value: 0.5137793524137584 and parameters: {'n_estimators': 75, 'learning_rate': 0.1992022973636998, 'loss': 'absolute_error'}. Best is trial 10 with value: 0.5287239253184698.[0m


true_positives_ctr:  634
true_negatives_ctr:  7603
false_positives_ctr:  747
false_negatives_ctr:  60
61325126124700
hybrid_recall 0.867752870180725
hybrid_precision 0.3554157771274527
hybrid_f1 0.5042854252168469
hybrid_accuracy 0.8940172615037714
hybrid_mcc 0.5137793524137584


[32m[I 2022-10-21 11:58:13,873][0m Trial 25 finished with value: 0.5295599848691138 and parameters: {'n_estimators': 164, 'learning_rate': 0.128508806750761, 'loss': 'absolute_error'}. Best is trial 25 with value: 0.5295599848691138.[0m


true_positives_ctr:  620
true_negatives_ctr:  7718
false_positives_ctr:  632
false_negatives_ctr:  74
56532633721600
hybrid_recall 0.8383329629440158
hybrid_precision 0.38594361478413364
hybrid_f1 0.5285558181823694
hybrid_accuracy 0.9064599016640538
hybrid_mcc 0.5295599848691138


[32m[I 2022-10-21 11:58:19,169][0m Trial 26 finished with value: 0.5188199379053209 and parameters: {'n_estimators': 156, 'learning_rate': 0.12242545502777621, 'loss': 'absolute_error'}. Best is trial 25 with value: 0.5295599848691138.[0m


true_positives_ctr:  626
true_negatives_ctr:  7659
false_positives_ctr:  691
false_negatives_ctr:  68
58971562259100
hybrid_recall 0.8496231704033961
hybrid_precision 0.3684899255104966
hybrid_f1 0.5140369639307623
hybrid_accuracy 0.8998770554095317
hybrid_mcc 0.5188199379053209


[32m[I 2022-10-21 11:58:29,805][0m Trial 27 finished with value: 0.5190606697925915 and parameters: {'n_estimators': 475, 'learning_rate': 0.320812259711449, 'loss': 'absolute_error'}. Best is trial 25 with value: 0.5295599848691138.[0m


true_positives_ctr:  590
true_negatives_ctr:  7797
false_positives_ctr:  553
false_negatives_ctr:  104
52332832100700
hybrid_recall 0.7748102159497631
hybrid_precision 0.40323962263245605
hybrid_f1 0.5304260802197778
hybrid_accuracy 0.9122972523512657
hybrid_mcc 0.5190606697925915


[32m[I 2022-10-21 11:58:39,169][0m Trial 28 finished with value: 0.5141810552555246 and parameters: {'n_estimators': 278, 'learning_rate': 0.12981730358890015, 'loss': 'absolute_error'}. Best is trial 25 with value: 0.5295599848691138.[0m


true_positives_ctr:  591
true_negatives_ctr:  7775
false_positives_ctr:  575
false_negatives_ctr:  103
53230491085200
hybrid_recall 0.7777460321124988
hybrid_precision 0.39569038255952527
hybrid_f1 0.5245220297117894
hybrid_accuracy 0.9099643550963987
hybrid_mcc 0.5141810552555246


[32m[I 2022-10-21 11:58:50,955][0m Trial 29 finished with value: 0.5106876856106236 and parameters: {'n_estimators': 414, 'learning_rate': 0.3710401933193387, 'loss': 'absolute_error'}. Best is trial 25 with value: 0.5295599848691138.[0m


true_positives_ctr:  577
true_negatives_ctr:  7816
false_positives_ctr:  534
false_negatives_ctr:  117
51073716228700
hybrid_recall 0.7489019482843028
hybrid_precision 0.4060736177817824
hybrid_f1 0.5266073715125358
hybrid_accuracy 0.9126528259937773
hybrid_mcc 0.5106876856106236
Number of finished trials:  30
Best trial:
  Value: 0.5295599848691138
  Params: 
    n_estimators: 164
    learning_rate: 0.128508806750761
    loss: absolute_error


In [21]:
# Same implementation as above, but for Gradient Boosting Regression.
def objective(trial):
    # import data
    y_max_u = pd.read_csv('..\data\ground_truth\\res_bus_vm_pu_max_balanced_constr.csv')
    y_max_u = pd.DataFrame(y_max_u['bus_16'], columns=['bus_16'])
    exogenous_data = pd.read_csv('..\data\ground_truth\\exogenous_data_vm_pu_max_balanced.csv').drop(columns=['date'])
    train_x, valid_x, train_y, valid_y, scaler = utils.split_and_suffle(exogenous_data, y_max_u, test_size=0.2, scaling=True)
    data = {'X_train': train_x, 'X_test': valid_x, 'y_train': train_y, 'y_test': valid_y}
    param = {
        'n_estimators': trial.suggest_int('n_estimators', 10, 1000, log=True),
        'learning_rate': trial.suggest_float('learning_rate', 0.1, 1.0, log=True) ,
        'loss': trial.suggest_categorical('loss', ['squared_error', 'absolute_error'])
    }
    model = my_ai.Context(my_ai.GradientBoostRegressorStrategy(param))
    model.fit(data)
    prediction = model.predict(data_max_reg)
    # evaluate the regression performance with my metrics
    metric = metrics.Metrics()
    metric.get_prediction_scores(prediction, data_max_reg['y_test'], threshold=threshold_max)
    print('hybrid_recall', metric.hybrid_recall)
    print('hybrid_precision', metric.hybrid_precision)
    print('hybrid_f1', metric.hybrid_f1)
    print('hybrid_accuracy', metric.hybrid_accuracy)
    print('hybrid_mcc', metric.hybrid_mcc)
    return metric.hybrid_mcc
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=num_trials)
print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial
print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))
# Write the results to a csv file.
with open("./hyper_params_results_mcc/params_gradient_boost_regression_balanced_max_u_one_bus_not hybrid.csv", "w") as f:
    f.write("params,value\n")
    for key, value in trial.params.items():
        f.write("{},{}\n".format(key, value))
    f.write("value,{}\n".format(trial.value))

[32m[I 2022-10-21 11:58:51,057][0m A new study created in memory with name: no-name-545cfb24-d30c-4655-b82d-be79559c7b63[0m
[32m[I 2022-10-21 11:59:04,171][0m Trial 0 finished with value: 0.5200193024610237 and parameters: {'n_estimators': 399, 'learning_rate': 0.14344219081822132, 'loss': 'absolute_error'}. Best is trial 0 with value: 0.5200193024610237.[0m


true_positives_ctr:  588
true_negatives_ctr:  7803
false_positives_ctr:  547
false_negatives_ctr:  106
52019165753500
hybrid_recall 0.7717488363399436
hybrid_precision 0.40601020119024916
hybrid_f1 0.5320916933361907
hybrid_accuracy 0.9130892239876028
hybrid_mcc 0.5200193024610237


[32m[I 2022-10-21 11:59:11,520][0m Trial 1 finished with value: 0.5264442861839969 and parameters: {'n_estimators': 171, 'learning_rate': 0.11717159309268428, 'loss': 'absolute_error'}. Best is trial 1 with value: 0.5264442861839969.[0m


true_positives_ctr:  623
true_negatives_ctr:  7696
false_positives_ctr:  654
false_negatives_ctr:  71
57476478059100
hybrid_recall 0.8439509769288046
hybrid_precision 0.379817817064974
hybrid_f1 0.5238695729784729
hybrid_accuracy 0.9041353316883544
hybrid_mcc 0.5264442861839969


[32m[I 2022-10-21 11:59:18,300][0m Trial 2 finished with value: 0.3789623052475237 and parameters: {'n_estimators': 103, 'learning_rate': 0.6079779367052507, 'loss': 'squared_error'}. Best is trial 1 with value: 0.5264442861839969.[0m


true_positives_ctr:  559
true_negatives_ctr:  7389
false_positives_ctr:  961
false_negatives_ctr:  135
66273257952000
hybrid_recall 0.7135875155685387
hybrid_precision 0.2718216072752275
hybrid_f1 0.3936811643342777
hybrid_accuracy 0.8537856144065097
hybrid_mcc 0.3789623052475237


[32m[I 2022-10-21 11:59:25,303][0m Trial 3 finished with value: 0.40900462488732386 and parameters: {'n_estimators': 139, 'learning_rate': 0.5053522156590097, 'loss': 'squared_error'}. Best is trial 1 with value: 0.5264442861839969.[0m


true_positives_ctr:  583
true_negatives_ctr:  7398
false_positives_ctr:  952
false_negatives_ctr:  111
66793842793500
hybrid_recall 0.7638960988050791
hybrid_precision 0.28491435136167964
hybrid_f1 0.41503202311563836
hybrid_accuracy 0.8582484986390952
hybrid_mcc 0.40900462488732386


[32m[I 2022-10-21 11:59:27,718][0m Trial 4 finished with value: 0.4879103013479564 and parameters: {'n_estimators': 18, 'learning_rate': 0.16204901774020794, 'loss': 'absolute_error'}. Best is trial 1 with value: 0.5264442861839969.[0m


true_positives_ctr:  690
true_negatives_ctr:  7229
false_positives_ctr:  1121
false_negatives_ctr:  4
75907180688700
hybrid_recall 0.9915459146852389
hybrid_precision 0.2854906416715202
hybrid_f1 0.4433343399939155
hybrid_accuracy 0.8528549016317012
hybrid_mcc 0.4879103013479564


[32m[I 2022-10-21 11:59:30,268][0m Trial 5 finished with value: 0.5000953808857983 and parameters: {'n_estimators': 56, 'learning_rate': 0.2904273261299853, 'loss': 'squared_error'}. Best is trial 1 with value: 0.5264442861839969.[0m


true_positives_ctr:  620
true_negatives_ctr:  7615
false_positives_ctr:  735
false_negatives_ctr:  74
60374716165500
hybrid_recall 0.8392857963819204
hybrid_precision 0.3516668532315947
hybrid_f1 0.4956519389270159
hybrid_accuracy 0.8930527943155275
hybrid_mcc 0.5000953808857983


[32m[I 2022-10-21 11:59:39,518][0m Trial 6 finished with value: 0.5185091247437454 and parameters: {'n_estimators': 347, 'learning_rate': 0.4558891435596646, 'loss': 'absolute_error'}. Best is trial 1 with value: 0.5264442861839969.[0m


true_positives_ctr:  573
true_negatives_ctr:  7857
false_positives_ctr:  493
false_negatives_ctr:  121
49283005205200
hybrid_recall 0.7379470719216962
hybrid_precision 0.42181568917561973
hybrid_f1 0.5367953915389796
hybrid_accuracy 0.9173419267255394
hybrid_mcc 0.5185091247437454


[32m[I 2022-10-21 11:59:44,033][0m Trial 7 finished with value: 0.49645627505614603 and parameters: {'n_estimators': 175, 'learning_rate': 0.19777998692304216, 'loss': 'squared_error'}. Best is trial 1 with value: 0.5264442861839969.[0m


true_positives_ctr:  601
true_negatives_ctr:  7681
false_positives_ctr:  669
false_negatives_ctr:  93
57212931802000
hybrid_recall 0.7987072522443422
hybrid_precision 0.36507790359791326
hybrid_f1 0.5011068714427521
hybrid_accuracy 0.8984197818228625
hybrid_mcc 0.49645627505614603


[32m[I 2022-10-21 11:59:46,631][0m Trial 8 finished with value: 0.4801508816065987 and parameters: {'n_estimators': 83, 'learning_rate': 0.30980463534942915, 'loss': 'squared_error'}. Best is trial 1 with value: 0.5264442861839969.[0m


true_positives_ctr:  612
true_negatives_ctr:  7574
false_positives_ctr:  776
false_negatives_ctr:  82
61579667107200
hybrid_recall 0.8198012124893065
hybrid_precision 0.3384083104617531
hybrid_f1 0.47906279086038156
hybrid_accuracy 0.8856541976812446
hybrid_mcc 0.4801508816065987


[32m[I 2022-10-21 11:59:53,181][0m Trial 9 finished with value: 0.5137194182761871 and parameters: {'n_estimators': 246, 'learning_rate': 0.20270771787467193, 'loss': 'absolute_error'}. Best is trial 1 with value: 0.5264442861839969.[0m


true_positives_ctr:  590
true_negatives_ctr:  7778
false_positives_ctr:  572
false_negatives_ctr:  104
53074816891600
hybrid_recall 0.7752298033008319
hybrid_precision 0.39638366626976024
hybrid_f1 0.5245559898634369
hybrid_accuracy 0.9101304607024546
hybrid_mcc 0.5137194182761871


[32m[I 2022-10-21 12:00:13,098][0m Trial 10 finished with value: 0.5098739831677878 and parameters: {'n_estimators': 862, 'learning_rate': 0.10500115674364142, 'loss': 'absolute_error'}. Best is trial 1 with value: 0.5264442861839969.[0m


true_positives_ctr:  578
true_negatives_ctr:  7810
false_positives_ctr:  540
false_negatives_ctr:  116
51350161933200
hybrid_recall 0.7510656394972336
hybrid_precision 0.403585631479024
hybrid_f1 0.5250404308521666
hybrid_accuracy 0.9124402832801068
hybrid_mcc 0.5098739831677878


[32m[I 2022-10-21 12:00:27,158][0m Trial 11 finished with value: 0.5216331936561465 and parameters: {'n_estimators': 580, 'learning_rate': 0.10088639848602945, 'loss': 'absolute_error'}. Best is trial 1 with value: 0.5264442861839969.[0m


true_positives_ctr:  588
true_negatives_ctr:  7808
false_positives_ctr:  542
false_negatives_ctr:  106
51822747618000
hybrid_recall 0.7718161789001349
hybrid_precision 0.4080473588391007
hybrid_f1 0.5338542013306831
hybrid_accuracy 0.9136513240151687
hybrid_mcc 0.5216331936561465


[32m[I 2022-10-21 12:00:47,154][0m Trial 12 finished with value: 0.48855776393925526 and parameters: {'n_estimators': 905, 'learning_rate': 0.9925155986920519, 'loss': 'absolute_error'}. Best is trial 1 with value: 0.5264442861839969.[0m


true_positives_ctr:  536
true_negatives_ctr:  7895
false_positives_ctr:  455
false_negatives_ctr:  158
46246332732700
hybrid_recall 0.6659647237538878
hybrid_precision 0.4220917162167291
hybrid_f1 0.5166978161476062
hybrid_accuracy 0.9170069677036422
hybrid_mcc 0.48855776393925526


[32m[I 2022-10-21 12:00:49,367][0m Trial 13 finished with value: 0.4902680986481996 and parameters: {'n_estimators': 41, 'learning_rate': 0.10201626360144084, 'loss': 'absolute_error'}. Best is trial 1 with value: 0.5264442861839969.[0m


true_positives_ctr:  690
true_negatives_ctr:  7234
false_positives_ctr:  1116
false_negatives_ctr:  4
75749936077200
hybrid_recall 0.9915918805284881
hybrid_precision 0.2878385035177296
hybrid_f1 0.4461646785173439
hybrid_accuracy 0.85397037927733
hybrid_mcc 0.4902680986481996


[32m[I 2022-10-21 12:01:03,226][0m Trial 14 finished with value: 0.5155043560415936 and parameters: {'n_estimators': 433, 'learning_rate': 0.13246615708915002, 'loss': 'absolute_error'}. Best is trial 1 with value: 0.5264442861839969.[0m


true_positives_ctr:  586
true_negatives_ctr:  7797
false_positives_ctr:  553
false_negatives_ctr:  108
52176091645500
hybrid_recall 0.7677353215890591
hybrid_precision 0.4023299422928839
hybrid_f1 0.5279755192566769
hybrid_accuracy 0.9119811039482713
hybrid_mcc 0.5155043560415936


[32m[I 2022-10-21 12:01:24,776][0m Trial 15 finished with value: 0.5163961801314643 and parameters: {'n_estimators': 559, 'learning_rate': 0.22239941403974434, 'loss': 'absolute_error'}. Best is trial 1 with value: 0.5264442861839969.[0m


true_positives_ctr:  586
true_negatives_ctr:  7801
false_positives_ctr:  549
false_negatives_ctr:  108
52019165753500
hybrid_recall 0.7671925791415902
hybrid_precision 0.40381660983172235
hybrid_f1 0.5291249792303258
hybrid_accuracy 0.9122896501246531
hybrid_mcc 0.5163961801314643


[32m[I 2022-10-21 12:01:30,554][0m Trial 16 finished with value: 0.5200900321512266 and parameters: {'n_estimators': 231, 'learning_rate': 0.13461197436062355, 'loss': 'absolute_error'}. Best is trial 1 with value: 0.5264442861839969.[0m


true_positives_ctr:  602
true_negatives_ctr:  7753
false_positives_ctr:  597
false_negatives_ctr:  92
54507727609500
hybrid_recall 0.8003290608499847
hybrid_precision 0.39214126836906693
hybrid_f1 0.5263729341423063
hybrid_accuracy 0.9086851552983005
hybrid_mcc 0.5200900321512266


[32m[I 2022-10-21 12:01:31,803][0m Trial 17 finished with value: 0.47309331819757605 and parameters: {'n_estimators': 11, 'learning_rate': 0.10212976464065862, 'loss': 'absolute_error'}. Best is trial 1 with value: 0.5264442861839969.[0m


true_positives_ctr:  692
true_negatives_ctr:  7160
false_positives_ctr:  1190
false_negatives_ctr:  2
78108784891600
hybrid_recall 0.9957619483239231
hybrid_precision 0.2688221559389857
hybrid_f1 0.42335321604642
hybrid_accuracy 0.8466918602715747
hybrid_mcc 0.47309331819757605


[32m[I 2022-10-21 12:01:33,420][0m Trial 18 finished with value: 0.4977958431876854 and parameters: {'n_estimators': 31, 'learning_rate': 0.24621638788623967, 'loss': 'absolute_error'}. Best is trial 1 with value: 0.5264442861839969.[0m


true_positives_ctr:  640
true_negatives_ctr:  7529
false_positives_ctr:  821
false_negatives_ctr:  54
64200323708700
hybrid_recall 0.879606139593277
hybrid_precision 0.33268526303455087
hybrid_f1 0.4827750147911144
hybrid_accuracy 0.8850270095287126
hybrid_mcc 0.4977958431876854


[32m[I 2022-10-21 12:01:46,983][0m Trial 19 finished with value: 0.5194555000222575 and parameters: {'n_estimators': 621, 'learning_rate': 0.16460961371280966, 'loss': 'absolute_error'}. Best is trial 1 with value: 0.5264442861839969.[0m


true_positives_ctr:  585
true_negatives_ctr:  7813
false_positives_ctr:  537
false_negatives_ctr:  109
51507875931600
hybrid_recall 0.7650383280112855
hybrid_precision 0.40873043584777885
hybrid_f1 0.5328041755349595
hybrid_accuracy 0.913840306335416
hybrid_mcc 0.5194555000222575


[32m[I 2022-10-21 12:01:52,627][0m Trial 20 finished with value: 0.48840407187667084 and parameters: {'n_estimators': 234, 'learning_rate': 0.3706490553170695, 'loss': 'absolute_error'}. Best is trial 1 with value: 0.5264442861839969.[0m


true_positives_ctr:  564
true_negatives_ctr:  7791
false_positives_ctr:  559
false_negatives_ctr:  130
51547275456700
hybrid_recall 0.7229838638762651
hybrid_precision 0.39121677345419004
hybrid_f1 0.5077064309760014
hybrid_accuracy 0.9078515592065679
hybrid_mcc 0.48840407187667084


[32m[I 2022-10-21 12:01:58,658][0m Trial 21 finished with value: 0.5274255263066426 and parameters: {'n_estimators': 247, 'learning_rate': 0.1285425904126249, 'loss': 'absolute_error'}. Best is trial 21 with value: 0.5274255263066426.[0m


true_positives_ctr:  609
true_negatives_ctr:  7752
false_positives_ctr:  598
false_negatives_ctr:  85
54815459979100
hybrid_recall 0.8152324864362275
hybrid_precision 0.3939294052124702
hybrid_f1 0.5311845348579832
hybrid_accuracy 0.9094164237811908
hybrid_mcc 0.5274255263066426


[32m[I 2022-10-21 12:02:02,121][0m Trial 22 finished with value: 0.5228130013113278 and parameters: {'n_estimators': 108, 'learning_rate': 0.12164545790241707, 'loss': 'absolute_error'}. Best is trial 21 with value: 0.5274255263066426.[0m


true_positives_ctr:  645
true_negatives_ctr:  7594
false_positives_ctr:  756
false_negatives_ctr:  49
62050879400700
hybrid_recall 0.8914393102164241
hybrid_precision 0.3559479561335502
hybrid_f1 0.5087529896262588
hybrid_accuracy 0.8937881183350003
hybrid_mcc 0.5228130013113278


[32m[I 2022-10-21 12:02:05,264][0m Trial 23 finished with value: 0.515386435380825 and parameters: {'n_estimators': 94, 'learning_rate': 0.1284149235448451, 'loss': 'absolute_error'}. Best is trial 21 with value: 0.5274255263066426.[0m


true_positives_ctr:  642
true_negatives_ctr:  7578
false_positives_ctr:  772
false_negatives_ctr:  52
62520133018000
hybrid_recall 0.8854627863342148
hybrid_precision 0.34994861591646226
hybrid_f1 0.5016409528982438
hybrid_accuracy 0.8917155610478045
hybrid_mcc 0.515386435380825


[32m[I 2022-10-21 12:02:07,546][0m Trial 24 finished with value: 0.5243927592795506 and parameters: {'n_estimators': 66, 'learning_rate': 0.17257895336666493, 'loss': 'absolute_error'}. Best is trial 21 with value: 0.5274255263066426.[0m


true_positives_ctr:  649
true_negatives_ctr:  7581
false_positives_ctr:  769
false_negatives_ctr:  45
62664124693200
hybrid_recall 0.9014200112594468
hybrid_precision 0.35357988431796034
hybrid_f1 0.5079267088805126
hybrid_accuracy 0.8929376727827313
hybrid_mcc 0.5243927592795506


[32m[I 2022-10-21 12:02:09,718][0m Trial 25 finished with value: 0.5266860198955369 and parameters: {'n_estimators': 60, 'learning_rate': 0.17365992932973964, 'loss': 'absolute_error'}. Best is trial 21 with value: 0.5274255263066426.[0m


true_positives_ctr:  654
true_negatives_ctr:  7569
false_positives_ctr:  781
false_negatives_ctr:  40
63274020533500
hybrid_recall 0.9116006022445424
hybrid_precision 0.3520559397075894
hybrid_f1 0.5079456260566158
hybrid_accuracy 0.892205487698101
hybrid_mcc 0.5266860198955369


[32m[I 2022-10-21 12:02:13,820][0m Trial 26 finished with value: 0.48951304340447166 and parameters: {'n_estimators': 154, 'learning_rate': 0.24775269811428793, 'loss': 'squared_error'}. Best is trial 21 with value: 0.5274255263066426.[0m


true_positives_ctr:  601
true_negatives_ctr:  7657
false_positives_ctr:  693
false_negatives_ctr:  93
58114154650000
hybrid_recall 0.7979433801718376
hybrid_precision 0.35770633760199727
hybrid_f1 0.4939721781524907
hybrid_accuracy 0.8949290246084295
hybrid_mcc 0.48951304340447166


[32m[I 2022-10-21 12:02:15,539][0m Trial 27 finished with value: 0.513889208776058 and parameters: {'n_estimators': 37, 'learning_rate': 0.17101921672131148, 'loss': 'absolute_error'}. Best is trial 21 with value: 0.5274255263066426.[0m


true_positives_ctr:  675
true_negatives_ctr:  7422
false_positives_ctr:  928
false_negatives_ctr:  19
69121120992700
hybrid_recall 0.9575093698269693
hybrid_precision 0.32217763102302993
hybrid_f1 0.4821305526246683
hybrid_accuracy 0.8755078751772197
hybrid_mcc 0.513889208776058


[32m[I 2022-10-21 12:02:17,058][0m Trial 28 finished with value: 0.4932355070774898 and parameters: {'n_estimators': 26, 'learning_rate': 0.14989203091611797, 'loss': 'absolute_error'}. Best is trial 21 with value: 0.5274255263066426.[0m


true_positives_ctr:  689
true_negatives_ctr:  7253
false_positives_ctr:  1097
false_negatives_ctr:  5
75118060181200
hybrid_recall 0.9895636220980973
hybrid_precision 0.2914137669893984
hybrid_f1 0.45023817789118376
hybrid_accuracy 0.8559737558284418
hybrid_mcc 0.4932355070774898


[32m[I 2022-10-21 12:02:19,773][0m Trial 29 finished with value: 0.5106271384203627 and parameters: {'n_estimators': 54, 'learning_rate': 0.11965308228402907, 'loss': 'absolute_error'}. Best is trial 21 with value: 0.5274255263066426.[0m


true_positives_ctr:  675
true_negatives_ctr:  7411
false_positives_ctr:  939
false_negatives_ctr:  19
69492556698000
hybrid_recall 0.9573041317315628
hybrid_precision 0.3187878153415933
hybrid_f1 0.47829922204609054
hybrid_accuracy 0.8740908806935468
hybrid_mcc 0.5106271384203627
Number of finished trials:  30
Best trial:
  Value: 0.5274255263066426
  Params: 
    n_estimators: 247
    learning_rate: 0.1285425904126249
    loss: absolute_error


## Min u

In [22]:
exogenous_data = pd.read_csv('..\data\processed\production\exogenous_data_extended.csv').drop(columns=['date'])
y_min_u_sparse_bool = pd.read_csv('..\data\ground_truth\\res_bus_vm_pu_min_sparse_bool_constr.csv').drop(columns=['timestamps'])
y_min_u_sparse_bool = pd.DataFrame(y_min_u_sparse_bool['bus_16'], columns=['bus_16'])
# Testing data
train_x, valid_x, train_y, valid_y, scaler = utils.split_and_suffle(exogenous_data, y_min_u_sparse_bool, test_size=0.2, scaling=True)
data_min_class = {'X_train': train_x, 'X_test': valid_x, 'y_train': train_y, 'y_test': valid_y}

In [23]:
def objective(trial):
    # import data
    y_min_u_balanced_bool = pd.read_csv('..\data\ground_truth\\res_bus_vm_pu_min_balanced_bool_constr.csv')
    y_min_u_balanced_bool = pd.DataFrame(y_min_u_balanced_bool['bus_16'], columns=['bus_16'])
    exogenous_data = pd.read_csv('..\data\ground_truth\\exogenous_data_vm_pu_min_balanced.csv').drop(columns=['date'])
    train_x, valid_x, train_y, valid_y, scaler = utils.split_and_suffle(exogenous_data, y_min_u_balanced_bool, test_size=0.2, scaling=True)
    data = {'X_train': train_x, 'X_test': valid_x, 'y_train': train_y, 'y_test': valid_y}
    param = {
        "verbosity": 0,
        "objective": "binary:logistic",
        # use exact for small dataset.
        "tree_method": "exact",
        # defines booster, gblinear for linear functions.
        "booster": trial.suggest_categorical("booster", ["gbtree", "gblinear", "dart"]),
        # L2 regularization weight.
        "lambda": trial.suggest_float("lambda", 1e-8, 1.0, log=True),
        # L1 regularization weight.
        "alpha": trial.suggest_float("alpha", 1e-8, 1.0, log=True),
        # sampling ratio for training data.
        "subsample": trial.suggest_float("subsample", 0.2, 1.0),
        # sampling according to each tree.
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.2, 1.0),
    }

    if param["booster"] in ["gbtree", "dart"]:
        # maximum depth of the tree, signifies complexity of the tree.
        param["max_depth"] = trial.suggest_int("max_depth", 3, 9, step=2)
        # minimum child weight, larger the term more conservative the tree.
        param["min_child_weight"] = trial.suggest_int("min_child_weight", 2, 10)
        param["eta"] = trial.suggest_float("eta", 1e-8, 1.0, log=True)
        # defines how selective algorithm is.
        param["gamma"] = trial.suggest_float("gamma", 1e-8, 1.0, log=True)
        param["grow_policy"] = trial.suggest_categorical("grow_policy", ["depthwise", "lossguide"])

    if param["booster"] == "dart":
        param["sample_type"] = trial.suggest_categorical("sample_type", ["uniform", "weighted"])
        param["normalize_type"] = trial.suggest_categorical("normalize_type", ["tree", "forest"])
        param["rate_drop"] = trial.suggest_float("rate_drop", 1e-8, 1.0, log=True)
        param["skip_drop"] = trial.suggest_float("skip_drop", 1e-8, 1.0, log=True)
    model = my_ai.Context(my_ai.XGBoostClassifierStrategy(param))
    model.fit(data)
    prediction = model.predict(data_min_class)
    prediction = pd.DataFrame(prediction , columns=valid_y.columns)
    tp, tn, fp, fn = 0, 0, 0, 0
    tp += sum((prediction['bus_16'] == 1) & (data_min_class['y_test']['bus_16'] == 1))
    tn += sum((prediction['bus_16'] == 0) & (data_min_class['y_test']['bus_16'] == 0))
    fp += sum((prediction['bus_16'] == 1) & (data_min_class['y_test']['bus_16'] == 0))
    fn += sum((prediction['bus_16'] == 0) & (data_min_class['y_test']['bus_16'] == 1))
    if (tp+fp)*(tp+fn)*(tn+fp)*(tn+fn) > 0:
        mcc_score = (tp*tn - fp*fn) / np.sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn))
    else: 
        mcc_score = 0
    return mcc_score
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=num_trials)
print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial
print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))
# Write the results to a csv file.
with open("./hyper_params_results_mcc/params_xgboost_balanced_classifier_min_u_test_bus_16.csv", "w") as f:
    f.write("params,value\n")
    for key, value in trial.params.items():
        f.write("{},{}\n".format(key, value))
    f.write("value,{}\n".format(trial.value))

[32m[I 2022-10-21 12:02:21,868][0m A new study created in memory with name: no-name-a7aba0fd-ad2b-4b23-ab87-76443f8eae36[0m
[32m[I 2022-10-21 12:02:25,639][0m Trial 0 finished with value: 0.4174425022397923 and parameters: {'booster': 'gbtree', 'lambda': 0.0004956780420458229, 'alpha': 0.09512710689043151, 'subsample': 0.7749314446370554, 'colsample_bytree': 0.9697391306473191, 'max_depth': 5, 'min_child_weight': 3, 'eta': 3.486310198546269e-05, 'gamma': 0.005375091177485281, 'grow_policy': 'depthwise'}. Best is trial 0 with value: 0.4174425022397923.[0m
[32m[I 2022-10-21 12:02:31,670][0m Trial 1 finished with value: 0.46786591953863593 and parameters: {'booster': 'dart', 'lambda': 1.3453075168902139e-05, 'alpha': 6.291305213032538e-07, 'subsample': 0.523907089727154, 'colsample_bytree': 0.8381216848105304, 'max_depth': 7, 'min_child_weight': 4, 'eta': 7.798974546525333e-05, 'gamma': 2.6470631526961524e-06, 'grow_policy': 'depthwise', 'sample_type': 'weighted', 'normalize_type'

Number of finished trials:  30
Best trial:
  Value: 0.5611822753355297
  Params: 
    booster: dart
    lambda: 0.022918347917256048
    alpha: 0.00739342179575011
    subsample: 0.9822013746871794
    colsample_bytree: 0.5445470368014637
    max_depth: 7
    min_child_weight: 7
    eta: 0.03787709447887554
    gamma: 1.981572606347494e-05
    grow_policy: lossguide
    sample_type: uniform
    normalize_type: tree
    rate_drop: 8.473403749879414e-06
    skip_drop: 1.0678716691937125e-08


In [24]:
# Classification data sparse
y_min_u_bool = pd.read_csv('..\data\ground_truth\\res_bus_vm_pu_min_sparse_bool_constr.csv').drop(columns=['timestamps'])
y_min_u_bool = pd.DataFrame(y_min_u_bool['bus_16'], columns=['bus_16'])
train_x, test_x, train_y, test_y, scaler = utils.split_and_suffle(exogenous_data, y_min_u_bool, test_size=0.2, scaling=True)
data_min_u_bool = {'X_train': deepcopy(train_x), 'X_test': deepcopy(test_x), 'y_train': deepcopy(train_y), 'y_test': deepcopy(test_y), 'scaler': deepcopy(scaler)}
# reg data sparse
y_min_u = pd.read_csv('..\data\ground_truth\\res_bus_vm_pu_min_constr.csv').drop(columns=['timestamps'])
y_min_u = pd.DataFrame(y_min_u['bus_16'], columns=['bus_16'])
exogenous_data = pd.read_csv('..\data\processed\production\exogenous_data_extended.csv').drop(columns=['date'])
train_x, valid_x, train_y, valid_y, scaler_min = utils.split_and_suffle(exogenous_data, y_min_u, test_size=0.2, scaling=True)
data_min_reg = {'X_train': deepcopy(train_x), 'X_test': deepcopy(valid_x), 'y_train': deepcopy(train_y), 'y_test': deepcopy(valid_y)}
threshold_min = utils.compute_threshold(y_min_u) / scaler_min['y']
# balanced data reg
y_min_u_balanced = pd.read_csv('..\data\ground_truth\\res_bus_vm_pu_min_balanced_constr.csv')
y_min_u_balanced = pd.DataFrame(y_min_u_balanced['bus_16'], columns=['bus_16'])
exogenous_data_vm_pu_min_balanced = pd.read_csv('..\data\ground_truth\\exogenous_data_vm_pu_min_balanced.csv').drop(columns=['date'])
X_min_u_train, X_min_u_test, y_min_u_train, y_min_u_test = utils.split_and_suffle(exogenous_data_vm_pu_min_balanced, y_min_u_balanced)
data_min_u_balanced = {'X_train': deepcopy(X_min_u_train), 'X_test': deepcopy(X_min_u_test), 'y_train': deepcopy(y_min_u_train), 'y_test': deepcopy(y_min_u_test)}
# class data bool balanced
y_min_u_balanced_bool = pd.read_csv('..\data\ground_truth\\res_bus_vm_pu_min_balanced_bool_constr.csv')
y_min_u_balanced_bool = pd.DataFrame(y_min_u_balanced_bool['bus_16'], columns=['bus_16'])
exogenous_data_balanced = pd.read_csv('..\data\ground_truth\\exogenous_data_vm_pu_min_balanced.csv').drop(columns=['date'])
train_x, valid_x, train_y, valid_y, scaler = utils.split_and_suffle(exogenous_data_balanced, y_min_u_balanced_bool, test_size=0.2, scaling=True)
data_min_u_bool_balanced = {'X_train': train_x, 'X_test': valid_x, 'y_train': train_y, 'y_test': valid_y}
# hybrid data training
data_min_u_hybrid = {}
data_min_u_hybrid['X_train'] = pd.concat([data_min_u_balanced['X_train'], data_min_u_bool_balanced['y_train']], axis=1)
data_min_u_hybrid['y_train'] = deepcopy(data_min_u_balanced['y_train'])


In [25]:

# hybrid data test
import ast
def get_hyper_params_from_df(df):
    output = {}
    for row in df.iterrows():
        if row[1]['params'] != 'value':
            try:
                output[row[1]['params']] = ast.literal_eval(row[1]['value'])
            except :
                output[row[1]['params']] = row[1]['value']
    return output
# import classifier hyper params 
df = pd.read_csv('hyper_params_results_mcc/params_xgboost_balanced_classifier_min_u_test_bus_16.csv')
hyper_params = get_hyper_params_from_df(df)
classifier_min_u = my_ai.Context(strategy=my_ai.XGBoostClassifierStrategy(hyper_params)) 
classifier_min_u.fit(data_min_u_bool_balanced)
class_prediction = classifier_min_u.predict(data_min_u_bool).to_csv('class_prediction_min_u.csv', index=False)
data_min_u_hybrid['X_test']  = pd.concat([data_min_reg['X_test'], class_prediction], axis=1)
data_min_u_hybrid['y_test'] = deepcopy(data_min_reg['y_test'])

In [26]:
print('data_min_u_hybrid X_train shape: ', data_min_u_hybrid['X_train'].shape)
print('data_min_u_hybrid y_train shape: ', data_min_u_hybrid['y_train'].shape)
print('data_min_u_hybrid X_test shape: ', data_min_u_hybrid['X_test'].shape)
print('data_min_u_hybrid y_test shape: ', data_min_u_hybrid['y_test'].shape)


data_min_u_hybrid X_train shape:  (11164, 12)
data_min_u_hybrid y_train shape:  (11164, 1)
data_min_u_hybrid X_test shape:  (9044, 11)
data_min_u_hybrid y_test shape:  (9044, 1)


In [27]:
num_trials = 30
# Same implementation as above, but for Gradient Boosting Regression.
def objective(trial):
    # import data
    y_min_u = pd.read_csv('..\data\ground_truth\\res_bus_vm_pu_min_balanced_constr.csv')
    y_min_u = pd.DataFrame(y_min_u['bus_16'], columns=['bus_16'])
    exogenous_data = pd.read_csv('..\data\ground_truth\\exogenous_data_vm_pu_min_balanced.csv').drop(columns=['date'])
    train_x, valid_x, train_y, valid_y, scaler = utils.split_and_suffle(exogenous_data, y_min_u, test_size=0.2, scaling=True)
    data = {'X_train': deepcopy(train_x), 'X_test': deepcopy(valid_x), 'y_train': deepcopy(train_y), 'y_test': deepcopy(valid_y)}
    # class data bool balanced
    y_min_u_balanced_bool = pd.read_csv('..\data\ground_truth\\res_bus_vm_pu_min_balanced_bool_constr.csv')
    y_min_u_balanced_bool = pd.DataFrame(y_min_u_balanced_bool['bus_16'], columns=['bus_16'])
    train_x, valid_x, train_y, valid_y, scaler = utils.split_and_suffle(exogenous_data, y_min_u_balanced_bool, test_size=0.2, scaling=True)
    data_min_u_bool_balanced = {'X_train': deepcopy(train_x), 'X_test': deepcopy(valid_x), 'y_train': deepcopy(train_y), 'y_test': deepcopy(valid_y)}
    # hybrid data training
    data_min_u_hybrid = {}
    data_min_u_hybrid['X_train'] =  pd.concat([data['X_train'], data_min_u_bool_balanced['y_train']], axis=1)
    data_min_u_hybrid['y_train'] = data['y_train']
    # Testing data
    class_prediction = pd.read_csv('class_prediction_min_u.csv')
    data_min_u_hybrid['X_test']  = pd.concat([data_min_reg['X_test'], class_prediction], axis=1)
    data_min_u_hybrid['y_test'] = data_min_reg['y_test']
    param = {
        'n_estimators': trial.suggest_int('n_estimators', 10, 1000, log=True),
        'learning_rate': trial.suggest_float('learning_rate', 0.1, 1.0, log=True) ,
        'loss': trial.suggest_categorical('loss', ['squared_error', 'absolute_error'])
    }
    model = my_ai.Context(my_ai.GradientBoostRegressorStrategy(param))
    model.fit(data_min_u_hybrid)
    prediction = model.predict(data_min_u_hybrid)
    # evaluate the regression performance with my metrics
    metric = metrics.Metrics()
    metric.get_prediction_scores(prediction, data_min_u_hybrid['y_test'], threshold=threshold_min)
    print('hybrid_recall', metric.hybrid_recall)
    print('hybrid_precision', metric.hybrid_precision)
    print('hybrid_f1', metric.hybrid_f1)
    print('hybrid_accuracy', metric.hybrid_accuracy)
    print('hybrid_mcc', metric.hybrid_mcc)
    return metric.hybrid_mcc
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=num_trials)
print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial
print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))
# Write the results to a csv file.
with open("./hyper_params_results_mcc/params_gradient_boost_regression_balanced_min_u_test_one_bus.csv", "w") as f:
    f.write("params,value\n")
    for key, value in trial.params.items():
        f.write("{},{}\n".format(key, value))
    f.write("value,{}\n".format(trial.value))

[32m[I 2022-10-21 12:05:28,752][0m A new study created in memory with name: no-name-78f35e5d-6a18-480b-98b2-d6af85cc41ec[0m
[32m[I 2022-10-21 12:05:59,645][0m Trial 0 finished with value: 0.35550036638169635 and parameters: {'n_estimators': 801, 'learning_rate': 0.755274254012659, 'loss': 'squared_error'}. Best is trial 0 with value: 0.35550036638169635.[0m


true_positives_ctr:  851
true_negatives_ctr:  6332
false_positives_ctr:  1690
false_negatives_ctr:  171
135472758029532
hybrid_recall 0.7799995430208047
hybrid_precision 0.2658186217859917
hybrid_f1 0.396509470760239
hybrid_accuracy 0.7612030721530926
hybrid_mcc 0.35550036638169635


[32m[I 2022-10-21 12:06:01,652][0m Trial 1 finished with value: 0.3760505210156553 and parameters: {'n_estimators': 18, 'learning_rate': 0.12024991973499621, 'loss': 'squared_error'}. Best is trial 1 with value: 0.3760505210156553.[0m


true_positives_ctr:  1008
true_negatives_ctr:  5504
false_positives_ctr:  2518
false_negatives_ctr:  14
159513541594512
hybrid_recall 0.9808235210681171
hybrid_precision 0.2283099141158045
hybrid_f1 0.37040036664565557
hybrid_accuracy 0.6793032610394036
hybrid_mcc 0.3760505210156553


[32m[I 2022-10-21 12:06:04,341][0m Trial 2 finished with value: 0.5305681974796951 and parameters: {'n_estimators': 43, 'learning_rate': 0.18663688526902306, 'loss': 'absolute_error'}. Best is trial 2 with value: 0.5305681974796951.[0m


true_positives_ctr:  982
true_negatives_ctr:  6804
false_positives_ctr:  1218
false_negatives_ctr:  40
123442933891200
hybrid_recall 0.9478121738718778
hybrid_precision 0.37199605840597216
hybrid_f1 0.5342933680312244
hybrid_accuracy 0.8357080569975418
hybrid_mcc 0.5305681974796951


[32m[I 2022-10-21 12:06:18,838][0m Trial 3 finished with value: 0.3738251946445303 and parameters: {'n_estimators': 362, 'learning_rate': 0.5104337239287468, 'loss': 'squared_error'}. Best is trial 2 with value: 0.5305681974796951.[0m


true_positives_ctr:  950
true_negatives_ctr:  5906
false_positives_ctr:  2116
false_negatives_ctr:  72
150266307521232
hybrid_recall 0.9010594931207634
hybrid_precision 0.24724945450804522
hybrid_f1 0.3880253108075934
hybrid_accuracy 0.7206864549187053
hybrid_mcc 0.3738251946445303


[32m[I 2022-10-21 12:06:21,520][0m Trial 4 finished with value: 0.5099873928703276 and parameters: {'n_estimators': 43, 'learning_rate': 0.19213077654543034, 'loss': 'absolute_error'}. Best is trial 2 with value: 0.5305681974796951.[0m


true_positives_ctr:  982
true_negatives_ctr:  6685
false_positives_ctr:  1337
false_negatives_ctr:  40
127857612563100
hybrid_recall 0.9480482699912073
hybrid_precision 0.3514425673121542
hybrid_f1 0.5127924081911406
hybrid_accuracy 0.8211358845511669
hybrid_mcc 0.5099873928703276


[32m[I 2022-10-21 12:06:27,945][0m Trial 5 finished with value: 0.4994115836013744 and parameters: {'n_estimators': 134, 'learning_rate': 0.1371772900842474, 'loss': 'absolute_error'}. Best is trial 2 with value: 0.5305681974796951.[0m


true_positives_ctr:  979
true_negatives_ctr:  6648
false_positives_ctr:  1374
false_negatives_ctr:  43
129076300812732
hybrid_recall 0.9434912055815048
hybrid_precision 0.3427506128331564
hybrid_f1 0.5028326466858843
hybrid_accuracy 0.8150514466797956
hybrid_mcc 0.4994115836013744


[32m[I 2022-10-21 12:06:29,812][0m Trial 6 finished with value: 0.5175864785401437 and parameters: {'n_estimators': 21, 'learning_rate': 0.1353527764321486, 'loss': 'absolute_error'}. Best is trial 2 with value: 0.5305681974796951.[0m


true_positives_ctr:  989
true_negatives_ctr:  6706
false_positives_ctr:  1316
false_negatives_ctr:  33
127350290373180
hybrid_recall 0.9562427955041263
hybrid_precision 0.35504838926776755
hybrid_f1 0.5178292483552497
hybrid_accuracy 0.8251761463495374
hybrid_mcc 0.5175864785401437


[32m[I 2022-10-21 12:06:34,544][0m Trial 7 finished with value: 0.4044854313574239 and parameters: {'n_estimators': 103, 'learning_rate': 0.17692643246050935, 'loss': 'squared_error'}. Best is trial 2 with value: 0.5305681974796951.[0m


true_positives_ctr:  993
true_negatives_ctr:  5856
false_positives_ctr:  2166
false_negatives_ctr:  29
152415679476060
hybrid_recall 0.9616682109859486
hybrid_precision 0.2549445997867976
hybrid_f1 0.4030404990093344
hybrid_accuracy 0.721420585099856
hybrid_mcc 0.4044854313574239


[32m[I 2022-10-21 12:06:45,073][0m Trial 8 finished with value: 0.4247616114788097 and parameters: {'n_estimators': 259, 'learning_rate': 0.40917207044793713, 'loss': 'squared_error'}. Best is trial 2 with value: 0.5305681974796951.[0m


true_positives_ctr:  963
true_negatives_ctr:  6229
false_positives_ctr:  1793
false_negatives_ctr:  59
142077497732352
hybrid_recall 0.9203132147038934
hybrid_precision 0.28258181492400003
hybrid_f1 0.4323964637047605
hybrid_accuracy 0.7625048153790367
hybrid_mcc 0.4247616114788097


[32m[I 2022-10-21 12:06:57,672][0m Trial 9 finished with value: 0.4097863393013696 and parameters: {'n_estimators': 312, 'learning_rate': 0.4867888524795504, 'loss': 'squared_error'}. Best is trial 2 with value: 0.5305681974796951.[0m


true_positives_ctr:  953
true_negatives_ctr:  6174
false_positives_ctr:  1848
false_negatives_ctr:  69
143363962849212
hybrid_recall 0.9084328831690994
hybrid_precision 0.2734465262246344
hybrid_f1 0.4203606801784364
hybrid_accuracy 0.7539948929704233
hybrid_mcc 0.4097863393013696


[32m[I 2022-10-21 12:07:00,274][0m Trial 10 finished with value: 0.48712062133641143 and parameters: {'n_estimators': 38, 'learning_rate': 0.2660986990443797, 'loss': 'absolute_error'}. Best is trial 2 with value: 0.5305681974796951.[0m


true_positives_ctr:  980
true_negatives_ctr:  6560
false_positives_ctr:  1462
false_negatives_ctr:  42
132176647720656
hybrid_recall 0.9454281811927363
hybrid_precision 0.3303516540399083
hybrid_f1 0.4896201598546332
hybrid_accuracy 0.8050244473860232
hybrid_mcc 0.48712062133641143


[32m[I 2022-10-21 12:07:01,654][0m Trial 11 finished with value: 0.5306621385055988 and parameters: {'n_estimators': 10, 'learning_rate': 0.23634209584878121, 'loss': 'absolute_error'}. Best is trial 11 with value: 0.5306621385055988.[0m


true_positives_ctr:  988
true_negatives_ctr:  6787
false_positives_ctr:  1235
false_negatives_ctr:  34
124314293366172
hybrid_recall 0.9547913541733033
hybrid_precision 0.36844622553583684
hybrid_f1 0.5317099151563252
hybrid_accuracy 0.8350584967992037
hybrid_mcc 0.5306621385055988


[32m[I 2022-10-21 12:07:03,195][0m Trial 12 finished with value: 0.5313756056255516 and parameters: {'n_estimators': 10, 'learning_rate': 0.288120464223893, 'loss': 'absolute_error'}. Best is trial 12 with value: 0.5313756056255516.[0m


true_positives_ctr:  985
true_negatives_ctr:  6804
false_positives_ctr:  1218
false_negatives_ctr:  37
123557081383932
hybrid_recall 0.9512756668567844
hybrid_precision 0.3707843869951827
hybrid_f1 0.533588718562736
hybrid_accuracy 0.8362304551810147
hybrid_mcc 0.5313756056255516


[32m[I 2022-10-21 12:07:04,599][0m Trial 13 finished with value: 0.5032433329370816 and parameters: {'n_estimators': 10, 'learning_rate': 0.2754086360034126, 'loss': 'absolute_error'}. Best is trial 12 with value: 0.5313756056255516.[0m


true_positives_ctr:  995
true_negatives_ctr:  6592
false_positives_ctr:  1430
false_negatives_ctr:  27
131594481570300
hybrid_recall 0.9636963613207715
hybrid_precision 0.33841285647780645
hybrid_f1 0.5009214802475638
hybrid_accuracy 0.8121422185821384
hybrid_mcc 0.5032433329370816


[32m[I 2022-10-21 12:07:06,020][0m Trial 14 finished with value: 0.5257376824068863 and parameters: {'n_estimators': 11, 'learning_rate': 0.3224010333295163, 'loss': 'absolute_error'}. Best is trial 12 with value: 0.5313756056255516.[0m


true_positives_ctr:  985
true_negatives_ctr:  6765
false_positives_ctr:  1257
false_negatives_ctr:  37
125027569672656
hybrid_recall 0.9514927425440428
hybrid_precision 0.36529872021946447
hybrid_f1 0.5279181874706179
hybrid_accuracy 0.8318700529879971
hybrid_mcc 0.5257376824068863


[32m[I 2022-10-21 12:07:07,833][0m Trial 15 finished with value: 0.45028381463847156 and parameters: {'n_estimators': 21, 'learning_rate': 0.8998090396010237, 'loss': 'absolute_error'}. Best is trial 12 with value: 0.5313756056255516.[0m


true_positives_ctr:  968
true_negatives_ctr:  6404
false_positives_ctr:  1618
false_negatives_ctr:  54
136917863811792
hybrid_recall 0.9293933961324327
hybrid_precision 0.3019643718565483
hybrid_f1 0.4558280304335817
hybrid_accuracy 0.7812434995305451
hybrid_mcc 0.45028381463847156


[32m[I 2022-10-21 12:07:09,214][0m Trial 16 finished with value: 0.5307209251045736 and parameters: {'n_estimators': 10, 'learning_rate': 0.23439852678434817, 'loss': 'absolute_error'}. Best is trial 12 with value: 0.5313756056255516.[0m


true_positives_ctr:  988
true_negatives_ctr:  6787
false_positives_ctr:  1235
false_negatives_ctr:  34
124314293366172
hybrid_recall 0.9547883303109492
hybrid_precision 0.36850323770727955
hybrid_f1 0.5317688097591892
hybrid_accuracy 0.8351049309707597
hybrid_mcc 0.5307209251045736


[32m[I 2022-10-21 12:07:13,162][0m Trial 17 finished with value: 0.48276201150053133 and parameters: {'n_estimators': 68, 'learning_rate': 0.3465014130762202, 'loss': 'absolute_error'}. Best is trial 12 with value: 0.5313756056255516.[0m


true_positives_ctr:  971
true_negatives_ctr:  6595
false_positives_ctr:  1427
false_negatives_ctr:  51
130660124944272
hybrid_recall 0.9331074089789828
hybrid_precision 0.33042055600792397
hybrid_f1 0.48802698069788075
hybrid_accuracy 0.8063175552539917
hybrid_mcc 0.48276201150053133


[32m[I 2022-10-21 12:07:14,875][0m Trial 18 finished with value: 0.5257677343884781 and parameters: {'n_estimators': 18, 'learning_rate': 0.5729371992823895, 'loss': 'absolute_error'}. Best is trial 12 with value: 0.5313756056255516.[0m


true_positives_ctr:  997
true_negatives_ctr:  6710
false_positives_ctr:  1312
false_negatives_ctr:  25
127495567509660
hybrid_recall 0.9675673002879454
hybrid_precision 0.35936190582678124
hybrid_f1 0.5240774375073733
hybrid_accuracy 0.826551263889948
hybrid_mcc 0.5257677343884781


[32m[I 2022-10-21 12:07:17,095][0m Trial 19 finished with value: 0.5280930195790821 and parameters: {'n_estimators': 31, 'learning_rate': 0.10179594997349826, 'loss': 'absolute_error'}. Best is trial 12 with value: 0.5313756056255516.[0m


true_positives_ctr:  985
true_negatives_ctr:  6782
false_positives_ctr:  1240
false_negatives_ctr:  37
124389653831100
hybrid_recall 0.9513476551796374
hybrid_precision 0.367545747084445
hybrid_f1 0.5302381285095268
hybrid_accuracy 0.8338087579204286
hybrid_mcc 0.5280930195790821


[32m[I 2022-10-21 12:07:21,723][0m Trial 20 finished with value: 0.5070062142903733 and parameters: {'n_estimators': 73, 'learning_rate': 0.22273549617944655, 'loss': 'absolute_error'}. Best is trial 12 with value: 0.5313756056255516.[0m


true_positives_ctr:  974
true_negatives_ctr:  6713
false_positives_ctr:  1309
false_negatives_ctr:  48
126546576589692
hybrid_recall 0.9372376211610545
hybrid_precision 0.3523661600178018
hybrid_f1 0.5121740900772641
hybrid_accuracy 0.8228281639462042
hybrid_mcc 0.5070062142903733


[32m[I 2022-10-21 12:07:23,193][0m Trial 21 finished with value: 0.5342160137420535 and parameters: {'n_estimators': 12, 'learning_rate': 0.23954789509674657, 'loss': 'absolute_error'}. Best is trial 21 with value: 0.5342160137420535.[0m


true_positives_ctr:  988
true_negatives_ctr:  6803
false_positives_ctr:  1219
false_negatives_ctr:  34
123709048483356
hybrid_recall 0.9549508218713104
hybrid_precision 0.37207502494397726
hybrid_f1 0.5355032861202386
hybrid_accuracy 0.8372746957665324
hybrid_mcc 0.5342160137420535


[32m[I 2022-10-21 12:07:24,729][0m Trial 22 finished with value: 0.5450521849143803 and parameters: {'n_estimators': 14, 'learning_rate': 0.3809648009132193, 'loss': 'absolute_error'}. Best is trial 22 with value: 0.5450521849143803.[0m


true_positives_ctr:  985
true_negatives_ctr:  6858
false_positives_ctr:  1164
false_negatives_ctr:  37
121479847889820
hybrid_recall 0.9511679448973402
hybrid_precision 0.3856066789411775
hybrid_f1 0.5487487655832025
hybrid_accuracy 0.8441935597827374
hybrid_mcc 0.5450521849143803


[32m[I 2022-10-21 12:07:26,296][0m Trial 23 finished with value: 0.5487099680412768 and parameters: {'n_estimators': 14, 'learning_rate': 0.3872266856076122, 'loss': 'absolute_error'}. Best is trial 23 with value: 0.5487099680412768.[0m


true_positives_ctr:  983
true_negatives_ctr:  6883
false_positives_ctr:  1139
false_negatives_ctr:  39
120423301058256
hybrid_recall 0.9486876622896252
hybrid_precision 0.3902600215863058
hybrid_f1 0.5530236498741614
hybrid_accuracy 0.847511362948161
hybrid_mcc 0.5487099680412768


[32m[I 2022-10-21 12:07:27,906][0m Trial 24 finished with value: 0.5342394102821 and parameters: {'n_estimators': 16, 'learning_rate': 0.3468418015965236, 'loss': 'absolute_error'}. Best is trial 23 with value: 0.5487099680412768.[0m


true_positives_ctr:  995
true_negatives_ctr:  6762
false_positives_ctr:  1260
false_negatives_ctr:  27
125512190260380
hybrid_recall 0.964143918716238
hybrid_precision 0.3692649452522916
hybrid_f1 0.5340065766481723
hybrid_accuracy 0.8333183222492243
hybrid_mcc 0.5342394102821


[32m[I 2022-10-21 12:07:29,844][0m Trial 25 finished with value: 0.5172791738985337 and parameters: {'n_estimators': 24, 'learning_rate': 0.38900517974856796, 'loss': 'absolute_error'}. Best is trial 23 with value: 0.5487099680412768.[0m


true_positives_ctr:  983
true_negatives_ctr:  6727
false_positives_ctr:  1295
false_negatives_ctr:  39
126362807570832
hybrid_recall 0.9481874374600244
hybrid_precision 0.35872547724925014
hybrid_f1 0.5205228094333361
hybrid_accuracy 0.8260752680082161
hybrid_mcc 0.5172791738985337


[32m[I 2022-10-21 12:07:31,430][0m Trial 26 finished with value: 0.5459846921994789 and parameters: {'n_estimators': 15, 'learning_rate': 0.4315603825086611, 'loss': 'absolute_error'}. Best is trial 23 with value: 0.5487099680412768.[0m


true_positives_ctr:  983
true_negatives_ctr:  6866
false_positives_ctr:  1156
false_negatives_ctr:  39
121089927990780
hybrid_recall 0.9487648744947422
hybrid_precision 0.38745183428724045
hybrid_f1 0.5502111873236112
hybrid_accuracy 0.8456386467556869
hybrid_mcc 0.5459846921994789


[32m[I 2022-10-21 12:07:33,532][0m Trial 27 finished with value: 0.47459450908453094 and parameters: {'n_estimators': 28, 'learning_rate': 0.6195470744546023, 'loss': 'absolute_error'}. Best is trial 23 with value: 0.5487099680412768.[0m


true_positives_ctr:  993
true_negatives_ctr:  6428
false_positives_ctr:  1594
false_negatives_ctr:  29
136949600143356
hybrid_recall 0.9614851032500823
hybrid_precision 0.3132221669657114
hybrid_f1 0.47251389331804594
hybrid_accuracy 0.7890683844174888
hybrid_mcc 0.47459450908453094


[32m[I 2022-10-21 12:07:36,762][0m Trial 28 finished with value: 0.49005287075360365 and parameters: {'n_estimators': 56, 'learning_rate': 0.4386730353471732, 'loss': 'absolute_error'}. Best is trial 23 with value: 0.5487099680412768.[0m


true_positives_ctr:  967
true_negatives_ctr:  6654
false_positives_ctr:  1368
false_negatives_ctr:  55
128433474079260
hybrid_recall 0.9274646670475292
hybrid_precision 0.33935281160774594
hybrid_f1 0.49689516877129714
hybrid_accuracy 0.8138791544108972
hybrid_mcc 0.49005287075360365


[32m[I 2022-10-21 12:08:03,767][0m Trial 29 finished with value: 0.34042104630329206 and parameters: {'n_estimators': 696, 'learning_rate': 0.7051596801306376, 'loss': 'squared_error'}. Best is trial 23 with value: 0.5487099680412768.[0m


true_positives_ctr:  895
true_negatives_ctr:  5974
false_positives_ctr:  2048
false_negatives_ctr:  127
147205772451612
hybrid_recall 0.8324527123571264
hybrid_precision 0.2391432527106037
hybrid_f1 0.3715494567922626
hybrid_accuracy 0.7215450446331251
hybrid_mcc 0.34042104630329206
Number of finished trials:  30
Best trial:
  Value: 0.5487099680412768
  Params: 
    n_estimators: 14
    learning_rate: 0.3872266856076122
    loss: absolute_error


In [28]:
# Same implementation as above, but for Gradient Boosting Regression.
def objective(trial):
    # import data
    y_min_u = pd.read_csv('..\data\ground_truth\\res_bus_vm_pu_min_balanced_constr.csv')
    y_min_u = pd.DataFrame(y_min_u['bus_16'], columns=['bus_16'])
    exogenous_data = pd.read_csv('..\data\ground_truth\\exogenous_data_vm_pu_min_balanced.csv').drop(columns=['date'])
    train_x, valid_x, train_y, valid_y, scaler = utils.split_and_suffle(exogenous_data, y_min_u, test_size=0.2, scaling=True)
    data = {'X_train': train_x, 'X_test': valid_x, 'y_train': train_y, 'y_test': valid_y}
    param = {
        'n_estimators': trial.suggest_int('n_estimators', 10, 1000, log=True),
        'learning_rate': trial.suggest_float('learning_rate', 0.1, 1.0, log=True) ,
        'loss': trial.suggest_categorical('loss', ['squared_error', 'absolute_error'])
    }
    model = my_ai.Context(my_ai.GradientBoostRegressorStrategy(param))
    model.fit(data)
    prediction = model.predict(data_min_reg)
    # evaluate the regression performance with my metrics
    metric = metrics.Metrics()
    metric.get_prediction_scores(prediction, data_min_reg['y_test'], threshold=threshold_min)
    print('hybrid_recall', metric.hybrid_recall)
    print('hybrid_precision', metric.hybrid_precision)
    print('hybrid_f1', metric.hybrid_f1)
    print('hybrid_accuracy', metric.hybrid_accuracy)
    print('hybrid_mcc', metric.hybrid_mcc)
    return metric.hybrid_mcc
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=num_trials)
print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial
print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))
# Write the results to a csv file.
with open("./hyper_params_results_mcc/params_gradient_boost_regression_balanced_min_u_one_bus_not hybrid.csv", "w") as f:
    f.write("params,value\n")
    for key, value in trial.params.items():
        f.write("{},{}\n".format(key, value))
    f.write("value,{}\n".format(trial.value))

[32m[I 2022-10-21 12:08:03,838][0m A new study created in memory with name: no-name-519b5a7c-0c3f-4eba-beae-73b3cb7de3cd[0m
[32m[I 2022-10-21 12:08:05,900][0m Trial 0 finished with value: 0.5613640127535421 and parameters: {'n_estimators': 25, 'learning_rate': 0.9747796838178895, 'loss': 'absolute_error'}. Best is trial 0 with value: 0.5613640127535421.[0m


true_positives_ctr:  974
true_negatives_ctr:  7000
false_positives_ctr:  1022
false_negatives_ctr:  48
115334698803072
hybrid_recall 0.9375450351637
hybrid_precision 0.4085789025755639
hybrid_f1 0.5691320254295182
hybrid_accuracy 0.8581472219387513
hybrid_mcc 0.5613640127535421


[32m[I 2022-10-21 12:08:20,162][0m Trial 1 finished with value: 0.45418007206006566 and parameters: {'n_estimators': 329, 'learning_rate': 0.6656574171461921, 'loss': 'absolute_error'}. Best is trial 0 with value: 0.5613640127535421.[0m


true_positives_ctr:  973
true_negatives_ctr:  6399
false_positives_ctr:  1623
false_negatives_ctr:  49
137234489263872
hybrid_recall 0.93495269923778
hybrid_precision 0.3031306384477307
hybrid_f1 0.4578250914323635
hybrid_accuracy 0.783148038626672
hybrid_mcc 0.45418007206006566


[32m[I 2022-10-21 12:08:42,860][0m Trial 2 finished with value: 0.4315288125411382 and parameters: {'n_estimators': 543, 'learning_rate': 0.11095980486905928, 'loss': 'absolute_error'}. Best is trial 0 with value: 0.5613640127535421.[0m


true_positives_ctr:  972
true_negatives_ctr:  6247
false_positives_ctr:  1775
false_negatives_ctr:  50
141816220245756
hybrid_recall 0.9341491827980155
hybrid_precision 0.2840552859099952
hybrid_f1 0.43564117521864043
hybrid_accuracy 0.7627809731532015
hybrid_mcc 0.4315288125411382


[32m[I 2022-10-21 12:08:48,834][0m Trial 3 finished with value: 0.47646085721102915 and parameters: {'n_estimators': 128, 'learning_rate': 0.11093848138450607, 'loss': 'absolute_error'}. Best is trial 0 with value: 0.5613640127535421.[0m


true_positives_ctr:  982
true_negatives_ctr:  6489
false_positives_ctr:  1533
false_negatives_ctr:  40
134622673620540
hybrid_recall 0.9477108185341983
hybrid_precision 0.3197113272021975
hybrid_f1 0.4781262259251505
hybrid_accuracy 0.7956246238649681
hybrid_mcc 0.47646085721102915


[32m[I 2022-10-21 12:08:52,679][0m Trial 4 finished with value: 0.4731814526758078 and parameters: {'n_estimators': 74, 'learning_rate': 0.18975227465091363, 'loss': 'absolute_error'}. Best is trial 0 with value: 0.5613640127535421.[0m


true_positives_ctr:  982
true_negatives_ctr:  6472
false_positives_ctr:  1550
false_negatives_ctr:  40
135179752409856
hybrid_recall 0.9474532838684044
hybrid_precision 0.31676340498561073
hybrid_f1 0.474789695325103
hybrid_accuracy 0.7931016606761838
hybrid_mcc 0.4731814526758078


[32m[I 2022-10-21 12:08:55,964][0m Trial 5 finished with value: 0.5377299301852745 and parameters: {'n_estimators': 53, 'learning_rate': 0.12491218931958994, 'loss': 'absolute_error'}. Best is trial 0 with value: 0.5613640127535421.[0m


true_positives_ctr:  989
true_negatives_ctr:  6806
false_positives_ctr:  1216
false_negatives_ctr:  33
123633097727580
hybrid_recall 0.956682186620517
hybrid_precision 0.3757284706724383
hybrid_f1 0.5395524764546501
hybrid_accuracy 0.837947469300305
hybrid_mcc 0.5377299301852745


[32m[I 2022-10-21 12:08:58,237][0m Trial 6 finished with value: 0.3921034246066694 and parameters: {'n_estimators': 38, 'learning_rate': 0.2980595103467954, 'loss': 'squared_error'}. Best is trial 0 with value: 0.5613640127535421.[0m


true_positives_ctr:  1000
true_negatives_ctr:  5709
false_positives_ctr:  2313
false_negatives_ctr:  22
155663000606652
hybrid_recall 0.9713167665552137
hybrid_precision 0.24248891384275703
hybrid_f1 0.3880910289396704
hybrid_accuracy 0.7034997557077224
hybrid_mcc 0.3921034246066694


[32m[I 2022-10-21 12:09:02,847][0m Trial 7 finished with value: 0.38440892487120715 and parameters: {'n_estimators': 102, 'learning_rate': 0.5274912811384689, 'loss': 'squared_error'}. Best is trial 0 with value: 0.5613640127535421.[0m


true_positives_ctr:  973
true_negatives_ctr:  5843
false_positives_ctr:  2179
false_negatives_ctr:  49
152258834278656
hybrid_recall 0.9327129151483575
hybrid_precision 0.24687872604094385
hybrid_f1 0.39041812134511067
hybrid_accuracy 0.7156644739100396
hybrid_mcc 0.38440892487120715


[32m[I 2022-10-21 12:09:22,032][0m Trial 8 finished with value: 0.4749781390789643 and parameters: {'n_estimators': 435, 'learning_rate': 0.14601623715413697, 'loss': 'absolute_error'}. Best is trial 0 with value: 0.5613640127535421.[0m


true_positives_ctr:  974
true_negatives_ctr:  6537
false_positives_ctr:  1485
false_negatives_ctr:  48
132754075147260
hybrid_recall 0.9364542328784651
hybrid_precision 0.3219516880737691
hybrid_f1 0.4791665647137276
hybrid_accuracy 0.7990296231378632
hybrid_mcc 0.4749781390789643


[32m[I 2022-10-21 12:09:26,939][0m Trial 9 finished with value: 0.4637870722617685 and parameters: {'n_estimators': 96, 'learning_rate': 0.6344313423899361, 'loss': 'absolute_error'}. Best is trial 0 with value: 0.5613640127535421.[0m


true_positives_ctr:  964
true_negatives_ctr:  6505
false_positives_ctr:  1517
false_negatives_ctr:  58
133494299870652
hybrid_recall 0.923627616610421
hybrid_precision 0.3155625521907353
hybrid_f1 0.47040768287147255
hybrid_accuracy 0.795533666826066
hybrid_mcc 0.4637870722617685


[32m[I 2022-10-21 12:09:28,256][0m Trial 10 finished with value: 0.32429598226066164 and parameters: {'n_estimators': 10, 'learning_rate': 0.9502470268150945, 'loss': 'squared_error'}. Best is trial 0 with value: 0.5613640127535421.[0m


true_positives_ctr:  918
true_negatives_ctr:  5677
false_positives_ctr:  2345
false_negatives_ctr:  104
154651307681052
hybrid_recall 0.8635648276985929
hybrid_precision 0.2205774486065632
hybrid_f1 0.3513983921912963
hybrid_accuracy 0.6853953337918878
hybrid_mcc 0.32429598226066164


[32m[I 2022-10-21 12:09:30,001][0m Trial 11 finished with value: 0.5275590551286762 and parameters: {'n_estimators': 21, 'learning_rate': 0.2963748495621068, 'loss': 'absolute_error'}. Best is trial 0 with value: 0.5613640127535421.[0m


true_positives_ctr:  992
true_negatives_ctr:  6741
false_positives_ctr:  1281
false_negatives_ctr:  30
126178628627772
hybrid_recall 0.9604872462343305
hybrid_precision 0.364005276003393
hybrid_f1 0.527934162395392
hybrid_accuracy 0.8297896361611494
hybrid_mcc 0.5275590551286762


[32m[I 2022-10-21 12:09:31,871][0m Trial 12 finished with value: 0.5149575391588952 and parameters: {'n_estimators': 23, 'learning_rate': 0.43223260671852776, 'loss': 'absolute_error'}. Best is trial 0 with value: 0.5613640127535421.[0m


true_positives_ctr:  976
true_negatives_ctr:  6739
false_positives_ctr:  1283
false_negatives_ctr:  46
125660746790460
hybrid_recall 0.9395954522337172
hybrid_precision 0.3594461947041875
hybrid_f1 0.5199741065467377
hybrid_accuracy 0.8275549252924085
hybrid_mcc 0.5149575391588952


[32m[I 2022-10-21 12:09:34,849][0m Trial 13 finished with value: 0.516144262014283 and parameters: {'n_estimators': 46, 'learning_rate': 0.20977911011982728, 'loss': 'absolute_error'}. Best is trial 0 with value: 0.5613640127535421.[0m


true_positives_ctr:  985
true_negatives_ctr:  6713
false_positives_ctr:  1309
false_negatives_ctr:  37
126949425498000
hybrid_recall 0.9514495241467158
hybrid_precision 0.3562265099237231
hybrid_f1 0.5183723407400381
hybrid_accuracy 0.8244382859352212
hybrid_mcc 0.516144262014283


[32m[I 2022-10-21 12:09:36,121][0m Trial 14 finished with value: 0.30569856755153774 and parameters: {'n_estimators': 10, 'learning_rate': 0.9933647793958948, 'loss': 'squared_error'}. Best is trial 0 with value: 0.5613640127535421.[0m


true_positives_ctr:  936
true_negatives_ctr:  5355
false_positives_ctr:  2667
false_negatives_ctr:  86
160722449052732
hybrid_recall 0.885822666394228
hybrid_precision 0.20237749728739973
hybrid_f1 0.32948088090485506
hybrid_accuracy 0.6499345425241178
hybrid_mcc 0.30569856755153774


[32m[I 2022-10-21 12:09:46,249][0m Trial 15 finished with value: 0.47101478503298316 and parameters: {'n_estimators': 233, 'learning_rate': 0.3968203091089596, 'loss': 'absolute_error'}. Best is trial 0 with value: 0.5613640127535421.[0m


true_positives_ctr:  966
true_negatives_ctr:  6542
false_positives_ctr:  1480
false_negatives_ctr:  56
132312939318672
hybrid_recall 0.9261340723757493
hybrid_precision 0.3217587145367017
hybrid_f1 0.4775918440133881
hybrid_accuracy 0.7997984948160756
hybrid_mcc 0.47101478503298316


[32m[I 2022-10-21 12:09:48,038][0m Trial 16 finished with value: 0.5470126514389256 and parameters: {'n_estimators': 22, 'learning_rate': 0.22784945475278984, 'loss': 'absolute_error'}. Best is trial 0 with value: 0.5613640127535421.[0m


true_positives_ctr:  989
true_negatives_ctr:  6857
false_positives_ctr:  1165
false_negatives_ctr:  33
121674192953040
hybrid_recall 0.9565600634959541
hybrid_precision 0.3851690972370044
hybrid_f1 0.5491978364820492
hybrid_accuracy 0.8443421934796521
hybrid_mcc 0.5470126514389256


[32m[I 2022-10-21 12:09:49,771][0m Trial 17 finished with value: 0.5441831536369168 and parameters: {'n_estimators': 20, 'learning_rate': 0.24024514251872656, 'loss': 'absolute_error'}. Best is trial 0 with value: 0.5613640127535421.[0m


true_positives_ctr:  989
true_negatives_ctr:  6843
false_positives_ctr:  1179
false_negatives_ctr:  33
122216178333312
hybrid_recall 0.9565965313198692
hybrid_precision 0.38215386736197987
hybrid_f1 0.5461317722988325
hybrid_accuracy 0.8425987493709198
hybrid_mcc 0.5441831536369168


[32m[I 2022-10-21 12:09:51,493][0m Trial 18 finished with value: 0.40556507066393677 and parameters: {'n_estimators': 16, 'learning_rate': 0.18035515309929723, 'loss': 'squared_error'}. Best is trial 0 with value: 0.5613640127535421.[0m


true_positives_ctr:  1009
true_negatives_ctr:  5781
false_positives_ctr:  2241
false_negatives_ctr:  13
154381552962000
hybrid_recall 0.9823071632237509
hybrid_precision 0.24973765995236222
hybrid_f1 0.39823079109334614
hybrid_accuracy 0.7126010507132462
hybrid_mcc 0.40556507066393677


[32m[I 2022-10-21 12:09:53,756][0m Trial 19 finished with value: 0.5022457106737634 and parameters: {'n_estimators': 33, 'learning_rate': 0.393817410784235, 'loss': 'absolute_error'}. Best is trial 0 with value: 0.5613640127535421.[0m


true_positives_ctr:  973
true_negatives_ctr:  6687
false_positives_ctr:  1335
false_negatives_ctr:  49
127459272820992
hybrid_recall 0.935611461373914
hybrid_precision 0.3483673144319535
hybrid_f1 0.5076975699166325
hybrid_accuracy 0.8198328090390371
hybrid_mcc 0.5022457106737634


[32m[I 2022-10-21 12:10:02,407][0m Trial 20 finished with value: 0.47681639575191276 and parameters: {'n_estimators': 193, 'learning_rate': 0.6838976156702928, 'loss': 'absolute_error'}. Best is trial 0 with value: 0.5613640127535421.[0m


true_positives_ctr:  977
true_negatives_ctr:  6522
false_positives_ctr:  1500
false_negatives_ctr:  45
133360303848156
hybrid_recall 0.9406234703501263
hybrid_precision 0.3220182224778197
hybrid_f1 0.4797844070310455
hybrid_accuracy 0.7992791323720879
hybrid_mcc 0.47681639575191276


[32m[I 2022-10-21 12:10:48,365][0m Trial 21 finished with value: 0.4715963551349998 and parameters: {'n_estimators': 907, 'learning_rate': 0.23350360266712714, 'loss': 'absolute_error'}. Best is trial 0 with value: 0.5613640127535421.[0m


true_positives_ctr:  968
true_negatives_ctr:  6539
false_positives_ctr:  1483
false_negatives_ctr:  54
132482934884412
hybrid_recall 0.9288938209036358
hybrid_precision 0.3214442590451079
hybrid_f1 0.4776109610357363
hybrid_accuracy 0.7990910373386523
hybrid_mcc 0.4715963551349998


[32m[I 2022-10-21 12:10:51,213][0m Trial 22 finished with value: 0.5267263828333357 and parameters: {'n_estimators': 16, 'learning_rate': 0.24717055131416, 'loss': 'absolute_error'}. Best is trial 0 with value: 0.5613640127535421.[0m


true_positives_ctr:  985
true_negatives_ctr:  6765
false_positives_ctr:  1257
false_negatives_ctr:  37
125027569672656
hybrid_recall 0.9516114688181954
hybrid_precision 0.3661293448271244
hybrid_f1 0.5288033579904909
hybrid_accuracy 0.8326949045497634
hybrid_mcc 0.5267263828333357


[32m[I 2022-10-21 12:10:53,873][0m Trial 23 finished with value: 0.5272810305778918 and parameters: {'n_estimators': 26, 'learning_rate': 0.14439412652719008, 'loss': 'absolute_error'}. Best is trial 0 with value: 0.5613640127535421.[0m


true_positives_ctr:  987
true_negatives_ctr:  6765
false_positives_ctr:  1257
false_negatives_ctr:  35
125102307052800
hybrid_recall 0.9540249639199788
hybrid_precision 0.3658591034505489
hybrid_f1 0.5288929938590144
hybrid_accuracy 0.832167597892976
hybrid_mcc 0.5272810305778918


[32m[I 2022-10-21 12:10:57,446][0m Trial 24 finished with value: 0.5357427964671378 and parameters: {'n_estimators': 15, 'learning_rate': 0.2670412905494848, 'loss': 'absolute_error'}. Best is trial 0 with value: 0.5613640127535421.[0m


true_positives_ctr:  988
true_negatives_ctr:  6806
false_positives_ctr:  1216
false_negatives_ctr:  34
123595097754240
hybrid_recall 0.9553583574984571
hybrid_precision 0.37388247911376477
hybrid_f1 0.5374372217662775
hybrid_accuracy 0.8375389907249674
hybrid_mcc 0.5357427964671378


[32m[I 2022-10-21 12:11:00,865][0m Trial 25 finished with value: 0.4821020522736364 and parameters: {'n_estimators': 31, 'learning_rate': 0.34017119635151905, 'loss': 'absolute_error'}. Best is trial 0 with value: 0.5613640127535421.[0m


true_positives_ctr:  978
true_negatives_ctr:  6537
false_positives_ctr:  1485
false_negatives_ctr:  44
132889251751452
hybrid_recall 0.9426601364949452
hybrid_precision 0.326745667266507
hybrid_f1 0.48528234925646985
hybrid_accuracy 0.8018923363907472
hybrid_mcc 0.4821020522736364


[32m[I 2022-10-21 12:11:06,793][0m Trial 26 finished with value: 0.3984364422712281 and parameters: {'n_estimators': 63, 'learning_rate': 0.15748308785522458, 'loss': 'squared_error'}. Best is trial 0 with value: 0.5613640127535421.[0m


true_positives_ctr:  1008
true_negatives_ctr:  5714
false_positives_ctr:  2308
false_negatives_ctr:  14
155722398623232
hybrid_recall 0.981645296465803
hybrid_precision 0.24461260013770647
hybrid_f1 0.3916350859742346
hybrid_accuracy 0.7048123725287971
hybrid_mcc 0.3984364422712281


[32m[I 2022-10-21 12:11:08,725][0m Trial 27 finished with value: 0.5440545775404398 and parameters: {'n_estimators': 13, 'learning_rate': 0.5127492523303036, 'loss': 'absolute_error'}. Best is trial 0 with value: 0.5613640127535421.[0m


true_positives_ctr:  987
true_negatives_ctr:  6851
false_positives_ctr:  1171
false_negatives_ctr:  35
121829373858192
hybrid_recall 0.9544419668313655
hybrid_precision 0.3834354636666347
hybrid_f1 0.5470858387358475
hybrid_accuracy 0.8423142122495885
hybrid_mcc 0.5440545775404398


[32m[I 2022-10-21 12:11:10,658][0m Trial 28 finished with value: 0.5411236912083278 and parameters: {'n_estimators': 23, 'learning_rate': 0.20101575981605033, 'loss': 'absolute_error'}. Best is trial 0 with value: 0.5613640127535421.[0m


true_positives_ctr:  989
true_negatives_ctr:  6829
false_positives_ctr:  1193
false_negatives_ctr:  33
122754949907856
hybrid_recall 0.9566823332294038
hybrid_precision 0.37907657876970036
hybrid_f1 0.5429959891598464
hybrid_accuracy 0.8403981221119703
hybrid_mcc 0.5411236912083278


[32m[I 2022-10-21 12:11:13,168][0m Trial 29 finished with value: 0.4653264592960774 and parameters: {'n_estimators': 40, 'learning_rate': 0.8044486591442415, 'loss': 'absolute_error'}. Best is trial 0 with value: 0.5613640127535421.[0m


true_positives_ctr:  966
true_negatives_ctr:  6518
false_positives_ctr:  1504
false_negatives_ctr:  56
133125179525520
hybrid_recall 0.9255157400595314
hybrid_precision 0.3163437366930763
hybrid_f1 0.47152051107150406
hybrid_accuracy 0.796056339619123
hybrid_mcc 0.4653264592960774
Number of finished trials:  30
Best trial:
  Value: 0.5613640127535421
  Params: 
    n_estimators: 25
    learning_rate: 0.9747796838178895
    loss: absolute_error
