In [1]:
from functools import partial

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

from ..tuner import  OptunaTuner
from evaluation import calc_tpr_at_fpr

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
SEED = 42
TARGET_FPR = 0.05
N_TRIALS = 30

In [3]:
X, y = make_classification(
    n_samples=10_000, n_features=50, n_informative=40,
    n_classes=2, weights=[0.99, 0.01], flip_y=0.005, random_state=SEED,
)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, shuffle=False)

In [4]:
tuner = OptunaTuner(task='binary')
model = tuner.run(
    X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val,
    hyperparam_space='hyperparam_space.yaml',
    evaluation_function=partial(calc_tpr_at_fpr, target_fpr=TARGET_FPR),
    n_trials=N_TRIALS,
)

[32m[I 2022-08-29 19:12:17,278][0m A new study created in memory with name: no-name-4ae6b6f4-7ff8-48e3-a3e4-aac1d5e3fe37[0m
[32m[I 2022-08-29 19:12:19,440][0m Trial 0 finished with value: 0.3793103448275862 and parameters: {'learner_type': 'RF', 'learner_RF_n_estimators': 763, 'learner_RF_max_depth': 19, 'learner_RF_criterion': 'gini', 'learner_RF_min_samples_split': 2, 'learner_RF_min_samples_leaf': 57, 'learner_RF_max_features': 'log2'}. Best is trial 0 with value: 0.3793103448275862.[0m
[32m[I 2022-08-29 19:12:26,145][0m Trial 1 finished with value: 0.5517241379310345 and parameters: {'learner_type': 'RF', 'learner_RF_n_estimators': 1544, 'learner_RF_max_depth': 8, 'learner_RF_criterion': 'entropy', 'learner_RF_min_samples_split': 6, 'learner_RF_min_samples_leaf': 14, 'learner_RF_max_features': 'sqrt'}. Best is trial 1 with value: 0.5517241379310345.[0m
[32m[I 2022-08-29 19:12:26,269][0m Trial 2 finished with value: 0.3793103448275862 and parameters: {'learner_type': 'LGB

In [5]:
y_score = model.predict_proba(X_test)[:, 1].squeeze()
print(f'TPR = {calc_tpr_at_fpr(y_score=y_score, y_true=y_test, target_fpr=TARGET_FPR):.2f}')

TPR = 0.46
