In [8]:
import pickle
from sklearn.metrics import roc_auc_score, log_loss
project = 'openstack'
is_its = 'in_its'
with open('../resource/'+project+'_train.pkl', 'rb') as f_train, open('../resource/'+project+'_test.pkl', 'rb') as f_test:
        tr_data = pickle.load(f_train)
        te_data = pickle.load(f_test)
pred_list = []
kinds = ['train', 'test']
models = ['lgb', 'code_cnn', 'msg_tf']
for kind in kinds:
    for model in models:
        with open('../pred/'+is_its+'/'+project+'-'+model+'-'+'random'+'-'+kind+'.pkl', 'rb') as f:
            pred_list.append(pickle.load(f))
tr_pred = [[pred_list[0][j], pred_list[1][j], pred_list[2][j]] for j in range(len(pred_list[0]))]
te_pred = [[pred_list[3][j], pred_list[4][j], pred_list[5][j]] for j in range(len(pred_list[3]))]                
tr_data = [tr_pred, tr_data[5]]
te_data = [te_pred, te_data[5]]

In [9]:
import optuna
def objective(trial):
    lgb_weights = trial.suggest_float("lgb_weight", 0.0, 1.0)
    cnn_weights = trial.suggest_float("cnn_weight", 0.0, 1.0 - lgb_weights)
    tf_weights  = 1.0 - lgb_weights - cnn_weights
    weighted_average = lgb_weights * pred_list[0] + cnn_weights * pred_list[1] + tf_weights * pred_list[2]

    # AUCの計算
    auc = roc_auc_score(tr_data[1], weighted_average)

    return auc

# Optunaの最適化
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)



[I 2023-12-15 17:28:39,286] A new study created in memory with name: no-name-9450263b-933e-48d7-afa2-602eb54330b7
[I 2023-12-15 17:28:39,301] Trial 0 finished with value: 0.8023369511849777 and parameters: {'lgb_weight': 0.2359063346496344, 'cnn_weight': 0.28017484089621614}. Best is trial 0 with value: 0.8023369511849777.
[I 2023-12-15 17:28:39,301] Trial 1 finished with value: 0.7951639887720037 and parameters: {'lgb_weight': 0.5590661182780491, 'cnn_weight': 0.0426585762101055}. Best is trial 0 with value: 0.8023369511849777.
[I 2023-12-15 17:28:39,316] Trial 2 finished with value: 0.807242146946531 and parameters: {'lgb_weight': 0.7704504491550669, 'cnn_weight': 0.14831504310462515}. Best is trial 2 with value: 0.807242146946531.
[I 2023-12-15 17:28:39,317] Trial 3 finished with value: 0.7956014021275477 and parameters: {'lgb_weight': 0.802541237637617, 'cnn_weight': 0.009866829906270316}. Best is trial 2 with value: 0.807242146946531.
[I 2023-12-15 17:28:39,317] Trial 4 finished w

In [10]:
# 結果の表示
print("Best trial:")
trial = study.best_trial
print(f"AUC: {trial.value:.3f}")
params = trial.params
print(f"Params: lgb_weights:{params['lgb_weight']:.3f}, cnn_weight:{params['cnn_weight']:.3f}, tf_weights:{1-sum([weight for weight in params.values()]):.3f}")

Best trial:
AUC: 0.818
Params: lgb_weights:0.460, cnn_weight:0.392, tf_weights:0.147


In [317]:
weighted_average = 0.4 * pred_list[0] + 0.45 * pred_list[1] + 0.15 * pred_list[2]
print(f'{roc_auc_score(tr_data[1] ,weighted_average):.3f}')
print(f'{log_loss(tr_data[1], weighted_average):.3f}')


0.814
0.466


In [11]:
weighted_average2 = 0.5 * pred_list[3] + 0.4 * pred_list[4] + 0.1 * pred_list[5]
print(f'{roc_auc_score(te_data[1] ,weighted_average2):.3f}')
print(f'{log_loss(te_data[1], weighted_average2):.3f}')

0.819
0.542


In [320]:
import numpy as np
metrics_weight = [0.471, 0.477, 0.322, 0.324]
code_weight = [0.388, 0.418, 0.516, 0.482]
msg_weight = [0.140, 0.105, 0.162, 0.194]
weights = [np.mean(code_weight), np.mean(metrics_weight), np.mean(msg_weight)]
normalize = weights / sum(weights)
print(f'metrics:{normalize[0]:.3f},code:{normalize[1]:.3f},msg:{normalize[2]:.3f}')

metrics:0.451,code:0.399,msg:0.150


In [None]:
import numpy as np
metrics_weight = [0.471, 0.477, 0.322, 0.324]
code_weight = [0.388, 0.418, 0.516, 0.482]
msg_weight = [0.140, 0.105, 0.162, 0.194]
weights = [np.mean(code_weight), np.mean(metrics_weight), np.mean(msg_weight)]
normalize = weights / sum(weights)
print(f'metrics:{normalize[0]:.3f},code:{normalize[1]:.3f},msg:{normalize[2]:.3f}')