In [53]:
import pickle
from sklearn.metrics import roc_auc_score, log_loss
project = 'openstack'
is_its = 'no_its'
with open('../resource/'+project+'_train.pkl', 'rb') as f_train, open('../resource/'+project+'_test.pkl', 'rb') as f_test:
        tr_data = pickle.load(f_train)
        te_data = pickle.load(f_test)
pred_list = []
kinds = ['train', 'test']
models = ['lgb', 'code_cnn', 'msg_tf']
for kind in kinds:
    for model in models:
        with open('../pred/'+is_its+'/'+project+'-'+model+'-'+'random'+'-'+kind+'.pkl', 'rb') as f:
            pred_list.append(pickle.load(f))
tr_pred = [[pred_list[0][j], pred_list[1][j], pred_list[2][j]] for j in range(len(pred_list[0]))]
te_pred = [[pred_list[3][j], pred_list[4][j], pred_list[5][j]] for j in range(len(pred_list[3]))]                
tr_data = [tr_pred, tr_data[5]]
te_data = [te_pred, te_data[5]]

In [54]:
import optuna
import numpy as np
def objective(trial):
    lgb_weights = trial.suggest_float("lgb_weight", 0.0, 1.0)
    cnn_weights = trial.suggest_float("cnn_weight", 0.0, 1.0 - lgb_weights)
    tf_weights  = 1.0 - lgb_weights - cnn_weights
    weighted_average = lgb_weights * pred_list[0] + cnn_weights * pred_list[1] + tf_weights * pred_list[2]

    # AUCの計算
    auc = roc_auc_score(tr_data[1], weighted_average)

    return np.mean(auc)

# Optunaの最適化
study = optuna.create_study(direction="maximize",sampler=optuna.samplers.RandomSampler(seed=100))
study.optimize(objective, n_trials=100)



[I 2023-12-19 14:18:52,139] A new study created in memory with name: no-name-4f4049e6-6baa-4e5d-81a6-9b2e65c4ba6a
[I 2023-12-19 14:18:52,145] Trial 0 finished with value: 0.8016085712360985 and parameters: {'lgb_weight': 0.5434049417909654, 'cnn_weight': 0.12710208559051503}. Best is trial 0 with value: 0.8016085712360985.
[I 2023-12-19 14:18:52,153] Trial 1 finished with value: 0.8141209825991957 and parameters: {'lgb_weight': 0.4245175907491331, 'cnn_weight': 0.4861538039050873}. Best is trial 1 with value: 0.8141209825991957.
[I 2023-12-19 14:18:52,155] Trial 2 finished with value: 0.744176390346291 and parameters: {'lgb_weight': 0.004718856190972565, 'cnn_weight': 0.12099545358487572}. Best is trial 1 with value: 0.8141209825991957.
[I 2023-12-19 14:18:52,161] Trial 3 finished with value: 0.8113648545344311 and parameters: {'lgb_weight': 0.6707490847267786, 'cnn_weight': 0.27191277549924847}. Best is trial 1 with value: 0.8141209825991957.
[I 2023-12-19 14:18:52,161] Trial 4 finish

In [55]:
# 結果の表示
print("Best trial:")
trial = study.best_trial
print(f"AUC: {trial.value:.3f}")
params = trial.params
print(f"Params: lgb_weights:{params['lgb_weight']:.3f}, cnn_weight:{params['cnn_weight']:.3f}, tf_weights:{1-sum([weight for weight in params.values()]):.3f}")

Best trial:
AUC: 0.814
Params: lgb_weights:0.500, cnn_weight:0.358, tf_weights:0.142


In [5]:
import optuna
def objective(trial):
    lgb_weights = trial.suggest_float("lgb_weight", 0.0, 1.0)
    cnn_weights = trial.suggest_float("cnn_weight", 0.0, 1.0 - lgb_weights)
    tf_weights  = 1.0 - lgb_weights - cnn_weights
    weighted_average = lgb_weights * pred_list[3] + cnn_weights * pred_list[4] + tf_weights * pred_list[5]

    # AUCの計算
    auc = roc_auc_score(te_data[1], weighted_average)

    return auc

# Optunaの最適化
study = optuna.create_study(direction="maximize",sampler=optuna.samplers.RandomSampler(seed=100))
study.optimize(objective, n_trials=100)

[I 2023-12-13 23:35:22,939] A new study created in memory with name: no-name-d2a569d0-162f-4b3b-9933-e38a89c94a0a
[I 2023-12-13 23:35:22,942] Trial 0 finished with value: 0.7976759250001088 and parameters: {'lgb_weight': 0.5434049417909654, 'cnn_weight': 0.12710208559051503}. Best is trial 0 with value: 0.7976759250001088.
[I 2023-12-13 23:35:22,945] Trial 1 finished with value: 0.7926678172562353 and parameters: {'lgb_weight': 0.4245175907491331, 'cnn_weight': 0.4861538039050873}. Best is trial 0 with value: 0.7976759250001088.
[I 2023-12-13 23:35:22,948] Trial 2 finished with value: 0.6761510605276773 and parameters: {'lgb_weight': 0.004718856190972565, 'cnn_weight': 0.12099545358487572}. Best is trial 0 with value: 0.7976759250001088.
[I 2023-12-13 23:35:22,949] Trial 3 finished with value: 0.804714229199181 and parameters: {'lgb_weight': 0.6707490847267786, 'cnn_weight': 0.27191277549924847}. Best is trial 3 with value: 0.804714229199181.
[I 2023-12-13 23:35:22,949] Trial 4 finishe

In [6]:
# 結果の表示
print("Best trial:")
trial = study.best_trial
print(f"AUC: {trial.value:.3f}")
params = trial.params
print(f"Params: lgb_weights:{params['lgb_weight']:.3f}, cnn_weight:{params['cnn_weight']:.3f}, tf_weights:{1.0-sum([weight for weight in params.values()]):.3f}")

Best trial:
AUC: 0.806
Params: lgb_weights:0.778, cnn_weight:0.173, tf_weights:0.049


In [12]:
import optuna
def objective(trial):
    cnn_weights = trial.suggest_float("cnn_weight", 0.0, 1.0)
    tf_weights  = 1.0 - cnn_weights - cnn_weights
    weighted_average = cnn_weights * pred_list[4] + tf_weights * pred_list[5]

    # AUCの計算
    auc = roc_auc_score(te_data[1], weighted_average)

    return auc

# Optunaの最適化
study = optuna.create_study(direction="maximize",sampler=optuna.samplers.RandomSampler(seed=100))
study.optimize(objective, n_trials=100)

[I 2023-12-14 12:22:01,028] A new study created in memory with name: no-name-7cb9ecc4-0b18-426a-8bc1-e7ef53f083f4
[I 2023-12-14 12:22:01,077] Trial 0 finished with value: 0.7865935605627359 and parameters: {'cnn_weight': 0.5434049417909654}. Best is trial 0 with value: 0.7865935605627359.
[I 2023-12-14 12:22:01,094] Trial 1 finished with value: 0.778537115406611 and parameters: {'cnn_weight': 0.27836938509379616}. Best is trial 0 with value: 0.7865935605627359.
[I 2023-12-14 12:22:01,095] Trial 2 finished with value: 0.8028529966830608 and parameters: {'cnn_weight': 0.4245175907491331}. Best is trial 2 with value: 0.8028529966830608.
[I 2023-12-14 12:22:01,095] Trial 3 finished with value: 0.6895051755690266 and parameters: {'cnn_weight': 0.8447761323199037}. Best is trial 2 with value: 0.8028529966830608.
[I 2023-12-14 12:22:01,095] Trial 4 finished with value: 0.7114012638682374 and parameters: {'cnn_weight': 0.004718856190972565}. Best is trial 2 with value: 0.8028529966830608.
[I 2

In [14]:
# 結果の表示
print("Best trial:")
trial = study.best_trial
print(f"AUC: {trial.value:.3f}")
params = trial.params
print(f"Params: cnn_weight:{params['cnn_weight']:.3f}, tf_weights:{1.0-sum([weight for weight in params.values()]):.3f}")

Best trial:
AUC: 0.804
Params: cnn_weight:0.457, tf_weights:0.543


In [4]:
weights = [[0.65, 0.3, 0.05],[0.6, 0.3, 0.1], [0.55, 0.35, 0.1], [0.5, 0.4, 0.1], [0.45, 0.45, 0.1], [0.4, 0.45, 0.15], [0.4, 0.5, 0.1]]

In [11]:
print(project, is_its)
for weight in weights:
    weighted_average = weight[0] * pred_list[0] + weight[1] * pred_list[1] + weight[2] * pred_list[2]
    weighted_average2 = weight[0] * pred_list[3] + weight[1] * pred_list[4] + weight[2] * pred_list[5]
    print(weight)
    print(f'train auc:{roc_auc_score(tr_data[1] ,weighted_average):.3f}')
    print(f'train loss:{log_loss(tr_data[1], weighted_average):.3f}')
    print(f'test auc:{roc_auc_score(te_data[1] ,weighted_average2):.3f}')
    print(f'test loss:{log_loss(te_data[1], weighted_average2):.3f}')

qt in_its
[0.65, 0.3, 0.05]
train auc:0.804
train loss:0.479
test auc:0.799
test loss:0.464
[0.6, 0.3, 0.1]
train auc:0.807
train loss:0.469
test auc:0.797
test loss:0.457
[0.55, 0.35, 0.1]
train auc:0.809
train loss:0.471
test auc:0.795
test loss:0.465
[0.5, 0.4, 0.1]
train auc:0.812
train loss:0.473
test auc:0.793
test loss:0.473
[0.45, 0.45, 0.1]
train auc:0.813
train loss:0.475
test auc:0.790
test loss:0.482
[0.4, 0.45, 0.15]
train auc:0.814
train loss:0.466
test auc:0.785
test loss:0.476
[0.4, 0.5, 0.1]
train auc:0.814
train loss:0.478
test auc:0.786
test loss:0.491


In [317]:
weighted_average = 0.4 * pred_list[0] + 0.45 * pred_list[1] + 0.15 * pred_list[2]
print(f'{roc_auc_score(tr_data[1] ,weighted_average):.3f}')
print(f'{log_loss(tr_data[1], weighted_average):.3f}')


0.814
0.466


In [16]:
weighted_average2 = 0.6 * pred_list[3] + 0.35 * pred_list[4] + 0.05 * pred_list[5]
print(f'{roc_auc_score(te_data[1] ,weighted_average2):.3f}')
print(f'{log_loss(te_data[1], weighted_average2):.3f}')

0.816
0.559


In [417]:
import numpy as np
metrics_weight = [0.461, 0.500, 0.299, 0.313]
code_weight = [0.376, 0.358, 0.600, 0.436]
msg_weight = [0.164, 0.142, 0.100, 0.251]
weights = [np.mean(metrics_weight), np.mean(code_weight), np.mean(msg_weight)]
print(weights)
normalize = weights / sum(weights)
print(f'metrics:{normalize[0]:.3f},code:{normalize[1]:.3f},msg:{normalize[2]:.3f}')

[0.39325, 0.4425, 0.16425]
metrics:0.393,code:0.443,msg:0.164
