In [1]:
import pandas as pd
import torch
import pickle
import tensorflow as tf
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score,roc_curve
import numpy as np
from keras.models import load_model



In [2]:
import keras

# 注意力机制层  
@keras.saving.register_keras_serializable(package="custom_objects")
class Attention(tf.keras.layers.Layer):
    def __init__(self, units, **kwargs):
        super(Attention, self).__init__()
        self.W = tf.keras.layers.Dense(units)
        self.V = tf.keras.layers.Dense(1)

    def call(self, inputs):
        query = inputs[0]
        values = inputs[1]

        query_with_time_axis = tf.expand_dims(query, 1)

        score = tf.nn.tanh(self.W(query_with_time_axis) + self.W(values))
        attention_weights = tf.nn.softmax(self.V(score), axis=1)

        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)

        return context_vector

读取测试数据

In [3]:
X_test_feature = pd.read_csv(r'data_features_smote/X_test_features_smote.csv')
y_test = pd.read_csv(r'data_features_smote/y_test_smote.csv')

In [4]:
X_test_embedding=torch.load('data_embedding_smote/test_embedding_somte.pt',map_location=torch.device('cpu')).numpy()
# y_test_embedding = pd.read_csv(r"data_embedding_smote/y_test_smote.csv")

In [5]:
X_test_embedding= torch.unsqueeze(torch.tensor(X_test_embedding),dim=1).numpy()

读取模型

In [6]:
#读取xgboost模型
def load_model_pickle(model_name):
    with open(model_name, 'rb') as f:
        model_xg = pickle.load(f)
    return model_xg
xgboost=load_model_pickle(r'ML_models_smote/xgboost_model_6.pkl')

In [1]:
#读取bert_bi_lstm_attention模型
from keras.models import load_model
import keras.src.models
# 如下的文件位置，根据自己电脑的文件位置更改
bert_bi_lstm_attention_model = load_model(r'classification_models/DL_models_smote/bert_bi-lstm_attention_smote/bert_bilstm_attention_model_l.keras',custom_objects={'Attention': Attention})
# 打印加载的模型结构
print (bert_bi_lstm_attention_model.summary())

模型评估

In [26]:
#复合模型评价
def evaluate_composite_model(model1,r1, model2,r2, X_test_features,X_test_embedding, y_test):
    
    # Predict probabilities
    y_pred_proba = pd.DataFrame(model1.predict_proba(X_test_features)[:,1])*r1+pd.DataFrame(model2.predict(X_test_embedding)*r2)
    
    # Predict labels
    y_pred=np.where(y_pred_proba>0.5,1,0)
    
    # Calculate accuracy, precision, recall, F1-score, and AUC
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_pred_proba)
    
    # Calculate ROC curve
    fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
    
    return [accuracy, precision, recall, f1, auc,fpr.tolist(), tpr.tolist()]

In [2]:
metrics_name = ['accuracy', 'precision', 'recall', 'f1-score','auc','fpr-score','tpr-score']
rate_list=['xg_0.1','xg_0.2','xg_0.3','xg_0.4','xg_0.5','xg_0.6','xg_0.7','xg_0.8','xg_0.9','xg_1.0']
metrics_dict = {}
for r in np.arange(0.1,1.1,0.1):
    r1=r
    r2=1-r
    metrics = evaluate_composite_model(xgboost,r1,bert_bi_lstm_attention_model,r2, X_test_feature,X_test_embedding, y_test)
    metrics_dict[rate_list[int(r*10-1)]] = {metrics_name[j]: metrics[j] for j in range(len(metrics))}

In [39]:
import json
#以json文件保存字典结果
with open('fixed_model_metrics_dict.json', 'w') as f:
    json.dump(metrics_dict, f)

In [42]:
# 保存最好的组合模型评价分数
# print(metrics_dict['xg_0.5'])
#以json文件保存字典结果
with open('last_model_metrics_dict.json', 'w') as f:
    json.dump(metrics_dict['xg_0.5'], f)