In [18]:
# maoer_data深度学习模型 双层注意力机制
# 加上付费label作为输入的gpu版  添加ACC、F1 score、Precision、Recall
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import math
import copy
from torch.utils.data import Dataset, DataLoader, TensorDataset
import datetime
from sklearn.metrics import roc_auc_score, f1_score, accuracy_score, recall_score, precision_score, roc_curve, \
    confusion_matrix
from _collections import OrderedDict  # 导入 OrderedDict 来保持字典中键值对的顺序

print('||--------开始时间：', datetime.datetime.now(), '-------------')
# data input
data_time_windows_list = ['0101_0131']  #'0115_0215',

# 参数设置
npratio=50 #负样本数

# train_ratio = 0.6
val_ratio = 0.5
test_ratio = 0.5
num_heads = 10
feature_dim = 200
max_history_len = 15
num_experts = 3
num_tasks = 2
# 设置嵌入维度
continue_embedding_dim = 200
discrete_embedding_dim = 200
lr = 0.1
batch_size = 128
threshold = 0.5


# 获取时间窗内连续与离散特征名的列表
def get_continue_discrete_feature_namelist(time_windows, datapath):
    data = pd.read_csv(datapath)
    time_windows_data = data[(data['DataSet'] == time_windows)]
    user_history_pay_QOE_continue_column = eval([time_windows_data['QOE_continue'].values.tolist()][0][0])
    user_history_pay_CHONGHE_continue_column = eval([time_windows_data['CHONGHE_continue'].values.tolist()][0][0])
    user_history_pay_FUFEI_continue_column = eval([time_windows_data['FUFEI_continue'].values.tolist()][0][0])
    user_history_pay_QOE_discrete_column = eval([time_windows_data['QOE_discrete'].values.tolist()][0][0])
    user_history_pay_CHONGHE_discrete_column = eval([time_windows_data['CHONGHE_discrete'].values.tolist()][0][0])
    user_history_pay_FUFEI_discrete_column = eval([time_windows_data['FUFEI_discrete'].values.tolist()][0][0])

    return user_history_pay_QOE_continue_column, user_history_pay_CHONGHE_continue_column, user_history_pay_FUFEI_continue_column, \
        user_history_pay_QOE_discrete_column, user_history_pay_CHONGHE_discrete_column, user_history_pay_FUFEI_discrete_column


||--------开始时间： 2024-09-04 15:26:09.916045 -------------


In [21]:
# 1.数据处理+划分训练、验证、测试集

# 划分数据集 给定输出后固定结果 输出形式定为存储user_id 形成train_dataset,val_dataset,test_dataset
def split_data_unique(input_train_file, input_test_file, output_file, val_ratio, test_ratio):
    train_df = pd.read_csv(input_train_file)
    test_df = pd.read_csv(input_test_file)
    train_data = train_df[train_df.columns[0]].unique()  # 提取第一列数据并去重
    test_data = test_df[test_df.columns[0]].unique()
    np.random.shuffle(train_data)  # 随机打乱训练数据

    test_df_0 = test_df[test_df['pay_DL'] == 0]
    test_df_1 = test_df[test_df['pay_DL'] == 1]

    len_0 = len(test_df_0)
    len_1 = len(test_df_1)

    val_df_0_sample = test_df_0.sample(n=round(len_0 * (val_ratio / (val_ratio + test_ratio))))
    val_df_1_sample = test_df_1.sample(n=round(len_1 * (val_ratio / (val_ratio + test_ratio))))

    val_df = pd.concat([val_df_0_sample, val_df_1_sample])
    val_data = val_df[val_df.columns[0]].unique()
    test_df = pd.concat([test_df_0.drop(val_df_0_sample.index), test_df_1.drop(val_df_1_sample.index)])
    test_data = test_df[test_df.columns[0]].unique()

    for _ in range(10):
        np.random.shuffle(test_data)
        np.random.shuffle(val_data)

    # 存储结果是去重的user_id
    result = {
        'Train': train_data,
        'Val': val_data,
        'Test': test_data
    }
    # 创建每个子集的DataFrame  
    train_df = pd.DataFrame(train_data, columns=['Train'])
    val_df = pd.DataFrame(val_data, columns=['Val'])
    test_df = pd.DataFrame(test_data, columns=['Test'])
    # 将每个DataFrame转换为一列Series  
    train_series = train_df.iloc[:, 0]
    val_series = val_df.iloc[:, 0]
    test_series = test_df.iloc[:, 0]
    # 为了确保所有Series有相同的长度，我们需要找到最大长度并截断较短的Series  
    max_len = max(len(train_series), len(val_series), len(test_series))
    train_series = train_series.reindex(range(max_len)).fillna(value=pd.NA)
    val_series = val_series.reindex(range(max_len)).fillna(value=pd.NA)
    test_series = test_series.reindex(range(max_len)).fillna(value=pd.NA)
    # 创建一个新的DataFrame，将Series作为列  
    combined_df = pd.DataFrame({
        'Train': train_series,
        'Val': val_series,
        'Test': test_series
    })
    # 写入CSV文件，不包含索引和列名  
    combined_df.to_csv(output_file, index=False)
    print('已输出划分数据集结果')


# 数据预处理 将连续特征变离散特征 分桶 不处理user_id、sound_id、drama_id、time
def data_pre_deal(input_path, continue_feature_list):
    df = pd.read_csv(input_path)
    print('数据预处理结束')
    return df


# 根据划分好的数据集中user_id 找到对应csv文件中对应user_id的所有行数据取出，即包含了历史数据（付费+非付费）+目标数据（最后一次行为）
# def find_data_by_list(user_list, intput_data_df, data_hash):
#     df = intput_data_df
#     # result_list = []
#     # 遍历列表中的值，在CSV文件中找到所有匹配的行数据并加入结果列表
#     for user_id in user_list:
#         result_df = df[df[df.columns[0]] == user_id]
#         # result_list.append(result_df)
#         if user_id in data_hash:
#             data_hash[user_id].update({col: result_df for col in df.columns})  # 使用列名作为键
#         else:
#             data_hash[user_id] = {col: result_df for col in df.columns}
#     #result = pd.concat(result_list)  # 合并所有匹配的行数据
#     return data_hash

def find_data_by_list(user_list, intput_data_df, data_hash):
    # 遍历列表中的值，在DataFrame中找到所有匹配的行数据并加入data_hash  
    for user_id in user_list:
        result_df = intput_data_df[intput_data_df[intput_data_df.columns[0]] == user_id]
        data_hash[user_id] = result_df  # 直接存储DataFrame对象  
    return data_hash


# 获取列唯一值数量表，并对离散特征的值转化为从0开始的索引
def get_unique_feature_num_and_discrete_valueChange(datadf, discrete_feature_column_list):
    # 获取离散特征的类别数量，并存储为字典
    feature_category_num_dict = {}
    for column in datadf.columns:
        unique_values_len = datadf[column].nunique()  # 获取列的唯一值数量
        feature_category_num_dict[column] = unique_values_len
        if column in discrete_feature_column_list:
            unique_values = datadf[column].unique()
            value_mapping_dict = {value: index for index, value in enumerate(unique_values) if
                                  value != -1 and value != '' and value is not None}
            datadf[column] = datadf[column].map(value_mapping_dict)
    return feature_category_num_dict, datadf


# 总的特征输入，生成划分后数据集及其输入
def data_input(data_time_windows, train_path, test_path, spilt_outpath, val_ratio, test_ratio, total_continue_feature):
    train_dataset_path = train_path  # 待修改********
    test_dataset_path = test_path
    dataset_spilt_path = spilt_outpath  # 待修改********
    if os.path.exists(dataset_spilt_path):  # 划分训练、验证、测试集
        print("划分文件已存在，不再进行数据划分")
    else:
        split_data_unique(train_dataset_path, test_dataset_path, dataset_spilt_path, val_ratio, test_ratio)
    train_deal_data_df = data_pre_deal(train_dataset_path, total_continue_feature)  # 数据预处理
    test_deal_data_df = data_pre_deal(test_dataset_path, total_continue_feature)  # 数据预处理
    # 获取离散特征的类别数量，并存储为字典
    _, train_deal_data_df = get_unique_feature_num_and_discrete_valueChange(
        train_deal_data_df,
        total_discrete_feature)

    _, test_deal_data_df = get_unique_feature_num_and_discrete_valueChange(
        test_deal_data_df,
        total_discrete_feature)

    feature_category_num_dict, _ = get_unique_feature_num_and_discrete_valueChange(
        pd.concat([train_deal_data_df, test_deal_data_df]),
        total_discrete_feature
    )
    # 读取划分文件的结果
    spilt_data_df = pd.read_csv(dataset_spilt_path)
    # 输出每一列数据为列表
    train_list = spilt_data_df['Train'].tolist()
    val_list = spilt_data_df['Val'].tolist()
    test_list = spilt_data_df['Test'].tolist()
    train_list = [x for x in train_list if not math.isnan(x)]
    val_list = [x for x in val_list if not math.isnan(x)]
    test_list = [x for x in test_list if not math.isnan(x)]
    # print('训练集、验证集、测试集大小=', len(train_list),len(val_list),len(test_list))
    # 根据划分好的生成以user_id为key的hash（特征集合）将最后一行看做目标数据
    train_data_hash = {}
    data_hash = {}  # 存成一个hash形式
    find_data_by_list(train_list, train_deal_data_df, train_data_hash)
    find_data_by_list(val_list, test_deal_data_df, data_hash)
    find_data_by_list(test_list, test_deal_data_df, data_hash)
    print('数据划分完成')
    # print(feature_category_num_dict)
    return train_list, val_list, test_list, train_data_hash, data_hash, feature_category_num_dict

# test
# 数据集 train、val、test划分及总数据hash表(以user_id为key的存储对应对应行的hash表)及不同类特征数存储的字典
# train_list, val_list, test_list, data_hash, feature_category_num_dict = data_input("0101_0131", path,dataset_spilt_path, train_ratio, val_ratio, test_ratio, total_continue_feature)
# print(data_hash[3617476])
# print(feature_category_num_dict)


In [22]:
# 2. 形成张量矩阵 目标特征为：（batch,1,feature_num; 用户历史行为特征为（batch,max_history_len(固定长度的历史记录数),feature_num）

# mask 对用户历史行为长度的固定
# 转换 history 列为长度为max_history_len的数组
def process_history(history, max_history_len):
    if len(history) >= max_history_len:
        processed_history = history[-max_history_len:]
    else:
        processed_history = [-1] * (max_history_len - len(history)) + history
    return processed_history


# 将填充-1的位置标记为True
def create_mask(history):
    mask = [True if item == -1 else False for item in history]
    return mask


# 将历史行为记录处理为固定长度并进行mask
def history_feature_mask(user_history_feature_index, data_matrix_user_history, max_history_len):
    # user_history_feature_index: 用户历史特征的索引列表
    mask_history_feature_matrix = []
    origin_history_feature_matrix = []
    for feature_index in range(len(user_history_feature_index)):
        feature_data = [data_row[feature_index] for data_row in data_matrix_user_history]  # 获取一列特征值
        processed_feature_data = process_history(feature_data,
                                                 max_history_len)  # 处理为固定长度 max_history_len 假如max_history_len=15,原长度为5，那么处理后为[-1,-1, ..., -1, x1,x2,x3,x4,x5]
        origin_history_feature_matrix.append(processed_feature_data)
        mask_feature_data = create_mask(processed_feature_data)  # 将空的mask,填充-1的位置标记为True,其他为False
        mask_history_feature_matrix.append(mask_feature_data)

    # print('mask',len(origin_history_feature_matrix),len(origin_history_feature_matrix[0]))
    return origin_history_feature_matrix, mask_history_feature_matrix


# 将输入形成的data_hash和连续、离散特征列名,按照划分的训练或测试的user_id的列表，提取用户特征形成张量矩阵存储到data_tensor_hash中，以user_id为key，多个张量矩阵为value
def get_feature_to_matrix(train_or_val_or_test_list, data_hash, feature_column_dict):
    # 存储新的张量hash
    data_tensor_hash = {}
    # 存储历史记录的掩码矩阵
    data_tensor_history_mask_hash = {}
    target_label = []  # 预测目标值的标签

    for user_id in train_or_val_or_test_list:
        user_data = data_hash[user_id]
        # 创建空的二维矩阵
        # data_matrix_user_info_continue = []
        # data_matrix_user_info_discrete = []
        data_matrix_pay_QOE_continue = []
        data_matrix_pay_QOE_discrete = []
        data_matrix_pay_CHONGHE_continue = []
        data_matrix_pay_CHONGHE_discrete = []
        data_matrix_pay_FUFEI_continue = []
        data_matrix_pay_FUFEI_discrete = []
        data_matrix_target_QOE_continue = []
        data_matrix_target_CHONGHE_continue = []
        data_matrix_target_FUFEI_continue = []
        data_matrix_target_QOE_discrete = []
        data_matrix_target_CHONGHE_discrete = []
        data_matrix_target_FUFEI_discrete = []
        # 提取特征列对应的索引
        # user_feature_continue_index = [user_data.columns.get_loc(col) for col in feature_column_dict['user_info_continue'] if col in user_data.columns]
        # user_feature_discrete_index = [user_data.columns.get_loc(col) for col in feature_column_dict['user_info_discrete'] if
        #                                col in user_data.columns]
        user_history_QOE_continue_index = [user_data.columns.get_loc(col) for col in
                                           feature_column_dict['history_QOE_continue'] if
                                           col in user_data.columns]
        user_history_QOE_discrete_index = [user_data.columns.get_loc(col) for col in
                                           feature_column_dict['history_QOE_discrete'] if
                                           col in user_data.columns]
        user_history_CHONGHE_continue_index = [user_data.columns.get_loc(col) for col in
                                               feature_column_dict['history_CHONGHE_continue'] if
                                               col in user_data.columns]
        user_history_CHONGHE_discrete_index = [user_data.columns.get_loc(col) for col in
                                               feature_column_dict['history_CHONGHE_discrete'] if
                                               col in user_data.columns]
        user_history_FUFEI_continue_index = [user_data.columns.get_loc(col) for col in
                                             feature_column_dict['history_FUFEI_continue'] if
                                             col in user_data.columns]
        user_history_FUFEI_discrete_index = [user_data.columns.get_loc(col) for col in
                                             feature_column_dict['history_FUFEI_discrete'] if
                                             col in user_data.columns]
        user_history_CHONGHE_discrete_add_D_index = [user_data.columns.get_loc(col) for col in
                                                     feature_column_dict['history_CHONGHE_discrete_add_D'] if
                                                     col in user_data.columns]

        # 填充数据矩阵
        for i in range(len(user_data)):
            if i != (len(user_data) - 1):  # 除最后一行即所有历史记录，不包括目标记录
                # [[x11, x12, x13, ..., x1n],
                #  [x21, x22, x23, ..., x2n],
                #    .    .    .    .    .
                #    .    .    .    .    .
                #    .    .    .    .    .
                #  [xm1, xm2, xm3, ..., xmn]]
                # m x n: m代表该user_id对应的数据的行数-1，n代表qoe/fufei/chonghe的特征数
                data_matrix_pay_QOE_continue.append(
                    [user_data.iloc[i, col] for col in user_history_QOE_continue_index])  # 用户历史QOE连续特征
                data_matrix_pay_QOE_discrete.append(
                    [user_data.iloc[i, col] for col in user_history_QOE_discrete_index])  # 用户历史QOE离散特征
                data_matrix_pay_CHONGHE_continue.append(
                    [user_data.iloc[i, col] for col in user_history_CHONGHE_continue_index])  # 用户历史CHONGHE连续特征
                data_matrix_pay_CHONGHE_discrete.append(
                    [user_data.iloc[i, col] for col in user_history_CHONGHE_discrete_add_D_index])  # 用户历史CHONGHE离散特征
                data_matrix_pay_FUFEI_continue.append(
                    [user_data.iloc[i, col] for col in user_history_FUFEI_continue_index])  # 用户历史FUFEI连续特征
                data_matrix_pay_FUFEI_discrete.append(
                    [user_data.iloc[i, col] for col in user_history_FUFEI_discrete_index])  # 用户历史FUFEI离散特征
            else:  # 目标记录
                # data_matrix_user_info_continue.append([user_data.iloc[i, col] for col in user_feature_continue_index])  # 用户连续特征
                # data_matrix_user_info_discrete.append([user_data.iloc[i, col] for col in user_feature_discrete_index])  # 用户离散特征
                # 维度：1xn, 其中n代表特征数
                target_label.append(user_data.iloc[i, -1])  # 预测目标的y值：1x1
                data_matrix_target_QOE_continue.append(
                    [user_data.iloc[i, col] for col in user_history_QOE_continue_index])  # 目标QOE连续特征
                data_matrix_target_QOE_discrete.append(
                    [user_data.iloc[i, col] for col in user_history_QOE_discrete_index])  # 目标QOE离散特征
                data_matrix_target_CHONGHE_continue.append(
                    [user_data.iloc[i, col] for col in user_history_CHONGHE_continue_index])  # 目标CHONGHE连续特征
                data_matrix_target_CHONGHE_discrete.append(
                    [user_data.iloc[i, col] for col in user_history_CHONGHE_discrete_index])  # 目标CHONGHE离散特征
                data_matrix_target_FUFEI_continue.append(
                    [user_data.iloc[i, col] for col in user_history_FUFEI_continue_index])  # 目标FUFEI连续特征
                data_matrix_target_FUFEI_discrete.append(
                    [user_data.iloc[i, col] for col in user_history_FUFEI_discrete_index])  # 目标FUFEI离散特征
        # print('data_matrix_pay_QOE_continue:', len(data_matrix_pay_QOE_continue),len(data_matrix_pay_QOE_continue[0]))
        # print(len(data_matrix_target_QOE_continue),len(data_matrix_target_QOE_continue[0]))
        # 将历史行为记录处理为固定长度并进行mask
        data_matrix_pay_QOE_continue, data_matrix_pay_QOE_continue_mask = history_feature_mask(
            user_history_QOE_continue_index, data_matrix_pay_QOE_continue, max_history_len)
        data_matrix_pay_QOE_discrete, data_matrix_pay_QOE_discrete_mask = history_feature_mask(
            user_history_QOE_discrete_index, data_matrix_pay_QOE_discrete, max_history_len)
        data_matrix_pay_CHONGHE_continue, data_matrix_pay_CHONGHE_continue_mask = history_feature_mask(
            user_history_CHONGHE_continue_index, data_matrix_pay_CHONGHE_continue, max_history_len)
        data_matrix_pay_CHONGHE_discrete, data_matrix_pay_CHONGHE_discrete_mask = history_feature_mask(
            user_history_CHONGHE_discrete_add_D_index, data_matrix_pay_CHONGHE_discrete, max_history_len)
        data_matrix_pay_FUFEI_continue, data_matrix_pay_FUFEI_continue_mask = history_feature_mask(
            user_history_FUFEI_continue_index, data_matrix_pay_FUFEI_continue, max_history_len)
        data_matrix_pay_FUFEI_discrete, data_matrix_pay_FUFEI_discrete_mask = history_feature_mask(
            user_history_FUFEI_discrete_index, data_matrix_pay_FUFEI_discrete, max_history_len)
        # print('data_matrix_pay_QOE_discrete',len(data_matrix_pay_QOE_discrete),len(data_matrix_pay_QOE_discrete[0]))
        # print('(ata_matrix_pay_QOE_discrete',data_matrix_pay_QOE_discrete[0])

        # 将numpy数组转换为PyTorch张量       # history   得到的data_matrix_user_history及data_tensor_pay_QOE_continue维度是(feature_num,history_len)需要转成tensor后转置
        data_tensor_pay_QOE_continue = torch.transpose(
            torch.tensor(np.array(data_matrix_pay_QOE_continue), dtype=torch.float32), -2, -1)
        # data_tensor_pay_QOE_discrete = torch.tensor(np.array(data_matrix_pay_QOE_discrete), dtype=torch.float32)
        # print('data_tensor_pay_QOE_discrete1',data_tensor_pay_QOE_discrete[0,:])
        data_tensor_pay_QOE_discrete = torch.transpose(
            torch.tensor(np.array(data_matrix_pay_QOE_discrete), dtype=torch.float32), -2, -1)
        # print('data_tensor_pay_QOE_discrete2',data_tensor_pay_QOE_discrete[0,:])
        # print('data_tensor_pay_QOE_discrete3',data_tensor_pay_QOE_discrete[:,0])
        data_tensor_pay_CHONGHE_continue = torch.transpose(
            torch.tensor(np.array(data_matrix_pay_CHONGHE_continue), dtype=torch.float32), -2, -1)
        data_tensor_pay_CHONGHE_discrete = torch.transpose(
            torch.tensor(np.array(data_matrix_pay_CHONGHE_discrete), dtype=torch.float32), -2, -1)
        data_tensor_pay_FUFEI_continue = torch.transpose(
            torch.tensor(np.array(data_matrix_pay_FUFEI_continue), dtype=torch.float32), -2, -1)
        data_tensor_pay_FUFEI_discrete = torch.transpose(
            torch.tensor(np.array(data_matrix_pay_FUFEI_discrete), dtype=torch.float32), -2, -1)
        #  mask矩阵   得到的data_matrix_user_history及data_tensor_pay_QOE_continue维度是(feature_num,history_len)需要转成tensor后转置
        data_tensor_pay_QOE_continue_mask = torch.transpose(
            torch.tensor(np.array(data_matrix_pay_QOE_continue_mask), dtype=torch.float32), -2, -1)
        data_tensor_pay_QOE_discrete_mask = torch.transpose(
            torch.tensor(np.array(data_matrix_pay_QOE_discrete_mask), dtype=torch.float32), -2, -1)
        data_tensor_pay_CHONGHE_continue_mask = torch.transpose(
            torch.tensor(np.array(data_matrix_pay_CHONGHE_continue_mask), dtype=torch.float32), -2, -1)
        data_tensor_pay_CHONGHE_discrete_mask = torch.transpose(
            torch.tensor(np.array(data_matrix_pay_CHONGHE_discrete_mask), dtype=torch.float32), -2, -1)
        data_tensor_pay_FUFEI_continue_mask = torch.transpose(
            torch.tensor(np.array(data_matrix_pay_FUFEI_continue_mask), dtype=torch.float32), -2, -1)
        data_tensor_pay_FUFEI_discrete_mask = torch.transpose(
            torch.tensor(np.array(data_matrix_pay_FUFEI_discrete_mask), dtype=torch.float32), -2, -1)
        # user + target   输出维度为（1，feature_num）,一处第一个为1的维度变为（feature_num）
        # data_tensor_user_info_continue = torch.tensor(np.array(data_matrix_user_info_continue), dtype=torch.float32)
        # data_tensor_user_info_discrete = torch.tensor(np.array(data_matrix_user_info_discrete), dtype=torch.float32)
        data_tensor_target_QOE_continue = torch.squeeze(
            torch.tensor(np.array(data_matrix_target_QOE_continue), dtype=torch.float32), dim=0)
        data_tensor_target_QOE_discrete = torch.squeeze(
            torch.tensor(np.array(data_matrix_target_QOE_discrete), dtype=torch.float32), dim=0)
        data_tensor_target_CHONGHE_continue = torch.squeeze(
            torch.tensor(np.array(data_matrix_target_CHONGHE_continue), dtype=torch.float32), dim=0)
        data_tensor_target_CHONGHE_discrete = torch.squeeze(
            torch.tensor(np.array(data_matrix_target_CHONGHE_discrete), dtype=torch.float32), dim=0)
        data_tensor_target_FUFEI_continue = torch.squeeze(
            torch.tensor(np.array(data_matrix_target_FUFEI_continue), dtype=torch.float32), dim=0)
        data_tensor_target_FUFEI_discrete = torch.squeeze(
            torch.tensor(np.array(data_matrix_target_FUFEI_discrete), dtype=torch.float32), dim=0)

        # 生成hash值，按user_id为key存储成hash
        tensor_hash_value = {
            'pay_QOE_continue': data_tensor_pay_QOE_continue,
            'pay_QOE_discrete': data_tensor_pay_QOE_discrete,
            'pay_CHONGHE_continue': data_tensor_pay_CHONGHE_continue,
            'pay_CHONGHE_discrete': data_tensor_pay_CHONGHE_discrete,
            'pay_FUFEI_continue': data_tensor_pay_FUFEI_continue,
            'pay_FUFEI_discrete': data_tensor_pay_FUFEI_discrete,
            'target_QOE_continue': data_tensor_target_QOE_continue,
            'target_QOE_discrete': data_tensor_target_QOE_discrete,
            'target_CHONGHE_continue': data_tensor_target_CHONGHE_continue,
            'target_CHONGHE_discrete': data_tensor_target_CHONGHE_discrete,
            'target_FUFEI_continue': data_tensor_target_FUFEI_continue,
            'target_FUFEI_discrete': data_tensor_target_FUFEI_discrete
        }
        tensor_hash_value_history_mask = {
            'pay_QOE_continue': data_tensor_pay_QOE_continue_mask,
            'pay_QOE_discrete': data_tensor_pay_QOE_discrete_mask,
            'pay_CHONGHE_continue': data_tensor_pay_CHONGHE_continue_mask,
            'pay_CHONGHE_discrete': data_tensor_pay_CHONGHE_discrete_mask,
            'pay_FUFEI_continue': data_tensor_pay_FUFEI_continue_mask,
            'pay_FUFEI_discrete': data_tensor_pay_FUFEI_discrete_mask,
        }
        if user_id in data_tensor_hash:
            data_tensor_hash[user_id].update(tensor_hash_value)
            data_tensor_history_mask_hash[user_id].update(tensor_hash_value_history_mask)
        else:
            data_tensor_hash[user_id] = tensor_hash_value
            data_tensor_history_mask_hash[user_id] = tensor_hash_value_history_mask

    # 如果需要合并成一个张量，可以使用torch.cat方法
    # combined_tensor = torch.cat((data_matrix_1_tensor, data_matrix_2_tensor), dim=1)
    # data_tensor_hash中用户历史的输出维度(max_history_len,feature_num)，目标的输出维度是（feature_num）
    return data_tensor_hash, target_label, data_tensor_history_mask_hash


# 张量矩阵添加一个batch维度，并在用户特征与目标特征的张量中再添加一维使其与用户历史行为张量对齐， 形成两种：
# 原数据为：1.用户特征与目标特征都为：（1,feature_num）; 2.用户历史行为特征为（max_history_len(固定长度的历史记录数),feature_num）
# 新数据为：1.用户特征与目标特征都为：（batch,1,1,feature_num); 2.用户历史行为特征为（batch,max_history_len(固定长度的历史记录数),feature_num）
# 形成batch维度的特征
def generate_batch_feature(train_or_val_or_test_list, data_tensor_hash,
                           feature_category):  # 例:feature_category = 'user_info_continue' 就是上面生成的tensor_hash_value字典的键
    tensor_list = []
    for user_id in train_or_val_or_test_list:  # 遍历data_tensor_hash的所有key (user_id)
        if feature_category in data_tensor_hash[user_id]:
            tensor = data_tensor_hash[user_id][feature_category]  # 获取feature_category对应的张量
            tensor_list.append(tensor)  # 添加到tensor_list中
    #  print(tensor_list)
    batch_feature_tensor = torch.stack(tensor_list, dim=0)  # 在第一个维度上合并所有张量(其实相当于生成一个新维度)
    return batch_feature_tensor


# 生成batch再添加维度对齐张量（三个维度）
def generate_user_feature_alignment_tensor(train_or_val_or_test_list, data_tensor_hash, is_mask=False):
    # 用户历史行为矩阵（max_history_len(固定长度的历史记录数),feature_num）->（batch,max_history_len(固定长度的历史记录数),feature_num）
    pay_QOE_continue_batch_feature_tensor = generate_batch_feature(train_or_val_or_test_list, data_tensor_hash,
                                                                   'pay_QOE_continue')
    pay_QOE_discrete_batch_feature_tensor = generate_batch_feature(train_or_val_or_test_list, data_tensor_hash,
                                                                   'pay_QOE_discrete')
    pay_CHONGHE_continue_batch_feature_tensor = generate_batch_feature(train_or_val_or_test_list, data_tensor_hash,
                                                                       'pay_CHONGHE_continue')
    pay_CHONGHE_discrete_batch_feature_tensor = generate_batch_feature(train_or_val_or_test_list, data_tensor_hash,
                                                                       'pay_CHONGHE_discrete')
    pay_FUFEI_continue_batch_feature_tensor = generate_batch_feature(train_or_val_or_test_list, data_tensor_hash,
                                                                     'pay_FUFEI_continue')
    pay_FUFEI_discrete_batch_feature_tensor = generate_batch_feature(train_or_val_or_test_list, data_tensor_hash,
                                                                     'pay_FUFEI_discrete')
    # print('pay_QOE_discrete_batch_feature_tensor1',pay_QOE_discrete_batch_feature_tensor[0,:,0])
    # 看是否是掩码矩阵，不是则xxx，是则没有user+target
    if is_mask == False:
        # 用户矩阵 (feature_num) ->(batch,feature_num)
        # user_info_continue_batch_feature_tensor = generate_batch_feature(data_tensor_hash, 'user_info_continue')
        # user_info_discrete_batch_feature_tensor = generate_batch_feature(data_tensor_hash, 'user_info_discrete')
        # 目标矩阵 (feature_num) ->(batch,feature_num)
        target_QOE_continue_batch_feature_tensor = generate_batch_feature(train_or_val_or_test_list, data_tensor_hash,
                                                                          'target_QOE_continue')
        target_QOE_discrete_batch_feature_tensor = generate_batch_feature(train_or_val_or_test_list, data_tensor_hash,
                                                                          'target_QOE_discrete')
        target_CHONGHE_continue_batch_feature_tensor = generate_batch_feature(train_or_val_or_test_list,
                                                                              data_tensor_hash,
                                                                              'target_CHONGHE_continue')
        target_CHONGHE_discrete_batch_feature_tensor = generate_batch_feature(train_or_val_or_test_list,
                                                                              data_tensor_hash,
                                                                              'target_CHONGHE_discrete')
        target_FUFEI_continue_batch_feature_tensor = generate_batch_feature(train_or_val_or_test_list, data_tensor_hash,
                                                                            'target_FUFEI_continue')
        target_FUFEI_discrete_batch_feature_tensor = generate_batch_feature(train_or_val_or_test_list, data_tensor_hash,
                                                                            'target_FUFEI_discrete')

        # 假设原始张量矩阵为 tensor，形状为 (batch_size, feature_num)将其加一个维度变为 (batch_size, 1, feature_num)
        # user_info_continue_batch_feature_tensor = torch.unsqueeze(user_info_continue_batch_feature_tensor, dim=1)
        # user_info_discrete_batch_feature_tensor = torch.unsqueeze(user_info_discrete_batch_feature_tensor, dim=1)
        target_QOE_continue_batch_feature_tensor = torch.unsqueeze(target_QOE_continue_batch_feature_tensor, dim=1)
        target_QOE_discrete_batch_feature_tensor = torch.unsqueeze(target_QOE_discrete_batch_feature_tensor, dim=1)
        target_CHONGHE_continue_batch_feature_tensor = torch.unsqueeze(target_CHONGHE_continue_batch_feature_tensor,
                                                                       dim=1)
        target_CHONGHE_discrete_batch_feature_tensor = torch.unsqueeze(target_CHONGHE_discrete_batch_feature_tensor,
                                                                       dim=1)
        target_FUFEI_continue_batch_feature_tensor = torch.unsqueeze(target_FUFEI_continue_batch_feature_tensor, dim=1)
        target_FUFEI_discrete_batch_feature_tensor = torch.unsqueeze(target_FUFEI_discrete_batch_feature_tensor, dim=1)

        batch_feature_tensor_dict = {
            'pay_QOE_discrete': pay_QOE_discrete_batch_feature_tensor,
            'pay_CHONGHE_discrete': pay_CHONGHE_discrete_batch_feature_tensor,
            'pay_FUFEI_discrete': pay_FUFEI_discrete_batch_feature_tensor,
            'pay_QOE_continue': pay_QOE_continue_batch_feature_tensor,
            'pay_CHONGHE_continue': pay_CHONGHE_continue_batch_feature_tensor,
            'pay_FUFEI_continue': pay_FUFEI_continue_batch_feature_tensor,
            'target_QOE_discrete': target_QOE_discrete_batch_feature_tensor,
            'target_CHONGHE_discrete': target_CHONGHE_discrete_batch_feature_tensor,
            'target_FUFEI_discrete': target_FUFEI_discrete_batch_feature_tensor,
            'target_QOE_continue': target_QOE_continue_batch_feature_tensor,
            'target_CHONGHE_continue': target_CHONGHE_continue_batch_feature_tensor,
            'target_FUFEI_continue': target_FUFEI_continue_batch_feature_tensor,

        }
    else:
        batch_feature_tensor_dict = {
            'pay_QOE_discrete': pay_QOE_discrete_batch_feature_tensor,
            'pay_CHONGHE_discrete': pay_CHONGHE_discrete_batch_feature_tensor,
            'pay_FUFEI_discrete': pay_FUFEI_discrete_batch_feature_tensor,
            'pay_QOE_continue': pay_QOE_continue_batch_feature_tensor,
            'pay_CHONGHE_continue': pay_CHONGHE_continue_batch_feature_tensor,
            'pay_FUFEI_continue': pay_FUFEI_continue_batch_feature_tensor,
        }
    return batch_feature_tensor_dict  # 这里张量输出的全是三维 (batch_size, 1 or max_history_len, feature_num)


# 由于模型输入得是张量，因此在之前将字典转化为了张量，现在将它转换回去
class TensorDatasettoDict(Dataset):
    def __init__(self, dataset, keys):
        self.dataset = dataset
        self.keys = keys

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        data = self.dataset[idx]
        sample = {key: data[i] for i, key in enumerate(self.keys)}
        return sample

# test
# 获取训练、验证、测试集对应的数据形成的向量hash存储及label
# print(data_hash[3617476])
# train_data_tensor_hash, train_label, train_data_tensor_hash_history_mask = get_feature_to_matrix(train_list, data_hash, feature_column_dict)
# first_key = list(train_data_tensor_hash.keys())[0]
# print(train_data_tensor_hash[first_key]['pay_QOE_discrete'][:,0])
# print(train_label)
# # print(train_data_tensor_hash[3617476])
# dimensions1 = train_data_tensor_hash[3617476]['pay_QOE_continue'].size()
# dimensions2 = train_data_tensor_hash[3617476]['pay_QOE_discrete'].size()
# dimensions3 = train_data_tensor_hash[3617476]['pay_CHONGHE_continue'].size()
# dimensions4 = train_data_tensor_hash[3617476]['target_QOE_continue'].size()
# dimensions5 = train_data_tensor_hash[3617476]['target_QOE_discrete'].size()
# dimensions6 = train_data_tensor_hash[3617476]['target_CHONGHE_continue'].size()
# print("PyTorch张量的维度：", dimensions1,dimensions2,dimensions3,dimensions4,dimensions5,dimensions6)
# train_batch_feature_tensor_dict = generate_user_feature_alignment_tensor(train_list,train_data_tensor_hash)
# train_batch_feature_tensor_history_mask_dict = generate_user_feature_alignment_tensor(train_data_tensor_hash_history_mask,is_mask=True)
# print(train_batch_feature_tensor_dict['pay_QOE_discrete'][0,:,0])
# dimensions1 = train_data_tensor_hash[3617476]['pay_QOE_continue'].size()
# dimensions2 = train_data_tensor_hash[3617476]['pay_QOE_discrete'].size()
# dimensions3 = train_data_tensor_hash[3617476]['pay_CHONGHE_continue'].size()
# dimensions4 = train_data_tensor_hash[3617476]['target_QOE_continue'].size()
# dimensions5 = train_data_tensor_hash[3617476]['target_QOE_discrete'].size()
# dimensions6 = train_data_tensor_hash[3617476]['target_CHONGHE_continue'].size()
# print("PyTorch添加batch后张量的维度：", dimensions1,dimensions2,dimensions3,dimensions4,dimensions5,dimensions6)

In [23]:
# 3.基础模型 embedding、attention
# 构建离散特征的embedding
def discrete_embedding(feature_category_num_dict, feature_column_name_list, embedding_dim):  # 输入特征取值大小的集合,特征数,维度
    # 创建一个列表来存储每个嵌入层
    embeddings = []
    for i in range(0, len(feature_column_name_list)):
        # print(feature_column_name_list[i], feature_category_num_dict[feature_column_name_list[i]])
        embedding_layer1 = nn.Embedding(feature_category_num_dict[feature_column_name_list[i]] + 2, embedding_dim)
        embeddings.append(embedding_layer1)
    #     print('embedding维度',feature_category_num_dict[feature_column_name_list[i]]+1)
    # print('本轮embedding层：',len(feature_column_name_list))
    return embeddings


# 全连接层 MLP
def dense_layer(in_features, out_features):
    # in_features=hidden_size,out_features=1
    return nn.Sequential(
        nn.Linear(in_features, out_features, bias=True),
        nn.ReLU())


# 全连接层 MLP
def dense_layer_noReLu(in_features, out_features):
    # in_features=hidden_size,out_features=1
    return nn.Sequential(
        nn.Linear(in_features, out_features, bias=True))


# 连续特征离散化
def continuous_embedding(num_continuous_features, out_features):
    continuous_embedding_layers = []
    for i in range(0, len(num_continuous_features)):
        num_continuous_feature = num_continuous_features[i]
        embedding_layer = dense_layer(1, out_features)
        continuous_embedding_layers.append(embedding_layer)
    return continuous_embedding_layers


# 根据全特征数量表及类别，得到类别下的对应特征数量  feature_column_name_list = feature_column_dict['user_info_continue']
def category_feature_num(feature_category_num_dict, feature_column_name_list):
    category_feature_num_list = []
    for i in range(len(feature_column_name_list)):
        category_feature_num_list.append(feature_category_num_dict[feature_column_name_list[i]])
    # print('category_feature_num',len(category_feature_num_list))
    return category_feature_num_list


# SE层中找到合适的reduction使channel // reduction得到整数
def find_reduction(channel, min_reduction=2, max_reduction=19):
    # 对于质数，直接取自己作为reduction  
    if is_prime(channel):
        return channel

        # 计算介于min_reduction和max_reduction之间的候选reduction值  
    candidates = [i for i in range(min_reduction, max_reduction + 1) if channel % i == 0]

    # 如果候选列表为空，则至少取2作为reduction  
    if not candidates:
        return min_reduction

        # 尝试找到最大的候选值，使得channel // reduction的结果尽可能大  
    reduction = max(candidates)

    return reduction


def is_prime(n):
    """判断一个数是否为质数"""
    if n < 2:
        return False
    for i in range(2, int(math.sqrt(n)) + 1):
        if n % i == 0:
            return False
    return True


# 输入(batch,feature_num,embedding_dim,1) ->(batch,feature_num,embedding_dim,1)->输出特征权重及权重乘后的(batch,embedding_dim) 
class SELayer(nn.Module):
    def __init__(self, channel, reduction=16):
        super(SELayer, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.reduction = reduction
        self.reduction = find_reduction(channel)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channel // reduction, channel, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, h, w = x.size()
        # print('b, c, h, w',b, c, h, w)
        y = self.avg_pool(x).view(b, c)
        # print('y',y)
        weight = self.fc(y).view(b, c, 1, 1)
        new_x = x * weight.expand_as(x)  # 利用了 PyTorch 的广播机制，使得张量 weight 被复制成与输入 x 相同的形状，然后进行逐元素相乘 
        # 加权平均 (batch, embedding_dim)
        weighted_avg_out_x = new_x.mean(dim=1, keepdim=True)  # 在 feature_num维度上取平均，保持维度
        # 调整维度
        weighted_avg_out_x = weighted_avg_out_x.view(b, 1, h, w)
        # 去除最后一维
        new_x = new_x.squeeze(dim=3)
        weighted_avg_out_x = weighted_avg_out_x.squeeze(dim=3)

        return weight, weighted_avg_out_x, new_x


# 旧 弃用
# class SELayer(nn.Module):
#     def __init__(self, feature_dim, feature_num, reduction=16):
#         super(SELayer, self).__init__()
#         self.pool = nn.AdaptiveAvgPool2d(1)
#         self.fc = nn.Sequential(
#             nn.Linear(in_features=feature_dim, out_features=feature_dim // reduction, bias=False),
#             nn.ReLU(inplace=True),
#             nn.Linear(in_features=feature_dim // reduction, out_features=feature_num, bias=False),
#             nn.Sigmoid()
#         )
#     def forward(self, x):
#         # Apply average pooling along the feature_dim dimension  x(batch, embedding_dim, feature_num)
#         b, c, h = x.size()
#         print('b, c, h', b, c, h)
#         y = self.pool(x.unsqueeze(-1)).view(b, c, -1)  # (batch, embedding_dim, 1)
#         print('y', y)
#         print('b, h', b, h)
#         # Generate attention weights for each feature
#         attention_weights = self.fc(y).view(b, h, -1)  # 权重batch, 1, feature_num
#         print(attention_weights.shape)
#         # Apply attention weights to the original input
#         weighted_x = x * attention_weights.unsqueeze(1)
#         # 输出的是一个形状为(batch, embedding_dim, feature_num)的张量。
#         # 这个张量是对原始输入x进行加权后的结果，其中每个特征都被相应的注意力权重所乘。

#         # Sum over the feature_num dimension to get (batch, embedding_dim)
#         weighted_sum = torch.sum(weighted_x, dim=2)

#         return attention_weights, weighted_sum, weighted_x
# def forward(self, x):
#     # Apply average pooling along the feature_dim dimension  x(batch, feature_dim, feature_num)
#     b, c, h, w = x.size()
#     print('b, c, h, w',b, c, h, w)
#     y = self.pool(x).view(b, c, -1)  # (batch, feature_dim, 1, 1)
#     print('y',y)
#     print('b, h * w',b, h * w)
#     # Generate attention weights for each feature
#     attention_weights = self.fc(y).view(b, h * w, -1)  # 权重batch, 1, 1, feature_num)
#     print(attention_weights.shape)
#     # Apply attention weights to the original input
#     weighted_x = x.view(b, c, -1) * attention_weights
#     # 输出的是一个形状为(batch, feature_dim, 1, feature_num)的张量。
#     # 这个张量是对原始输入x进行加权后的结果，其中每个特征都被相应的注意力权重所乘。
#     weighted_x = weighted_x.view(b, c, h, w)

#     # Sum over the feature_num dimension to get (batch, feature_dim, 1)
#     # weighted_sum = torch.sum(weighted_x, dim=2, keepdim=True)的具体意思是沿着feature_num维度
#     # （即第三个维度，索引为2）对weighted_x进行求和。由于keepdim=True，求和后的结果保持了一个额外的维度，
#     # 形状为(batch, feature_dim, 1)。这一步实现了对每个样本的所有特征进行加权求和，得到一个新的特征表示。
#     weighted_sum = torch.sum(weighted_x, dim=2, keepdim=True)
#     # 转置最后两维
#     weighted_sum = torch.transpose(weighted_sum, -2, -1)

#     return attention_weights, weighted_sum, weighted_x

# 多头自注意力
class MultiHeadSelfAttention(nn.Module):
    def __init__(self, num_heads, feature_dim, max_history_len):
        super(MultiHeadSelfAttention, self).__init__()
        self.num_heads = num_heads  #10
        self.feature_dim = feature_dim  #200
        self.head_dim = feature_dim // num_heads
        self.max_history_len = max_history_len

        self.WQ = nn.Linear(feature_dim, feature_dim)
        self.WK = nn.Linear(feature_dim, feature_dim)
        self.WV = nn.Linear(feature_dim, feature_dim)

    def forward(self, history_matrix, mask=None):
        batch_size, history_len, _ = history_matrix.size()

        Q = self.WQ(history_matrix)
        K = self.WK(history_matrix)
        V = self.WV(history_matrix)

        Q = Q.view(batch_size, history_len, self.num_heads, self.head_dim).permute(0, 2, 1,
                                                                                   3)  #(batch,num_heads,history_len,head_dim)
        K = K.view(batch_size, history_len, self.num_heads, self.head_dim).permute(0, 2, 1, 3)
        V = V.view(batch_size, history_len, self.num_heads, self.head_dim).permute(0, 2, 1, 3)

        attention_scores = torch.matmul(Q, K.permute(0, 1, 3, 2)) / (self.head_dim ** 0.5)

        if mask is not None:
            mask = mask.permute(0, 2, 1)  # 二、三维度互换  变为(batch, feature_num, history)
            temp_dim = mask.shape[1]
            #（样本数*特征数,历史数）
            mask = mask.reshape(-1, max_history_len)
            attention_scores = attention_scores.masked_fill(mask.unsqueeze(1).unsqueeze(2).bool(), float('-1e30'))  #()

        attention_weights = torch.softmax(attention_scores, dim=-1)  #shape(batch,head,history_len,history_len)
        #(batch,history_len,200)
        weighted_sum = torch.matmul(attention_weights, V).permute(0, 2, 1, 3).contiguous().view(batch_size, history_len,
                                                                                                self.feature_dim)
        # 计算加权平均
        weighted_avg_out = weighted_sum.mean(dim=1, keepdim=True)  # 在 history_len 维度上取平均，保持维度
        # 调整维度
        weighted_avg_out = weighted_avg_out.view(batch_size, 1, self.feature_dim)
        # print('weighted_sum',weighted_avg_out.shape)

        return attention_weights, weighted_avg_out, weighted_sum


# class MultiHeadSelfAttention(nn.Module):
#     def __init__(self, num_heads, feature_dim):
#         super(MultiHeadSelfAttention, self).__init__()
#         self.num_heads = num_heads
#         self.feature_dim = feature_dim
#         self.head_dim = feature_dim // num_heads

#         # 线性变换的权重
#         self.wq = nn.Parameter(torch.Tensor(feature_dim, self.num_heads * self.head_dim))
#         self.wk = nn.Parameter(torch.Tensor(feature_dim, self.num_heads * self.head_dim))
#         self.wv = nn.Parameter(torch.Tensor(feature_dim, self.num_heads * self.head_dim))

#         # 初始化权重
#         nn.init.normal_(self.wq, std=0.02)
#         nn.init.normal_(self.wk, std=0.02)
#         nn.init.normal_(self.wv, std=0.02)

#     def forward(self, history_embedding_vec, mask=None):
#         batch_size, history_len, feature_num, feature_dim = history_embedding_vec.size()
#         # 将feature_num和batch_size合并
#         x = history_embedding_vec.view(batch_size * feature_num, history_len, feature_dim)
#         # 线性变换
#         q = torch.matmul(x, self.wq).view(batch_size * feature_num, history_len, self.num_heads,self.head_dim).transpose(1, 2)
#         k = torch.matmul(x, self.wk).view(batch_size * feature_num, history_len, self.num_heads,self.head_dim).transpose(1, 2)
#         v = torch.matmul(x, self.wv).view(batch_size * feature_num, history_len, self.num_heads,self.head_dim).transpose(1, 2)
#         # 缩放点积注意力
#         scores = torch.matmul(q, k.transpose(-1, -2)) / torch.sqrt(torch.tensor(self.head_dim, dtype=torch.float32))
#         if mask is not None:
#             mask = mask.view(batch_size * feature_num, history_len)
#             scores = scores.masked_fill(mask.unsqueeze(1).unsqueeze(2).bool(), float('-inf'))
#         attention_weights = nn.Softmax(dim=-1)(scores)
#         out = torch.matmul(attention_weights, v).transpose(1, 2).contiguous().view(batch_size, feature_num, history_len,self.num_heads * self.head_dim)
#         # 合并多头
#         out = torch.matmul(out, self.wq.view(self.num_heads * self.head_dim, feature_dim)).view(batch_size, feature_num,history_len,feature_dim)
#         # 恢复到原始形状
#         # out = out.view(batch_size, feature_num, history_len, feature_dim)

#           # 计算加权平均后的结果
#         # 计算加权平均
#         weighted_avg_out = out.mean(dim=2, keepdim=True)  # 在 history_len 维度上取平均，保持维度
#         # 调整维度
#         weighted_avg_out = weighted_avg_out.view(batch_size, 1, feature_num, feature_dim)

#         return attention_weights, weighted_avg_out

# 注意力机制 关于用
class MultiHeadHistory_TargetAttention(nn.Module):
    def __init__(self, num_heads, embed_dim, dropout=0.1):
        super(MultiHeadHistory_TargetAttention, self).__init__()

        assert embed_dim % num_heads == 0, f"Embedding dimension ({embed_dim}) should be divisible by the number of heads ({num_heads})."

        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.head_dim = embed_dim // num_heads

        # 定义权重矩阵
        self.q_linear = nn.Linear(embed_dim, embed_dim)
        self.k_linear = nn.Linear(embed_dim, embed_dim)
        self.v_linear = nn.Linear(embed_dim, embed_dim)

        self.out_proj = nn.Linear(embed_dim, embed_dim)

        self.scaling = self.head_dim ** -0.5
        self.dropout = nn.Dropout(dropout)
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, query, key, value, attn_mask=None):
        batch_size = query.size(0)
        # 进行线性投影并分离成多个头
        q = self.q_linear(query).view(batch_size, -1, self.num_heads, self.head_dim).transpose(1, 2)
        k = self.k_linear(key).view(batch_size, -1, self.num_heads, self.head_dim).transpose(1, 2)
        v = self.v_linear(value).view(batch_size, -1, self.num_heads, self.head_dim).transpose(1, 2)
        # 计算注意力得分
        scores = torch.matmul(q, k.transpose(-2, -1)) * self.scaling
        if attn_mask is not None:
            scores.masked_fill_(attn_mask.unsqueeze(1), float('-1e30'))
        # 应用softmax函数
        attn_weights = self.softmax(scores)
        # 应用dropout
        attn_weights = self.dropout(attn_weights)
        # 进行值的加权求和
        context = torch.matmul(attn_weights, v).transpose(1, 2).contiguous().view(batch_size, -1, self.embed_dim)
        # 输出层的线性变换
        output = self.out_proj(context)
        return attn_weights, output

# class MultiHeadHistory_TargetAttention(nn.Module):
#     def __init__(self, num_heads, feature_dim):
#         super(MultiHeadHistory_TargetAttention, self).__init__()
#         self.feature_dim = feature_dim
#         self.num_heads = num_heads
#         self.head_dim = feature_dim // num_heads

#         assert (
#                 self.head_dim * num_heads == feature_dim
#         ), "Embedding dimension must be divisible by num_heads."

#         self.values = nn.Linear(self.num_heads * self.head_dim, self.num_heads * self.head_dim, bias=False)
#         # self.values = nn.Linear(self.head_dim, self.head_dim, bias=False)
#         self.keys = nn.Linear(self.head_dim, self.head_dim, bias=False)
#         self.queries = nn.Linear(self.head_dim, self.head_dim, bias=False)
#         # 其他部分保持不变

#     def forward(self, student_embeddings, unit_embeddings, mask=None):
#         batch_size = student_embeddings.size(0)

#         # Split the embedding into self.num_heads different pieces
#         student_values = student_embeddings.view(batch_size, -1, self.num_heads, self.head_dim)
#         student_keys = student_embeddings.view(batch_size, -1, self.num_heads, self.head_dim)
#         student_queries = student_embeddings.view(batch_size, -1, self.num_heads, self.head_dim)

#         unit_values = unit_embeddings.view(batch_size, -1, self.num_heads, self.head_dim)
#         unit_keys = unit_embeddings.view(batch_size, -1, self.num_heads, self.head_dim)
#         # print('student_queries',student_queries.shape)
#         # print('unit_keys',unit_keys.shape)

#         # Compute the attention weights
#         energy = torch.matmul(student_queries, unit_keys.transpose(-2, -1)) / torch.sqrt(
#             torch.tensor(self.head_dim, dtype=torch.float32))
#         if mask is not None:
#             attention_weights = energy.masked_fill(mask.unsqueeze(1).unsqueeze(2), float('-inf'))
#         attention_weights = torch.softmax(energy, dim=-1)

#         # Apply attention weights to the values
#         out = torch.matmul(attention_weights, unit_values)
#         # print(out.shape)

#         # Concatenate the outputs of the different heads
#         out = out.view(batch_size, -1, self.num_heads * self.head_dim)
#         # print(out.shape)

#         # Finally, apply a linear layer to get the final output
#         out = self.values(out)

#         return attention_weights, out


In [24]:
# 4.Embedding层

# user_history_feature 对于一个user的多个历史行为，将其拼接成一维向量 要先经过一层通道注意力机制得到最后结果
# (样本数,history,20,200) ->多头 ->(样本数,20,200)->转置->(样本数,200,20) ->SE->特征权重->(样本数,200,20) ->转置-> 加权->(样本数,1，200)
# user_pay_history_feature 加上batch的
# 用户历史
class UserPayHistoryEmbedding(nn.Module):
    def __init__(self, continue_embedding_dim, discrete_embedding_dim, feature_category_num_dict, feature_column_dict):
        super(UserPayHistoryEmbedding, self).__init__()
        # 连续特征
        # 离散特征
        self.feature_category_num_dict = feature_category_num_dict
        # 离散embedding
        self.user_pay_history_QOE_discrete_embeddings = discrete_embedding(self.feature_category_num_dict,
                                                                           feature_column_dict['history_QOE_discrete'],
                                                                           discrete_embedding_dim)
        self.user_pay_history_CHONGHE_discrete_embeddings = discrete_embedding(self.feature_category_num_dict,
                                                                               feature_column_dict[
                                                                                   'history_CHONGHE_discrete_add_D'],
                                                                               discrete_embedding_dim)
        self.user_pay_history_FUFEI_discrete_embeddings = discrete_embedding(self.feature_category_num_dict,
                                                                             feature_column_dict[
                                                                                 'history_FUFEI_discrete'],
                                                                             discrete_embedding_dim)
        # MLP  连续embedding
        # category_feature_num_list = category_feature_num(feature_category_num_dict, feature_column_dict['history_QOE_continue'])
        self.user_pay_history_QOE_continue_embedding = continuous_embedding(
            category_feature_num(feature_category_num_dict, feature_column_dict['history_QOE_continue']),
            continue_embedding_dim)
        self.user_pay_history_CHONGHE_continue_embedding = continuous_embedding(
            category_feature_num(feature_category_num_dict, feature_column_dict['history_CHONGHE_continue']),
            continue_embedding_dim)
        self.user_pay_history_FUFEI_continue_embedding = continuous_embedding(
            category_feature_num(feature_category_num_dict, feature_column_dict['history_FUFEI_continue']),
            continue_embedding_dim)

    def forward(self, batch_feature_tensor_pay_QOE_discrete, batch_feature_tensor_pay_CHONGHE_discrete,
                batch_feature_tensor_pay_FUFEI_discrete, batch_feature_tensor_pay_QOE_continue,
                batch_feature_tensor_pay_CHONGHE_continue, batch_feature_tensor_pay_FUFEI_continue):
        # user_history Embedding
        # user_history_continue_features_embedding 得到(batch, 1, continue_feature_num, continue_embedding_dim)
        # user_history_discrete_features_embedding 得到(batch, 1, discrete_feature_num, discrete_embedding_dim)
        # history中有三种：QOE/CHONGHE/FUFEI,将其分别转化为embedding然后合并
        # embedding的数据要求输入是整数类型 因此转为int，输入数据得是从0开始的索引后的数据，因此mask后得到-1以及在输入时得到了从0开始的索引后值，
        # 现在所有discrete数据输入时+1，即 batch_feature_tensor_pay_QOE_discrete[:, :, i]+1 
        # for i in range(batch_feature_tensor_pay_QOE_discrete.shape[2]):
        #     print(i,batch_feature_tensor_pay_QOE_discrete.shape[2],batch_feature_tensor_pay_QOE_discrete[:, :, i]+1,self.user_pay_history_QOE_discrete_embeddings[i].num_embeddings )
        batch_feature_tensor_pay_QOE_discrete = batch_feature_tensor_pay_QOE_discrete.int()
        batch_feature_tensor_pay_CHONGHE_discrete = batch_feature_tensor_pay_CHONGHE_discrete.int()
        batch_feature_tensor_pay_FUFEI_discrete = batch_feature_tensor_pay_FUFEI_discrete.int()

        user_history_pay_QOE_discrete_column_discrete_features_embedding = torch.stack(
            [embedding_layer(batch_feature_tensor_pay_QOE_discrete[:, :, i] + 1) for i, embedding_layer in
             enumerate(self.user_pay_history_QOE_discrete_embeddings)], dim=-2)
        user_history_pay_QOE_continue_column_discrete_features_embedding = torch.stack(
            [embedding_layer(batch_feature_tensor_pay_QOE_continue[:, :, i].unsqueeze(-1).float()) for
             i, embedding_layer in
             enumerate(self.user_pay_history_QOE_continue_embedding)], dim=-2)
        user_history_pay_QOE_vec = torch.cat(
            [user_history_pay_QOE_discrete_column_discrete_features_embedding,
             user_history_pay_QOE_continue_column_discrete_features_embedding], dim=2)  # 特征级合并

        user_history_pay_CHONGHE_discrete_column_discrete_features_embedding = torch.stack(
            [embedding_layer(batch_feature_tensor_pay_CHONGHE_discrete[:, :, i] + 1) for i, embedding_layer in
             enumerate(self.user_pay_history_CHONGHE_discrete_embeddings)], dim=-2)
        user_history_pay_CHONGHE_continue_column_discrete_features_embedding = torch.stack(
            [embedding_layer(batch_feature_tensor_pay_CHONGHE_continue[:, :, i].unsqueeze(2).float()) for
             i, embedding_layer in
             enumerate(self.user_pay_history_CHONGHE_continue_embedding)], dim=-2)
        user_history_pay_CHONGHE_vec = torch.cat(
            [user_history_pay_CHONGHE_discrete_column_discrete_features_embedding,
             user_history_pay_CHONGHE_continue_column_discrete_features_embedding], dim=2)  # 特征级合并

        user_history_pay_FUFEI_discrete_column_discrete_features_embedding = torch.stack(
            [embedding_layer(batch_feature_tensor_pay_FUFEI_discrete[:, :, i] + 1) for i, embedding_layer in
             enumerate(self.user_pay_history_FUFEI_discrete_embeddings)], dim=-2)
        user_history_pay_FUFEI_continue_column_discrete_features_embedding = torch.stack(
            [embedding_layer(batch_feature_tensor_pay_FUFEI_continue[:, :, i].unsqueeze(2).float()) for
             i, embedding_layer in
             enumerate(self.user_pay_history_FUFEI_continue_embedding)], dim=-2)
        user_history_pay_FUFEI_vec = torch.cat(
            [user_history_pay_FUFEI_discrete_column_discrete_features_embedding,
             user_history_pay_FUFEI_continue_column_discrete_features_embedding], dim=2)  # 特征级合并
        # print(user_history_pay_FUFEI_discrete_column_discrete_features_embedding.shape,user_history_pay_FUFEI_continue_column_discrete_features_embedding.shape)

        return user_history_pay_QOE_vec, user_history_pay_CHONGHE_vec, user_history_pay_FUFEI_vec


# target_feature
class TargetEmbedding(nn.Module):
    def __init__(self, continue_embedding_dim, discrete_embedding_dim, feature_category_num_dict, feature_column_dict):
        super(TargetEmbedding, self).__init__()
        # 连续特征  与付费、非付费共享一套特征
        # 离散特征  与付费、非付费共享一套特征
        self.feature_category_num_dict = feature_category_num_dict
        # 离散embedding
        self.target_QOE_discrete_embeddings = discrete_embedding(self.feature_category_num_dict,
                                                                 feature_column_dict['history_QOE_discrete'],
                                                                 discrete_embedding_dim)
        self.target_CHONGHE_discrete_embeddings = discrete_embedding(self.feature_category_num_dict,
                                                                     feature_column_dict['history_CHONGHE_discrete'],
                                                                     discrete_embedding_dim)
        self.target_FUFEI_discrete_embeddings = discrete_embedding(self.feature_category_num_dict,
                                                                   feature_column_dict['history_FUFEI_discrete'],
                                                                   discrete_embedding_dim)
        # MLP  连续embedding
        self.target_QOE_continue_embedding = continuous_embedding(
            category_feature_num(feature_category_num_dict, feature_column_dict['history_QOE_continue']),
            continue_embedding_dim)
        self.target_CHONGHE_continue_embedding = continuous_embedding(
            category_feature_num(feature_category_num_dict, feature_column_dict['history_CHONGHE_continue']),
            continue_embedding_dim)
        self.target_FUFEI_continue_embedding = continuous_embedding(
            category_feature_num(feature_category_num_dict, feature_column_dict['history_FUFEI_continue']),
            continue_embedding_dim)

    def forward(self, batch_feature_tensor_target_QOE_discrete, batch_feature_tensor_target_CHONGHE_discrete,
                batch_feature_tensor_target_FUFEI_discrete, batch_feature_tensor_target_QOE_continue,
                batch_feature_tensor_target_CHONGHE_continue, batch_feature_tensor_target_FUFEI_continue):
        # target Embedding
        # target_continue_features_embedding 得到(batch, 1, continue_feature_num, continue_embedding_dim)
        # target_discrete_features_embedding 得到(batch, 1, discrete_feature_num, discrete_embedding_dim)
        # 有三种：QOE/CHONGHE/FUFEI,将其分别转化为embedding然后合并
        batch_feature_tensor_target_QOE_discrete = batch_feature_tensor_target_QOE_discrete.int()
        batch_feature_tensor_target_CHONGHE_discrete = batch_feature_tensor_target_CHONGHE_discrete.int()
        batch_feature_tensor_target_FUFEI_discrete = batch_feature_tensor_target_FUFEI_discrete.int()
        target_QOE_discrete_column_discrete_features_embedding = torch.stack(
            [embedding_layer(batch_feature_tensor_target_QOE_discrete[:, :, i] + 1) for i, embedding_layer in
             enumerate(self.target_QOE_discrete_embeddings)], dim=-2)
        target_QOE_continue_column_discrete_features_embedding = torch.stack(
            [embedding_layer(batch_feature_tensor_target_QOE_continue[:, :, i].unsqueeze(2).float()) for
             i, embedding_layer in
             enumerate(self.target_QOE_continue_embedding)], dim=-2)
        target_QOE_vec = torch.cat(
            [target_QOE_discrete_column_discrete_features_embedding,
             target_QOE_continue_column_discrete_features_embedding], dim=2)  # 特征级合并

        target_CHONGHE_discrete_column_discrete_features_embedding = torch.stack(
            [embedding_layer(batch_feature_tensor_target_CHONGHE_discrete[:, :, i] + 1) for i, embedding_layer in
             enumerate(self.target_CHONGHE_discrete_embeddings)], dim=-2)
        target_CHONGHE_continue_column_discrete_features_embedding = torch.stack(
            [embedding_layer(batch_feature_tensor_target_CHONGHE_continue[:, :, i].unsqueeze(2).float()) for
             i, embedding_layer in
             enumerate(self.target_CHONGHE_continue_embedding)], dim=-2)
        target_CHONGHE_vec = torch.cat(
            [target_CHONGHE_discrete_column_discrete_features_embedding,
             target_CHONGHE_continue_column_discrete_features_embedding], dim=2)  # 特征级合并

        target_FUFEI_discrete_column_discrete_features_embedding = torch.stack(
            [embedding_layer(batch_feature_tensor_target_FUFEI_discrete[:, :, i] + 1) for i, embedding_layer in
             enumerate(self.target_FUFEI_discrete_embeddings)], dim=-2)
        target_FUFEI_continue_column_discrete_features_embedding = torch.stack(
            [embedding_layer(batch_feature_tensor_target_FUFEI_continue[:, :, i].unsqueeze(2).float()) for
             i, embedding_layer in
             enumerate(self.target_FUFEI_continue_embedding)], dim=-2)
        target_FUFEI_vec = torch.cat(
            [target_FUFEI_discrete_column_discrete_features_embedding,
             target_FUFEI_continue_column_discrete_features_embedding], dim=2)  # 特征级合并

        return target_QOE_vec, target_CHONGHE_vec, target_FUFEI_vec


In [25]:
# 5.Attention层


# 用户历史embedding 多头+SE  (batch, history, feature_num, feature_dim)->(batch, 1，feature_dim)
class HistoryDimScalingLayer(nn.Module):
    def __init__(self, num_heads, feature_dim, feature_category_num_dict, max_history_len):
        super(HistoryDimScalingLayer, self).__init__()
        # 多头注意力
        self.multi_head_attention = MultiHeadSelfAttention(num_heads, feature_dim, max_history_len)
        # SE注意力
        self.se_attention_QOE = SELayer(
            len(category_feature_num(feature_category_num_dict, feature_column_dict['history_QOE_continue'])) + len(
                category_feature_num(feature_category_num_dict, feature_column_dict['history_QOE_discrete'])))
        self.se_attention_CHONGHE = SELayer(
            len(category_feature_num(feature_category_num_dict, feature_column_dict['history_CHONGHE_continue'])) + len(
                category_feature_num(feature_category_num_dict, feature_column_dict['history_CHONGHE_discrete_add_D'])))
        self.se_attention_FUFEI = SELayer(
            len(category_feature_num(feature_category_num_dict, feature_column_dict['history_FUFEI_continue'])) + len(
                category_feature_num(feature_category_num_dict, feature_column_dict['history_FUFEI_discrete'])))

    def forward(self, user_history_QOE_vec, user_history_CHONGHE_vec, user_history_FUFEI_vec, pay_QOE_mask=None,
                pay_CHONGHE_mask=None, pay_FUFEI_mask=None):
        # (batch, history, feature_num, feature_dim) ->多头 ->(batch, feature_num, feature_dim)->转置->(batch, feature_dim, feature_num) ->SE->特征权重->(batch, feature_dim, feature_num) ->转置-> 加权->(batch, 1，feature_dim)
        # 多头注意力  例(batch, history, 20, 200) ->多头 ->(batch, 20, 200)
        # print('user_history_QOE_vec',user_history_QOE_vec.shape,pay_QOE_mask.shape)
        # ********多头注意力前转化************
        # 二、三维度互换  变为(batch, feature_num, history, 200)
        user_history_QOE_vec = user_history_QOE_vec.permute(0, 2, 1, 3)
        user_history_CHONGHE_vec = user_history_CHONGHE_vec.permute(0, 2, 1, 3)
        user_history_FUFEI_vec = user_history_FUFEI_vec.permute(0, 2, 1, 3)
        # 记录特征数
        user_history_QOE_temp_dim = user_history_QOE_vec.shape[1]
        user_history_CHONGHE_temp_dim = user_history_CHONGHE_vec.shape[1]
        user_history_FUFEI_temp_dim = user_history_FUFEI_vec.shape[1]
        #（样本数*特征数,历史数，200）
        user_history_QOE_vec = user_history_QOE_vec.reshape(-1, max_history_len, feature_dim)
        user_history_CHONGHE_vec = user_history_CHONGHE_vec.reshape(-1, max_history_len, feature_dim)
        user_history_FUFEI_vec = user_history_FUFEI_vec.reshape(-1, max_history_len, feature_dim)
        #(样本数*特征数，200）
        mutli_QOE_weight, multi_user_history_QOE_vec, _ = self.multi_head_attention(user_history_QOE_vec,
                                                                                    mask=pay_QOE_mask)
        mutli_CHONGHE_weight, multi_user_history_CHONGHE_vec, _ = self.multi_head_attention(user_history_CHONGHE_vec,
                                                                                            mask=pay_CHONGHE_mask)
        mutli_FUFEI_weight, multi_user_history_FUFEI_vec, _ = self.multi_head_attention(user_history_FUFEI_vec,
                                                                                        mask=pay_FUFEI_mask)
        #(样本数,特征数，200）
        multi_user_history_QOE_vec = multi_user_history_QOE_vec.view(-1, user_history_QOE_temp_dim, feature_dim)
        multi_user_history_CHONGHE_vec = multi_user_history_CHONGHE_vec.view(-1, user_history_CHONGHE_temp_dim,
                                                                             feature_dim)
        multi_user_history_FUFEI_vec = multi_user_history_FUFEI_vec.view(-1, user_history_FUFEI_temp_dim, feature_dim)
        # print('multi_user_history_QOE_vec',multi_user_history_QOE_vec.shape) #,multi_user_history_QOE_vec[0,0,:])

        # 去掉第二维  (batch, 1, 20, 200)->(batch, 20, 200)
        # multi_user_history_QOE_vec = multi_user_history_QOE_vec.squeeze(dim=1)
        # multi_user_history_CHONGHE_vec = multi_user_history_CHONGHE_vec.squeeze(dim=1)
        # multi_user_history_FUFEI_vec= multi_user_history_FUFEI_vec.squeeze(dim=1)
        # 调整维度 (batch, 20, 200)->(batch,20,200,1)  (batch,feature_num.embedding_dim,1)
        multi_user_history_QOE_vec = multi_user_history_QOE_vec.unsqueeze(-1)
        multi_user_history_CHONGHE_vec = multi_user_history_CHONGHE_vec.unsqueeze(-1)
        multi_user_history_FUFEI_vec = multi_user_history_FUFEI_vec.unsqueeze(-1)
        # 转置 交换最后两个维度 (feature_num 和 embedding_dim)
        # multi_user_history_QOE_vec = torch.transpose(multi_user_history_QOE_vec, 1, 2)
        # multi_user_history_CHONGHE_vec = torch.transpose(multi_user_history_CHONGHE_vec, 1, 2)
        # multi_user_history_FUFEI_vec = torch.transpose(multi_user_history_FUFEI_vec, 1, 2)

        # SE注意力  (batch,feature_num,feature_dim,1) ->SE->特征权重->(batch,feature_num,feature_dim,1)->去除最后一列-> 加权->(batch, 1，feature_dim)
        se_QOE_weight, se_user_history_QOE_vec, _ = self.se_attention_QOE(multi_user_history_QOE_vec)
        se_CHONGHE_weight, se_user_history_CHONGHE_vec, _ = self.se_attention_CHONGHE(multi_user_history_CHONGHE_vec)
        se_FUFEI_weight, se_user_history_FUFEI_vec, _ = self.se_attention_FUFEI(multi_user_history_FUFEI_vec)

        HistoryDimScaling_Weight_Result = {
            'mutli_QOE_weight': mutli_QOE_weight,
            'mutli_CHONGHE_weight': mutli_CHONGHE_weight,
            'mutli_FUFEI_weight': mutli_FUFEI_weight,
            'se_QOE_weight': se_QOE_weight,
            'se_CHONGHE_weight': se_CHONGHE_weight,
            'se_FUFEI_weight': se_FUFEI_weight
        }
        return HistoryDimScaling_Weight_Result, se_user_history_QOE_vec, se_user_history_CHONGHE_vec, se_user_history_FUFEI_vec


# 目标产品embedding SE  (batch, 1, feature_num, feature_dim)->(batch, 1，feature_dim)
class TargetDimScalingLayer(nn.Module):
    def __init__(self, feature_dim, feature_category_num_dict):
        super(TargetDimScalingLayer, self).__init__()
        # SE注意力
        self.se_attention_QOE = SELayer(
            len(category_feature_num(feature_category_num_dict, feature_column_dict['history_QOE_continue'])) + len(
                category_feature_num(feature_category_num_dict, feature_column_dict['history_QOE_discrete'])))
        self.se_attention_CHONGHE = SELayer(
            len(category_feature_num(feature_category_num_dict, feature_column_dict['history_CHONGHE_continue'])) + len(
                category_feature_num(feature_category_num_dict, feature_column_dict['history_CHONGHE_discrete'])))
        self.se_attention_FUFEI = SELayer(
            len(category_feature_num(feature_category_num_dict, feature_column_dict['history_FUFEI_continue'])) + len(
                category_feature_num(feature_category_num_dict, feature_column_dict['history_FUFEI_discrete'])))

    def forward(self, target_QOE_vec, target_CHONGHE_vec, target_FUFEI_vec, mask=None):
        # (batch, 1, feature_num, feature_dim) (batch, feature_num, feature_dim)->转置->(batch, feature_dim, feature_num) ->SE->特征权重->(batch, feature_dim, feature_num) ->转置-> 加权->(batch, 1，feature_dim)
        # target_QOE_vec = target_QOE_vec.squeeze(1)  # 使用 squeeze 函数移除大小为 1 的维度
        # target_CHONGHE_vec = target_CHONGHE_vec.squeeze(1)  # 使用 squeeze 函数移除大小为 1 的维度
        # target_FUFEI_vec = target_FUFEI_vec.squeeze(1)  # 使用 squeeze 函数移除大小为 1 的维度
        # 转置 交换最后两个维度 (20 和 200)
        # target_QOE_vec = torch.transpose(target_QOE_vec, -2, -1)
        # target_CHONGHE_vec = torch.transpose(target_CHONGHE_vec, -2, -1)
        # target_FUFEI_vec = torch.transpose(target_FUFEI_vec, -2, -1)
        # 去掉第二维  (batch, 1, 20, 200)->(batch, 20, 200)
        target_QOE_vec = target_QOE_vec.squeeze(dim=1)
        target_CHONGHE_vec = target_CHONGHE_vec.squeeze(dim=1)
        target_FUFEI_vec = target_FUFEI_vec.squeeze(dim=1)
        # 调整维度 (batch, 20, 200)->(batch,20,200,1)  (batch,feature_num.embedding_dim,1)
        target_QOE_vec = target_QOE_vec.unsqueeze(-1)
        target_CHONGHE_vec = target_CHONGHE_vec.unsqueeze(-1)
        target_FUFEI_vec = target_FUFEI_vec.unsqueeze(-1)

        # SE注意力  (batch, feature_dim, feature_num) ->SE->特征权重->(batch, feature_dim, feature_num)->转置-> 加权->(batch, 1，feature_dim)
        # 结果为权重，合并后向量，合并前向量
        se_QOE_weight, se_target_QOE_vec, _ = self.se_attention_QOE(target_QOE_vec)
        se_CHONGHE_weight, se_target_CHONGHE_vec, _ = self.se_attention_CHONGHE(target_CHONGHE_vec)
        se_FUFEI_weight, se_target_FUFEI_vec, _ = self.se_attention_FUFEI(target_FUFEI_vec)

        TargetDimScaling_Weight_Result = {
            'se_QOE_weight': se_QOE_weight,
            'se_CHONGHE_weight': se_CHONGHE_weight,
            'se_FUFEI_weight': se_FUFEI_weight
        }
        return TargetDimScaling_Weight_Result, se_target_QOE_vec, se_target_CHONGHE_vec, se_target_FUFEI_vec


# 用户历史与目标记录的attention层
class History_Target_AttentionLayer(nn.Module):
    def __init__(self, num_heads, feature_dim):
        super(History_Target_AttentionLayer, self).__init__()
        self.target_history_pay_feature_pianhao_QOE_layer = MultiHeadHistory_TargetAttention(num_heads, feature_dim)
        self.target_history_pay_feature_pianhao_CHONGHE_layer = MultiHeadHistory_TargetAttention(num_heads, feature_dim)
        self.target_history_pay_feature_pianhao_FUFEI_layer = MultiHeadHistory_TargetAttention(num_heads, feature_dim)

    def forward(self, se_user_history_pay_QOE_vec, se_user_history_pay_CHONGHE_vec, se_user_history_pay_FUFEI_vec,
                se_target_QOE_vec, se_target_CHONGHE_vec, se_target_FUFEI_vec, pay_QOE_mask=None, pay_CHONGHE_mask=None,
                pay_FUFEI_mask=None):
        # 将QOE、CHONGHE、FUFEI分别做attention
        # 对目标特征求对历史特征的偏好   (batch, 1，feature_dim)输出
        target_history_pay_attention_QOE_weight, target_history_pay_attention_QOE_vec = self.target_history_pay_feature_pianhao_QOE_layer(
            se_target_QOE_vec, se_user_history_pay_QOE_vec, se_user_history_pay_QOE_vec)
        target_history_pay_attention_CHONGHE_weight, target_history_pay_attention_CHONGHE_vec = self.target_history_pay_feature_pianhao_CHONGHE_layer(
            se_target_CHONGHE_vec, se_user_history_pay_CHONGHE_vec, se_user_history_pay_CHONGHE_vec)
        target_history_pay_attention_FUFEI_weight, target_history_pay_attention_FUFEI_vec = self.target_history_pay_feature_pianhao_FUFEI_layer(
            se_target_FUFEI_vec, se_user_history_pay_FUFEI_vec, se_user_history_pay_FUFEI_vec)
        # CONCAT  (batch, 3，feature_dim)输出
        target_history_pay_attention_vec = torch.cat((target_history_pay_attention_QOE_vec,
                                                      target_history_pay_attention_CHONGHE_vec,
                                                      target_history_pay_attention_FUFEI_vec), dim=1)
        return target_history_pay_attention_vec, target_history_pay_attention_QOE_weight, target_history_pay_attention_CHONGHE_weight, target_history_pay_attention_FUFEI_weight

# class History_Target_AttentionLayer(nn.Module):
#     def __init__(self, num_heads, feature_dim):
#         super(History_Target_AttentionLayer, self).__init__()
#         self.target_history_pay_feature_pianhao_layer = MultiHeadHistory_TargetAttention(num_heads, feature_dim)
#         self.target_history_not_pay_feature_pianhao_layer = MultiHeadHistory_TargetAttention(num_heads, feature_dim)

#     def forward(self, se_user_history_pay_QOE_vec, se_user_history_pay_CHONGHE_vec, se_user_history_pay_FUFEI_vec, se_target_QOE_vec, se_target_CHONGHE_vec, se_target_FUFEI_vec, pay_QOE_mask=None, pay_CHONGHE_mask=None, pay_FUFEI_mask=None):
#         # 将QOE、CHONGHE、FUFEI叠加，形成三个特征的向量  (batch, 1，feature_dim)->(batch, 3，feature_dim)
#         user_history_pay_vec = torch.cat((se_user_history_pay_QOE_vec, se_user_history_pay_CHONGHE_vec, se_user_history_pay_FUFEI_vec), dim=1)
#         target_vec = torch.cat((se_target_QOE_vec, se_target_CHONGHE_vec, se_target_FUFEI_vec), dim=1)

#         # 对目标特征求对历史特征的偏好   (batch, 3，feature_dim)输出
#         target_history_pay_attention_weight, target_history_pay_attention_vec = self.target_history_pay_feature_pianhao_layer(target_vec, user_history_pay_vec)

#         return target_history_pay_attention_weight, target_history_pay_attention_vec


In [16]:
# 6.整合模型


# (batch,600)经过网络变成200 +(batch,featuer_user*200)经过网络变成200 -> (batch,200)
# (batch,200) ->MLP ->(batch，1) ->sigmoid -> (batch,1)

# 整合层
class MatchingModel(nn.Module):
    def __init__(self, feature_category_num_dict, feature_column_dict, continue_embedding_dim,
                 discrete_embedding_dim, num_heads, feature_dim, max_history_len):
        super(MatchingModel, self).__init__()
        # Embedding层
        # self.user_info_embedding_layer = UserInfoEmbedding(continue_embedding_dim, discrete_embedding_dim, feature_category_num_dict, feature_column_dict)
        self.user_history_pay_embedding_layer = UserPayHistoryEmbedding(continue_embedding_dim, discrete_embedding_dim,
                                                                        feature_category_num_dict, feature_column_dict)
        #print('embedding user_history结果')
        self.target_embedding_layer = TargetEmbedding(continue_embedding_dim, discrete_embedding_dim,
                                                      feature_category_num_dict, feature_column_dict)

        # User History & Target Attention层
        self.history_pay_attention_layer = HistoryDimScalingLayer(num_heads, feature_dim, feature_category_num_dict,
                                                                  max_history_len)
        self.target_attention_layer = TargetDimScalingLayer(feature_dim, feature_category_num_dict)

        # Target History Attention层
        self.target_history_attention_layer = History_Target_AttentionLayer(num_heads, feature_dim)

        # 维度转换层
        final_dim = 20
        self.target_dim_change = dense_layer_noReLu(3 * feature_dim,
                                                    final_dim)  # (batch,3,200)->(batch,600)->(batch,200)
        # user_info_feature_num = feature_category_num_dict['user_info_continue'].shape[2] + feature_category_num_dict['user_info_discrete'].shape[2]
        # self.user_info_dim_change = dense_layer(user_info_feature_num, 200)  # (batch,user_info_feature,200)->(batch,user_info_feature*200)->(batch,200)
        # MLP
        self.pay_vec_MLP_layer = dense_layer_noReLu(final_dim, 1)

    def forward(self, batch_feature_tensor_pay_QOE_discrete, batch_feature_tensor_pay_CHONGHE_discrete,
                batch_feature_tensor_pay_FUFEI_discrete,
                batch_feature_tensor_pay_QOE_continue, batch_feature_tensor_pay_CHONGHE_continue,
                batch_feature_tensor_pay_FUFEI_continue,
                batch_feature_tensor_target_QOE_discrete, batch_feature_tensor_target_CHONGHE_discrete,
                batch_feature_tensor_target_FUFEI_discrete,
                batch_feature_tensor_target_QOE_continue, batch_feature_tensor_target_CHONGHE_continue,
                batch_feature_tensor_target_FUFEI_continue,
                batch_feature_tensor_pay_QOE_discrete_mask, batch_feature_tensor_pay_CHONGHE_discrete_mask,
                batch_feature_tensor_pay_FUFEI_discrete_mask,
                batch_feature_tensor_pay_QOE_continue_mask, batch_feature_tensor_pay_CHONGHE_continue_mask,
                batch_feature_tensor_pay_FUFEI_continue_mask,
                label_tensor):
        # Embedding层
        user_history_pay_QOE_vec, user_history_pay_CHONGHE_vec, user_history_pay_FUFEI_vec = self.user_history_pay_embedding_layer(
            batch_feature_tensor_pay_QOE_discrete, batch_feature_tensor_pay_CHONGHE_discrete,
            batch_feature_tensor_pay_FUFEI_discrete, batch_feature_tensor_pay_QOE_continue,
            batch_feature_tensor_pay_CHONGHE_continue, batch_feature_tensor_pay_FUFEI_continue)
        target_QOE_vec, target_CHONGHE_vec, target_FUFEI_vec = self.target_embedding_layer(
            batch_feature_tensor_target_QOE_discrete, batch_feature_tensor_target_CHONGHE_discrete,
            batch_feature_tensor_target_FUFEI_discrete, batch_feature_tensor_target_QOE_continue,
            batch_feature_tensor_target_CHONGHE_continue, batch_feature_tensor_target_FUFEI_continue)
        # print('user_history_pay_FUFEI_vec size=',user_history_pay_FUFEI_vec.size())
        # print('target_QOE_vec size=',target_QOE_vec.size())
        # User History & Target Attention层
        # 合并mask输入  
        # print("Shape of mask tensor:", batch_feature_tensor_pay_QOE_discrete_mask.shape,batch_feature_tensor_pay_QOE_continue_mask.shape)
        pay_QOE_mask = torch.cat(
            (batch_feature_tensor_pay_QOE_discrete_mask, batch_feature_tensor_pay_QOE_continue_mask), dim=2)
        pay_CHONGHE_mask = torch.cat(
            (batch_feature_tensor_pay_CHONGHE_discrete_mask, batch_feature_tensor_pay_CHONGHE_continue_mask), dim=2)
        pay_FUFEI_mask = torch.cat(
            (batch_feature_tensor_pay_FUFEI_discrete_mask, batch_feature_tensor_pay_FUFEI_continue_mask), dim=2)
        HistoryDimScaling_Weight_Result, se_user_history_pay_QOE_vec, se_user_history_pay_CHONGHE_vec, se_user_history_pay_FUFEI_vec = self.history_pay_attention_layer(
            user_history_pay_QOE_vec, user_history_pay_CHONGHE_vec, user_history_pay_FUFEI_vec,
            pay_QOE_mask=pay_QOE_mask, pay_CHONGHE_mask=pay_CHONGHE_mask, pay_FUFEI_mask=pay_FUFEI_mask)
        TargetDimScaling_Weight_Result, se_target_QOE_vec, se_target_CHONGHE_vec, se_target_FUFEI_vec = self.target_attention_layer(
            target_QOE_vec, target_CHONGHE_vec, target_FUFEI_vec)
        # print('se_user_history_pay_QOE_vec size=', se_user_history_pay_QOE_vec.shape)
        # print('se_target_QOE_vec size=', se_target_QOE_vec.shape)
        # Target with History Attention层
        target_history_pay_attention_vec, target_history_pay_attention_QOE_weight, \
            target_history_pay_attention_CHONGHE_weight, target_history_pay_attention_FUFEI_weight = self.target_history_attention_layer(
            se_user_history_pay_QOE_vec, se_user_history_pay_CHONGHE_vec, se_user_history_pay_FUFEI_vec,
            se_target_QOE_vec, se_target_CHONGHE_vec, se_target_FUFEI_vec)
        # print('target_history_pay_attention_vec size=', target_history_pay_attention_vec.shape)

        # # 拼接user_info_vec与target_history_pay_attention_vec等
        # user_info_vec = user_info_vec.squeeze(1)  # 使用 squeeze 函数移除大小为 1 的维度
        # FUFEI:(batch,3,200)->(batch,3*200)经过网络->(batch,200) + uer_info:(batch,featuer_user*200)经过网络->(batch,200) 叠加后-> (batch,400)
        # 维度转换 (batch,3,200)->(batch,feature*200)经过网络->(batch,200)
        target_history_pay_attention_vec = target_history_pay_attention_vec.view(batch_size,
                                                                                 -1)  # 将张量 x 重塑为 (batch, 3*200)  使用 -1 作为自动计算的维度       
        target_history_pay_attention_vec = self.target_dim_change(target_history_pay_attention_vec)
        # print('target_history_pay_attention_vec',target_history_pay_attention_vec)

        # MLP
        # (batch,200) ->MLP ->(batch，1) ->sigmoid -> (batch,1)
        out_vec = self.pay_vec_MLP_layer(target_history_pay_attention_vec)
        # print('out_vec size=',out_vec.shape,'out_vec:',out_vec)
        # 使用softmax函数将logits转换为概率分布
        # softmax_score = F.softmax(out_vec, dim=1)  # 在类别维度（dim=1）上应用softmax
        sigmoid_score = torch.sigmoid(out_vec)  # 在类别维度（dim=1）上应用softmax
        # sigmoid_score = out_vec  # 在类别维度（dim=1）上应用softmax
        softmax_score = torch.softmax(out_vec, dim=1)
        # print('softmax_score size=',softmax_score.shape,'score:',softmax_score)
        # print('sigmoid_score size=',sigmoid_score.shape,'score:',sigmoid_score)
        print(sigmoid_score)
        return softmax_score, sigmoid_score, HistoryDimScaling_Weight_Result, TargetDimScaling_Weight_Result, target_history_pay_attention_QOE_weight, target_history_pay_attention_CHONGHE_weight, target_history_pay_attention_FUFEI_weight


# 损失函数
class LossFunction(nn.Module):
    def __init__(self):
        super(LossFunction, self).__init__()

    def forward(self, pred, target_label):
        # pred是未经处理过的原值，target_label是0、1标签
        # 计算第一个任务的二元交叉熵损失
        loss = F.binary_cross_entropy_with_logits(pred, target_label, reduction='none')
        return loss


# 自动评估阈值，计算ACC 、 Precision 等评估指标
def evaluate(y_true, y_pred, digits=4, cutoff='auto'):
    '''
    Args:
        y_true: list, labels, y_pred: list, predictions, digits: The number of decimals to use when rounding the number. Default is 4（保留小数后几位）
        cutoff: float or 'auto'
    Returns:
        evaluation: dict
    '''
    # 根据预测概率值y_pred计算最佳的切分阈值
    if cutoff == 'auto':
        fpr, tpr, thresholds = roc_curve(y_true, y_pred)
        youden = tpr - fpr
        cutoff = thresholds[np.argmax(youden)]
    y_pred_t = [1 if i > cutoff else 0 for i in y_pred]

    evaluation = OrderedDict()
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred_t).ravel()
    evaluation['auc'] = round(roc_auc_score(y_true, y_pred), digits)
    evaluation['acc'] = round(accuracy_score(y_true, y_pred_t), digits)
    evaluation['recall'] = round(recall_score(y_true, y_pred_t), digits)
    evaluation['precision'] = round(precision_score(y_true, y_pred_t), digits)
    evaluation['specificity'] = round(tn / (tn + fp), digits)
    evaluation['F1'] = round(f1_score(y_true, y_pred_t), digits)
    evaluation['cutoff'] = cutoff

    return evaluation


# 输出 Target History 的对特征的注意力得分
def OutputMutliAttentionScore(attention_weights):
    # 将注意力矩阵 reshape 成 (batch_size * head_num, feature_num, head_dim) 的形状
    batch_size, feature_num, feature_num, head_dim = attention_weights.shape
    attention_weights = attention_weights.view(-1, feature_num, head_dim)
    # 定义全连接层和激活函数
    fc_layer = nn.Linear(head_dim, 1).to(attention_weights.device)
    activation = nn.Sigmoid()
    # 将注意力矩阵输入全连接层
    output = fc_layer(attention_weights)
    # 应用激活函数
    output = activation(output)
    # 将输出 reshape 成最终形状 (batch_size, feature_num, 1, 1)
    final_output = output.view(-1, feature_num, 1, 1)
    return final_output


# 输出权重结果到文件夹 首先要压缩维度到特征上，然后根据特征名列表输出
# tensor_dict_idx = ['pay_QOE_continue','pay_QOE_discrete','pay_CHONGHE_continue','pay_CHONGHE_discrete','pay_FUFEI_continue','pay_FUFEI_discrete','target_QOE_continue','target_QOE_discrete','target_CHONGHE_continue','target_CHONGHE_discrete','target_FUFEI_continue','target_FUFEI_discrete']
def WeightResult(HistoryDimScaling_Weight_Result, TargetDimScaling_Weight_Result,
                 target_history_pay_attention_QOE_weight,
                 target_history_pay_attention_CHONGHE_weight, target_history_pay_attention_FUFEI_weight):
    # SE attention  (batch,feature_num,1,1)
    se_user_pay_QOE_weight = HistoryDimScaling_Weight_Result['se_QOE_weight']
    se_user_pay_CHONGHE_weight = HistoryDimScaling_Weight_Result['se_CHONGHE_weight']
    se_user_pay_FUFEI_weight = HistoryDimScaling_Weight_Result['se_FUFEI_weight']
    se_target_QOE_weight = TargetDimScaling_Weight_Result['se_QOE_weight']
    se_target_CHONGHE_weight = TargetDimScaling_Weight_Result['se_CHONGHE_weight']
    se_target_FUFEI_weight = TargetDimScaling_Weight_Result['se_FUFEI_weight']
    # Target History Attention  得到(batch,feature_num,1,1)
    # print('target_history_pay_attention_QOE_weight',target_history_pay_attention_QOE_weight.shape)
    target_history_pay_attention_QOE_weight = OutputMutliAttentionScore(target_history_pay_attention_QOE_weight)
    target_history_pay_attention_CHONGHE_weight = OutputMutliAttentionScore(target_history_pay_attention_CHONGHE_weight)
    target_history_pay_attention_FUFEI_weight = OutputMutliAttentionScore(target_history_pay_attention_FUFEI_weight)
    # 在batch维度上取平均，保持维度 得到(1,feature_num,1,1) 再用.squeeze()去掉为1的维度
    se_user_pay_QOE_weight = se_user_pay_QOE_weight.mean(dim=0, keepdim=True).squeeze()
    se_user_pay_CHONGHE_weight = se_user_pay_CHONGHE_weight.mean(dim=0, keepdim=True).squeeze()
    se_user_pay_FUFEI_weight = se_user_pay_FUFEI_weight.mean(dim=0, keepdim=True).squeeze()
    se_target_QOE_weight = se_target_QOE_weight.mean(dim=0, keepdim=True).squeeze()
    se_target_CHONGHE_weight = se_target_CHONGHE_weight.mean(dim=0, keepdim=True).squeeze()
    se_target_FUFEI_weight = se_target_FUFEI_weight.mean(dim=0, keepdim=True).squeeze()
    target_history_pay_attention_QOE_weight = target_history_pay_attention_QOE_weight.mean(dim=0,
                                                                                           keepdim=True).squeeze()
    target_history_pay_attention_CHONGHE_weight = target_history_pay_attention_CHONGHE_weight.mean(dim=0,
                                                                                                   keepdim=True).squeeze()
    target_history_pay_attention_FUFEI_weight = target_history_pay_attention_FUFEI_weight.mean(dim=0,
                                                                                               keepdim=True).squeeze()

    result = {'se_user_pay_QOE_weight': se_user_pay_QOE_weight.tolist(),
              'se_user_pay_CHONGHE_weight': se_user_pay_CHONGHE_weight.tolist(),
              'se_user_pay_FUFEI_weight': se_user_pay_FUFEI_weight.tolist(),
              'se_target_QOE_weight': se_target_QOE_weight.tolist(),
              'se_target_CHONGHE_weight': se_target_CHONGHE_weight.tolist(),
              'se_target_FUFEI_weight': se_target_FUFEI_weight.tolist(),
              'target_history_pay_attention_QOE_weight': target_history_pay_attention_QOE_weight.tolist(),
              'target_history_pay_attention_CHONGHE_weight': target_history_pay_attention_CHONGHE_weight.tolist(),
              'target_history_pay_attention_FUFEI_weight': target_history_pay_attention_FUFEI_weight.tolist()
              }

    return result

In [8]:
# 创建大模型的实例 'drama_upuser_subscriptions_num,drama_sound_max_traffic_position_in_sound_avg,label1'
# model = MatchingModel(feature_category_num_dict, feature_column_dict, continue_embedding_dim,
#                  discrete_embedding_dim, num_heads, feature_dim, max_history_len)
# print('模型创建完成')

In [9]:
# 7.模型训练 Trainging

def model_training(model, train_loader, val_loader, lossfunction, optimizer, EPOCH, device):
    # 定义早停策略的参数
    best_val_loss = float('inf')  # 初始化最佳验证损失为正无穷
    patience = 1  # 容忍多少个epoch没有验证性能提升
    early_stopping_counter = 0  # 初始化计数器

    for epoch in range(EPOCH):
        model.train()  # 设置模型为训练模式
        total_classfier_loss = 0.0
        total_loss = 0.0
        train_time = 0
        val_time = 0
        for batch in train_loader:
            batch = [data.to(device) for data in batch]
            batch_feature_tensor_pay_QOE_discrete, batch_feature_tensor_pay_CHONGHE_discrete, batch_feature_tensor_pay_FUFEI_discrete, \
                batch_feature_tensor_pay_QOE_continue, batch_feature_tensor_pay_CHONGHE_continue, batch_feature_tensor_pay_FUFEI_continue, \
                batch_feature_tensor_target_QOE_discrete, batch_feature_tensor_target_CHONGHE_discrete, batch_feature_tensor_target_FUFEI_discrete, \
                batch_feature_tensor_target_QOE_continue, batch_feature_tensor_target_CHONGHE_continue, batch_feature_tensor_target_FUFEI_continue, \
                batch_feature_tensor_pay_QOE_discrete_mask, batch_feature_tensor_pay_CHONGHE_discrete_mask, batch_feature_tensor_pay_FUFEI_discrete_mask, \
                batch_feature_tensor_pay_QOE_continue_mask, batch_feature_tensor_pay_CHONGHE_continue_mask, batch_feature_tensor_pay_FUFEI_continue_mask, \
                train_label_tensor = batch
            for param in model.parameters():
                param.requires_grad = True
            optimizer.zero_grad()
            softmax_score, sigmoid_score, HistoryDimScaling_Weight_Result, TargetDimScaling_Weight_Result, \
                target_history_pay_attention_QOE_weight, target_history_pay_attention_CHONGHE_weight, \
                target_history_pay_attention_FUFEI_weight = model(batch_feature_tensor_pay_QOE_discrete,
                                                                  batch_feature_tensor_pay_CHONGHE_discrete,
                                                                  batch_feature_tensor_pay_FUFEI_discrete,
                                                                  batch_feature_tensor_pay_QOE_continue,
                                                                  batch_feature_tensor_pay_CHONGHE_continue,
                                                                  batch_feature_tensor_pay_FUFEI_continue,
                                                                  batch_feature_tensor_target_QOE_discrete,
                                                                  batch_feature_tensor_target_CHONGHE_discrete,
                                                                  batch_feature_tensor_target_FUFEI_discrete,
                                                                  batch_feature_tensor_target_QOE_continue,
                                                                  batch_feature_tensor_target_CHONGHE_continue,
                                                                  batch_feature_tensor_target_FUFEI_continue,
                                                                  batch_feature_tensor_pay_QOE_discrete_mask,
                                                                  batch_feature_tensor_pay_CHONGHE_discrete_mask,
                                                                  batch_feature_tensor_pay_FUFEI_discrete_mask,
                                                                  batch_feature_tensor_pay_QOE_continue_mask,
                                                                  batch_feature_tensor_pay_CHONGHE_continue_mask,
                                                                  batch_feature_tensor_pay_FUFEI_continue_mask,
                                                                  train_label_tensor)

            # weight_result_dict = WeightResult(HistoryDimScaling_Weight_Result, TargetDimScaling_Weight_Result, target_history_pay_attention_QOE_weight,
            #            target_history_pay_attention_CHONGHE_weight,target_history_pay_attention_FUFEI_weight)
            # weight_result_dict = {key: torch.tensor(value).cpu() for key, value in weight_result_dict.items()}
            # print('weight_result_dict_se_user_pay_QOE_weight',weight_result_dict['se_user_pay_QOE_weight'])
            # print('HistoryDimScaling_Weight_Result, TargetDimScaling_Weight_Result, target_history_pay_attention_weight',
            #      HistoryDimScaling_Weight_Result['mutli_QOE_weight'].shape, TargetDimScaling_Weight_Result['se_QOE_weight'].shape, target_history_pay_attention_weight.shape)
            # sigmoid
            # print('sigmoid_score',sigmoid_score)
            sigmoid_score = sigmoid_score[:, 0]  # (样本数，1)
            train_label_tensor = train_label_tensor[:, 0].to(device)  # (样本数，1)
            # train_label_tensor[train_label_tensor == 1] = 0
            # train_label_tensor[train_label_tensor == 2] = 1
            # train_label_tensor = torch.where(train_label_tensor == 1, torch.tensor(0).to(device), torch.tensor(1).to(device))  # 使用 torch.where 将 1 映射为 0，将 2 映射为 1
            loss = lossfunction(sigmoid_score, train_label_tensor.float())
            # softmax
            # softmax_score = softmax_score[:, 0]  # (样本数，1)
            # train_label_tensor = train_label_tensor[:, 0].to(device)  # (样本数，1)
            # train_label_tensor = torch.where(train_label_tensor == 1, torch.tensor(0).to(device), torch.tensor(1).to(device))  # 使用 torch.where 将 1 映射为 0，将 2 映射为 1
            # loss = lossfunction(softmax_score, train_label_tensor.float())
            loss.to(device)

            # loss回传检查
            # for name, parms in model.named_parameters():	
            #     if parms.grad is not None:  # 检查梯度是否为None
            #         grad_mean = torch.mean(parms.grad)  # 计算梯度的均值
            #         print('-->name:', name, '-->grad_requirs:', parms.requires_grad, '-->grad_mean: {:.4f}'.format(grad_mean))
            #     else:
            #         print('-->name:', name, '-->grad_requirs:', parms.requires_grad, '-->grad_mean: None')
            loss.backward()
            optimizer.step()
            # print("=============更新之后===========")
            # for name, parms in model.named_parameters():	
            #     if parms.grad is not None:  # 检查梯度是否为None
            #         grad_mean = torch.mean(parms.grad)  # 计算梯度的均值
            #         print('-->name:', name, '-->grad_requirs:', parms.requires_grad, '-->grad_mean: {:.4f}'.format(grad_mean))
            #     else:
            #         print('-->name:', name, '-->grad_requirs:', parms.requires_grad, '-->grad_mean: None')
            # print(optimizer)
            # input("=====迭代结束=====")

            # 损失
            total_loss += loss.item()
            train_time += 1
            print('||--训练：----------', train_time, '个batch运行时间：', datetime.datetime.now(), '-------------')
        # 平均损失
        average_loss = total_loss / len(train_loader)

        if (epoch + 1) % 5 == 0:
            print(
                f"Epoch {epoch + 1},loss:{average_loss}")

            # 验证集评估
            model.eval()  # 将模型切换为评估模式
            with torch.no_grad():  # 在评估模式下不计算梯度
                total_loss_val = 0.0
                total_auc_val = 0.0
                total_acc_val = 0
                total_f1_val = 0
                total_precision_val = 0
                total_recall_val = 0
                val_time = 0
                for batch_val in val_loader:  # 假设你有一个名为 val_loader 的验证集数据加载器
                    batch_val = [data.to(device) for data in batch_val]
                    val_batch_feature_tensor_pay_QOE_discrete, val_batch_feature_tensor_pay_CHONGHE_discrete, val_batch_feature_tensor_pay_FUFEI_discrete, \
                        val_batch_feature_tensor_pay_QOE_continue, val_batch_feature_tensor_pay_CHONGHE_continue, val_batch_feature_tensor_pay_FUFEI_continue, \
                        val_batch_feature_tensor_target_QOE_discrete, val_batch_feature_tensor_target_CHONGHE_discrete, val_batch_feature_tensor_target_FUFEI_discrete, \
                        val_batch_feature_tensor_target_QOE_continue, val_batch_feature_tensor_target_CHONGHE_continue, val_batch_feature_tensor_target_FUFEI_continue, \
                        val_batch_feature_tensor_pay_QOE_discrete_mask, val_batch_feature_tensor_pay_CHONGHE_discrete_mask, val_batch_feature_tensor_pay_FUFEI_discrete_mask, \
                        val_batch_feature_tensor_pay_QOE_continue_mask, val_batch_feature_tensor_pay_CHONGHE_continue_mask, val_batch_feature_tensor_pay_FUFEI_continue_mask, \
                        val_label_tensor = batch_val
                    softmax_score_val, sigmoid_score_val, HistoryDimScaling_Weight_Result_val, TargetDimScaling_Weight_Result_val, \
                        target_history_pay_attention_QOE_weight_val, target_history_pay_attention_CHONGHE_weight_val, \
                        target_history_pay_attention_FUFEI_weight_val = model(val_batch_feature_tensor_pay_QOE_discrete,
                                                                              val_batch_feature_tensor_pay_CHONGHE_discrete,
                                                                              val_batch_feature_tensor_pay_FUFEI_discrete,
                                                                              val_batch_feature_tensor_pay_QOE_continue,
                                                                              val_batch_feature_tensor_pay_CHONGHE_continue,
                                                                              val_batch_feature_tensor_pay_FUFEI_continue,
                                                                              val_batch_feature_tensor_target_QOE_discrete,
                                                                              val_batch_feature_tensor_target_CHONGHE_discrete,
                                                                              val_batch_feature_tensor_target_FUFEI_discrete,
                                                                              val_batch_feature_tensor_target_QOE_continue,
                                                                              val_batch_feature_tensor_target_CHONGHE_continue,
                                                                              val_batch_feature_tensor_target_FUFEI_continue,
                                                                              val_batch_feature_tensor_pay_QOE_discrete_mask,
                                                                              val_batch_feature_tensor_pay_CHONGHE_discrete_mask,
                                                                              val_batch_feature_tensor_pay_FUFEI_discrete_mask,
                                                                              val_batch_feature_tensor_pay_QOE_continue_mask,
                                                                              val_batch_feature_tensor_pay_CHONGHE_continue_mask,
                                                                              val_batch_feature_tensor_pay_FUFEI_continue_mask,
                                                                              val_label_tensor)

                    # sigmoid                   
                    sigmoid_score_val = sigmoid_score_val[:, 0]  # (样本数，1)
                    sigmoid_score_val = sigmoid_score_val.cpu()  # .detach()  # 转为CPU
                    val_label_tensor = val_label_tensor[:, 0]  # (样本数，1)
                    val_label_tensor = val_label_tensor.cpu()
                    # val_label_tensor[val_label_tensor == 1] = 0
                    # val_label_tensor[val_label_tensor == 2] = 1
                    # val_label_tensor = torch.where(val_label_tensor == 1, torch.tensor(0), torch.tensor(1))  # 使用 torch.where 将 1 映射为 0，将 2 映射为 1
                    loss_val = lossfunction(sigmoid_score_val, val_label_tensor.float())
                    # softmax
                    # softmax_score_val = softmax_score_val[:, 0]  # (样本数，1)
                    # softmax_score_val = softmax_score_val.cpu()# .detach()  # 转为CPU
                    # val_label_tensor = val_label_tensor[:, 0]  # (样本数，1)
                    # val_label_tensor = val_label_tensor.cpu()
                    # val_label_tensor = torch.where(val_label_tensor == 1, torch.tensor(0), torch.tensor(1))  # 使用 torch.where 将 1 映射为 0，将 2 映射为 1
                    # loss_val = lossfunction(softmax_score_val, val_label_tensor.float())

                    # 损失
                    total_loss_val += loss_val.item()
                    # 计算验证集上的精度
                    # predicted_classes_val = (sigmoid_score_val > 0.5).long()
                    # total_acc_val += (predicted_classes_val == val_label_tensor).sum().item() / len(val_label_tensor)
                    # total_f1_val += f1_score(val_label_tensor, predicted_classes_val)
                    # total_recall_val += recall_score(val_label_tensor, predicted_classes_val)
                    # precision_val = ((predicted_classes_val == 1) & (val_label_tensor == 1)).sum().item() / (predicted_classes_val == 1).sum().item()
                    # total_precision_val += precision_val
                    # total_auc_val += roc_auc_score(val_label_tensor, softmax_score_val)
                    evaluation = evaluate(val_label_tensor, sigmoid_score_val)
                    total_acc_val += evaluation['acc']
                    total_f1_val += evaluation['F1']
                    total_recall_val += evaluation['recall']
                    total_precision_val += evaluation['precision']
                    total_auc_val += evaluation['auc']

                    val_time += 1
                    print('||--验证：----------', val_time, '个batch运行时间：', datetime.datetime.now(), '-------------')
                # 平均损失
                average_loss_val = total_loss_val / len(val_loader)
                average_auc_val = total_auc_val / len(val_loader)
                average_acc_val = total_acc_val / len(val_loader)
                average_f1_val = total_f1_val / len(val_loader)
                average_precision_val = total_precision_val / len(val_loader)
                average_recall_val = total_recall_val / len(val_loader)
                print(
                    f"Validation Loss: {average_loss_val},AUC: {average_auc_val},ACC:{average_acc_val},F1:{average_f1_val},Precision:{average_precision_val},Recall:{average_recall_val}")

                if average_loss_val < best_val_loss:
                    best_val_loss = average_loss_val
                    early_stopping_counter = 0
                else:
                    early_stopping_counter += 1
                if early_stopping_counter >= patience:
                    print(f"早停策略触发，停止训练在第 {epoch} 个epoch.")
                    break

In [10]:
# 模型测试 Test

def test_model(model, test_loader):
    model.eval()  # 设置模型为评估模式
    with torch.no_grad():  # 在评估模式下不计算梯度
        total_loss_test = 0.0
        total_auc_test = 0.0
        total_acc_test = 0
        total_f1_test = 0
        total_precision_test = 0
        total_recall_test = 0
        test_time = 0
        results = []  # 用于保存结果的列表
        for batch_test in test_loader:  # 假设你有一个名为 val_loader 的验证集数据加载器
            batch_test = [data.to(device) for data in batch_test]
            test_batch_feature_tensor_pay_QOE_discrete, test_batch_feature_tensor_pay_CHONGHE_discrete, test_batch_feature_tensor_pay_FUFEI_discrete, \
                test_batch_feature_tensor_pay_QOE_continue, test_batch_feature_tensor_pay_CHONGHE_continue, test_batch_feature_tensor_pay_FUFEI_continue, \
                test_batch_feature_tensor_target_QOE_discrete, test_batch_feature_tensor_target_CHONGHE_discrete, test_batch_feature_tensor_target_FUFEI_discrete, \
                test_batch_feature_tensor_target_QOE_continue, test_batch_feature_tensor_target_CHONGHE_continue, test_batch_feature_tensor_target_FUFEI_continue, \
                test_batch_feature_tensor_pay_QOE_discrete_mask, test_batch_feature_tensor_pay_CHONGHE_discrete_mask, test_batch_feature_tensor_pay_FUFEI_discrete_mask, \
                test_batch_feature_tensor_pay_QOE_continue_mask, test_batch_feature_tensor_pay_CHONGHE_continue_mask, test_batch_feature_tensor_pay_FUFEI_continue_mask, \
                test_label_tensor = batch_test
            softmax_score_test, sigmoid_score_test, HistoryDimScaling_Weight_Result_test, TargetDimScaling_Weight_Result_test, \
                target_history_pay_attention_QOE_weight_test, target_history_pay_attention_CHONGHE_weight_test, \
                target_history_pay_attention_FUFEI_weight_test = model(test_batch_feature_tensor_pay_QOE_discrete,
                                                                       test_batch_feature_tensor_pay_CHONGHE_discrete,
                                                                       test_batch_feature_tensor_pay_FUFEI_discrete,
                                                                       test_batch_feature_tensor_pay_QOE_continue,
                                                                       test_batch_feature_tensor_pay_CHONGHE_continue,
                                                                       test_batch_feature_tensor_pay_FUFEI_continue,
                                                                       test_batch_feature_tensor_target_QOE_discrete,
                                                                       test_batch_feature_tensor_target_CHONGHE_discrete,
                                                                       test_batch_feature_tensor_target_FUFEI_discrete,
                                                                       test_batch_feature_tensor_target_QOE_continue,
                                                                       test_batch_feature_tensor_target_CHONGHE_continue,
                                                                       test_batch_feature_tensor_target_FUFEI_continue,
                                                                       test_batch_feature_tensor_pay_QOE_discrete_mask,
                                                                       test_batch_feature_tensor_pay_CHONGHE_discrete_mask,
                                                                       test_batch_feature_tensor_pay_FUFEI_discrete_mask,
                                                                       test_batch_feature_tensor_pay_QOE_continue_mask,
                                                                       test_batch_feature_tensor_pay_CHONGHE_continue_mask,
                                                                       test_batch_feature_tensor_pay_FUFEI_continue_mask,
                                                                       test_label_tensor)
            weight_result_dict = WeightResult(HistoryDimScaling_Weight_Result_test, TargetDimScaling_Weight_Result_test,
                                              target_history_pay_attention_QOE_weight_test,
                                              target_history_pay_attention_CHONGHE_weight_test,
                                              target_history_pay_attention_FUFEI_weight_test)
            weight_result_dict = {key: torch.tensor(value).cpu() for key, value in weight_result_dict.items()}
            # sigmoid
            sigmoid_score_test = sigmoid_score_test[:, 0]  # (样本数，1)
            sigmoid_score_test = sigmoid_score_test.cpu()  #.detach()  # 转为CPU
            test_label_tensor = test_label_tensor[:, 0]  # (样本数，1)
            test_label_tensor = test_label_tensor.cpu()
            # test_label_tensor[test_label_tensor == 1] = 0
            # test_label_tensor[test_label_tensor == 2] = 1
            # test_label_tensor = torch.where(test_label_tensor == 1, torch.tensor(0), torch.tensor(1))  # 使用 torch.where 将 1 映射为 0，将 2 映射为 1
            loss_test = lossfunction(sigmoid_score_test, test_label_tensor.float())
            # softmax
            # softmax_score_test = softmax_score_test[:, 0]  # (样本数，1)
            # softmax_score_test = softmax_score_test.cpu()#.detach()  # 转为CPU
            # test_label_tensor = test_label_tensor[:, 0]  # (样本数，1)
            # test_label_tensor = test_label_tensor.cpu()
            # test_label_tensor = torch.where(test_label_tensor == 1, torch.tensor(0), torch.tensor(1))  # 使用 torch.where 将 1 映射为 0，将 2 映射为 1
            # loss_test = lossfunction(softmax_score_test, test_label_tensor.float())

            # 损失
            total_loss_test += loss_test.item()
            # 计算验证集上的精度
            # predicted_classes_test = (sigmoid_score_test > 0.5).long()
            # total_acc_test += (predicted_classes_test == test_label_tensor).sum().item() / len(test_label_tensor)
            # total_f1_test += f1_score(test_label_tensor, predicted_classes_test)
            # total_recall_test += recall_score(test_label_tensor, predicted_classes_test)
            # precision_test = ((predicted_classes_test == 1) & (test_label_tensor == 1)).sum().item() / (predicted_classes_test == 1).sum().item()
            # total_precision_test += precision_test
            # total_auc_test += roc_auc_score(test_label_tensor, sigmoid_score_test)
            evaluation = evaluate(test_label_tensor, sigmoid_score_test)
            total_acc_test += evaluation['acc']
            total_f1_test += evaluation['F1']
            total_recall_test += evaluation['recall']
            total_precision_test += evaluation['precision']
            total_auc_test += evaluation['auc']

            test_time += 1
            print('||--测试：----------', test_time, '个batch运行时间：', datetime.datetime.now(), '-------------')
        # 平均损失
        average_loss_test = total_loss_test / len(test_loader)
        average_auc_test = total_auc_test / len(test_loader)
        average_acc_test = total_acc_test / len(test_loader)
        average_f1_test = total_f1_test / len(test_loader)
        average_precision_test = total_precision_test / len(test_loader)
        average_recall_test = total_recall_test / len(test_loader)
        print(
            f"Test Loss: {average_loss_test},AUC: {average_auc_test},ACC:{average_acc_test},F1:{average_f1_test},Precision:{average_precision_test},Recall:{average_recall_test}")
        return average_loss_test, average_auc_test, average_acc_test, average_f1_test, average_precision_test, average_recall_test, weight_result_dict


In [17]:
# 模型运行
for data_time_windows in data_time_windows_list:

    data_path = "../Dataset/"
    # path = './Dataset/' + data_time_windows + '_user_pay_pred_feature_deal.csv'
    train_path = data_path + data_time_windows + '_all_feature_FS_deal.csv'
    test_path = data_path + data_time_windows + '_all_feature_DL_deal.csv'
    dataset_spilt_path = data_path + data_time_windows + '_user_pay_pred_feature_spilt.csv'
    output_weight_result_path = data_path + data_time_windows + '_user_pay_pred_result_weight.csv'
    data_feature_continue_discrete_namelist_path = data_path + 'DL_windows_fs_new.csv'  # 连续与离散划分表
    # 获取时间窗内连续与离散特征名的列表(获取列名)
    user_history_pay_QOE_continue_column, user_history_pay_CHONGHE_continue_column, \
        user_history_pay_FUFEI_continue_column, user_history_pay_QOE_discrete_column, \
        user_history_pay_CHONGHE_discrete_column, user_history_pay_FUFEI_discrete_column = get_continue_discrete_feature_namelist(
        data_time_windows, data_feature_continue_discrete_namelist_path)
    user_feature_continue_column = []
    user_feature_discrete_column = []
    # total continue feature
    total_continue_feature = user_feature_continue_column + user_history_pay_QOE_continue_column + user_history_pay_CHONGHE_continue_column + user_history_pay_FUFEI_continue_column
    total_discrete_feature = user_feature_discrete_column + user_history_pay_QOE_discrete_column + user_history_pay_CHONGHE_discrete_column + user_history_pay_FUFEI_discrete_column
    # 付费label(离散特征)
    total_discrete_feature_add_D = user_feature_discrete_column + user_history_pay_QOE_discrete_column + user_history_pay_CHONGHE_discrete_column + user_history_pay_FUFEI_discrete_column
    total_discrete_feature_add_D.append('pay_DL')
    user_history_pay_CHONGHE_discrete_column_add_D = copy.deepcopy(user_history_pay_CHONGHE_discrete_column)
    user_history_pay_CHONGHE_discrete_column_add_D.append('pay_DL')
    tensor_dict_idx = ['pay_QOE_continue', 'pay_QOE_discrete', 'pay_CHONGHE_continue', 'pay_CHONGHE_discrete',
                       'pay_FUFEI_continue', 'pay_FUFEI_discrete', 'target_QOE_continue', 'target_QOE_discrete',
                       'target_CHONGHE_continue', 'target_CHONGHE_discrete', 'target_FUFEI_continue',
                       'target_FUFEI_discrete']

    # 形成对应需要的特征名称列表
    feature_column_dict = {
        'user_info_continue': user_feature_continue_column,  #[]
        'user_info_discrete': user_feature_discrete_column,  #[]
        'history_QOE_continue': user_history_pay_QOE_continue_column,
        'history_QOE_discrete': user_history_pay_QOE_discrete_column,
        'history_CHONGHE_continue': user_history_pay_CHONGHE_continue_column,
        'history_CHONGHE_discrete': user_history_pay_CHONGHE_discrete_column,
        'history_FUFEI_continue': user_history_pay_FUFEI_continue_column,
        'history_FUFEI_discrete': user_history_pay_FUFEI_discrete_column,
        'history_CHONGHE_discrete_add_D': user_history_pay_CHONGHE_discrete_column_add_D
    }
    # 创建一个空的DataFrame来存储结果
    test_auc_df = pd.DataFrame(
        columns=['时间', 'model', '运行位置', 'Type', 'dataset', 'train_ratio', 'feature_embedding', 'batchSize', 'lr',
                 'max_history_len', '实验数', '测试集总损失', 'AUC', 'ACC', 'F1', 'Precision', 'Recall'])
    test_weight_df = pd.DataFrame(
        columns=['时间', 'model', '运行位置', 'Type', 'dataset', 'train_ratio', 'feature_embedding', 'batchSize', 'lr',
                 'max_history_len', '实验数', 'se_user_pay_QOE_weight', 'se_user_pay_CHONGHE_weight',
                 'se_user_pay_FUFEI_weight', 'se_target_QOE_weight', 'se_target_CHONGHE_weight',
                 'se_target_FUFEI_weight', \
                 'target_history_pay_attention_QOE_weight', 'target_history_pay_attention_CHONGHE_weight',
                 'target_history_pay_attention_FUFEI_weight'])
    for i in range(5):
        """
        主要用于在反向传播（backward pass）过程中，如果有任何计算图中的操作产生了异常，比如 NaN（不是数字）或者 inf（无限大）值，它会给出详细的错误信息
        """
        torch.autograd.set_detect_anomaly(True)
        print(f"i=:{i + 1}")
        n = i
        # 数据集 train、val、test划分及总数据hash表(以user_id为key的存储对应对应行的hash表)及不同类特征数存储的字典

        # xxx_list: user_id的列表
        # data_hash: 所有数据（包括训练、验证、测试）
        # feature_category_num_dict: 各列的值的数量的字典 key:列名, value:数量
        train_list, val_list, test_list, train_data_hash, data_hash, feature_category_num_dict = data_input(
            data_time_windows, train_path, test_path, dataset_spilt_path, val_ratio, test_ratio, total_continue_feature)

        # 获取训练、验证、测试集对应的数据形成的向量hash存储及label
        # 数据以key-value形式存储，key为user_id，value的维度为(max_history_len, feature_num), label的维度为(1, batch)
        train_data_tensor_hash, train_label, train_data_tensor_hash_history_mask = get_feature_to_matrix(train_list,
                                                                                                         train_data_hash,
                                                                                                         feature_column_dict)
        val_data_tensor_hash, val_label, val_data_tensor_hash_history_mask = get_feature_to_matrix(val_list, data_hash,
                                                                                                   feature_column_dict)
        test_data_tensor_hash, test_label, test_data_tensor_hash_history_mask = get_feature_to_matrix(test_list,
                                                                                                      data_hash,
                                                                                                      feature_column_dict)
        # 输出查看结果
        # for key1 in train_data_tensor_hash.keys():
        #     dimensions1 = train_data_tensor_hash[key1]['pay_QOE_continue'].size()
        #     dimensions2 = train_data_tensor_hash[key1]['pay_QOE_discrete'].size()
        #     dimensions3 = train_data_tensor_hash[key1]['pay_CHONGHE_continue'].size()
        #     dimensions4 = train_data_tensor_hash[key1]['target_QOE_continue'].size()
        #     dimensions5 = train_data_tensor_hash[key1]['target_QOE_discrete'].size()
        #     dimensions6 = train_data_tensor_hash[key1]['target_CHONGHE_continue'].size()
        #     print("val_data_tensor_hash size=", dimensions1,dimensions2,dimensions3,dimensions4,dimensions5,dimensions6)

        # 生成batch再添加维度对齐张量（三个维度）这里张量输出的全是三维 (batch_size, 1 or max_history_len, feature_num)
        train_batch_feature_tensor_dict = generate_user_feature_alignment_tensor(train_list, train_data_tensor_hash)
        val_batch_feature_tensor_dict = generate_user_feature_alignment_tensor(val_list, val_data_tensor_hash)
        test_batch_feature_tensor_dict = generate_user_feature_alignment_tensor(test_list, test_data_tensor_hash)

        train_label_tensor = torch.tensor(train_label)
        val_label_tensor = torch.tensor(val_label)
        test_label_tensor = torch.tensor(test_label)

        train_label_tensor = train_label_tensor.unsqueeze(-1)
        val_label_tensor = val_label_tensor.unsqueeze(-1)
        test_label_tensor = test_label_tensor.unsqueeze(-1)  # 在最后新增一个维度，因为TensorDataset要第一维大小相同 label变为(batch,1)
        # mask矩阵的字典
        train_batch_feature_tensor_history_mask_dict = generate_user_feature_alignment_tensor(train_list,
                                                                                              train_data_tensor_hash_history_mask,
                                                                                              is_mask=True)
        val_batch_feature_tensor_history_mask_dict = generate_user_feature_alignment_tensor(val_list,
                                                                                            val_data_tensor_hash_history_mask,
                                                                                            is_mask=True)
        test_batch_feature_tensor_history_mask_dict = generate_user_feature_alignment_tensor(test_list,
                                                                                             test_data_tensor_hash_history_mask,
                                                                                             is_mask=True)
        print('张量生成完成')

        # # TensorDataset输入得是张量，因此由字典转为张量
        train_batch_feature_tensor_pay_QOE_discrete = train_batch_feature_tensor_dict['pay_QOE_discrete']
        train_batch_feature_tensor_pay_CHONGHE_discrete = train_batch_feature_tensor_dict['pay_CHONGHE_discrete']
        train_batch_feature_tensor_pay_FUFEI_discrete = train_batch_feature_tensor_dict['pay_FUFEI_discrete']
        train_batch_feature_tensor_pay_QOE_continue = train_batch_feature_tensor_dict['pay_QOE_continue']
        train_batch_feature_tensor_pay_CHONGHE_continue = train_batch_feature_tensor_dict['pay_CHONGHE_continue']
        train_batch_feature_tensor_pay_FUFEI_continue = train_batch_feature_tensor_dict['pay_FUFEI_continue']
        train_batch_feature_tensor_target_QOE_discrete = train_batch_feature_tensor_dict['target_QOE_discrete']
        train_batch_feature_tensor_target_CHONGHE_discrete = train_batch_feature_tensor_dict['target_CHONGHE_discrete']
        train_batch_feature_tensor_target_FUFEI_discrete = train_batch_feature_tensor_dict['target_FUFEI_discrete']
        train_batch_feature_tensor_target_QOE_continue = train_batch_feature_tensor_dict['target_QOE_continue']
        train_batch_feature_tensor_target_CHONGHE_continue = train_batch_feature_tensor_dict['target_CHONGHE_continue']
        train_batch_feature_tensor_target_FUFEI_continue = train_batch_feature_tensor_dict['target_FUFEI_continue']
        train_batch_feature_tensor_pay_QOE_discrete_mask = train_batch_feature_tensor_history_mask_dict[
            'pay_QOE_discrete']
        train_batch_feature_tensor_pay_CHONGHE_discrete_mask = train_batch_feature_tensor_history_mask_dict[
            'pay_CHONGHE_discrete']
        train_batch_feature_tensor_pay_FUFEI_discrete_mask = train_batch_feature_tensor_history_mask_dict[
            'pay_FUFEI_discrete']
        train_batch_feature_tensor_pay_QOE_continue_mask = train_batch_feature_tensor_history_mask_dict[
            'pay_QOE_continue']
        train_batch_feature_tensor_pay_CHONGHE_continue_mask = train_batch_feature_tensor_history_mask_dict[
            'pay_CHONGHE_continue']
        train_batch_feature_tensor_pay_FUFEI_continue_mask = train_batch_feature_tensor_history_mask_dict[
            'pay_FUFEI_continue']

        val_batch_feature_tensor_pay_QOE_discrete = val_batch_feature_tensor_dict['pay_QOE_discrete']
        val_batch_feature_tensor_pay_CHONGHE_discrete = val_batch_feature_tensor_dict['pay_CHONGHE_discrete']
        val_batch_feature_tensor_pay_FUFEI_discrete = val_batch_feature_tensor_dict['pay_FUFEI_discrete']
        val_batch_feature_tensor_pay_QOE_continue = val_batch_feature_tensor_dict['pay_QOE_continue']
        val_batch_feature_tensor_pay_CHONGHE_continue = val_batch_feature_tensor_dict['pay_CHONGHE_continue']
        val_batch_feature_tensor_pay_FUFEI_continue = val_batch_feature_tensor_dict['pay_FUFEI_continue']
        val_batch_feature_tensor_target_QOE_discrete = val_batch_feature_tensor_dict['target_QOE_discrete']
        val_batch_feature_tensor_target_CHONGHE_discrete = val_batch_feature_tensor_dict['target_CHONGHE_discrete']
        val_batch_feature_tensor_target_FUFEI_discrete = val_batch_feature_tensor_dict['target_FUFEI_discrete']
        val_batch_feature_tensor_target_QOE_continue = val_batch_feature_tensor_dict['target_QOE_continue']
        val_batch_feature_tensor_target_CHONGHE_continue = val_batch_feature_tensor_dict['target_CHONGHE_continue']
        val_batch_feature_tensor_target_FUFEI_continue = val_batch_feature_tensor_dict['target_FUFEI_continue']
        val_batch_feature_tensor_pay_QOE_discrete_mask = val_batch_feature_tensor_history_mask_dict['pay_QOE_discrete']
        val_batch_feature_tensor_pay_CHONGHE_discrete_mask = val_batch_feature_tensor_history_mask_dict[
            'pay_CHONGHE_discrete']
        val_batch_feature_tensor_pay_FUFEI_discrete_mask = val_batch_feature_tensor_history_mask_dict[
            'pay_FUFEI_discrete']
        val_batch_feature_tensor_pay_QOE_continue_mask = val_batch_feature_tensor_history_mask_dict['pay_QOE_continue']
        val_batch_feature_tensor_pay_CHONGHE_continue_mask = val_batch_feature_tensor_history_mask_dict[
            'pay_CHONGHE_continue']
        val_batch_feature_tensor_pay_FUFEI_continue_mask = val_batch_feature_tensor_history_mask_dict[
            'pay_FUFEI_continue']

        test_batch_feature_tensor_pay_QOE_discrete = test_batch_feature_tensor_dict['pay_QOE_discrete']
        test_batch_feature_tensor_pay_CHONGHE_discrete = test_batch_feature_tensor_dict['pay_CHONGHE_discrete']
        test_batch_feature_tensor_pay_FUFEI_discrete = test_batch_feature_tensor_dict['pay_FUFEI_discrete']
        test_batch_feature_tensor_pay_QOE_continue = test_batch_feature_tensor_dict['pay_QOE_continue']
        test_batch_feature_tensor_pay_CHONGHE_continue = test_batch_feature_tensor_dict['pay_CHONGHE_continue']
        test_batch_feature_tensor_pay_FUFEI_continue = test_batch_feature_tensor_dict['pay_FUFEI_continue']
        test_batch_feature_tensor_target_QOE_discrete = test_batch_feature_tensor_dict['target_QOE_discrete']
        test_batch_feature_tensor_target_CHONGHE_discrete = test_batch_feature_tensor_dict['target_CHONGHE_discrete']
        test_batch_feature_tensor_target_FUFEI_discrete = test_batch_feature_tensor_dict['target_FUFEI_discrete']
        test_batch_feature_tensor_target_QOE_continue = test_batch_feature_tensor_dict['target_QOE_continue']
        test_batch_feature_tensor_target_CHONGHE_continue = test_batch_feature_tensor_dict['target_CHONGHE_continue']
        test_batch_feature_tensor_target_FUFEI_continue = test_batch_feature_tensor_dict['target_FUFEI_continue']
        test_batch_feature_tensor_pay_QOE_discrete_mask = test_batch_feature_tensor_history_mask_dict[
            'pay_QOE_discrete']
        test_batch_feature_tensor_pay_CHONGHE_discrete_mask = test_batch_feature_tensor_history_mask_dict[
            'pay_CHONGHE_discrete']
        test_batch_feature_tensor_pay_FUFEI_discrete_mask = test_batch_feature_tensor_history_mask_dict[
            'pay_FUFEI_discrete']
        test_batch_feature_tensor_pay_QOE_continue_mask = test_batch_feature_tensor_history_mask_dict[
            'pay_QOE_continue']
        test_batch_feature_tensor_pay_CHONGHE_continue_mask = test_batch_feature_tensor_history_mask_dict[
            'pay_CHONGHE_continue']
        test_batch_feature_tensor_pay_FUFEI_continue_mask = test_batch_feature_tensor_history_mask_dict[
            'pay_FUFEI_continue']

        # 训练集
        train_dataset = TensorDataset(train_batch_feature_tensor_pay_QOE_discrete,
                                      train_batch_feature_tensor_pay_CHONGHE_discrete,
                                      train_batch_feature_tensor_pay_FUFEI_discrete,
                                      train_batch_feature_tensor_pay_QOE_continue,
                                      train_batch_feature_tensor_pay_CHONGHE_continue,
                                      train_batch_feature_tensor_pay_FUFEI_continue,
                                      train_batch_feature_tensor_target_QOE_discrete,
                                      train_batch_feature_tensor_target_CHONGHE_discrete,
                                      train_batch_feature_tensor_target_FUFEI_discrete,
                                      train_batch_feature_tensor_target_QOE_continue,
                                      train_batch_feature_tensor_target_CHONGHE_continue,
                                      train_batch_feature_tensor_target_FUFEI_continue,
                                      train_batch_feature_tensor_pay_QOE_discrete_mask,
                                      train_batch_feature_tensor_pay_CHONGHE_discrete_mask,
                                      train_batch_feature_tensor_pay_FUFEI_discrete_mask,
                                      train_batch_feature_tensor_pay_QOE_continue_mask,
                                      train_batch_feature_tensor_pay_CHONGHE_continue_mask,
                                      train_batch_feature_tensor_pay_FUFEI_continue_mask,
                                      train_label_tensor)
        val_dataset = TensorDataset(val_batch_feature_tensor_pay_QOE_discrete,
                                    val_batch_feature_tensor_pay_CHONGHE_discrete,
                                    val_batch_feature_tensor_pay_FUFEI_discrete,
                                    val_batch_feature_tensor_pay_QOE_continue,
                                    val_batch_feature_tensor_pay_CHONGHE_continue,
                                    val_batch_feature_tensor_pay_FUFEI_continue,
                                    val_batch_feature_tensor_target_QOE_discrete,
                                    val_batch_feature_tensor_target_CHONGHE_discrete,
                                    val_batch_feature_tensor_target_FUFEI_discrete,
                                    val_batch_feature_tensor_target_QOE_continue,
                                    val_batch_feature_tensor_target_CHONGHE_continue,
                                    val_batch_feature_tensor_target_FUFEI_continue,
                                    val_batch_feature_tensor_pay_QOE_discrete_mask,
                                    val_batch_feature_tensor_pay_CHONGHE_discrete_mask,
                                    val_batch_feature_tensor_pay_FUFEI_discrete_mask,
                                    val_batch_feature_tensor_pay_QOE_continue_mask,
                                    val_batch_feature_tensor_pay_CHONGHE_continue_mask,
                                    val_batch_feature_tensor_pay_FUFEI_continue_mask,
                                    val_label_tensor)


        # # 训练集
        # train_dataset = TensorDataset(*train_batch_feature_tensor, *train_batch_feature_tensor_history_mask, train_label_tensor)
        # val_dataset = TensorDataset(*val_batch_feature_tensor, *val_batch_feature_tensor_history_mask, val_label_tensor)

        # 创建数据加载器
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, drop_last=True)  # 记得改回随机
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

        # 确保您的计算机上有CUDA支持的GPU
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        # 创建大模型的实例
        model = MatchingModel(feature_category_num_dict, feature_column_dict, continue_embedding_dim,
                              discrete_embedding_dim, num_heads, feature_dim, max_history_len)
        print('模型搭建完成')
        model.to(device)
        # 进一步处理 列表转移到GPU
        for i in range(len(model.user_history_pay_embedding_layer.user_pay_history_QOE_discrete_embeddings)):
            model.user_history_pay_embedding_layer.user_pay_history_QOE_discrete_embeddings[i] = \
                model.user_history_pay_embedding_layer.user_pay_history_QOE_discrete_embeddings[i].to(device)
        for i in range(len(model.user_history_pay_embedding_layer.user_pay_history_CHONGHE_discrete_embeddings)):
            model.user_history_pay_embedding_layer.user_pay_history_CHONGHE_discrete_embeddings[i] = \
                model.user_history_pay_embedding_layer.user_pay_history_CHONGHE_discrete_embeddings[i].to(device)
        for i in range(len(model.user_history_pay_embedding_layer.user_pay_history_FUFEI_discrete_embeddings)):
            model.user_history_pay_embedding_layer.user_pay_history_FUFEI_discrete_embeddings[i] = \
                model.user_history_pay_embedding_layer.user_pay_history_FUFEI_discrete_embeddings[i].to(device)
        for i in range(len(model.user_history_pay_embedding_layer.user_pay_history_QOE_continue_embedding)):
            model.user_history_pay_embedding_layer.user_pay_history_QOE_continue_embedding[i] = \
                model.user_history_pay_embedding_layer.user_pay_history_QOE_continue_embedding[i].to(device)
        for i in range(len(model.user_history_pay_embedding_layer.user_pay_history_CHONGHE_continue_embedding)):
            model.user_history_pay_embedding_layer.user_pay_history_CHONGHE_continue_embedding[i] = \
                model.user_history_pay_embedding_layer.user_pay_history_CHONGHE_continue_embedding[i].to(device)
        for i in range(len(model.user_history_pay_embedding_layer.user_pay_history_FUFEI_continue_embedding)):
            model.user_history_pay_embedding_layer.user_pay_history_FUFEI_continue_embedding[i] = \
                model.user_history_pay_embedding_layer.user_pay_history_FUFEI_continue_embedding[i].to(device)

        for i in range(len(model.target_embedding_layer.target_QOE_discrete_embeddings)):
            model.target_embedding_layer.target_QOE_discrete_embeddings[i] = \
                model.target_embedding_layer.target_QOE_discrete_embeddings[i].to(device)
        for i in range(len(model.target_embedding_layer.target_CHONGHE_discrete_embeddings)):
            model.target_embedding_layer.target_CHONGHE_discrete_embeddings[i] = \
                model.target_embedding_layer.target_CHONGHE_discrete_embeddings[i].to(device)
        for i in range(len(model.target_embedding_layer.target_FUFEI_discrete_embeddings)):
            model.target_embedding_layer.target_FUFEI_discrete_embeddings[i] = \
                model.target_embedding_layer.target_FUFEI_discrete_embeddings[i].to(device)
        for i in range(len(model.target_embedding_layer.target_QOE_continue_embedding)):
            model.target_embedding_layer.target_QOE_continue_embedding[i] = \
                model.target_embedding_layer.target_QOE_continue_embedding[i].to(device)
        for i in range(len(model.target_embedding_layer.target_CHONGHE_continue_embedding)):
            model.target_embedding_layer.target_CHONGHE_continue_embedding[i] = \
                model.target_embedding_layer.target_CHONGHE_continue_embedding[i].to(device)
        for i in range(len(model.target_embedding_layer.target_FUFEI_continue_embedding)):
            model.target_embedding_layer.target_FUFEI_continue_embedding[i] = \
                model.target_embedding_layer.target_FUFEI_continue_embedding[i].to(device)
        print('模型转移到GPU完成')
        lossfunction = nn.BCELoss()
        #     optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr)
        optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=lr, momentum=0.9)

        # 训练
        model_training(model, train_loader, val_loader, lossfunction, optimizer, 500, device)
        print('模型训练完成')
        print('||--------训练结束时间：', datetime.datetime.now(), '-------------')
        # 测试
        test_dataset = TensorDataset(test_batch_feature_tensor_pay_QOE_discrete,
                                     test_batch_feature_tensor_pay_CHONGHE_discrete,
                                     test_batch_feature_tensor_pay_FUFEI_discrete,
                                     test_batch_feature_tensor_pay_QOE_continue,
                                     test_batch_feature_tensor_pay_CHONGHE_continue,
                                     test_batch_feature_tensor_pay_FUFEI_continue,
                                     test_batch_feature_tensor_target_QOE_discrete,
                                     test_batch_feature_tensor_target_CHONGHE_discrete,
                                     test_batch_feature_tensor_target_FUFEI_discrete,
                                     test_batch_feature_tensor_target_QOE_continue,
                                     test_batch_feature_tensor_target_CHONGHE_continue,
                                     test_batch_feature_tensor_target_FUFEI_continue,
                                     test_batch_feature_tensor_pay_QOE_discrete_mask,
                                     test_batch_feature_tensor_pay_CHONGHE_discrete_mask,
                                     test_batch_feature_tensor_pay_FUFEI_discrete_mask,
                                     test_batch_feature_tensor_pay_QOE_continue_mask,
                                     test_batch_feature_tensor_pay_CHONGHE_continue_mask,
                                     test_batch_feature_tensor_pay_FUFEI_continue_mask,
                                     test_label_tensor)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=True)
        average_loss_test, average_auc_test, average_acc_test, average_f1_test, average_precision_test, average_recall_test, weight_result_dict = test_model(
            model, test_loader)
        # 测试的每个样本结果保存到csv
        # 将本次训练的结果添加到DataFrame中
        test_auc_df = test_auc_df.append(
            {'时间': datetime.datetime.now(), 'model': 'model3.1', '运行位置': 'GPU', 'Type': 'Origin',
             'dataset': data_time_windows, 'feature_embedding': feature_dim, 'batchSize': batch_size, 'lr': lr,
             'max_history_len': max_history_len, '实验数': i + 1, '测试集总损失': average_loss_test,
             'AUC': average_auc_test, 'ACC': average_acc_test, 'F1': average_f1_test,
             'Precision': average_precision_test, 'Recall': average_recall_test}, ignore_index=True)
        weight_result = {'时间': datetime.datetime.now(), 'model': 'model3.1', '运行位置': 'GPU', 'Type': 'Origin',
                         'dataset': data_time_windows, 'feature_embedding': feature_dim, 'batchSize': batch_size,
                         'lr': lr, 'max_history_len': max_history_len, '实验数': i + 1, \
                         'se_user_pay_QOE_weight': weight_result_dict['se_user_pay_QOE_weight'],
                         'se_user_pay_CHONGHE_weight': weight_result_dict['se_user_pay_CHONGHE_weight'], \
                         'se_user_pay_FUFEI_weight': weight_result_dict['se_user_pay_FUFEI_weight'],
                         'se_target_QOE_weight': weight_result_dict['se_target_QOE_weight'], \
                         'se_target_CHONGHE_weight': weight_result_dict['se_target_CHONGHE_weight'],
                         'se_target_FUFEI_weight': weight_result_dict['se_target_FUFEI_weight'], \
                         'target_history_pay_attention_QOE_weight': weight_result_dict[
                             'target_history_pay_attention_QOE_weight'], \
                         'target_history_pay_attention_CHONGHE_weight': weight_result_dict[
                             'target_history_pay_attention_CHONGHE_weight'], \
                         'target_history_pay_attention_FUFEI_weight': weight_result_dict[
                             'target_history_pay_attention_FUFEI_weight']}
        test_weight_df = test_weight_df.append(weight_result, ignore_index=True)
    # 将结果保存到CSV文件中
    with open(data_path + 'maoerDL_result_maoer_pay_pred_model3_1.csv', 'a') as f:
        test_auc_df.to_csv(f, index=False)
    with open(data_path + 'maoerDL_result_maoer_pay_pred_weight_model3_1.csv', 'a') as f:
        test_weight_df.to_csv(f, index=False)
    #     test_auc_df.to_csv('./Dataset/maoerDL_result_maoer_pay_pred_model3_1.csv', index=False)
    #     test_weight_df.to_csv('./Dataset/maoerDL_result_maoer_pay_pred_weight_model3_1.csv', index=False)
    print('结果已输出')
    print('||--------当前时间窗', data_time_windows, '结束时间：', datetime.datetime.now(), '-------------')


i=:1
划分文件已存在，不再进行数据划分
数据预处理结束
数据预处理结束
数据划分完成
张量生成完成
模型搭建完成
模型转移到GPU完成
tensor([[0.5517],
        [0.5519],
        [0.5538],
        [0.5515],
        [0.5531],
        [0.5522],
        [0.5551],
        [0.5516],
        [0.5533],
        [0.5519],
        [0.5526],
        [0.5530],
        [0.5524],
        [0.5527],
        [0.5533],
        [0.5537],
        [0.5532],
        [0.5549],
        [0.5513],
        [0.5535],
        [0.5541],
        [0.5516],
        [0.5554],
        [0.5529],
        [0.5530],
        [0.5535],
        [0.5538],
        [0.5521],
        [0.5538],
        [0.5521],
        [0.5523],
        [0.5527],
        [0.5514],
        [0.5518],
        [0.5527],
        [0.5523],
        [0.5533],
        [0.5534],
        [0.5525],
        [0.5518],
        [0.5534],
        [0.5541],
        [0.5549],
        [0.5535],
        [0.5530],
        [0.5520],
        [0.5530],
        [0.5545],
        [0.5528],
        [0.5521],
        [0.5518],
        [0.5

KeyboardInterrupt: 