In [192]:
import pandas as pd
import numpy as np

base_path = 'data(li)delete10/'
new_path = 'data(li)delete10/'
# 将 filelist 替换为 exp.csv
filename = 'xun.csv'

# 设置随机种子以确保结果可重现
np.random.seed(435)

# 读取原始数据
print(filename)
data_RNA = pd.read_csv(base_path + filename, index_col=0)  # 使用 read_csv 读取 CSV 文件

# 随机删除10%的样本
print(f"原始样本数量: {len(data_RNA)}")
sample_indices = data_RNA.index.tolist()
delete_count = int(len(sample_indices) * 0.1)  # 计算要删除的样本数量

# 随机选择要删除的样本索引
indices_to_delete = np.random.choice(sample_indices, size=delete_count, replace=False)
data_RNA_filtered = data_RNA.drop(indices_to_delete)

print(f"删除样本数量: {delete_count}")
print(f"剩余样本数量: {len(data_RNA_filtered)}")
print(f"删除的样本索引: {indices_to_delete}")

# 读取KEGG通路信息
KEGG_info = pd.read_csv("训练/KEGG_Pathway_information.csv", header=0)

all_gene_list = []
for gene_set in KEGG_info['gene_name']:
    all_gene_list.extend(gene_set.split("/"))

# 处理KEGG基因数据
KEGG_data_dict = dict()
gene_num = 1
for gene in all_gene_list:
    if gene in data_RNA_filtered.columns:  # 使用过滤后的数据
        KEGG_data_dict['gene' + str(gene_num)] = data_RNA_filtered[gene]
        gene_num += 1

# 添加标签列
KEGG_data_dict['label'] = data_RNA_filtered['label']  # 使用过滤后的数据

# 创建新的DataFrame并保存
data_KEGG = pd.DataFrame(KEGG_data_dict)
print(data_KEGG.shape)
data_KEGG.to_pickle(new_path + filename.replace('.csv', '_filtered.pkl'))  # 修改文件名以区分

print("处理完成！已随机删除10%的样本")

xun.csv
原始样本数量: 31
删除样本数量: 3
剩余样本数量: 28
删除的样本索引: ['Pat45' 'Pat41' 'Pat33']
(28, 22955)
处理完成！已随机删除10%的样本


In [193]:
import pandas as pd
import numpy as np
import math
import torch
import torch.optim.lr_scheduler as lr_scheduler
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from dataset import MyDataSet
from model import Pathway_Guided_Transformer
from utils import train_one_epoch, train_evaluate
from sklearn.metrics import f1_score, recall_score, precision_score
from collections import Counter
 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def get_integer_mapping(le):
    res = {}
    for cl in le.classes_:
        res.update({cl:le.transform([cl])[0]})
    return res

# 修改数据路径 - 使用删除后的文件
train_data_path = 'data(li)delete10\\xun_filtered.pkl'  # 修改为实际的文件名
KEGG_result = '训练/KEGG_Pathway_information.csv'
model_save_path = 'data(li)delete10/Model_weight/'

# 读取数据
data = pd.read_pickle(train_data_path)
data = data.replace(np.nan, 0)

# 检查删除后的数据
print("删除10%样本后的数据信息:")
print("数据形状:", data.shape)
print("标签分布:")
print(Counter(data['label']))
print("总样本数:", len(data))

x_train = data.iloc[:, :-1].values
x_train = np.log2(x_train+1)
y_train = data.iloc[:,-1]
lbl = LabelEncoder()
y_train = lbl.fit_transform(y_train)
print("标签编码映射:", get_integer_mapping(lbl))

pathway_df = pd.read_csv(KEGG_result, header=0)
pathway_num = list(pathway_df['count'])

batch_size = 64
epochs = 20
lr, lrf = 0.0001, 0.001
max_acc, max_f1, max_recall, max_precision = 0, 0, 0, 0
test_acc, test_f1, test_recall, test_precision  = [], [], [], []

# 根据样本数量调整折数（如果需要）
n_samples = len(data)
if n_samples < 50:
    n_splits = min(3, n_samples)
    print(f"样本数量较少({n_samples})，使用{n_splits}折交叉验证")
    kfold = StratifiedKFold(n_splits=n_splits, shuffle=False)  # 移除random_state
else:
    kfold = StratifiedKFold(n_splits=3, shuffle=False)  # 移除random_sta

KK = 0

for train, val in kfold.split(x_train, y_train):
    print('*'*30, KK, '*'*30)
    loss_m = np.inf
    train_data_mRNA, train_data_y_label = x_train[train,:], y_train[train]
    val_data_mRNA, val_data_y_label = x_train[val,:], y_train[val]

    print(f"训练集大小: {len(train_data_y_label)}, 验证集大小: {len(val_data_y_label)}")

    train_data_mRNA = train_data_mRNA.reshape((train_data_mRNA.shape[0], 1, train_data_mRNA.shape[1]))
    val_data_mRNA = val_data_mRNA.reshape((val_data_mRNA.shape[0], 1, val_data_mRNA.shape[1]))

    train_data_set_KFold = MyDataSet(train_data_mRNA, train_data_y_label)
    val_data_set_KFold = MyDataSet(val_data_mRNA, val_data_y_label)

    train_loader_KFold = torch.utils.data.DataLoader(train_data_set_KFold, 
                                                   batch_size=min(batch_size, len(train_data_set_KFold)), 
                                                   shuffle=True)
    val_loader_KFold = torch.utils.data.DataLoader(val_data_set_KFold, 
                                                 batch_size=min(batch_size, len(val_data_set_KFold)), 
                                                 shuffle=True)

    model = Pathway_Guided_Transformer(
            num_classes = len(np.unique(y_train)),  # 使用实际的类别数
            pathway_number = pathway_num,
            dim = 512,
            depth = 6,
            heads = 8,
            mlp_dim = 1024,
            dropout = 0.1,
            emb_dropout = 0.1,
        ).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - lrf) + lrf  # cosine
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
    criterion = torch.nn.CrossEntropyLoss()
    train_acc_list, val_acc_list, train_loss_list, val_loss_list = [], [], [], []

    for epoch in range(epochs):
        print("-"*50)
        print("epoch:", epoch, "K:", KK)
        train_loss, train_acc = train_one_epoch(now_epoch=epoch, all_epoch=epochs, model=model, optimizer=optimizer, data_loader=train_loader_KFold, 
                                                num=len(train_data_set_KFold), criterion=criterion, device=device)
        scheduler.step()
        val_loss, val_acc, true_label, pre_label = train_evaluate(model=model, data_loader=val_loader_KFold, 
                                                                    num=len(val_data_set_KFold), criterion=criterion, device=device)

        if val_loss<loss_m:
            loss_m = val_loss
            max_acc = val_acc
            max_f1 = f1_score(true_label, pre_label, average='weighted')
            max_recall = recall_score(true_label, pre_label, average='weighted')
            max_precision = precision_score(true_label, pre_label, average='weighted', zero_division=0)
            torch.save(model.state_dict(), model_save_path + "weights_" + str(KK) + ".pth")

        train_acc_list.append(train_acc.item())
        val_acc_list.append(val_acc.item())
        train_loss_list.append(train_loss)
        val_loss_list.append(val_loss)
        print("epoch: {}, train loss: {:.8f}, val loss: {:.8f}, train acc: {:.4f}, val acc: {:.4f}".format(epoch, train_loss, val_loss, train_acc, val_acc))
    
    test_acc.append(max_acc.item())
    test_f1.append(max_f1)
    test_recall.append(max_recall)
    test_precision.append(max_precision)
    print("train_acc_list: ", train_acc_list)
    print('val_acc_list: ', val_acc_list)
    print('train_loss_list: ', train_loss_list)
    print('val_loss_list: ', val_loss_list)
    KK = KK+1

print('*'*60)
print("K-fold test_acc: ", test_acc)
print("K-fold test_f1: ", test_f1)
print("K-fold test_recall: ", test_recall)
print("K-fold test_precision: ", test_precision)
print("平均准确率: {:.4f} ± {:.4f}".format(np.mean(test_acc), np.std(test_acc)))
print("平均F1分数: {:.4f} ± {:.4f}".format(np.mean(test_f1), np.std(test_f1)))

删除10%样本后的数据信息:
数据形状: (28, 22955)
标签分布:
Counter({0: 22, 1: 6})
总样本数: 28
标签编码映射: {0: 0, 1: 1}
样本数量较少(28)，使用3折交叉验证
****************************** 0 ******************************
训练集大小: 18, 验证集大小: 10
--------------------------------------------------
epoch: 0 K: 0
train: 18


Epoch 1/20: 100%|██████████████████████| 1/1 [00:02<00:00,  2.53s/it, Loss=1.12]


test: 10
epoch: 0, train loss: 0.06241520, val loss: 0.06501075, train acc: 0.2222, val acc: 0.8000
--------------------------------------------------
epoch: 1 K: 0
train: 18


Epoch 2/20: 100%|███████████████████████| 1/1 [00:02<00:00,  2.48s/it, Loss=0.7]


test: 10
epoch: 1, train loss: 0.03891335, val loss: 0.08633227, train acc: 0.7778, val acc: 0.8000
--------------------------------------------------
epoch: 2 K: 0
train: 18


Epoch 3/20: 100%|█████████████████████| 1/1 [00:02<00:00,  2.43s/it, Loss=0.875]


test: 10
epoch: 2, train loss: 0.04861688, val loss: 0.08570464, train acc: 0.7778, val acc: 0.8000
--------------------------------------------------
epoch: 3 K: 0
train: 18


Epoch 4/20: 100%|██████████████████████| 1/1 [00:02<00:00,  2.41s/it, Loss=0.88]


test: 10
epoch: 3, train loss: 0.04889399, val loss: 0.07495842, train acc: 0.7778, val acc: 0.8000
--------------------------------------------------
epoch: 4 K: 0
train: 18


Epoch 5/20: 100%|█████████████████████| 1/1 [00:02<00:00,  2.40s/it, Loss=0.764]


test: 10
epoch: 4, train loss: 0.04245041, val loss: 0.06157652, train acc: 0.7778, val acc: 0.8000
--------------------------------------------------
epoch: 5 K: 0
train: 18


Epoch 6/20: 100%|███████████████████████| 1/1 [00:02<00:00,  2.45s/it, Loss=0.6]


test: 10
epoch: 5, train loss: 0.03333426, val loss: 0.05201373, train acc: 0.7778, val acc: 0.8000
--------------------------------------------------
epoch: 6 K: 0
train: 18


Epoch 7/20: 100%|█████████████████████| 1/1 [00:02<00:00,  2.41s/it, Loss=0.524]


test: 10
epoch: 6, train loss: 0.02911040, val loss: 0.05069576, train acc: 0.7778, val acc: 0.8000
--------------------------------------------------
epoch: 7 K: 0
train: 18


Epoch 8/20: 100%|█████████████████████| 1/1 [00:02<00:00,  2.42s/it, Loss=0.547]


test: 10
epoch: 7, train loss: 0.03037920, val loss: 0.05582961, train acc: 0.7778, val acc: 0.8000
--------------------------------------------------
epoch: 8 K: 0
train: 18


Epoch 9/20: 100%|█████████████████████| 1/1 [00:02<00:00,  2.42s/it, Loss=0.573]


test: 10
epoch: 8, train loss: 0.03184729, val loss: 0.05992680, train acc: 0.7778, val acc: 0.8000
--------------------------------------------------
epoch: 9 K: 0
train: 18


Epoch 10/20: 100%|████████████████████| 1/1 [00:02<00:00,  2.42s/it, Loss=0.582]


test: 10
epoch: 9, train loss: 0.03233032, val loss: 0.05987331, train acc: 0.7778, val acc: 0.8000
--------------------------------------------------
epoch: 10 K: 0
train: 18


Epoch 11/20: 100%|████████████████████| 1/1 [00:02<00:00,  2.47s/it, Loss=0.629]


test: 10
epoch: 10, train loss: 0.03494930, val loss: 0.05688429, train acc: 0.7778, val acc: 0.8000
--------------------------------------------------
epoch: 11 K: 0
train: 18


Epoch 12/20: 100%|████████████████████| 1/1 [00:02<00:00,  2.42s/it, Loss=0.568]


test: 10
epoch: 11, train loss: 0.03155823, val loss: 0.05384642, train acc: 0.7778, val acc: 0.8000
--------------------------------------------------
epoch: 12 K: 0
train: 18


Epoch 13/20: 100%|████████████████████| 1/1 [00:02<00:00,  2.48s/it, Loss=0.575]


test: 10
epoch: 12, train loss: 0.03196268, val loss: 0.05174600, train acc: 0.7778, val acc: 0.8000
--------------------------------------------------
epoch: 13 K: 0
train: 18


Epoch 14/20: 100%|████████████████████| 1/1 [00:02<00:00,  2.47s/it, Loss=0.547]


test: 10
epoch: 13, train loss: 0.03040298, val loss: 0.05063995, train acc: 0.7778, val acc: 0.8000
--------------------------------------------------
epoch: 14 K: 0
train: 18


Epoch 15/20: 100%|████████████████████| 1/1 [00:02<00:00,  2.54s/it, Loss=0.545]


test: 10
epoch: 14, train loss: 0.03028043, val loss: 0.05021614, train acc: 0.7778, val acc: 0.8000
--------------------------------------------------
epoch: 15 K: 0
train: 18


Epoch 16/20: 100%|████████████████████| 1/1 [00:02<00:00,  2.44s/it, Loss=0.518]


test: 10
epoch: 15, train loss: 0.02878457, val loss: 0.05009903, train acc: 0.7778, val acc: 0.8000
--------------------------------------------------
epoch: 16 K: 0
train: 18


Epoch 17/20: 100%|██████████████████████| 1/1 [00:02<00:00,  2.47s/it, Loss=0.5]


test: 10
epoch: 16, train loss: 0.02778884, val loss: 0.05008959, train acc: 0.7778, val acc: 0.8000
--------------------------------------------------
epoch: 17 K: 0
train: 18


Epoch 18/20: 100%|████████████████████| 1/1 [00:02<00:00,  2.53s/it, Loss=0.506]


test: 10
epoch: 17, train loss: 0.02808441, val loss: 0.05010284, train acc: 0.7778, val acc: 0.8000
--------------------------------------------------
epoch: 18 K: 0
train: 18


Epoch 19/20: 100%|████████████████████| 1/1 [00:02<00:00,  2.73s/it, Loss=0.525]


test: 10
epoch: 18, train loss: 0.02916233, val loss: 0.05011159, train acc: 0.7778, val acc: 0.8000
--------------------------------------------------
epoch: 19 K: 0
train: 18


Epoch 20/20: 100%|████████████████████| 1/1 [00:02<00:00,  2.48s/it, Loss=0.504]


test: 10
epoch: 19, train loss: 0.02797623, val loss: 0.05011396, train acc: 0.7778, val acc: 0.8000
train_acc_list:  [0.2222222238779068, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544]
val_acc_list:  [0.800000011920929, 0.800000011920929, 0.800000011920929, 0.800000011920929, 0.800000011920929, 0.800000011920929, 0.800000011920929, 0.800000011920929, 0.800000011920929, 0.800000011920929, 0.800000011920929, 0.800000011920929, 0.800000011920929, 0.800000011920929, 0.800000011920929, 0.800000011920929, 0.800000011920929, 0.800000011920929, 0.800000011920929, 0.800000011920929]
train_loss_list:  [0.06241520245869955, 0.038913345999187894, 0.048616876204808555, 0

Epoch 1/20: 100%|█████████████████████| 1/1 [00:02<00:00,  2.63s/it, Loss=0.937]


test: 9
epoch: 0, train loss: 0.04929189, val loss: 0.08951305, train acc: 0.1579, val acc: 0.7778
--------------------------------------------------
epoch: 1 K: 1
train: 19


Epoch 2/20: 100%|█████████████████████| 1/1 [00:02<00:00,  2.57s/it, Loss=0.711]


test: 9
epoch: 1, train loss: 0.03740970, val loss: 0.11432530, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 2 K: 1
train: 19


Epoch 3/20: 100%|█████████████████████| 1/1 [00:02<00:00,  2.58s/it, Loss=0.872]


test: 9
epoch: 2, train loss: 0.04588064, val loss: 0.10996813, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 3 K: 1
train: 19


Epoch 4/20: 100%|█████████████████████| 1/1 [00:02<00:00,  2.59s/it, Loss=0.845]


test: 9
epoch: 3, train loss: 0.04444985, val loss: 0.09361757, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 4 K: 1
train: 19


Epoch 5/20: 100%|██████████████████████| 1/1 [00:02<00:00,  2.63s/it, Loss=0.72]


test: 9
epoch: 4, train loss: 0.03788873, val loss: 0.07481890, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 5 K: 1
train: 19


Epoch 6/20: 100%|█████████████████████| 1/1 [00:02<00:00,  2.57s/it, Loss=0.574]


test: 9
epoch: 5, train loss: 0.03022000, val loss: 0.06156026, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 6 K: 1
train: 19


Epoch 7/20: 100%|█████████████████████| 1/1 [00:02<00:00,  2.57s/it, Loss=0.546]


test: 9
epoch: 6, train loss: 0.02873746, val loss: 0.05918509, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 7 K: 1
train: 19


Epoch 8/20: 100%|█████████████████████| 1/1 [00:02<00:00,  2.56s/it, Loss=0.507]


test: 9
epoch: 7, train loss: 0.02670148, val loss: 0.06389345, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 8 K: 1
train: 19


Epoch 9/20: 100%|█████████████████████| 1/1 [00:02<00:00,  2.56s/it, Loss=0.587]


test: 9
epoch: 8, train loss: 0.03091865, val loss: 0.06658487, train acc: 0.7368, val acc: 0.7778
--------------------------------------------------
epoch: 9 K: 1
train: 19


Epoch 10/20: 100%|████████████████████| 1/1 [00:02<00:00,  2.56s/it, Loss=0.575]


test: 9
epoch: 9, train loss: 0.03026914, val loss: 0.06564096, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 10 K: 1
train: 19


Epoch 11/20: 100%|████████████████████| 1/1 [00:02<00:00,  2.55s/it, Loss=0.593]


test: 9
epoch: 10, train loss: 0.03123281, val loss: 0.06271389, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 11 K: 1
train: 19


Epoch 12/20: 100%|████████████████████| 1/1 [00:02<00:00,  2.56s/it, Loss=0.535]


test: 9
epoch: 11, train loss: 0.02814836, val loss: 0.06029312, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 12 K: 1
train: 19


Epoch 13/20: 100%|████████████████████| 1/1 [00:02<00:00,  2.63s/it, Loss=0.505]


test: 9
epoch: 12, train loss: 0.02656443, val loss: 0.05907636, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 13 K: 1
train: 19


Epoch 14/20: 100%|████████████████████| 1/1 [00:02<00:00,  2.61s/it, Loss=0.516]


test: 9
epoch: 13, train loss: 0.02715090, val loss: 0.05877008, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 14 K: 1
train: 19


Epoch 15/20: 100%|████████████████████| 1/1 [00:02<00:00,  2.83s/it, Loss=0.489]


test: 9
epoch: 14, train loss: 0.02573891, val loss: 0.05889881, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 15 K: 1
train: 19


Epoch 16/20: 100%|████████████████████| 1/1 [00:03<00:00,  3.88s/it, Loss=0.518]


test: 9
epoch: 15, train loss: 0.02727331, val loss: 0.05913617, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 16 K: 1
train: 19


Epoch 17/20: 100%|████████████████████| 1/1 [00:03<00:00,  3.84s/it, Loss=0.503]


test: 9
epoch: 16, train loss: 0.02647754, val loss: 0.05933411, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 17 K: 1
train: 19


Epoch 18/20: 100%|████████████████████| 1/1 [00:04<00:00,  4.01s/it, Loss=0.525]


test: 9
epoch: 17, train loss: 0.02762213, val loss: 0.05944723, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 18 K: 1
train: 19


Epoch 19/20: 100%|████████████████████| 1/1 [00:03<00:00,  3.98s/it, Loss=0.515]


test: 9
epoch: 18, train loss: 0.02711446, val loss: 0.05949637, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 19 K: 1
train: 19


Epoch 20/20: 100%|████████████████████| 1/1 [00:04<00:00,  4.15s/it, Loss=0.441]


test: 9
epoch: 19, train loss: 0.02320063, val loss: 0.05950745, train acc: 0.7895, val acc: 0.7778
train_acc_list:  [0.15789473056793213, 0.7894737124443054, 0.7894737124443054, 0.7894737124443054, 0.7894737124443054, 0.7894737124443054, 0.7894737124443054, 0.7894737124443054, 0.7368420958518982, 0.7894737124443054, 0.7894737124443054, 0.7894737124443054, 0.7894737124443054, 0.7894737124443054, 0.7894737124443054, 0.7894737124443054, 0.7894737124443054, 0.7894737124443054, 0.7894737124443054, 0.7894737124443054]
val_acc_list:  [0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544]
train_loss_list:  [0.04929188991847791, 0.037409700845417226, 0.0

Epoch 1/20: 100%|██████████████████████| 1/1 [00:04<00:00,  4.09s/it, Loss=0.87]


test: 9
epoch: 0, train loss: 0.04581349, val loss: 0.10186598, train acc: 0.2105, val acc: 0.7778
--------------------------------------------------
epoch: 1 K: 2
train: 19


Epoch 2/20: 100%|█████████████████████| 1/1 [00:03<00:00,  3.97s/it, Loss=0.724]


test: 9
epoch: 1, train loss: 0.03811570, val loss: 0.12264294, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 2 K: 2
train: 19


Epoch 3/20: 100%|█████████████████████| 1/1 [00:03<00:00,  3.94s/it, Loss=0.924]


test: 9
epoch: 2, train loss: 0.04862968, val loss: 0.11251728, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 3 K: 2
train: 19


Epoch 4/20: 100%|█████████████████████| 1/1 [00:03<00:00,  3.97s/it, Loss=0.847]


test: 9
epoch: 3, train loss: 0.04455372, val loss: 0.09141522, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 4 K: 2
train: 19


Epoch 5/20: 100%|█████████████████████| 1/1 [00:03<00:00,  3.83s/it, Loss=0.713]


test: 9
epoch: 4, train loss: 0.03752793, val loss: 0.07006720, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 5 K: 2
train: 19


Epoch 6/20: 100%|█████████████████████| 1/1 [00:03<00:00,  3.94s/it, Loss=0.548]


test: 9
epoch: 5, train loss: 0.02886701, val loss: 0.05954549, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 6 K: 2
train: 19


Epoch 7/20: 100%|█████████████████████| 1/1 [00:03<00:00,  3.95s/it, Loss=0.481]


test: 9
epoch: 6, train loss: 0.02530543, val loss: 0.06367273, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 7 K: 2
train: 19


Epoch 8/20: 100%|█████████████████████| 1/1 [00:04<00:00,  4.07s/it, Loss=0.564]


test: 9
epoch: 7, train loss: 0.02970768, val loss: 0.06915338, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 8 K: 2
train: 19


Epoch 9/20: 100%|█████████████████████| 1/1 [00:04<00:00,  4.04s/it, Loss=0.657]


test: 9
epoch: 8, train loss: 0.03459511, val loss: 0.06749477, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 9 K: 2
train: 19


Epoch 10/20: 100%|██████████████████████| 1/1 [00:04<00:00,  4.07s/it, Loss=0.6]


test: 9
epoch: 9, train loss: 0.03158896, val loss: 0.06324740, train acc: 0.7368, val acc: 0.7778
--------------------------------------------------
epoch: 10 K: 2
train: 19


Epoch 11/20: 100%|████████████████████| 1/1 [00:04<00:00,  4.23s/it, Loss=0.533]


test: 9
epoch: 10, train loss: 0.02806241, val loss: 0.06047488, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 11 K: 2
train: 19


Epoch 12/20: 100%|████████████████████| 1/1 [00:04<00:00,  4.05s/it, Loss=0.483]


test: 9
epoch: 11, train loss: 0.02542838, val loss: 0.05970088, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 12 K: 2
train: 19


Epoch 13/20: 100%|█████████████████████| 1/1 [00:04<00:00,  4.10s/it, Loss=0.55]


test: 9
epoch: 12, train loss: 0.02895878, val loss: 0.06017155, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 13 K: 2
train: 19


Epoch 14/20: 100%|████████████████████| 1/1 [00:04<00:00,  4.17s/it, Loss=0.529]


test: 9
epoch: 13, train loss: 0.02782485, val loss: 0.06105840, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 14 K: 2
train: 19


Epoch 15/20: 100%|████████████████████| 1/1 [00:04<00:00,  4.10s/it, Loss=0.525]


test: 9
epoch: 14, train loss: 0.02761267, val loss: 0.06182508, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 15 K: 2
train: 19


Epoch 16/20: 100%|█████████████████████| 1/1 [00:04<00:00,  4.14s/it, Loss=0.54]


test: 9
epoch: 15, train loss: 0.02842503, val loss: 0.06229914, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 16 K: 2
train: 19


Epoch 17/20: 100%|████████████████████| 1/1 [00:04<00:00,  4.13s/it, Loss=0.485]


test: 9
epoch: 16, train loss: 0.02550309, val loss: 0.06253608, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 17 K: 2
train: 19


Epoch 18/20: 100%|████████████████████| 1/1 [00:04<00:00,  4.20s/it, Loss=0.494]


test: 9
epoch: 17, train loss: 0.02602469, val loss: 0.06263081, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 18 K: 2
train: 19


Epoch 19/20: 100%|████████████████████| 1/1 [00:03<00:00,  3.95s/it, Loss=0.507]


test: 9
epoch: 18, train loss: 0.02666600, val loss: 0.06265425, train acc: 0.7895, val acc: 0.7778
--------------------------------------------------
epoch: 19 K: 2
train: 19


Epoch 20/20: 100%|████████████████████| 1/1 [00:04<00:00,  4.08s/it, Loss=0.529]


test: 9
epoch: 19, train loss: 0.02783423, val loss: 0.06265447, train acc: 0.7895, val acc: 0.7778
train_acc_list:  [0.21052631735801697, 0.7894737124443054, 0.7894737124443054, 0.7894737124443054, 0.7894737124443054, 0.7894737124443054, 0.7894737124443054, 0.7894737124443054, 0.7894737124443054, 0.7368420958518982, 0.7894737124443054, 0.7894737124443054, 0.7894737124443054, 0.7894737124443054, 0.7894737124443054, 0.7894737124443054, 0.7894737124443054, 0.7894737124443054, 0.7894737124443054, 0.7894737124443054]
val_acc_list:  [0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544, 0.7777777910232544]
train_loss_list:  [0.045813494607021936, 0.03811570217734889, 0.0

In [194]:
torch.save(model.state_dict(), model_save_path + "weights_" + str(KK) + ".model")


In [195]:
print("epoch: {}, train loss: {:.8f}, val loss: {:.8f}, train acc: {:.4f}, val acc: {:.4f}".format(epoch, train_loss, val_loss, train_acc, val_acc))

epoch: 19, train loss: 0.02783423, val loss: 0.06265447, train acc: 0.7895, val acc: 0.7778


In [196]:
import pandas as pd

base_path = 'data(li)delete10/训练/'
new_path = 'data(li)delete10/训练/'
# 将 filelist 替换为 exp.csv
filename = 'te_xun.csv'

KEGG_info = pd.read_csv("训练/KEGG_Pathway_information.csv", header=0)

all_gene_list = []
for gene_set in KEGG_info['gene_name']:
    all_gene_list.extend(gene_set.split("/"))

# 读取 exp.csv 文件
print(filename)
data_RNA = pd.read_csv(base_path + filename,index_col=0)  # 使用 read_csv 读取 CSV 文件
KEGG_data_dict = dict()
gene_num = 1
for gene in all_gene_list:
    if gene in data_RNA.columns:  # 检查基因是否在数据框中
        KEGG_data_dict['gene' + str(gene_num)] = data_RNA[gene]
        gene_num += 1

KEGG_data_dict['label'] = data_RNA['label']  # 假设 'label' 列在数据框中
data_KEGG = pd.DataFrame(KEGG_data_dict)
print(data_KEGG)
data_KEGG.to_pickle(new_path + filename.replace('.csv', '.pkl'))  # 保存为 pkl 文件

te_xun.csv
           gene1      gene2     gene3      gene4      gene5      gene6  \
Pat03   0.423672  11.349284  0.479430   9.926252  23.168080   1.277639   
Pat28   0.677110  34.613793  0.294675  12.827103  35.033630   4.919835   
Pat49   1.084989  15.863534  0.258371  16.475945  40.957265   1.602234   
Pat38   0.272135  17.157354  0.184135  34.860374  41.482285  12.017714   
Pat02   0.297357   6.906578  0.152635  17.407315  37.292708   6.656415   
Pat118  2.163818  14.388787  0.631081   7.674412  31.249620   7.233839   
Pat36   0.327394  29.165064  0.520166  11.338593  34.583755   0.517158   
Pat43   0.966011  15.781033  0.299822  18.233375  36.627173   3.285748   

            gene7      gene8      gene9    gene10  ...  gene22946  gene22947  \
Pat03   86.309893  15.897700  64.521477  0.872070  ...  11.670901   2.591405   
Pat28   18.941887   0.067197  14.813769  2.363295  ...  13.372190   0.957363   
Pat49   36.210708   2.553129  27.195108  2.331151  ...  13.135285   3.612381   
Pa

In [197]:
import pandas as pd
import torch
from model import Pathway_Guided_Transformer
from utils import test_TCGA, test_ICGC, test_GEO_p_m, test_GEO_m, test_GEO_p

# Test data path
test_data_path = 'data(li)delete10/训练/'
path_test_data_TCGA = test_data_path + 'te_xun.pkl'
# Model weight file
path_weight = 'data(li)delete10\\Model_weight\\weights_2.pth'

# prediction result
prediction_path = 'data(li)delete10/训练/'
TCGA_prediction = prediction_path + 'prediction.csv'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
batch_size = 64
pathway_df = pd.read_csv('RNAseq/KEGG_Pathway_information.csv', header=0)
pathway_num = list(pathway_df['count'])
model = Pathway_Guided_Transformer(
        num_classes = 2,
        pathway_number = pathway_num,
        dim = 512,
        depth = 6,
        heads = 8,
        mlp_dim = 1024,
        dropout = 0.1,
        emb_dropout = 0.1,
    ).to(device)
print("*"*20, "test for TCGA", "*"*20)
TCGA_acc = test_TCGA(path_test_data_TCGA,  path_weight, TCGA_prediction, model, batch_size, device)

******************** test for TCGA ********************


  model.load_state_dict(torch.load(path_weight))


test: 8
x_mRNA shape: torch.Size([8, 1, 22954])
y_label: ('0', '0', '0', '1', '0', '0', '0', '0')


In [198]:


# Test data path
test_data_path = 'data(li)delete10/外部测试/'
path_test_data_TCGA = test_data_path + 'test.pkl'


In [199]:
# Model weight file
path_weight = 'data(li)delete10\\Model_weight\\weights_2.pth'

# prediction result
prediction_path = 'data(li)delete10/外部测试/'
TCGA_prediction = prediction_path + 'prediction.csv'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
batch_size = 64
pathway_df = pd.read_csv('RNAseq/KEGG_Pathway_information.csv', header=0)
pathway_num = list(pathway_df['count'])


In [200]:
print("*"*20, "test for TCGA", "*"*20)
TCGA_acc = test_TCGA(path_test_data_TCGA,  path_weight, TCGA_prediction, model, batch_size, device)

******************** test for TCGA ********************


  model.load_state_dict(torch.load(path_weight))


test: 16
x_mRNA shape: torch.Size([16, 1, 22954])
y_label: ('0', '0', '0', '0', '1', '0', '0', '1', '0', '0', '0', '1', '0', '0', '1', '0')
