In [None]:
import os
import shutil
import tempfile
import matplotlib.pyplot as plt
import PIL
import torch
import numpy as np
from sklearn.metrics import classification_report

from monai.apps import download_and_extract
from monai.config import print_config
from monai.data import decollate_batch, DataLoader
from monai.metrics import ROCAUCMetric
from monai.networks.nets import DenseNet121
from monai.transforms import (
    Activations,
    EnsureChannelFirst,
    AsDiscrete,
    Compose,
    LoadImage,
    RandFlip,
    RandRotate,
    RandZoom,
    ScaleIntensity,
)
from monai.utils import set_determinism

In [None]:
root_dir = "/home/yfang/MRI_110_result"
#模型训练时设置随机种子
set_determinism(seed=0)
data_dir='dataset/MRI_110_merge'

In [None]:
class_names = sorted(x for x in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, x)))
num_class = len(class_names)
image_files = [
    [os.path.join(data_dir, class_names[i], x) for x in os.listdir(os.path.join(data_dir, class_names[i]))]
    for i in range(num_class)
]
num_each = [len(image_files[i]) for i in range(num_class)]
image_files_list = []
image_class = []
for i in range(num_class):
    image_files_list.extend(image_files[i])
    image_class.extend([i] * num_each[i])
num_total = len(image_class)
image_width, image_height = PIL.Image.open(image_files_list[0]).size

print(f"Total image count: {num_total}")
print(f"Image dimensions: {image_width} x {image_height}")
print(f"Label names: {class_names}")
print(f"Label counts: {num_each}")

In [None]:
#随机取九张图进行一个可视化
plt.subplots(3, 3, figsize=(8, 8))
for i, k in enumerate(np.random.randint(num_total, size=9)):
    im = PIL.Image.open(image_files_list[k])
    arr = np.array(im)
    plt.subplot(3, 3, i + 1)
    plt.xlabel(class_names[image_class[k]])
    plt.imshow(arr, cmap="gray", vmin=0, vmax=255)
plt.tight_layout()
plt.show()

In [None]:
#根据临床取的训练集和测试集进行数据划分
import pandas as pd
import os
import numpy as np
import torch
import torch.nn as nn
import timm
import shutil
import tempfile
import matplotlib.pyplot as plt
import PIL
# train_frac = 0.8
# test_frac = 0.2
# 读取三个Excel文件
train_df = pd.read_excel(r'/home/yfang/prostate/clinical/data/clinic_blood_110_train(1).xlsx')
test_df = pd.read_excel(r'/home/yfang/prostate/clinical/data/clinic_blood_110_test(1).xlsx')
df = pd.read_excel(r'/home/yfang/prostate/clinical/data/name_number.xlsx')

new_column_names = {'姓名': 'name'}
train_df = train_df.rename(columns=new_column_names)
test_df = test_df.rename(columns=new_column_names)
# 使用merge函数进行拼接
# 以df1和df2的column_name列为键进行拼接
train_df = pd.merge(train_df, df, on='name',how='inner')
test_df = pd.merge(test_df, df, on='name',how='inner')

train_value = train_df['Value']
test_value = test_df['Value']

length = len(image_files_list)
indices = np.arange(length)
train_indices =  []
test_indices = []

for index, img_path in zip (indices,image_files_list):

    if int(os.path.basename(image_files_list[index]).split('_')[0]) in train_value.tolist():
        # print("*****")
        train_indices.append(index)
    elif int(os.path.basename(image_files_list[index]).split('_')[0]) in test_value.tolist():
        test_indices.append(index)
   

print(train_indices)
print(test_indices)

train_xx = [image_files_list[i] for i in train_indices]
train_yy = [image_class[i] for i in train_indices]

test_xx = [image_files_list[i] for i in test_indices]
test_yy = [image_class[i] for i in test_indices]

print(f"Training count: {len(train_xx)},  Test count: {len(test_xx)}")

In [None]:
train_transforms = Compose(
    [
        LoadImage(image_only=True),
        EnsureChannelFirst(),
        ScaleIntensity(),
        RandRotate(range_x=np.pi / 12, prob=0.5, keep_size=True),
        RandFlip(spatial_axis=0, prob=0.5),
        RandZoom(min_zoom=0.9, max_zoom=1.1, prob=0.5),
    ]
)

val_transforms = Compose([LoadImage(image_only=True), EnsureChannelFirst(), ScaleIntensity()])

y_pred_trans = Compose([Activations(softmax=True)])
y_trans = Compose([AsDiscrete(to_onehot=num_class)])

In [None]:
# 自定义class初始化数据集：MedNISTDataset（可修改name）
class MedNISTDataset(torch.utils.data.Dataset):
    def __init__(self, image_files, labels, transforms): 
        self.image_files = image_files
        self.labels = labels
        self.transforms = transforms

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, index):
        return self.transforms(self.image_files[index]), self.labels[index]


# train_ds = MedNISTDataset(train_x, train_y, train_transforms)
# train_loader = DataLoader(train_ds, batch_size=16, shuffle=True, num_workers=12)


test_ds = MedNISTDataset(test_xx, test_yy, val_transforms)
test_loader = DataLoader(test_ds, batch_size=16, num_workers=12)

In [None]:
import torch
import torch.nn as nn
import timm
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def get_model():
    model = timm.create_model('efficientnetv2_s', pretrained=False)
    in_features = model.classifier.in_features
    model.classifier = nn.Linear(in_features, 2)
    model_weight_path = "/home/yfang/result/efficientv2_s_best_metric_model.pth"
    assert os.path.exists(model_weight_path), "file {} does not exist.".format(model_weight_path)
    model.load_state_dict(torch.load(model_weight_path, map_location=device))
    in_channel = model.classifier.in_features
    model.classifier = nn.Linear(in_channel, 3)
    model.to(device)
    return model
model = get_model()
loss_function = torch.nn.CrossEntropyLoss()

In [None]:
import torch
from sklearn.model_selection import StratifiedKFold
from torch.utils.data import DataLoader
#保存跑的结果列表到excel中
import pandas as pd
# 使用stratifiedKFold进行交叉验证\
n_splits= 5
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=2023)
for fold, (train_indices, val_indices) in enumerate(skf.split(train_xx, train_yy)):
    if model:
        del model
    model = get_model()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)
    print(f"Fold {fold + 1}/{n_splits}")#根据fold的划分获取训练集和验证集
    train_x_fold = [image_files_list[i] for i in train_indices]
    train_y_fold = [image_class[i] for i in train_indices]
    val_x_fold = [image_files_list[i] for i in val_indices]
    val_y_fold = [image_class[i] for i in val_indices]

    train_ds = MedNISTDataset(train_x_fold, train_y_fold, train_transforms)
    train_loader = DataLoader(train_ds, batch_size=16, shuffle=True, num_workers=12)
    val_ds = MedNISTDataset(val_x_fold, val_y_fold, val_transforms)
    val_loader = DataLoader(val_ds, batch_size=16,num_workers=12)
    max_epochs = 200
    val_interval = 1
    auc_metric = ROCAUCMetric()
    best_metric = -1
    best_metric_epoch = -1
    train_loss_values = []
    train_acc_values = []
    train_auc_values=[]
    val_loss_values=[]
    val_acc_values=[]
    val_auc_values = []
    for epoch in range(max_epochs):
        print("-" * 10)
        print(f"epoch {epoch + 1}/{max_epochs}")
        model.train()
        train_loss = 0
        step = 0
        train_y_pred = torch.tensor([], dtype=torch.float32, device='cpu')
        train_y = torch.tensor([], dtype=torch.long, device='cpu')
        for batch_data in train_loader:
            step += 1
            inputs, labels = batch_data[0].to(device), batch_data[1].to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            train_y_pred = torch.cat([train_y_pred, outputs.cpu()], dim=0)
            train_y = torch.cat([train_y, labels.cpu()], dim=0)
            loss = loss_function(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            print(f"{step}/{len(train_ds) // train_loader.batch_size}, " f"train_loss: {loss.item():.4f}")
            epoch_len = len(train_ds) // train_loader.batch_size
        y_onehot = [y_trans(i) for i in decollate_batch(train_y, detach=False)]
        y_pred_act = [y_pred_trans(i) for i in decollate_batch(train_y_pred)]
        auc_metric(y_pred_act, y_onehot)
        train_auc = auc_metric.aggregate()
        auc_metric.reset()
        train_auc_values.append(train_auc)
        del y_pred_act, y_onehot
        acc_value = torch.eq(train_y_pred.argmax(dim=1), train_y)
        train_acc = acc_value.sum().item() / len(acc_value)
        train_acc_values.append(train_acc)
        train_loss /= step
        train_loss_values.append(train_loss)
        print(f"epoch {epoch + 1} average loss: {train_loss:.4f}")
        #验证集
        if (epoch + 1) % val_interval == 0:
            model.eval()
            val_loss=0
            val_step=0
            with torch.no_grad():
                val_y_pred = torch.tensor([], dtype=torch.float32, device=device)
                val_y = torch.tensor([], dtype=torch.long, device=device)
                for val_data in val_loader:
                    val_step += 1
                    val_images, val_labels = (
                        val_data[0].to(device),
                        val_data[1].to(device),
                    )
                    val_outputs=model(val_images)
                    val_y_pred = torch.cat([val_y_pred, val_outputs], dim=0)
                    val_y = torch.cat([val_y, val_labels], dim=0)
                    loss = loss_function(val_outputs, val_labels)
                    val_loss += loss.item()
                val_loss /= val_step
                val_loss_values.append(val_loss)
                y_onehot = [y_trans(i) for i in decollate_batch(val_y, detach=False)]
                y_pred_act = [y_pred_trans(i) for i in decollate_batch(val_y_pred)]
                auc_metric(y_pred_act, y_onehot)
                val_auc = auc_metric.aggregate()
                auc_metric.reset()
                del y_pred_act, y_onehot
                val_auc_values.append(val_auc)
                acc_value = torch.eq(val_y_pred.argmax(dim=1), val_y)
                val_acc = acc_value.sum().item() / len(acc_value)
                val_acc_values.append(val_acc)
                if val_auc > best_metric:
                    best_metric = val_auc
                    best_metric_epoch = epoch + 1
                    torch.save(model.state_dict(), os.path.join(root_dir, "{}mri_110_efficientv2_s_best_metric_model.pth".format(fold+1)))
                    print("saved new best metric model")
        print(
            f"fold: {fold + 1}"
            f"current epoch: {epoch + 1}"
            f" train AUC: {train_auc:.4f} " 
            f" train loss: {train_loss:.4f}"
            f" train accuracy: {train_acc:.4f}"
            f" val AUC: {val_auc:.4f} " 
            f" val loss: {val_loss:.4f}"
            f" val accuracy: {val_acc:.4f}"
            f" best AUC: {best_metric:.4f}"
            f" at epoch: {best_metric_epoch}"
        )

    print(f"train completed, best_metric: {best_metric:.4f} " f"at epoch: {best_metric_epoch}")

In [None]:
plt.figure(figsize=(12, 6))
plt.subplot(1,3,1)
plt.plot(train_loss_values, label='train loss')
plt.plot(val_loss_values, label='val loss')
plt.legend(loc='upper left')
plt.xlabel("epoch")
plt.title(" loss")
plt.subplot(1,3,2)
plt.plot(train_acc_values, label='train acc')
plt.plot(val_acc_values, label='val acc')
plt.legend(loc='upper left')
plt.xlabel("epoch")
plt.title(" acc")
plt.subplot(1,3,3)
plt.plot(train_auc_values, label='train auc')
plt.plot(val_auc_values, label='val auc')
plt.legend(loc='upper left')
plt.xlabel("epoch")
plt.title(" auc")
plt.show()

In [None]:
model.load_state_dict(torch.load(os.path.join(root_dir, "1mri_95_efficientv2_s_best_metric_model.pth"), map_location='cpu'))
model.to(device)
model.eval()
y_true = []
y_pred = []
with torch.no_grad():
    y_pred = torch.tensor([], dtype=torch.float32, device=device)
    y = torch.tensor([], dtype=torch.long, device=device)
    for test_data in test_loader:
        test_images, test_labels = (
            test_data[0].to(device),
            test_data[1].to(device),
        )
        pred = model(test_images)
        y_pred = torch.cat([y_pred, pred], dim=0)
        y = torch.cat([y, test_labels], dim=0)
        # for i in range(len(pred)):
        #     y_true.append(test_labels[i].item())
        #     y_pred.append(pred[i].item())
    y_onehot = [y_trans(i) for i in decollate_batch(y, detach=False)]
    y_pred_act = [y_pred_trans(i) for i in decollate_batch(y_pred)]
    pred_lables = [max(y_pred[i]) for i in range(len(y_pred))]
    print(pred_lables)
    auc_metric(y_pred_act, y_onehot)
    result = auc_metric.aggregate()
    auc_metric.reset()
    del y_pred_act, y_onehot
    acc_value = torch.eq(y_pred.argmax(dim=1), y)
    print(len(acc_value))
    test_acc = acc_value.sum().item() / len(acc_value)
    print('test acc:',test_acc)
    print('test auc:',result)

In [None]:
#提取深度学习特征
# 将列表中的张量转换为 NumPy 数组
numpy_array_list = [tensor.cpu().numpy() for tensor in pred_lables]
name = [os.path.basename(i).split(".")[0] for i in test_xx]


In [None]:
#保存跑的结果列表到excel中
import pandas as pd



# 创建一个DataFrame
data = {'number': name,
        'label': test_yy,
        'pred': numpy_array_list
    }

df = pd.DataFrame(data)
df.to_excel('./MRI_110_result/DL_result.xlsx',index=False)

In [None]:
#每个患者选取的两张图片预测值进行平均求和
import pandas as pd
import numpy as np

path = r'/home/yfang/MRI_110_result/DL_result.xlsx'

path1 = r'/home/yfang/MRI_110_result/data_110.xlsx'
df1 = pd.read_excel(path1)
df1
# df1 = df1["Value"]

df = pd.read_excel(path)
list1 = []

df2 = pd.DataFrame()

a = (df[df['number']== '60011088833_2']['pred'].values + df[df['number']== '60011088833_1']['pred'].values)/2
a[0]
for i in df1['Value']:
    for j in df['number']:
        if str(i) == j.split('_')[0]:
            a = (df[df['number']== str(i) + '_2']['pred'].values + df[df['number']== str(i) + '_1']['pred'].values)/2
            df2 = df2._append(pd.DataFrame({'pred':[a[0]], 'name': [str(i)]},index=[0]),ignore_index=True)
            
            break
df2
df2.to_excel(r'/home/yfang/MRI_110_result/DL_pred_lable_value.xlsx')