In [44]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,roc_auc_score,classification_report,recall_score,precision_score
from sklearn.svm import SVC
import numpy as np
from sklearn.model_selection import KFold
from utils.eval import get_results
import json
from monai.networks.nets import DenseNet121
import torch
from monai.data import DataLoader, ImageDataset
from monai.transforms import RandRotate90, Resize, EnsureChannelFirst, Compose, ScaleIntensity,RandAxisFlip
import os
from tqdm import tqdm
from joblib import dump, load
from sklearn.preprocessing import label_binarize
from sklearn.linear_model import LogisticRegression,LinearRegression
from sklearn.metrics import mean_squared_error

In [45]:
def get_densenet_probabilities(densenet_model, test_dataloader):
    prediction_list = []
    with torch.no_grad(): 
        progress_bar = tqdm(test_dataloader, desc="Testing")
    for X, y in progress_bar:
        X = X.to('cuda')
        pred = densenet_model(X)
        prediction_list.append(torch.nn.functional.softmax(pred, dim=-1).cpu().detach().numpy().reshape(2))
    return np.array(prediction_list)

def get_rf_probabilities(model, radiomics_data):
    probs = model.predict_proba(radiomics_data)  # Output probabilities for each class
    return probs

In [None]:
def train_fusion_model(densenet_model, rf_model, image_dataloader, radiomics_data,labels):
    densenet_probs = get_densenet_probabilities(densenet_model, image_dataloader)
    rf_probs = get_rf_probabilities(rf_model, radiomics_data)
    fusion_features = np.hstack([densenet_probs, rf_probs])
    # fusion_features = np.hstack([densenet_probs[:,1].reshape(len(labels),1), rf_probs[:,1].reshape(len(labels),1)])
    print(fusion_features.shape)
    model = LogisticRegression()
    model.fit(fusion_features, labels)
    return model

def predict_with_fusion_model(densenet_model, rf_model, fusion_model,image_dataloader, radiomics_data):
    densenet_probs = get_densenet_probabilities(densenet_model, image_dataloader)
    rf_probs = get_rf_probabilities(rf_model, radiomics_data)
    # Combine probabilities for the fusion model  
    fusion_features = np.hstack([densenet_probs, rf_probs])
    # fusion_features = np.hstack([densenet_probs[:,1].reshape(len(radiomics_data),1), rf_probs[:,1].reshape(len(radiomics_data),1)])
    fusion_predictoin = fusion_model.predict(fusion_features)
    fusion_proba = fusion_model.predict_proba(fusion_features)
    return fusion_predictoin,fusion_proba

Read radiomics data

In [47]:
# Get features
features = pd.read_csv('./Data/2D_t2/all.csv')
data = features.drop(columns=['Center'])
correlation = data.iloc[:,2:].corr(method='spearman')
selected_features = correlation[(correlation.abs() <= 0.6) | (correlation.abs() == 1)]
for i in range(200):
    for index in range(selected_features.shape[0]):
        row = selected_features.iloc[index]
        to_drop = row[row.isna()].index
        selected_features_c_droped = selected_features.drop(labels=to_drop,axis=0)
        selected_features_r_droped = selected_features_c_droped.drop(labels=to_drop,axis=1)
        if selected_features_r_droped.shape[0]<selected_features.shape[0]:
            break
    selected_features = selected_features_r_droped
selected_data = data[selected_features.index.to_list()+['Label','Name']]
selected_data['Label'] = selected_data['Label'].apply(lambda x: x-1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_data['Label'] = selected_data['Label'].apply(lambda x: x-1)


Organize label

In [48]:
with open("Fold_data.json", "r") as file:
    fold_data = json.load(file)

In [49]:
test_acc_list = []
test_auc_list = []
test_recall_list = []
test_precision_list = []

for fold in range(1,6):
    train_list = fold_data[f'Fold{fold}']['train']
    test_list = fold_data[f'Fold{fold}']['test']
    train_data = selected_data[selected_data["Name"].isin(train_list)]
    test_data = selected_data[selected_data["Name"].isin(test_list)]

    test_label = test_data[['Label']]
    test_features = test_data.iloc[:,:-2]
    test_label_and_path = test_data[['Name','Label']]
    test_label_and_path['Name'] = test_label_and_path['Name'].apply(lambda x: x + '.nii.gz')

    train_label = train_data[['Label']]
    train_features = train_data.iloc[:,:-2]
    train_label_and_path = train_data[['Name','Label']]
    train_label_and_path['Name'] = train_label_and_path['Name'].apply(lambda x: x + '.nii.gz')

    rf_model = load(f'F:\Myproject\IPMN-Radiomics-Plus-Deeplearning\RFmodels\RF_{fold}.joblib')
    densenet = DenseNet121(
            spatial_dims=3,  # 3D input
            in_channels=1,   # Typically for grayscale (e.g., MRI/CT scans), change to 3 for RGB
            out_channels=2   # Adjust for binary or multi-class segmentation/classification
        )
    densenet.load_state_dict(torch.load(f'./weights/fold{fold}_best_auc_model.pth', map_location='cpu', weights_only=True))
    densenet.to('cuda')
    data_dir  = r"D:\DATASET\IPMN_t2_cyst\ROI"
    transforms = Compose([ScaleIntensity(), EnsureChannelFirst(), Resize((96, 96, 96))])
    test_ds = ImageDataset(image_files=[os.path.join(data_dir,name) for name in test_label_and_path['Name'].to_list()], labels=test_label_and_path['Label'].to_list(), transform=transforms)
    test_dataloader = DataLoader(test_ds, batch_size=1, shuffle=False,num_workers=1)

    train_ds = ImageDataset(image_files=[os.path.join(data_dir,name) for name in train_label_and_path['Name'].to_list()], labels=train_label_and_path['Label'].to_list(), transform=transforms)
    train_dataloader = DataLoader(train_ds, batch_size=1, shuffle=False,num_workers=1)

    fusion_model = train_fusion_model(densenet_model = densenet, rf_model = rf_model, image_dataloader = train_dataloader,radiomics_data = train_features,labels=train_label)
    final_prediction,final_proba = predict_with_fusion_model(densenet_model=densenet,rf_model=rf_model,fusion_model=fusion_model, 
                            image_dataloader=test_dataloader, radiomics_data=test_features)

    accuracy = accuracy_score(test_label,final_prediction)
    print(f'Accuracy: {accuracy:.4f}')
    auc_score = roc_auc_score(test_label,final_proba[:,1])
    print(f'AUC Score: {auc_score:.4f}')
    recall = recall_score(test_label, final_prediction)
    print(f'Recall Score: {recall:.4f}')
    precision = precision_score(test_label,final_prediction)
    print(f'Recall Score: {precision:.4f}')

    test_acc_list.append(accuracy)
    test_auc_list.append(auc_score)
    test_recall_list.append(recall)
    test_precision_list.append(precision)

  rf_model = load(f'F:\Myproject\IPMN-Radiomics-Plus-Deeplearning\RFmodels\RF_{fold}.joblib')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_label_and_path['Name'] = test_label_and_path['Name'].apply(lambda x: x + '.nii.gz')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_label_and_path['Name'] = train_label_and_path['Name'].apply(lambda x: x + '.nii.gz')
Testing: 100%|██████████| 256/256 [00:18<00:00, 13.95it/s]
  y = column_or_1d(y, warn=True)


(256, 4)


Testing: 100%|██████████| 67/67 [00:08<00:00,  8.30it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_label_and_path['Name'] = test_label_and_path['Name'].apply(lambda x: x + '.nii.gz')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_label_and_path['Name'] = train_label_and_path['Name'].apply(lambda x: x + '.nii.gz')


Accuracy: 0.6567
AUC Score: 0.7786
Recall Score: 0.2759
Recall Score: 0.8000


Testing: 100%|██████████| 261/261 [00:17<00:00, 14.73it/s]
  y = column_or_1d(y, warn=True)


(261, 4)


Testing: 100%|██████████| 62/62 [00:07<00:00,  7.97it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_label_and_path['Name'] = test_label_and_path['Name'].apply(lambda x: x + '.nii.gz')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_label_and_path['Name'] = train_label_and_path['Name'].apply(lambda x: x + '.nii.gz')


Accuracy: 0.7097
AUC Score: 0.7178
Recall Score: 0.5238
Recall Score: 0.5789


Testing: 100%|██████████| 257/257 [00:18<00:00, 14.27it/s]
  y = column_or_1d(y, warn=True)


(257, 4)


Testing: 100%|██████████| 66/66 [00:08<00:00,  8.15it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_label_and_path['Name'] = test_label_and_path['Name'].apply(lambda x: x + '.nii.gz')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_label_and_path['Name'] = train_label_and_path['Name'].apply(lambda x: x + '.nii.gz')


Accuracy: 0.6818
AUC Score: 0.7990
Recall Score: 0.2000
Recall Score: 0.8333


Testing: 100%|██████████| 260/260 [00:18<00:00, 14.05it/s]
  y = column_or_1d(y, warn=True)


(260, 4)


Testing: 100%|██████████| 63/63 [00:07<00:00,  7.89it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_label_and_path['Name'] = test_label_and_path['Name'].apply(lambda x: x + '.nii.gz')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_label_and_path['Name'] = train_label_and_path['Name'].apply(lambda x: x + '.nii.gz')


Accuracy: 0.6032
AUC Score: 0.6211
Recall Score: 0.1600
Recall Score: 0.5000


Testing: 100%|██████████| 258/258 [00:18<00:00, 14.20it/s]
  y = column_or_1d(y, warn=True)


(258, 4)


Testing: 100%|██████████| 65/65 [00:08<00:00,  7.93it/s]

Accuracy: 0.5846
AUC Score: 0.6351
Recall Score: 0.0357
Recall Score: 1.0000





In [50]:

print("Test set")
get_results(test_acc_list,test_auc_list,test_recall_list,test_precision_list)
print('================================================')

Test set
Recall, Average:0.2391, Std:0.1622
precision, Average:0.7425, Std:0.1808
Accuracy, Average:0.6472, Std:0.0470
AUC, Average:0.7103, Std:0.0724
