In [1]:
import sys
import os
sys.path.append(os.path.dirname(os.getcwd()))
cwd = os.getcwd()
sep = os.sep

In [2]:
pardir = cwd.split(sep)[:-1]
pardir_str = ""
for elem in pardir:
    pardir_str += elem + sep 
pardir = pardir_str

datasets_path = pardir + "use_case" + sep + "data" 
datasets_name = os.listdir(datasets_path)
datasets_name

['bladder_cancer', 'celiac', 'colorectal_cancer', 'parkinson']

In [3]:
models_path = pardir + "Deep-GONet" + sep + "model"
models_name = os.listdir(models_path)
models_name = [model for model in models_name if model.endswith('.pth')]
models_name.sort()
models_name

['deepgonet_model_l1_bladder_cancer.pth',
 'deepgonet_model_l1_celiac.pth',
 'deepgonet_model_l1_colorectal_cancer.pth',
 'deepgonet_model_l1_parkinson.pth',
 'deepgonet_model_l2_bladder_cancer.pth',
 'deepgonet_model_l2_celiac.pth',
 'deepgonet_model_l2_colorectal_cancer.pth',
 'deepgonet_model_l2_parkinson.pth',
 'deepgonet_model_lgo_bladder_cancer.pth',
 'deepgonet_model_lgo_celiac.pth',
 'deepgonet_model_lgo_colorectal_cancer.pth',
 'deepgonet_model_lgo_parkinson.pth']

In [4]:
# connection_matrix: Prepare list of numpy arrays for LGO
n_hidden = [1574, 1386, 951, 515, 255, 90]  # Example hidden layer sizes
keep_prob = 0.4
use_bn = False
lr_method = 'adam'
type_trainings = ["l1", "l2", "lgo"]
alpha = 1e-2
epochs = 200
batch_size = 4   #2**9
is_training = True  # Set to False for evaluation
lr = 0.001
n_classes = 1

In [7]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

def preprocess_data(X, y):
    """ Preprocess the input data X: remove NaN values, standardize, etc. """
    idx_nan = ~pd.isnull(X).any(axis=1) & ~pd.isnull(y)
    X = X[idx_nan]
    y = y[idx_nan]
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    return X, y

In [9]:
from deepgonet import DeepGONet_pl
import torch
import numpy as np
import os
from sklearn.metrics import roc_auc_score

device = "cuda" if torch.cuda.is_available() else "cpu"

for type_training in type_trainings:
    for dataset_name in datasets_name:

        dataset_path = datasets_path + sep + dataset_name + sep

        test = np.load(os.path.join(dataset_path, 'X_test.npz'))  
        X_test = test['arr_0']
        y_test = test['arr_1']

        # Preprocess the validation data
        X_test, y_test = preprocess_data(X_test, y_test)

        n_input = X_test.shape[1]  # Assuming X_test is a 2D array with shape (num_samples, num_features)

        model_name = f"deepgonet_model_{type_training}_{dataset_name}.pth"
        model_path = models_path + sep + model_name
        model = DeepGONet_pl(n_input, n_classes, n_hidden, None, keep_prob, use_bn, lr_method, lr, type_training, alpha, device) 
        model.load_state_dict(torch.load(model_path, map_location=device))
        model.to(device)
        model.eval()

        X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
        y_test = torch.tensor(y_test, dtype=torch.float32).to(device)

        with torch.no_grad():
            X_test = X_test.unsqueeze(1)  # Add channel dimension if needed
            y_test = y_test.unsqueeze(1)    
            pred = model(X_test)    
        
        pred = pred.squeeze().cpu().numpy()  # Convert predictions to numpy array
        y_test = y_test.squeeze().cpu().numpy()  # Convert true labels to numpy array
        
        auc = roc_auc_score(y_test, pred)
        print(f"Loss: {type_training}, Dataset: {dataset_name}, AUC: {auc:.4f}")
        

Loss: l1, Dataset: bladder_cancer, AUC: 1.0000
Loss: l1, Dataset: celiac, AUC: 0.5909
Loss: l1, Dataset: colorectal_cancer, AUC: 1.0000
Loss: l1, Dataset: parkinson, AUC: 0.5000
Loss: l2, Dataset: bladder_cancer, AUC: 1.0000
Loss: l2, Dataset: celiac, AUC: 0.5844
Loss: l2, Dataset: colorectal_cancer, AUC: 1.0000
Loss: l2, Dataset: parkinson, AUC: 0.5000
Loss: lgo, Dataset: bladder_cancer, AUC: 1.0000
Loss: lgo, Dataset: celiac, AUC: 0.5528
Loss: lgo, Dataset: colorectal_cancer, AUC: 1.0000
Loss: lgo, Dataset: parkinson, AUC: 0.5000


In [36]:
import pandas as pd
our_results = pd.read_csv("eabin_internal_comparison.csv")
our_results

Unnamed: 0,Model Name,Accuracy,F1,Sensitivity,Specificity,AUC score,Precision,Dataset
0,LR,88.240,0.920000,84.620000,100.000000,1.000000,100.000000,bladder_cancer
1,RF,82.350,0.900000,100.000000,25.000000,0.980000,81.250000,bladder_cancer
2,KNN,76.470,0.870000,100.000000,0.000000,0.620000,76.470000,bladder_cancer
3,DT,64.710,0.750000,69.230000,50.000000,0.600000,81.820000,bladder_cancer
4,SVM,88.240,0.920000,84.620000,100.000000,0.000000,100.000000,bladder_cancer
...,...,...,...,...,...,...,...,...
91,GAE,43.750,0.307692,26.666667,58.823529,0.427451,36.363636,celiac
92,GCN,46.875,0.638298,100.000000,0.000000,0.500000,46.875000,celiac
93,GAAN,46.875,0.260870,20.000000,70.588235,0.452941,37.500000,parkinson
94,GAE,40.625,0.173913,13.333333,64.705882,0.390196,25.000000,parkinson


In [40]:
#group by dataset: get the model with the best AUC
best_models = our_results.loc[our_results.groupby('Dataset')['AUC score'].idxmax()]
best_models = best_models[['Dataset', 'Model Name', 'AUC score']]
best_models.reset_index(drop=True, inplace=True)
best_models.to_csv("best_models.csv", index=False)

In [41]:
best_models

Unnamed: 0,Dataset,Model Name,AUC score
0,bladder_cancer,LR,1.0
1,celiac,LR,0.73
2,colorectal_cancer,LR,1.0
3,parkinson,LR,0.64
