In [1]:
from sklearn.metrics import auc, roc_auc_score
from sklearn.metrics import precision_recall_curve
import pandas as pd
import numpy as np
import torch
import random
import scipy.sparse as sp
import os
from tqdm import tqdm
from scipy.sparse import load_npz

def cal_auc(output, labels):
    outputTest = output.cpu().detach().numpy()
    outputTest = np.exp(outputTest)
    outputTest = outputTest[:,1]
    labelsTest = labels.cpu().numpy()
    AUROC = roc_auc_score(labelsTest, outputTest)
    precision, recall, _thresholds = precision_recall_curve(labelsTest, outputTest)
    AUPRC = auc(recall, precision)
    return AUROC,AUPRC

def _generate_G_from_H_weight(H, W):
    n_edge = H.shape[1]
    DV = np.sum(H * W, axis=1)  # the degree of the node
    DE = np.sum(H, axis=0)  # the degree of the hyperedge
    invDE = np.mat(np.diag(1/DE))
    DV2 = np.mat(np.diag(np.power(DV, -0.5)))
    W = np.mat(np.diag(W))
    H = np.mat(H)
    HT = H.T
    G = DV2 * H * W * invDE * HT * DV2
    return G
def generate_P_from_H_weight(H: np.ndarray, W: np.ndarray):
    """
    Generate the RWR-based propagation matrix P = D_u^{-1} * H * D_e^{-1} * W^T

    Args:
        H (np.ndarray): Incidence matrix of shape (n_nodes, n_edges)
        W (np.ndarray): Hyperedge weight vector of shape (n_nodes, n_edges)

    Returns:
        np.ndarray: Propagation matrix P of shape (n_nodes, n_edges)
    """
    H = np.array(H)
    W = np.array(W)
    assert not np.isnan(H).any(), "H has NaNs"
    assert not np.isinf(H).any(), "H has Infs"
    assert not np.isnan(W).any(), "W has NaNs"
    assert not np.isinf(W).any(), "W has Infs"
    # Compute hyperedge degrees
    DE = np.sum(H, axis=0)  # (m,)
    # invDE = np.diag(1.0 / DE)
    invDE = np.diag(np.where(DE != 0, 1.0 / DE, 0.0))
    # Compute node degrees
    DU = np.sum(H * W, axis=1)  # (n,)
    # invDU = np.diag(1.0 / DU)
    invDU = np.diag(np.where(DU != 0, 1.0 / DU, 0.0))

    # Final P = D_u^{-1} * H * D_e^{-1} * W^T
    P = invDU @ H @ invDE @ W.T
    assert not np.isnan(P).any(), "P has NaNs"
    assert not np.isinf(P).any(), "P has Infs"
    return P
def getData(positiveGenePath, negativeGenePath, geneList):
    positiveGene = pd.read_csv(positiveGenePath, header = None)
    positiveGene = list(positiveGene[0].values)
    positiveGene = list(set(geneList)&set(positiveGene))
    positiveGene.sort()
    negativeGene = pd.read_csv(negativeGenePath, header = None)     
    negativeGene = negativeGene[0]
    negativeGene = list(set(negativeGene)&set(geneList))
    negativeGene.sort()

    labelFrame = pd.DataFrame(data = [0]*len(geneList), index = geneList)
    labelFrame.loc[positiveGene,:] = 1
    positiveIndex = np.where(labelFrame == 1)[0]
    labelFrame.loc[negativeGene,:] = -1
    negativeIndex = np.where(labelFrame == -1)[0]
    labelFrame = pd.DataFrame(data = [0]*len(geneList), index = geneList)
    labelFrame.loc[positiveGene,:] = 1
    
    positiveIndex = list(positiveIndex)
    negativeIndex = list(negativeIndex)
    sampleIndex = positiveIndex + negativeIndex
    sampleIndex = np.array(sampleIndex)
    label = pd.DataFrame(data = [1]*len(positiveIndex) + [0]*len(negativeIndex))
    label = label.values.ravel()
    return  sampleIndex, label, labelFrame

def processingIncidenceMatrix(geneList):
    ids = ['c2','c5']
    incidenceMatrix = pd.DataFrame(index= geneList)
    for id in ids:
        geneSetNameList = pd.read_csv('./Data/'+id+'Name.txt',sep='\t',header=None)
        geneSetNameList = list(geneSetNameList[0].values)
        z=0
        idList = list()
        for name in geneSetNameList:
            idList.append(z)
            # if(id=='c2'):
            #     q = name.split('_')
            #     if('CANCER' in q or 'TUMOR' in q or 'NEOPLASM' in q):
            #         print(name)
            #     else:
            #         idList.append(z)
            # elif(name[:2]=='HP'):
            #     q = name.split('_')
            #     if('CANCER' in q or 'TUMOR' in q or 'NEOPLASM' in q):
            #         print(name)
            #     else:
            #         idList.append(z)
            # else:
            #     idList.append(z)
            z=z+1
        genesetData = sp.load_npz('./Data/'+id+'_GenesetsMatrix.npz')
        incidenceMatrixTemp = pd.DataFrame(data = genesetData.A,index= geneList)
        incidenceMatrixTemp = incidenceMatrixTemp.iloc[:,idList]

        incidenceMatrix = pd.concat([incidenceMatrix,incidenceMatrixTemp],axis=1)

    incidenceMatrix.columns = np.arange(incidenceMatrix.shape[1])
    return incidenceMatrix
def getWeightedIncidenceMatrix(incidenceMatrix):
    # Step 2: Load HumanNet
    HUMANNET = pd.read_csv("./Data/humannet_claim.tsv", sep='\t', header=None)
    HUMANNET.columns = ["Gene1", "Gene2", "Score"] if HUMANNET.shape[1] == 3 else ["Gene1", "Gene2"]

    # Step 3: Compute degrees
    gene_to_degree = {}
    for _, row in HUMANNET.iterrows():
        gene_to_degree[row["Gene1"]] = gene_to_degree.get(row["Gene1"], 0) + 1
        gene_to_degree[row["Gene2"]] = gene_to_degree.get(row["Gene2"], 0) + 1

   # Step 4: Apply weights
    weighted_matrix = incidenceMatrix.copy()
    
    for gene in weighted_matrix.index:
        # Convert to string if it's not already
        if isinstance(gene, list):
            gene = gene[0]  # or: gene = "_".join(gene)
        degree = gene_to_degree.get(str(gene), 0)
        weighted_matrix.loc[gene] = weighted_matrix.loc[gene] * degree

    # Step 5: Save to file
    # weighted_matrix.to_csv(output_path)
    # print(f"Saved weighted incidence matrix to {output_path}")
    return weighted_matrix
def getInterlayerMatrix(incidenceMatrix, DGIDB_binary_matrix, dgidb):
    # Jump probability for matching genes
    w = 1

    # Number of genes (assuming they are both of same size or matchable)
    num_genes_dgidb = DGIDB_binary_matrix.shape[0]
    num_genes_msigdb = incidenceMatrix.shape[0]

    # Initialize the inter-layer matrix (D) with zeros
    D = np.zeros((num_genes_dgidb, num_genes_msigdb))
    i = 0
    # Build the inter-layer matrix (D)
    for gene_dgidb, idx_dgidb in dgidb.items():
        # If the gene exists in both gene-to-index mappings
        if gene_dgidb in incidenceMatrix:      
            idx_msigdb = incidenceMatrix.index.get_loc(gene_dgidb)
            D[idx_dgidb, idx_msigdb] = w  # Set jump probability
            i += 1
    return D
def getDGIDBCompleteVector(incidenceMatrix, D, DGIDB_binary_matrix, DGIDB_vector):
    dgidb_vector_complete = np.zeros(incidenceMatrix.shape[0])
    print(dgidb_vector_complete.shape)
    for gene in tqdm(range(incidenceMatrix.shape[0]), desc="DGIDB full vector calculation"):
        # Intra-hypergraph transitions in MSIGDB (moving within MSIGDB)
        row = incidenceMatrix.iloc[gene]  # This gives you the full row as a Series
        connected_pathways = np.where(row.values != 0)[0]
        if len(connected_pathways) == 0:
            continue  # Skip if no pathways are found

        for pathway in connected_pathways:
            # Find genes connected to the selected pathway (weighted transition in MSIGDB)
            # connected_genes = incidenceMatrix[:, pathway].toarray().flatten()
            connected_genes = incidenceMatrix.iloc[:, pathway].values.flatten()
        # for pathway in incidenceMatrix.columns:
        #     connected_genes = incidenceMatrix[pathway].values.flatten()
            neighbor_genes = np.where(connected_genes > 0)[0]  # Get genes with nonzero weight

            # Check if the current gene has a DGIDB connection
            dgidb_gene = np.where(D[:, gene] > 0)[0]  # Find DGIDB neighbors of the current MSIGDB gene
            if len(dgidb_gene) == 1:
                dgidb_drugs = DGIDB_binary_matrix[dgidb_gene[0], :].nonzero()[1]
                neighbor_genes_set = set()  # To avoid duplicates
            
                for drug in dgidb_drugs:
                    # Get genes connected through the same drug (edge)
                    connected_genes = DGIDB_binary_matrix[:, drug].toarray().flatten()
                    neighbor_genes = np.where(connected_genes > 0)[0]
                    # Add unique neighbors to the set
                    neighbor_genes_set.update(neighbor_genes)

                # Sum contributions from unique DGIDB neighbors
                if len(neighbor_genes_set) > 0:
                    neighbor_genes_list = list(neighbor_genes_set)
                    dgidb_contribution = np.sum(DGIDB_vector[neighbor_genes_list])  # Sum unique contributions
                    dgidb_vector_complete[gene] += dgidb_contribution  # Store in the complete vector

    # Normalize dgidb_vector_complete to avoid overflow
    dgidb_vector_complete /= np.sum(dgidb_vector_complete) if np.sum(dgidb_vector_complete) > 0 else 1
    return dgidb_vector_complete

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import torch
import torch.nn as nn
import math
from torch.nn.parameter import Parameter
import torch.nn.functional as F
from torch.nn.modules.module import Module
class RWR_HGNN_Prop(nn.Module):
    def __init__(self, alpha):
        super(RWR_HGNN_Prop, self).__init__()
        self.alpha = alpha

    def forward(self, H0, H_l, P, complete_vector):
        return self.alpha * H0 + (1 - self.alpha) * P.T.matmul(H_l) + complete_vector

class RWR_HGNN_Layer(nn.Module):
    def __init__(self, nhid, alpha, dropout=0.5):
        super(RWR_HGNN_Layer, self).__init__()
        self.propagate = RWR_HGNN_Prop(alpha)
        self.linear = nn.Linear(nhid, nhid)
        self.activation = nn.ReLU()
        self.dropout = dropout

    def forward(self, H0, x, P, complete_vector):
        h = self.propagate(H0, x, P, complete_vector)
        # Possibly to prevent oversmoothing
        # h = h + x
        h = self.activation(h)
        h = F.dropout(h, p=self.dropout, training=self.training)
        return h
class DISHyperNet_RWR(nn.Module):
    def __init__(self, in_ch, n_hid, n_class, alpha=0.2, num_layers=3, dropout=0.5):
        super(DISHyperNet_RWR, self).__init__()
        self.dropout = dropout
        self.alpha = alpha
        self.fc = nn.Linear(in_ch, n_hid)
        self.layers = nn.ModuleList()
        self.n_hid = n_hid
        for i in range(num_layers):
            self.layers.append(
                RWR_HGNN_Layer(n_hid, alpha, dropout=dropout)
            )
        self.outLayer = nn.Linear(n_hid, n_class)

    def forward(self, x, P, complete_vector):
        H0 = F.relu(self.fc(x))
        H0 = F.dropout(H0, self.dropout, training=self.training)
        x = H0
        complete_vector = complete_vector.unsqueeze(1)  # shape [17442, 1]
        complete_vector = complete_vector.expand(-1, self.n_hid)  # shape [17442, 256]
        for layer in self.layers:
            x = layer(H0, x, P, complete_vector)
        out = self.outLayer(x)
        return F.log_softmax(out, dim=1)


In [3]:
import pandas as pd
import os
import numpy as np
import scipy.sparse as sp
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module
import torch.optim as optim
from sklearn.model_selection import StratifiedKFold
import random
import json
from tqdm import tqdm
def train_test(trainIndex, testIndex, labelFrame, incidenceMatrix, geneList, lr, epochs, dropout, n_hid, weight_decay):
    print("Starting train_test function...")
    print("Selecting training labels...")
    trainFrame = labelFrame.iloc[trainIndex]

    print("Finding positive training genes...")
    trainPositiveGene = list(trainFrame.where(trainFrame == 1).dropna().index)
    print(f"Number of positive genes in training set: {len(trainPositiveGene)}")

    print("Computing sum over selected rows in incidence matrix...")
    positiveMatrixSum = incidenceMatrix.loc[trainPositiveGene].sum()

    print("Selecting hyperedges with at least 3 positive gene hits...")
    selHyperedgeIndex = np.where(positiveMatrixSum >= 3)[0]
    print(f"Selected {len(selHyperedgeIndex)} hyperedges")

    selHyperedge = incidenceMatrix.iloc[:, selHyperedgeIndex]
    # RIGHT NOW THIS IS |E| x 1, NEEDS TO BE |V| x |E|
    hyperedgeWeight = positiveMatrixSum[selHyperedgeIndex].values
    print(hyperedgeWeight.shape)
    selHyperedgeWeightSum = incidenceMatrix.iloc[:, selHyperedgeIndex].values.sum(0)

    print("Computing normalized hyperedge weights...")
    hyperedgeWeight = hyperedgeWeight / selHyperedgeWeightSum

    print("Constructing incidence matrix H...")
    H = np.array(selHyperedge).astype('float')
    print(H.shape)

    print("Computing DV vector and applying regularization to isolated nodes...")
    DV = np.sum(H * hyperedgeWeight, axis=1)
    for i in range(DV.shape[0]):
        if DV[i] == 0:
            t = random.randint(0, H.shape[1] - 1)
            H[i][t] = 0.0001
        
    print("GENERATING COMPLETE VECTOR")
    DGIDB_DIRECTORY = "./"
    DGIDB_binary_matrix = load_npz(DGIDB_DIRECTORY + "hypergraph_incidence_matrix_binary.npz")
    # Open the JSON file and load its content into a dictionary
    with open("./gene_claim_to_index.json", "r") as file:
        dgidb = json.load(file)
    DGIDB_vector =  np.load("./DGIDB_vector.npy")

    D = getInterlayerMatrix(incidenceMatrix.iloc[:, selHyperedgeIndex], DGIDB_binary_matrix, dgidb)
    print("D SHAPE", D.shape)
    print("MATRIX SHAPE", incidenceMatrix.iloc[:, selHyperedgeIndex].shape)
    complete_vector = getDGIDBCompleteVector(incidenceMatrix.iloc[:, selHyperedgeIndex], D, DGIDB_binary_matrix, DGIDB_vector)

    print("Generating graph G from H and hyperedge weights...")
    print("COMPUTING P FIRST")
    W_matrix = getWeightedIncidenceMatrix(incidenceMatrix.iloc[:, selHyperedgeIndex])
    # W_matrix = np.array(selHyperedge).astype('float')
    print(W_matrix.shape)  
    # assert not np.isnan(W_matrix).any(), "W_matrix has NaNs"
    # assert not np.isinf(W_matrix).any(), "W_matrix has Infs"
    print(H.shape)
    assert H.shape == W_matrix.shape, f"Mismatch: H {H.shape} vs W {W_matrix.shape}"
    P = generate_P_from_H_weight(H, W_matrix)
    print(P.shape)
    # G = _generate_G_from_H_weight(H, hyperedgeWeight)
    # print(G.shape)

    N = H.shape[0]

    print("Preparing adjacency matrix and features...")
    # adj = torch.Tensor(G).float()
    adj = torch.Tensor(P).float()
    features = torch.eye(N).float()
    theLabels = torch.from_numpy(labelFrame.values.reshape(-1,))
    complete_vector = torch.tensor(complete_vector, dtype=torch.float32)

    print("Initializing model...")
    model = DISHyperNet_RWR(in_ch=N, n_hid=n_hid, n_class=2, dropout=dropout)

    print("Setting up optimizer and learning rate scheduler...")
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    schedular = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[100, 200, 300, 400], gamma=0.5)

    if torch.cuda.is_available():
        model.cuda()
        features = features.cuda()
        adj = adj.cuda()
        theLabels = theLabels.cuda()
        complete_vector = complete_vector.cuda()
    for epoch in tqdm(range(epochs), desc="Training Epochs"):
        model.train()
        optimizer.zero_grad() 
        output = model(features, adj, complete_vector)
        loss_train = F.nll_loss(output[trainIndex], theLabels[trainIndex]) 
        loss_train.backward()
        optimizer.step()
        schedular.step()
    model.eval()
    with torch.no_grad():
        output = model(features, adj, complete_vector)
        loss_test = F.nll_loss(output[testIndex], theLabels[testIndex])
        AUROC_val, AUPRC_val = cal_auc(output[testIndex], theLabels[testIndex])
        outputFrame = pd.DataFrame(data = output.exp().cpu().detach().numpy(), index = geneList)
    return AUROC_val, AUPRC_val, outputFrame

def trainPred(geneList, incidenceMatrix, positiveGenePath, negativeGenePath, lr, epochs, dropout, n_hid, weight_decay):
    aurocList = list()
    auprcList = list()
    evaluationRes = pd.DataFrame(index = geneList)
    for i in range(5):
        print(f"\n===== Outer Loop Iteration {i+1}/5 =====")
        print("🔄 Loading data...")
        sampleIndex, label, labelFrame = getData(positiveGenePath, negativeGenePath, geneList)

        print("🧮 Reshaping data for StratifiedKFold...")
        sk_X = sampleIndex.reshape([-1, 1])
        sfolder = StratifiedKFold(n_splits=5, random_state=i, shuffle=True)

        for fold_num, (train_index, test_index) in enumerate(sfolder.split(sk_X, label), start=1):
            print(f"\n📂 Fold {fold_num}/5 — splitting train/test sets...")
            trainIndex = sampleIndex[train_index]
            testIndex = sampleIndex[test_index]
            _, __ = label[train_index], label[test_index]

            print("🚀 Starting training and evaluation...")
            AUROC_val, AUPRC_val, outputFrame = train_test(
                trainIndex, testIndex, labelFrame, incidenceMatrix,
                geneList, lr, epochs, dropout, n_hid, weight_decay
            )

            print(f"✅ Finished training — AUROC: {AUROC_val.item():.4f}, AUPRC: {AUPRC_val.item():.4f}")
            aurocList.append(AUROC_val.item())
            auprcList.append(AUPRC_val.item())

            print("📊 Appending results to evaluation dataframe...")
            evaluationRes = pd.concat([evaluationRes, outputFrame[1]], axis=1)
    return aurocList, auprcList, evaluationRes

In [4]:
import pandas as pd
import sys, os, random
import numpy as np
import scipy.sparse as sp
import torch

seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
 

if __name__ == "__main__":    
    _, outputPath = sys.argv
    lr = 5e-3
    dropout = 0.2
    weight_decay = 5e-6
    epochs = 500
    n_hid = 256
    
    positiveGenePath = r'./Data/BreastCancerOTGenes.txt'
    negativeGenePath = r'./Data/BreastCancerFalsegenes.txt'
    print("READING GENE LIST")


    geneList = pd.read_csv(r'./Data/geneList.csv', header=None)
    geneList = list(geneList[0].values)
    print("PROCESSING MATRIX")
    incidenceMatrix = processingIncidenceMatrix(geneList)
    print("TRAINING")

    aurocList, auprcList, evaluationRes = trainPred(geneList, incidenceMatrix, positiveGenePath,
                                          negativeGenePath, lr, epochs, dropout, n_hid, weight_decay) 
    predRes = evaluationRes.sum(1).sort_values(ascending = False) / 25
    predRes.to_csv("UNIDIRCANCER_WITHRESIDUAL.tsv",sep='\t', header = False)
    print(np.mean(aurocList)) # 0.936
    print(np.mean(auprcList)) # 0.894


READING GENE LIST
PROCESSING MATRIX
TRAINING

===== Outer Loop Iteration 1/5 =====
🔄 Loading data...
🧮 Reshaping data for StratifiedKFold...

📂 Fold 1/5 — splitting train/test sets...
🚀 Starting training and evaluation...
Starting train_test function...
Selecting training labels...
Finding positive training genes...
Number of positive genes in training set: 9708
Computing sum over selected rows in incidence matrix...
Selecting hyperedges with at least 3 positive gene hits...
Selected 20745 hyperedges
(20745,)
Computing normalized hyperedge weights...
Constructing incidence matrix H...
(17442, 20745)
Computing DV vector and applying regularization to isolated nodes...
GENERATING COMPLETE VECTOR
D SHAPE (4774, 17442)
MATRIX SHAPE (17442, 20745)
(17442,)


DGIDB full vector calculation: 100%|██████████| 17442/17442 [02:55<00:00, 99.48it/s] 


Generating graph G from H and hyperedge weights...
COMPUTING P FIRST
(17442, 20745)
(17442, 20745)




(17442, 17442)
Preparing adjacency matrix and features...
Initializing model...
Setting up optimizer and learning rate scheduler...


Training Epochs: 100%|██████████| 500/500 [01:38<00:00,  5.07it/s]


✅ Finished training — AUROC: 0.6554, AUPRC: 0.9609
📊 Appending results to evaluation dataframe...

📂 Fold 2/5 — splitting train/test sets...
🚀 Starting training and evaluation...
Starting train_test function...
Selecting training labels...
Finding positive training genes...
Number of positive genes in training set: 9708
Computing sum over selected rows in incidence matrix...
Selecting hyperedges with at least 3 positive gene hits...
Selected 20730 hyperedges
(20730,)
Computing normalized hyperedge weights...
Constructing incidence matrix H...
(17442, 20730)
Computing DV vector and applying regularization to isolated nodes...
GENERATING COMPLETE VECTOR
D SHAPE (4774, 17442)
MATRIX SHAPE (17442, 20730)
(17442,)


DGIDB full vector calculation: 100%|██████████| 17442/17442 [03:21<00:00, 86.68it/s] 


Generating graph G from H and hyperedge weights...
COMPUTING P FIRST
(17442, 20730)
(17442, 20730)
(17442, 17442)
Preparing adjacency matrix and features...
Initializing model...
Setting up optimizer and learning rate scheduler...


Training Epochs: 100%|██████████| 500/500 [01:35<00:00,  5.23it/s]


✅ Finished training — AUROC: 0.6392, AUPRC: 0.9579
📊 Appending results to evaluation dataframe...

📂 Fold 3/5 — splitting train/test sets...
🚀 Starting training and evaluation...
Starting train_test function...
Selecting training labels...
Finding positive training genes...
Number of positive genes in training set: 9708
Computing sum over selected rows in incidence matrix...
Selecting hyperedges with at least 3 positive gene hits...
Selected 20694 hyperedges
(20694,)
Computing normalized hyperedge weights...
Constructing incidence matrix H...
(17442, 20694)
Computing DV vector and applying regularization to isolated nodes...
GENERATING COMPLETE VECTOR
D SHAPE (4774, 17442)
MATRIX SHAPE (17442, 20694)
(17442,)


DGIDB full vector calculation: 100%|██████████| 17442/17442 [03:13<00:00, 89.99it/s] 


Generating graph G from H and hyperedge weights...
COMPUTING P FIRST
(17442, 20694)
(17442, 20694)
(17442, 17442)
Preparing adjacency matrix and features...
Initializing model...
Setting up optimizer and learning rate scheduler...


Training Epochs: 100%|██████████| 500/500 [01:36<00:00,  5.18it/s]


✅ Finished training — AUROC: 0.6640, AUPRC: 0.9636
📊 Appending results to evaluation dataframe...

📂 Fold 4/5 — splitting train/test sets...
🚀 Starting training and evaluation...
Starting train_test function...
Selecting training labels...
Finding positive training genes...
Number of positive genes in training set: 9708
Computing sum over selected rows in incidence matrix...
Selecting hyperedges with at least 3 positive gene hits...
Selected 20737 hyperedges
(20737,)
Computing normalized hyperedge weights...
Constructing incidence matrix H...
(17442, 20737)
Computing DV vector and applying regularization to isolated nodes...
GENERATING COMPLETE VECTOR
D SHAPE (4774, 17442)
MATRIX SHAPE (17442, 20737)
(17442,)


DGIDB full vector calculation: 100%|██████████| 17442/17442 [03:12<00:00, 90.67it/s] 


Generating graph G from H and hyperedge weights...
COMPUTING P FIRST
(17442, 20737)
(17442, 20737)
(17442, 17442)
Preparing adjacency matrix and features...
Initializing model...
Setting up optimizer and learning rate scheduler...


Training Epochs: 100%|██████████| 500/500 [01:34<00:00,  5.30it/s]


✅ Finished training — AUROC: 0.6512, AUPRC: 0.9600
📊 Appending results to evaluation dataframe...

📂 Fold 5/5 — splitting train/test sets...
🚀 Starting training and evaluation...
Starting train_test function...
Selecting training labels...
Finding positive training genes...
Number of positive genes in training set: 9708
Computing sum over selected rows in incidence matrix...
Selecting hyperedges with at least 3 positive gene hits...
Selected 20740 hyperedges
(20740,)
Computing normalized hyperedge weights...
Constructing incidence matrix H...
(17442, 20740)
Computing DV vector and applying regularization to isolated nodes...
GENERATING COMPLETE VECTOR
D SHAPE (4774, 17442)
MATRIX SHAPE (17442, 20740)
(17442,)


DGIDB full vector calculation: 100%|██████████| 17442/17442 [03:07<00:00, 92.96it/s] 


Generating graph G from H and hyperedge weights...
COMPUTING P FIRST
(17442, 20740)
(17442, 20740)
(17442, 17442)
Preparing adjacency matrix and features...
Initializing model...
Setting up optimizer and learning rate scheduler...


Training Epochs: 100%|██████████| 500/500 [01:36<00:00,  5.19it/s]


✅ Finished training — AUROC: 0.6634, AUPRC: 0.9631
📊 Appending results to evaluation dataframe...

===== Outer Loop Iteration 2/5 =====
🔄 Loading data...
🧮 Reshaping data for StratifiedKFold...

📂 Fold 1/5 — splitting train/test sets...
🚀 Starting training and evaluation...
Starting train_test function...
Selecting training labels...
Finding positive training genes...
Number of positive genes in training set: 9708
Computing sum over selected rows in incidence matrix...
Selecting hyperedges with at least 3 positive gene hits...
Selected 20780 hyperedges
(20780,)
Computing normalized hyperedge weights...
Constructing incidence matrix H...
(17442, 20780)
Computing DV vector and applying regularization to isolated nodes...
GENERATING COMPLETE VECTOR
D SHAPE (4774, 17442)
MATRIX SHAPE (17442, 20780)
(17442,)


DGIDB full vector calculation: 100%|██████████| 17442/17442 [03:18<00:00, 88.07it/s] 


Generating graph G from H and hyperedge weights...
COMPUTING P FIRST
(17442, 20780)
(17442, 20780)
(17442, 17442)
Preparing adjacency matrix and features...
Initializing model...
Setting up optimizer and learning rate scheduler...


Training Epochs: 100%|██████████| 500/500 [01:36<00:00,  5.19it/s]


✅ Finished training — AUROC: 0.6381, AUPRC: 0.9572
📊 Appending results to evaluation dataframe...

📂 Fold 2/5 — splitting train/test sets...
🚀 Starting training and evaluation...
Starting train_test function...
Selecting training labels...
Finding positive training genes...
Number of positive genes in training set: 9708
Computing sum over selected rows in incidence matrix...
Selecting hyperedges with at least 3 positive gene hits...
Selected 20696 hyperedges
(20696,)
Computing normalized hyperedge weights...
Constructing incidence matrix H...
(17442, 20696)
Computing DV vector and applying regularization to isolated nodes...
GENERATING COMPLETE VECTOR
D SHAPE (4774, 17442)
MATRIX SHAPE (17442, 20696)
(17442,)


DGIDB full vector calculation: 100%|██████████| 17442/17442 [03:14<00:00, 89.74it/s] 


Generating graph G from H and hyperedge weights...
COMPUTING P FIRST
(17442, 20696)
(17442, 20696)
(17442, 17442)
Preparing adjacency matrix and features...
Initializing model...
Setting up optimizer and learning rate scheduler...


Training Epochs: 100%|██████████| 500/500 [01:36<00:00,  5.17it/s]


✅ Finished training — AUROC: 0.6431, AUPRC: 0.9594
📊 Appending results to evaluation dataframe...

📂 Fold 3/5 — splitting train/test sets...
🚀 Starting training and evaluation...
Starting train_test function...
Selecting training labels...
Finding positive training genes...
Number of positive genes in training set: 9708
Computing sum over selected rows in incidence matrix...
Selecting hyperedges with at least 3 positive gene hits...
Selected 20700 hyperedges
(20700,)
Computing normalized hyperedge weights...
Constructing incidence matrix H...
(17442, 20700)
Computing DV vector and applying regularization to isolated nodes...
GENERATING COMPLETE VECTOR
D SHAPE (4774, 17442)
MATRIX SHAPE (17442, 20700)
(17442,)


DGIDB full vector calculation: 100%|██████████| 17442/17442 [03:05<00:00, 93.88it/s] 


Generating graph G from H and hyperedge weights...
COMPUTING P FIRST
(17442, 20700)
(17442, 20700)
(17442, 17442)
Preparing adjacency matrix and features...
Initializing model...
Setting up optimizer and learning rate scheduler...


Training Epochs: 100%|██████████| 500/500 [01:38<00:00,  5.07it/s]


✅ Finished training — AUROC: 0.6671, AUPRC: 0.9637
📊 Appending results to evaluation dataframe...

📂 Fold 4/5 — splitting train/test sets...
🚀 Starting training and evaluation...
Starting train_test function...
Selecting training labels...
Finding positive training genes...
Number of positive genes in training set: 9708
Computing sum over selected rows in incidence matrix...
Selecting hyperedges with at least 3 positive gene hits...
Selected 20727 hyperedges
(20727,)
Computing normalized hyperedge weights...
Constructing incidence matrix H...
(17442, 20727)
Computing DV vector and applying regularization to isolated nodes...
GENERATING COMPLETE VECTOR
D SHAPE (4774, 17442)
MATRIX SHAPE (17442, 20727)
(17442,)


DGIDB full vector calculation: 100%|██████████| 17442/17442 [02:45<00:00, 105.27it/s]


Generating graph G from H and hyperedge weights...
COMPUTING P FIRST
(17442, 20727)
(17442, 20727)
(17442, 17442)
Preparing adjacency matrix and features...
Initializing model...
Setting up optimizer and learning rate scheduler...


Training Epochs: 100%|██████████| 500/500 [01:25<00:00,  5.87it/s]


✅ Finished training — AUROC: 0.6590, AUPRC: 0.9627
📊 Appending results to evaluation dataframe...

📂 Fold 5/5 — splitting train/test sets...
🚀 Starting training and evaluation...
Starting train_test function...
Selecting training labels...
Finding positive training genes...
Number of positive genes in training set: 9708
Computing sum over selected rows in incidence matrix...
Selecting hyperedges with at least 3 positive gene hits...
Selected 20748 hyperedges
(20748,)
Computing normalized hyperedge weights...
Constructing incidence matrix H...
(17442, 20748)
Computing DV vector and applying regularization to isolated nodes...
GENERATING COMPLETE VECTOR
D SHAPE (4774, 17442)
MATRIX SHAPE (17442, 20748)
(17442,)


DGIDB full vector calculation: 100%|██████████| 17442/17442 [01:55<00:00, 151.60it/s]


Generating graph G from H and hyperedge weights...
COMPUTING P FIRST
(17442, 20748)
(17442, 20748)
(17442, 17442)
Preparing adjacency matrix and features...
Initializing model...
Setting up optimizer and learning rate scheduler...


Training Epochs: 100%|██████████| 500/500 [01:24<00:00,  5.89it/s]


✅ Finished training — AUROC: 0.6582, AUPRC: 0.9615
📊 Appending results to evaluation dataframe...

===== Outer Loop Iteration 3/5 =====
🔄 Loading data...
🧮 Reshaping data for StratifiedKFold...

📂 Fold 1/5 — splitting train/test sets...
🚀 Starting training and evaluation...
Starting train_test function...
Selecting training labels...
Finding positive training genes...
Number of positive genes in training set: 9708
Computing sum over selected rows in incidence matrix...
Selecting hyperedges with at least 3 positive gene hits...
Selected 20761 hyperedges
(20761,)
Computing normalized hyperedge weights...
Constructing incidence matrix H...
(17442, 20761)
Computing DV vector and applying regularization to isolated nodes...
GENERATING COMPLETE VECTOR
D SHAPE (4774, 17442)
MATRIX SHAPE (17442, 20761)
(17442,)


DGIDB full vector calculation: 100%|██████████| 17442/17442 [01:57<00:00, 148.43it/s]


Generating graph G from H and hyperedge weights...
COMPUTING P FIRST
(17442, 20761)
(17442, 20761)
(17442, 17442)
Preparing adjacency matrix and features...
Initializing model...
Setting up optimizer and learning rate scheduler...


Training Epochs: 100%|██████████| 500/500 [01:25<00:00,  5.88it/s]


✅ Finished training — AUROC: 0.6329, AUPRC: 0.9579
📊 Appending results to evaluation dataframe...

📂 Fold 2/5 — splitting train/test sets...
🚀 Starting training and evaluation...
Starting train_test function...
Selecting training labels...
Finding positive training genes...
Number of positive genes in training set: 9708
Computing sum over selected rows in incidence matrix...
Selecting hyperedges with at least 3 positive gene hits...
Selected 20720 hyperedges
(20720,)
Computing normalized hyperedge weights...
Constructing incidence matrix H...
(17442, 20720)
Computing DV vector and applying regularization to isolated nodes...
GENERATING COMPLETE VECTOR
D SHAPE (4774, 17442)
MATRIX SHAPE (17442, 20720)
(17442,)


DGIDB full vector calculation: 100%|██████████| 17442/17442 [02:00<00:00, 144.95it/s]


Generating graph G from H and hyperedge weights...
COMPUTING P FIRST
(17442, 20720)
(17442, 20720)
(17442, 17442)
Preparing adjacency matrix and features...
Initializing model...
Setting up optimizer and learning rate scheduler...


Training Epochs: 100%|██████████| 500/500 [01:25<00:00,  5.88it/s]


✅ Finished training — AUROC: 0.6738, AUPRC: 0.9632
📊 Appending results to evaluation dataframe...

📂 Fold 3/5 — splitting train/test sets...
🚀 Starting training and evaluation...
Starting train_test function...
Selecting training labels...
Finding positive training genes...
Number of positive genes in training set: 9708
Computing sum over selected rows in incidence matrix...
Selecting hyperedges with at least 3 positive gene hits...
Selected 20757 hyperedges
(20757,)
Computing normalized hyperedge weights...
Constructing incidence matrix H...
(17442, 20757)
Computing DV vector and applying regularization to isolated nodes...
GENERATING COMPLETE VECTOR
D SHAPE (4774, 17442)
MATRIX SHAPE (17442, 20757)
(17442,)


DGIDB full vector calculation: 100%|██████████| 17442/17442 [02:01<00:00, 143.62it/s]


Generating graph G from H and hyperedge weights...
COMPUTING P FIRST
(17442, 20757)
(17442, 20757)
(17442, 17442)
Preparing adjacency matrix and features...
Initializing model...
Setting up optimizer and learning rate scheduler...


Training Epochs: 100%|██████████| 500/500 [01:24<00:00,  5.89it/s]


✅ Finished training — AUROC: 0.6554, AUPRC: 0.9610
📊 Appending results to evaluation dataframe...

📂 Fold 4/5 — splitting train/test sets...
🚀 Starting training and evaluation...
Starting train_test function...
Selecting training labels...
Finding positive training genes...
Number of positive genes in training set: 9708
Computing sum over selected rows in incidence matrix...
Selecting hyperedges with at least 3 positive gene hits...
Selected 20760 hyperedges
(20760,)
Computing normalized hyperedge weights...
Constructing incidence matrix H...
(17442, 20760)
Computing DV vector and applying regularization to isolated nodes...
GENERATING COMPLETE VECTOR
D SHAPE (4774, 17442)
MATRIX SHAPE (17442, 20760)
(17442,)


DGIDB full vector calculation: 100%|██████████| 17442/17442 [02:00<00:00, 144.19it/s]


Generating graph G from H and hyperedge weights...
COMPUTING P FIRST
(17442, 20760)
(17442, 20760)
(17442, 17442)
Preparing adjacency matrix and features...
Initializing model...
Setting up optimizer and learning rate scheduler...


Training Epochs: 100%|██████████| 500/500 [01:25<00:00,  5.88it/s]


✅ Finished training — AUROC: 0.6465, AUPRC: 0.9603
📊 Appending results to evaluation dataframe...

📂 Fold 5/5 — splitting train/test sets...
🚀 Starting training and evaluation...
Starting train_test function...
Selecting training labels...
Finding positive training genes...
Number of positive genes in training set: 9708
Computing sum over selected rows in incidence matrix...
Selecting hyperedges with at least 3 positive gene hits...
Selected 20723 hyperedges
(20723,)
Computing normalized hyperedge weights...
Constructing incidence matrix H...
(17442, 20723)
Computing DV vector and applying regularization to isolated nodes...
GENERATING COMPLETE VECTOR
D SHAPE (4774, 17442)
MATRIX SHAPE (17442, 20723)
(17442,)


DGIDB full vector calculation: 100%|██████████| 17442/17442 [02:34<00:00, 112.67it/s]


Generating graph G from H and hyperedge weights...
COMPUTING P FIRST
(17442, 20723)
(17442, 20723)
(17442, 17442)
Preparing adjacency matrix and features...
Initializing model...
Setting up optimizer and learning rate scheduler...


Training Epochs: 100%|██████████| 500/500 [01:34<00:00,  5.28it/s]


✅ Finished training — AUROC: 0.6610, AUPRC: 0.9627
📊 Appending results to evaluation dataframe...

===== Outer Loop Iteration 4/5 =====
🔄 Loading data...
🧮 Reshaping data for StratifiedKFold...

📂 Fold 1/5 — splitting train/test sets...
🚀 Starting training and evaluation...
Starting train_test function...
Selecting training labels...
Finding positive training genes...
Number of positive genes in training set: 9708
Computing sum over selected rows in incidence matrix...
Selecting hyperedges with at least 3 positive gene hits...
Selected 20742 hyperedges
(20742,)
Computing normalized hyperedge weights...
Constructing incidence matrix H...
(17442, 20742)
Computing DV vector and applying regularization to isolated nodes...
GENERATING COMPLETE VECTOR
D SHAPE (4774, 17442)
MATRIX SHAPE (17442, 20742)
(17442,)


DGIDB full vector calculation: 100%|██████████| 17442/17442 [04:24<00:00, 65.83it/s] 


Generating graph G from H and hyperedge weights...
COMPUTING P FIRST
(17442, 20742)
(17442, 20742)
(17442, 17442)
Preparing adjacency matrix and features...
Initializing model...
Setting up optimizer and learning rate scheduler...


Training Epochs: 100%|██████████| 500/500 [01:33<00:00,  5.35it/s]


✅ Finished training — AUROC: 0.6583, AUPRC: 0.9611
📊 Appending results to evaluation dataframe...

📂 Fold 2/5 — splitting train/test sets...
🚀 Starting training and evaluation...
Starting train_test function...
Selecting training labels...
Finding positive training genes...
Number of positive genes in training set: 9708
Computing sum over selected rows in incidence matrix...
Selecting hyperedges with at least 3 positive gene hits...
Selected 20702 hyperedges
(20702,)
Computing normalized hyperedge weights...
Constructing incidence matrix H...
(17442, 20702)
Computing DV vector and applying regularization to isolated nodes...
GENERATING COMPLETE VECTOR
D SHAPE (4774, 17442)
MATRIX SHAPE (17442, 20702)
(17442,)


DGIDB full vector calculation: 100%|██████████| 17442/17442 [04:23<00:00, 66.14it/s] 


Generating graph G from H and hyperedge weights...
COMPUTING P FIRST
(17442, 20702)
(17442, 20702)
(17442, 17442)
Preparing adjacency matrix and features...
Initializing model...
Setting up optimizer and learning rate scheduler...


Training Epochs: 100%|██████████| 500/500 [01:33<00:00,  5.38it/s]


✅ Finished training — AUROC: 0.6649, AUPRC: 0.9627
📊 Appending results to evaluation dataframe...

📂 Fold 3/5 — splitting train/test sets...
🚀 Starting training and evaluation...
Starting train_test function...
Selecting training labels...
Finding positive training genes...
Number of positive genes in training set: 9708
Computing sum over selected rows in incidence matrix...
Selecting hyperedges with at least 3 positive gene hits...
Selected 20733 hyperedges
(20733,)
Computing normalized hyperedge weights...
Constructing incidence matrix H...
(17442, 20733)
Computing DV vector and applying regularization to isolated nodes...
GENERATING COMPLETE VECTOR
D SHAPE (4774, 17442)
MATRIX SHAPE (17442, 20733)
(17442,)


DGIDB full vector calculation: 100%|██████████| 17442/17442 [04:23<00:00, 66.19it/s] 


Generating graph G from H and hyperedge weights...
COMPUTING P FIRST
(17442, 20733)
(17442, 20733)
(17442, 17442)
Preparing adjacency matrix and features...
Initializing model...
Setting up optimizer and learning rate scheduler...


Training Epochs: 100%|██████████| 500/500 [01:36<00:00,  5.20it/s]


✅ Finished training — AUROC: 0.6536, AUPRC: 0.9606
📊 Appending results to evaluation dataframe...

📂 Fold 4/5 — splitting train/test sets...
🚀 Starting training and evaluation...
Starting train_test function...
Selecting training labels...
Finding positive training genes...
Number of positive genes in training set: 9708
Computing sum over selected rows in incidence matrix...
Selecting hyperedges with at least 3 positive gene hits...
Selected 20771 hyperedges
(20771,)
Computing normalized hyperedge weights...
Constructing incidence matrix H...
(17442, 20771)
Computing DV vector and applying regularization to isolated nodes...
GENERATING COMPLETE VECTOR
D SHAPE (4774, 17442)
MATRIX SHAPE (17442, 20771)
(17442,)


DGIDB full vector calculation: 100%|██████████| 17442/17442 [02:52<00:00, 100.95it/s]


Generating graph G from H and hyperedge weights...
COMPUTING P FIRST
(17442, 20771)
(17442, 20771)
(17442, 17442)
Preparing adjacency matrix and features...
Initializing model...
Setting up optimizer and learning rate scheduler...


Training Epochs: 100%|██████████| 500/500 [01:33<00:00,  5.36it/s]


✅ Finished training — AUROC: 0.6485, AUPRC: 0.9614
📊 Appending results to evaluation dataframe...

📂 Fold 5/5 — splitting train/test sets...
🚀 Starting training and evaluation...
Starting train_test function...
Selecting training labels...
Finding positive training genes...
Number of positive genes in training set: 9708
Computing sum over selected rows in incidence matrix...
Selecting hyperedges with at least 3 positive gene hits...
Selected 20734 hyperedges
(20734,)
Computing normalized hyperedge weights...
Constructing incidence matrix H...
(17442, 20734)
Computing DV vector and applying regularization to isolated nodes...
GENERATING COMPLETE VECTOR
D SHAPE (4774, 17442)
MATRIX SHAPE (17442, 20734)
(17442,)


DGIDB full vector calculation: 100%|██████████| 17442/17442 [02:43<00:00, 106.95it/s]


Generating graph G from H and hyperedge weights...
COMPUTING P FIRST
(17442, 20734)
(17442, 20734)
(17442, 17442)
Preparing adjacency matrix and features...
Initializing model...
Setting up optimizer and learning rate scheduler...


Training Epochs: 100%|██████████| 500/500 [01:32<00:00,  5.40it/s]


✅ Finished training — AUROC: 0.6556, AUPRC: 0.9608
📊 Appending results to evaluation dataframe...

===== Outer Loop Iteration 5/5 =====
🔄 Loading data...
🧮 Reshaping data for StratifiedKFold...

📂 Fold 1/5 — splitting train/test sets...
🚀 Starting training and evaluation...
Starting train_test function...
Selecting training labels...
Finding positive training genes...
Number of positive genes in training set: 9708
Computing sum over selected rows in incidence matrix...
Selecting hyperedges with at least 3 positive gene hits...
Selected 20768 hyperedges
(20768,)
Computing normalized hyperedge weights...
Constructing incidence matrix H...
(17442, 20768)
Computing DV vector and applying regularization to isolated nodes...
GENERATING COMPLETE VECTOR
D SHAPE (4774, 17442)
MATRIX SHAPE (17442, 20768)
(17442,)


DGIDB full vector calculation: 100%|██████████| 17442/17442 [04:52<00:00, 59.67it/s] 


Generating graph G from H and hyperedge weights...
COMPUTING P FIRST
(17442, 20768)
(17442, 20768)
(17442, 17442)
Preparing adjacency matrix and features...
Initializing model...
Setting up optimizer and learning rate scheduler...


Training Epochs: 100%|██████████| 500/500 [01:33<00:00,  5.32it/s]


✅ Finished training — AUROC: 0.6581, AUPRC: 0.9612
📊 Appending results to evaluation dataframe...

📂 Fold 2/5 — splitting train/test sets...
🚀 Starting training and evaluation...
Starting train_test function...
Selecting training labels...
Finding positive training genes...
Number of positive genes in training set: 9708
Computing sum over selected rows in incidence matrix...
Selecting hyperedges with at least 3 positive gene hits...
Selected 20767 hyperedges
(20767,)
Computing normalized hyperedge weights...
Constructing incidence matrix H...
(17442, 20767)
Computing DV vector and applying regularization to isolated nodes...
GENERATING COMPLETE VECTOR
D SHAPE (4774, 17442)
MATRIX SHAPE (17442, 20767)
(17442,)


DGIDB full vector calculation: 100%|██████████| 17442/17442 [04:28<00:00, 64.87it/s] 


Generating graph G from H and hyperedge weights...
COMPUTING P FIRST
(17442, 20767)
(17442, 20767)
(17442, 17442)
Preparing adjacency matrix and features...
Initializing model...
Setting up optimizer and learning rate scheduler...


Training Epochs: 100%|██████████| 500/500 [01:35<00:00,  5.21it/s]


✅ Finished training — AUROC: 0.6487, AUPRC: 0.9594
📊 Appending results to evaluation dataframe...

📂 Fold 3/5 — splitting train/test sets...
🚀 Starting training and evaluation...
Starting train_test function...
Selecting training labels...
Finding positive training genes...
Number of positive genes in training set: 9708
Computing sum over selected rows in incidence matrix...
Selecting hyperedges with at least 3 positive gene hits...
Selected 20699 hyperedges
(20699,)
Computing normalized hyperedge weights...
Constructing incidence matrix H...
(17442, 20699)
Computing DV vector and applying regularization to isolated nodes...
GENERATING COMPLETE VECTOR
D SHAPE (4774, 17442)
MATRIX SHAPE (17442, 20699)
(17442,)


DGIDB full vector calculation: 100%|██████████| 17442/17442 [02:33<00:00, 113.35it/s]


Generating graph G from H and hyperedge weights...
COMPUTING P FIRST
(17442, 20699)
(17442, 20699)
(17442, 17442)
Preparing adjacency matrix and features...
Initializing model...
Setting up optimizer and learning rate scheduler...


Training Epochs: 100%|██████████| 500/500 [01:25<00:00,  5.85it/s]


✅ Finished training — AUROC: 0.6502, AUPRC: 0.9603
📊 Appending results to evaluation dataframe...

📂 Fold 4/5 — splitting train/test sets...
🚀 Starting training and evaluation...
Starting train_test function...
Selecting training labels...
Finding positive training genes...
Number of positive genes in training set: 9708
Computing sum over selected rows in incidence matrix...
Selecting hyperedges with at least 3 positive gene hits...
Selected 20724 hyperedges
(20724,)
Computing normalized hyperedge weights...
Constructing incidence matrix H...
(17442, 20724)
Computing DV vector and applying regularization to isolated nodes...
GENERATING COMPLETE VECTOR
D SHAPE (4774, 17442)
MATRIX SHAPE (17442, 20724)
(17442,)


DGIDB full vector calculation: 100%|██████████| 17442/17442 [02:02<00:00, 142.85it/s]


Generating graph G from H and hyperedge weights...
COMPUTING P FIRST
(17442, 20724)
(17442, 20724)
(17442, 17442)
Preparing adjacency matrix and features...
Initializing model...
Setting up optimizer and learning rate scheduler...


Training Epochs: 100%|██████████| 500/500 [01:25<00:00,  5.87it/s]


✅ Finished training — AUROC: 0.6591, AUPRC: 0.9625
📊 Appending results to evaluation dataframe...

📂 Fold 5/5 — splitting train/test sets...
🚀 Starting training and evaluation...
Starting train_test function...
Selecting training labels...
Finding positive training genes...
Number of positive genes in training set: 9708
Computing sum over selected rows in incidence matrix...
Selecting hyperedges with at least 3 positive gene hits...
Selected 20730 hyperedges
(20730,)
Computing normalized hyperedge weights...
Constructing incidence matrix H...
(17442, 20730)
Computing DV vector and applying regularization to isolated nodes...
GENERATING COMPLETE VECTOR
D SHAPE (4774, 17442)
MATRIX SHAPE (17442, 20730)
(17442,)


DGIDB full vector calculation: 100%|██████████| 17442/17442 [01:59<00:00, 146.25it/s]


Generating graph G from H and hyperedge weights...
COMPUTING P FIRST
(17442, 20730)
(17442, 20730)
(17442, 17442)
Preparing adjacency matrix and features...
Initializing model...
Setting up optimizer and learning rate scheduler...


Training Epochs: 100%|██████████| 500/500 [01:24<00:00,  5.88it/s]


✅ Finished training — AUROC: 0.6582, AUPRC: 0.9621
📊 Appending results to evaluation dataframe...
0.654542258868956
0.9610890493317977
