In [11]:
import pandas as pd
import os
from scipy.sparse import dok_matrix, save_npz, load_npz

K = 10
HOP = 1
OUTPUT_FOLDER = f"./output/KHOP/DGIDB_Bipolar/K_{K}_HOP_{HOP}/"
if not os.path.exists(OUTPUT_FOLDER):
    os.makedirs(OUTPUT_FOLDER)
SCORES = pd.read_csv("../Methods/output/DGIDB_Bipolar/rwr_results.csv")
HYPERGRAPH_DIRECTORY = "../Gen_Hypergraph/output/DGIDB_BIPOLAR/"

In [9]:
top_k_indices = bipolar_scores["Index"][:K].to_list()
binary_full_matrix = load_npz(HYPERGRAPH_DIRECTORY + "hypergraph_incidence_matrix_binary.npz")
weighted_full_matrix = load_npz(HYPERGRAPH_DIRECTORY + "hypergraph_incidence_matrix_weighted.npz")

In [10]:
# Initialize a sparse incidence matrix
queue = top_k_indices.copy()
# Perform BFS up to n hops
current_genes = set(top_k_indices)

all_genes = set(top_k_indices)
all_drugs = set()

# BFS for n Hops
for hop in range(HOP):
    next_genes = set()
    next_drugs = set()

    # Step 1: Gene → Drug
    for gene in current_genes:
        connected_drugs = binary_full_matrix[gene, :].nonzero()[1]
        for drug in connected_drugs:
            next_drugs.add(drug)
    
    # Step 2: Drug → Gene
    for drug in next_drugs:
        connected_genes = binary_full_matrix[:, drug].nonzero()[0]
        for gene in connected_genes:
            next_genes.add(gene)
    
    # Update the sets for the next hop
    all_drugs.update(next_drugs)
    all_genes.update(next_genes)
    current_genes = next_genes

In [12]:
# Initialize filtered incidence matrices
binary_incidence_matrix = dok_matrix((binary_full_matrix.shape), dtype=float)
weighted_incidence_matrix = dok_matrix((weighted_full_matrix.shape), dtype=float)
# Copy relevant nodes and edges
for gene in all_genes:
    for drug in binary_full_matrix[gene].nonzero()[1]:
        if drug in all_drugs:
            binary_incidence_matrix[gene, drug] = binary_full_matrix[gene, drug]
            weighted_incidence_matrix[gene, drug] = weighted_full_matrix[gene, drug]


binary_csr_matrix = binary_incidence_matrix.tocsr()
weighted_csr_matrix = weighted_incidence_matrix.tocsr()

save_npz(OUTPUT_FOLDER + "hypergraph_incidence_matrix_binary.npz", binary_csr_matrix)
save_npz(OUTPUT_FOLDER + "hypergraph_incidence_matrix_weighted.npz", weighted_csr_matrix)
