In [1]:
import numpy as np
import scipy.sparse as sp
import numpy as np
from scipy.sparse import load_npz, csr_matrix
# Hypergraph incidence matrix (genes × drugs), already loaded
# Assume it's in sparse format
# weighted_matrix: shape (num_genes, num_drugs)
FOLDER = "DGIDB_BIPOLAR/"
HYPERGRAPH_DIRECTORY = "../Gen_Hypergraph/output/" + FOLDER
weighted_matrix = load_npz(HYPERGRAPH_DIRECTORY + "hypergraph_incidence_matrix_weighted.npz").tocsr()

# Step 1: Project hypergraph to gene-gene graph
gene_gene_adjacency = weighted_matrix @ weighted_matrix.T  # Shape: (num_genes, num_genes)

# Step 2: Row-normalize the adjacency matrix
def row_normalize(matrix):
    row_sums = np.array(matrix.sum(axis=1)).flatten()
    row_inv = np.divide(1.0, row_sums, out=np.zeros_like(row_sums), where=row_sums != 0)
    D_inv = sp.diags(row_inv)
    return D_inv @ matrix

A_norm = row_normalize(gene_gene_adjacency)

# Step 3: Random Walk with Restart (RWR)
def run_rwr(A_norm, restart_prob=0.85, num_iter=50, tol=1e-10):
    num_genes = A_norm.shape[0]
    
    # Initialize
    v = np.ones(num_genes) / num_genes  # Initial uniform distribution
    teleport = v.copy()                 # Teleport vector stays constant

    for i in range(num_iter):
        v_new = restart_prob * (A_norm @ v) + (1 - restart_prob) * teleport
        delta = np.linalg.norm(v_new - v, ord=1)
        v = v_new
        if delta < tol:
            print(f"Converged at iteration {i+1}")
            break

    return v

# Run it
rwr_scores = run_rwr(A_norm, restart_prob=0.85, num_iter=100)

# Step 4: Rank genes
sorted_indices = np.argsort(rwr_scores)[::-1]
sorted_scores = rwr_scores[sorted_indices]

# Display top 10
for rank, (idx, score) in enumerate(zip(sorted_indices[:10], sorted_scores[:10]), 1):
    print(f"{rank}. Gene index {idx} — score: {score:.5f}")


Converged at iteration 2
1. Gene index 253 — score: 0.00021
2. Gene index 929 — score: 0.00021
3. Gene index 817 — score: 0.00021
4. Gene index 1180 — score: 0.00021
5. Gene index 368 — score: 0.00021
6. Gene index 169 — score: 0.00021
7. Gene index 427 — score: 0.00021
8. Gene index 259 — score: 0.00021
9. Gene index 584 — score: 0.00021
10. Gene index 47 — score: 0.00021


In [7]:
print(sorted_scores)

[2.09467951e-04 2.09467951e-04 2.09467951e-04 ... 3.14201927e-05
 3.14201927e-05 3.14201927e-05]


In [5]:
import json
import pandas as pd
DGIDB = pd.read_csv("../Data/DGIDB/DrugToGene.tsv", sep="\t")

# Load the JSON data from the file
with open(HYPERGRAPH_DIRECTORY + 'gene_to_index.json', 'r') as file:
    gene_to_index = json.load(file)

index_to_gene = {v: k for k, v in gene_to_index.items()}
def get_gene_claim_name(ncbi_gene_id):
    ncbi_gene_id = int(ncbi_gene_id)
    result = DGIDB[(DGIDB['ncbi_gene_id']) == ncbi_gene_id]
    if not result.empty:
        return result['gene_name'].values[0]
    else:
        return "Gene name not found"

In [8]:
results_df = pd.DataFrame(columns=['Index', 'Score'])
results_df["Index"] = sorted_indices
results_df["Score"] = sorted_scores
results_df["ncbi_gene_id"] = results_df["Index"].apply(index_to_gene.get)
results_df["claim_name"] = results_df["ncbi_gene_id"].apply(get_gene_claim_name)
results_df.to_csv("GRAPH_REG_" + "rwr_results.csv", index=False)