In [1]:
import numpy as np
import json
from scipy.sparse import load_npz
binary_matrix = load_npz("../Data/hypergraphs/DGIDB_HumanNet/human/undirected/full/hypergraph_incidence_matrix_binary.npz")
weighted_matrix = load_npz("../Data/hypergraphs/DGIDB_HumanNet/human/undirected/full/hypergraph_incidence_matrix_weighted.npz")
# Load the JSON data from the file
with open('../Data/hypergraphs/DGIDB_HumanNet/human/undirected/full/gene_to_index.json', 'r') as file:
    gene_to_index = json.load(file)

# Invert the dictionary to map indices back to genes
index_to_gene = {v: k for k, v in gene_to_index.items()}

def get_gene_by_index(index):
    return index_to_gene.get(index, "Index not found")

In [2]:
import numpy as np
import networkx as nx
from scipy.sparse import csr_matrix

# Convert binary matrix to a sparse matrix
sparse_matrix = csr_matrix(binary_matrix)

# Compute gene pairwise similarity using dot product (common drugs)
adj_matrix = (sparse_matrix @ sparse_matrix.T).tocoo()

# Create graph (only add edges where there's at least one shared drug)
gene_graph = nx.Graph()
rows, cols = adj_matrix.row, adj_matrix.col

for i, j in zip(rows, cols):
    if i < j:  # Avoid duplicate edges and self-loops
        gene_graph.add_edge(i, j)


In [3]:
nx.write_edgelist(gene_graph, "gene_graph.edgelist", delimiter=",")


In [4]:
from community import community_louvain

# Step 3: Perform community detection using the Louvain method
partition = community_louvain.best_partition(gene_graph)


# Step 4: Output or analyze the partition
print("Communities detected:", partition)

Communities detected: {0: 3, 2936: 20, 2020: 20, 1838: 20, 1662: 20, 55: 20, 1436: 3, 951: 3, 2818: 3, 1388: 3, 2655: 3, 2392: 3, 2241: 3, 559: 3, 4432: 3, 129: 3, 3033: 20, 2147: 20, 1539: 24, 86: 20, 1929: 3, 1739: 3, 2985: 3, 725: 3, 1982: 3, 2082: 3, 1678: 3, 1384: 3, 2637: 3, 3740: 3, 2872: 3, 2235: 3, 4752: 3, 4726: 3, 4675: 3, 4657: 3, 4626: 3, 4600: 3, 4389: 3, 3516: 3, 3401: 3, 3397: 3, 3393: 3, 3370: 3, 3089: 3, 2799: 3, 2421: 3, 2316: 3, 2268: 3, 2032: 3, 1707: 3, 1562: 3, 1524: 3, 843: 3, 3671: 3, 3647: 3, 3308: 3, 3032: 3, 2902: 3, 2733: 3, 2651: 3, 2334: 3, 894: 3, 4099: 3, 1154: 3, 2164: 3, 1521: 3, 3974: 3, 2557: 3, 1205: 3, 3565: 3, 3062: 3, 3039: 3, 3023: 3, 2769: 3, 2768: 3, 2767: 3, 2766: 3, 2569: 3, 2546: 3, 2544: 3, 2514: 3, 2387: 3, 2386: 3, 2385: 3, 2107: 3, 2106: 3, 2105: 3, 2104: 3, 2103: 3, 2102: 3, 2101: 3, 2099: 3, 2098: 3, 2096: 3, 1617: 3, 1533: 3, 651: 3, 649: 3, 648: 3, 432: 3, 25: 3, 2406: 3, 2721: 3, 2967: 3, 2966: 3, 2965: 20, 2964: 3, 2963: 3, 2634:

In [6]:
with open("partition.txt", "w") as file:
    file.write(str(partition))

In [None]:
from networkx.algorithms.community import girvan_newman
from itertools import islice

# Step 3: Apply the Girvan-Newman community detection algorithm
# This returns a generator of communities (connected components after edge removal)
# communities_generator = girvan_newman(gene_graph)

# # Step 4: Get the first set of communities (this is after removing edges)
# # You can change the number of steps to stop the division depending on the number of communities you want.
# top_level_communities = next(communities_generator)

# # Step 5: Display the detected communities
# print("Detected communities:")
# for community in top_level_communities:
#     print(community)

# Step 1: Apply Girvan-Newman community detection
communities_generator = girvan_newman(gene_graph)

# Step 2: Control the number of clusters
num_splits = 30  # Adjust this to get fewer clusters (lower = bigger clusters)
communities = next(islice(communities_generator, num_splits, None))

# Step 3: Display the detected communities
print(f"Detected communities (after {num_splits} edge removals):")
for i, community in enumerate(communities):
    print(f"Community {i+1}: {sorted(community)}")

In [None]:
with open("communities.txt", "w") as file:
    file.write(str(communities))