In [None]:
######## Code for checking the degree threshold using wieghted network. This was done to check Annans result ###

import pandas as pd
import networkx as nx
import numpy as np
from tqdm import tqdm

# Step 1: Load the STRING links file
df = pd.read_csv("/Users/piyushagrawal/Downloads/Annan_Project/Briefings_paper/STRING_work/9606.protein.links.v11.5.txt.gz", sep=" ")

# Step 2: Create graph (inverse weights: stronger interaction = shorter path)
G = nx.Graph()
for _, row in df.iterrows():
    p1 = row["protein1"]
    p2 = row["protein2"]
    score = row["combined_score"]
    weight = 1000 - score  # inverse for shortest path
    G.add_edge(p1, p2, weight=weight)

# Step 3: Compute shortest paths (only consider connected pairs)
print("Computing shortest paths...")
lengths = []
for source in tqdm(G.nodes()):
    path_lengths = nx.single_source_dijkstra_path_length(G, source, weight="weight")
    lengths.extend(path_lengths.values())

# Step 4: Compute median (exclude self-paths = 0)
filtered_lengths = [l for l in lengths if l > 0]
median_length = np.median(filtered_lengths)


print(f"\nMedian shortest path length: {median_length:.2f}")


In [None]:
######## Code for checking the degree threshold using unwieghted network. This was done to check Annans result ###

import pandas as pd
import networkx as nx
import numpy as np
import random
from tqdm import tqdm

# Step 1: Load STRING network file
df = pd.read_csv("/Users/piyushagrawal/Downloads/Annan_Project/Briefings_paper/STRING_work/9606.protein.links.v11.5.txt.gz", sep=" ")

# Step 2: Build unweighted graph
G = nx.Graph()
G.add_edges_from(zip(df['protein1'], df['protein2']))

# Step 3: Keep only largest connected component (LCC)
largest_cc = max(nx.connected_components(G), key=len)
G_lcc = G.subgraph(largest_cc).copy()

# Step 4: Sample nodes to speed up
sample_size = 2000
sample_nodes = random.sample(list(G_lcc.nodes), sample_size)

# Step 5: Compute shortest path lengths (unweighted)
all_path_lengths = []
for node in tqdm(sample_nodes, desc="Computing shortest paths"):
    lengths = nx.single_source_shortest_path_length(G_lcc, node)
    all_path_lengths.extend([l for l in lengths.values() if l > 0])  # exclude self-distances

# Step 6: Compute median
median_d = int(np.median(all_path_lengths))
print(f"\nEstimated median shortest path length (d): {median_d}")