## 5.3 Decision point: log2FC/psitedeviation and network betweenescentrality

## 5.3 Decision point: log2FC/psitedeviation and network betweenescentrality

### 5.3.1 Collapse site-level metrics to protein level

In [290]:
# ======================================================
# 1Ô∏è‚É£ Collapse site-level metrics_df ‚Üí protein-level
# ======================================================
import pandas as pd
import numpy as np

# Sort by combined_score and select top phosphosite per UniProt
metrics_df_prot = (
    metrics_df.sort_values("combined_score", ascending=False)
              .groupby("uniprot_id", as_index=False)
              .first()
)

metrics_df_prot = metrics_df_prot.rename(columns={
    "uniprot_id": "UniProt",
    "combined_score": "dyn_deviation"
})

# Define direction (based on delta_slope or delta_auc)
def classify_direction(x, thr=0.05):
    if x > thr:
        return "upregulated"
    elif x < -thr:
        return "downregulated"
    else:
        return "neutral"

metrics_df_prot["direction"] = metrics_df_prot["delta_slope"].apply(classify_direction)

print(f"‚úÖ Collapsed to {len(metrics_df_prot)} unique proteins")
display(metrics_df_prot.head(10))


‚úÖ Collapsed to 1315 unique proteins


Unnamed: 0,UniProt,gene_symbol,cosine,dtw,euclidean,delta_slope,abs_delta_slope,delta_slope_rel,abs_delta_slope_rel,delta_auc,...,dtw_norm,euclidean_norm,delta_slope_norm,abs_delta_slope_norm,delta_slope_rel_norm,abs_delta_slope_rel_norm,delta_auc_norm,abs_delta_auc_norm,dyn_deviation,direction
0,A0A1B0GUW6,,0.00029,0.218825,1.285097,-0.31819,0.31819,-0.016535,0.016535,-1.223503,...,0.282891,0.263915,0.265374,0.406838,0.022481,0.000217,0.395647,0.267925,0.208481,downregulated
1,A0FGR8,ESYT2,0.00017,0.248443,0.836646,0.228243,0.228243,0.01558,0.01558,0.980959,...,0.321783,0.170948,0.630221,0.29182,0.022894,0.000205,0.65656,0.214808,0.214645,upregulated
2,A1A4S6,ARHGAP10,1.7e-05,0.078184,0.244207,0.077003,0.077003,0.004077,0.004077,0.089862,...,0.098215,0.048132,0.52924,0.098427,0.022746,5.4e-05,0.551093,0.01966,0.054036,upregulated
3,A1L390,PLEKHG3,4.3e-05,0.078588,0.410054,0.105279,0.105279,0.004854,0.004854,-0.032295,...,0.098745,0.082514,0.54812,0.134584,0.022756,6.4e-05,0.536635,0.007053,0.051152,upregulated
4,A2A2Y4,FRMD3,0.000472,0.179169,0.856551,0.228233,0.228233,0.018718,0.018718,0.362753,...,0.230819,0.175075,0.630214,0.291807,0.022934,0.000246,0.583391,0.079423,0.135327,upregulated
5,A2RRP1,NBAS,2e-06,0.033302,0.084008,-0.010437,0.010437,-0.000565,0.000565,0.109399,...,0.039279,0.014922,0.470857,0.013308,0.022686,7e-06,0.553405,0.023939,0.025626,neutral
6,A2RUS2,DENND3,4.6e-05,0.162292,1.008272,-0.25008,0.25008,-0.006216,0.006216,-1.051629,...,0.208657,0.206527,0.31085,0.319744,0.022614,8.2e-05,0.41599,0.230285,0.16192,downregulated
7,A6NFX1,MFSD2B,3.3e-05,0.07478,0.295435,0.083011,0.083011,0.006461,0.006461,0.24041,...,0.093745,0.058752,0.533251,0.106109,0.022777,8.5e-05,0.568911,0.05263,0.060051,upregulated
8,A6QL63,ABTB3,2e-05,0.11793,0.639656,0.012998,0.012998,0.00036,0.00036,0.986409,...,0.150406,0.130111,0.486504,0.016582,0.022698,5e-06,0.657205,0.216002,0.129205,neutral
9,A7E2V4,ZSWIM8,0.000405,0.232378,1.06166,-0.125017,0.125017,-0.010064,0.010064,-1.490417,...,0.300689,0.217595,0.394354,0.159823,0.022564,0.000132,0.364056,0.326379,0.231972,downregulated


### 5.3.2 Attach protein metrics to the graph

In [291]:
# ======================================================
# 2Ô∏è‚É£ Integrate with igraph network (giant component)
# ======================================================
import igraph as ig

node_names = g_gcc.vs["name"]  # UniProt IDs
nodes_df = pd.DataFrame({"UniProt": node_names})

merged = (
    nodes_df.merge(metrics_df_prot[["UniProt", "gene_symbol", "delta_slope", "delta_auc", "dyn_deviation", "direction"]],
                   on="UniProt", how="left")
    .fillna({"gene_symbol": "", "delta_slope": 0, "delta_auc": 0, "dyn_deviation": 0, "direction": "neutral"})
)

print(f"‚úÖ Annotated {len(merged)} network nodes with phospho metrics")
display(merged.head(10))

# Push back to igraph
for col in ["gene_symbol", "delta_slope", "delta_auc", "dyn_deviation", "direction"]:
    g_gcc.vs[col] = merged[col].tolist()


‚úÖ Annotated 581 network nodes with phospho metrics


Unnamed: 0,UniProt,gene_symbol,delta_slope,delta_auc,dyn_deviation,direction
0,P0DP24,,0.0,0.0,0.0,neutral
1,Q9Y210,TRPC6,0.079137,-0.009224,0.095635,upregulated
2,P06241,FYN,0.067988,-0.150654,0.047711,upregulated
3,P49137,MAPKAPK2,0.079732,-0.177862,0.066835,upregulated
4,Q16539,MAPK14,-0.009051,0.346202,0.044965,neutral
5,P31749,AKT1,0.045927,-0.903394,0.167479,neutral
6,O15111,,0.0,0.0,0.0,neutral
7,P19838,,0.0,0.0,0.0,neutral
8,P27986,,0.0,0.0,0.0,neutral
9,P78536,ADAM17,-0.096917,-0.131354,0.112439,downregulated


### 5.3.3 Identify top differential nodes (signal origin)

In [292]:
# ======================================================
# 3Ô∏è‚É£ Define "top differential" nodes for reach analysis
# ======================================================
# You can define this based on absolute delta_slope or combined_score
TOP_N = 50
thr = metrics_df_prot["delta_slope"].abs().quantile(0.9)

top_nodes = metrics_df_prot.query("abs(delta_slope) >= @thr")
top_uniprots = set(top_nodes["UniProt"])

print(f"üåü Identified {len(top_uniprots)} top differential proteins (|Œîslope| ‚â• {thr:.3f})")
display(top_nodes.head(10))


üåü Identified 132 top differential proteins (|Œîslope| ‚â• 0.267)


Unnamed: 0,UniProt,gene_symbol,cosine,dtw,euclidean,delta_slope,abs_delta_slope,delta_slope_rel,abs_delta_slope_rel,delta_auc,...,dtw_norm,euclidean_norm,delta_slope_norm,abs_delta_slope_norm,delta_slope_rel_norm,abs_delta_slope_rel_norm,delta_auc_norm,abs_delta_auc_norm,dyn_deviation,direction
0,A0A1B0GUW6,,0.00029,0.218825,1.285097,-0.31819,0.31819,-0.016535,0.016535,-1.223503,...,0.282891,0.263915,0.265374,0.406838,0.022481,0.000217,0.395647,0.267925,0.208481,downregulated
17,O00151,PDLIM1,0.000107,0.24058,1.002421,0.273118,0.273118,0.007955,0.007955,-0.564487,...,0.311459,0.205314,0.660183,0.349202,0.022796,0.000105,0.473646,0.123602,0.186656,upregulated
37,O14639,ABLIM1,0.001204,0.420141,1.969342,0.390976,0.390976,0.021635,0.021635,0.937341,...,0.547241,0.405762,0.738876,0.49991,0.022972,0.000284,0.651397,0.205256,0.325006,upregulated
45,O14980,XPO1,6.2e-05,0.168973,1.061922,0.328569,0.328569,0.008145,0.008145,0.541029,...,0.21743,0.217649,0.697207,0.420109,0.022798,0.000107,0.604491,0.118465,0.138358,upregulated
68,O43182,ARHGAP6,0.000477,0.375599,1.70129,0.541059,0.541059,0.024898,0.024898,1.098064,...,0.488754,0.350194,0.839085,0.691824,0.023014,0.000327,0.67042,0.240454,0.304572,upregulated
85,O43561,LAT,0.001545,0.268038,1.403649,0.32868,0.32868,0.032238,0.032238,1.078904,...,0.347514,0.288491,0.697282,0.420251,0.023108,0.000424,0.668152,0.236258,0.232927,upregulated
91,O43665,RGS10,0.000519,0.575117,2.681708,0.284379,0.284379,0.007731,0.007731,-3.505836,...,0.750742,0.55344,0.667702,0.363602,0.022793,0.000102,0.125518,0.767751,0.567334,upregulated
99,O60229,KALRN,0.000256,0.390351,1.599641,-0.45759,0.45759,-0.01723,0.01723,-1.4812,...,0.508124,0.329121,0.172299,0.58509,0.022472,0.000226,0.365147,0.32436,0.335209,downregulated
115,O60890,OPHN1,0.000131,0.634324,2.623327,0.673443,0.673443,0.01433,0.01433,3.279046,...,0.828487,0.541337,0.927476,0.861106,0.022878,0.000188,0.928553,0.718084,0.593812,upregulated
120,O75113,N4BP1,0.000133,0.273315,1.13821,-0.348876,0.348876,-0.01021,0.01021,0.210078,...,0.354443,0.233464,0.244885,0.446076,0.022562,0.000134,0.565321,0.045987,0.188752,downregulated


### 5.3.4 Compute reach / average distance to differential proteins

In [293]:
# ======================================================
# 4Ô∏è‚É£ Compute reach metrics relative to top differential nodes
# ======================================================
dist_matrix = g_gcc.shortest_paths_dijkstra(weights=None, mode="OUT")

uni_index = {v["name"]: i for i, v in enumerate(g_gcc.vs)}

# Compute mean distance to all top differential nodes
avg_dist_to_top = []
for node in g_gcc.vs:
    i = uni_index[node["name"]]
    dists = [dist_matrix[i][uni_index[t]] for t in top_uniprots if t in uni_index]
    dists = [d for d in dists if d != 0 and np.isfinite(d)]  # exclude self & unreachable
    avg_dist_to_top.append(np.mean(dists) if len(dists) > 0 else np.nan)

g_gcc.vs["avg_dist_to_top"] = avg_dist_to_top
print("‚úÖ Computed average shortest-path distance to top differential nodes")


‚úÖ Computed average shortest-path distance to top differential nodes


  dist_matrix = g_gcc.shortest_paths_dijkstra(weights=None, mode="OUT")


In [295]:
# Check which of your top IDs are not in the graph
graph_nodes = set(g_gcc.vs["name"])
missing_in_graph = [u for u in top_uniprots if u not in graph_nodes]

print(f"üß© Graph contains {len(graph_nodes)} nodes")
print(f"üîé Top differential proteins: {len(top_uniprots)}")
print(f"‚ö†Ô∏è {len(missing_in_graph)} top proteins not found in graph:")
print(missing_in_graph[:10])  # show a few examples


üß© Graph contains 581 nodes
üîé Top differential proteins: 132
‚ö†Ô∏è 91 top proteins not found in graph:
['P01042', 'O00151', 'Q9NSY0', 'O95425', 'P29692', 'Q99719', 'Q147X3', 'Q8ND56', 'Q66K74', 'Q86X29']


In [296]:
import itertools

graph_names = g_gcc.vs["name"]
print("üîç Example graph vertex names:")
print(graph_names[:10])


üîç Example graph vertex names:
['P0DP24', 'Q9Y210', 'P06241', 'P49137', 'Q16539', 'P31749', 'O15111', 'P19838', 'P27986', 'P78536']


### 5.3.5 Classify upstream/downstream influence

In [297]:
# ======================================================
# 5Ô∏è‚É£ Classify upstream/downstream influence (robust)
# ======================================================
import numpy as np
import pandas as pd
from tqdm import tqdm

# --- Keep only top differential proteins present in the graph
graph_nodes = set(g_gcc.vs["name"])
top_uniprots_present = [u for u in top_uniprots if u in graph_nodes]
print(f"‚úÖ {len(top_uniprots_present)} / {len(top_uniprots)} top differential proteins are in the network.")

# --- Precompute index map
name_to_idx = {v["name"]: v.index for v in g_gcc.vs}

# --- Relation classifier (uses shortest paths, direction-aware)
def classify_relation(g, top_set, node_name):
    """Return 'upstream', 'downstream', 'bidirectional', or 'unrelated'."""
    if node_name not in name_to_idx:
        return "unrelated"

    node_idx = name_to_idx[node_name]
    reachable_from_top = False
    reaches_top = False

    for top in top_set:
        if top not in name_to_idx:
            continue
        top_idx = name_to_idx[top]

        # downstream: top ‚Üí node
        dist_down = g.shortest_paths_dijkstra(source=top_idx, target=node_idx, weights=None, mode="OUT")[0][0]
        # upstream: node ‚Üí top
        dist_up = g.shortest_paths_dijkstra(source=node_idx, target=top_idx, weights=None, mode="OUT")[0][0]

        if np.isfinite(dist_down):
            reachable_from_top = True
        if np.isfinite(dist_up):
            reaches_top = True

        if reachable_from_top and reaches_top:
            break

    if reachable_from_top and not reaches_top:
        return "downstream"
    elif reaches_top and not reachable_from_top:
        return "upstream"
    elif reachable_from_top and reaches_top:
        return "bidirectional"
    else:
        return "unrelated"

# --- Apply classifier with progress bar
relations = []
for v in tqdm(g_gcc.vs, desc="Classifying relations"):
    relations.append(classify_relation(g_gcc, top_uniprots_present, v["name"]))

g_gcc.vs["relation_to_top"] = relations
print("‚úÖ Classified nodes by direction of signal relation")

‚úÖ 41 / 132 top differential proteins are in the network.


  dist_down = g.shortest_paths_dijkstra(source=top_idx, target=node_idx, weights=None, mode="OUT")[0][0]
  dist_up = g.shortest_paths_dijkstra(source=node_idx, target=top_idx, weights=None, mode="OUT")[0][0]
Classifying relations: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 581/581 [00:01<00:00, 421.36it/s]

‚úÖ Classified nodes by direction of signal relation





### 5.3.6 Compute distance, betweenness, and reach coverage

In [298]:
# ======================================================
# 6Ô∏è‚É£ Compute distance, betweenness, and reach coverage
# ======================================================
print("‚è≥ Computing shortest-path distances to top differential proteins...")
dist_matrix = np.array(g_gcc.shortest_paths_dijkstra(
    target=[name_to_idx[t] for t in top_uniprots_present],
    weights=None,
    mode="OUT"
))

avg_dists = []
coverage = []

for i, v in enumerate(g_gcc.vs):
    dists = dist_matrix[i, :]
    # Exclude infinities and self (0-distance)
    finite_dists = dists[np.isfinite(dists) & (dists > 0)]
    avg_dists.append(finite_dists.mean() if len(finite_dists) > 0 else np.nan)
    coverage.append((np.isfinite(dists) & (dists > 0)).sum() / len(top_uniprots_present))

g_gcc.vs["avg_dist_to_top"] = avg_dists
g_gcc.vs["reach_coverage"] = coverage

# Compute betweenness centrality (unweighted)
g_gcc.vs["betweenness"] = g_gcc.betweenness()

print("‚úÖ Added avg_dist_to_top, reach_coverage, and betweenness")

‚è≥ Computing shortest-path distances to top differential proteins...
‚úÖ Added avg_dist_to_top, reach_coverage, and betweenness


  dist_matrix = np.array(g_gcc.shortest_paths_dijkstra(


### 5.3.7 Assemble node-level summary table

In [299]:
reach_df = pd.DataFrame({
    "UniProt": g_gcc.vs["name"],
    "gene_symbol": g_gcc.vs["gene_symbol"] if "gene_symbol" in g_gcc.vs.attributes() else ["" for _ in g_gcc.vs],
    "direction": g_gcc.vs["direction"] if "direction" in g_gcc.vs.attributes() else ["" for _ in g_gcc.vs],
    "relation_to_top": g_gcc.vs["relation_to_top"],
    "avg_dist_to_top": g_gcc.vs["avg_dist_to_top"],
    "reach_coverage": g_gcc.vs["reach_coverage"],
    "betweenness": g_gcc.vs["betweenness"],
    "delta_slope": g_gcc.vs["delta_slope"] if "delta_slope" in g_gcc.vs.attributes() else np.nan,
    "delta_auc": g_gcc.vs["delta_auc"] if "delta_auc" in g_gcc.vs.attributes() else np.nan,
    "dyn_deviation": g_gcc.vs["dyn_deviation"] if "dyn_deviation" in g_gcc.vs.attributes() else np.nan,
})

display(reach_df.head(10))


Unnamed: 0,UniProt,gene_symbol,direction,relation_to_top,avg_dist_to_top,reach_coverage,betweenness,delta_slope,delta_auc,dyn_deviation
0,P0DP24,,neutral,upstream,3.40625,0.780488,0.0,0.0,0.0,0.0
1,Q9Y210,TRPC6,upregulated,downstream,,0.0,0.0,0.079137,-0.009224,0.095635
2,P06241,FYN,upregulated,bidirectional,2.78125,0.780488,4200.646077,0.067988,-0.150654,0.047711
3,P49137,MAPKAPK2,upregulated,bidirectional,3.375,0.780488,1238.549449,0.079732,-0.177862,0.066835
4,Q16539,MAPK14,neutral,bidirectional,2.6875,0.780488,12885.704631,-0.009051,0.346202,0.044965
5,P31749,AKT1,neutral,bidirectional,2.125,0.780488,26749.364661,0.045927,-0.903394,0.167479
6,O15111,,neutral,bidirectional,4.40625,0.780488,705.675735,0.0,0.0,0.0
7,P19838,,neutral,bidirectional,4.28125,0.780488,1169.251541,0.0,0.0,0.0
8,P27986,,neutral,bidirectional,2.96875,0.780488,1221.408208,0.0,0.0,0.0
9,P78536,ADAM17,downregulated,bidirectional,3.5,0.780488,571.021685,-0.096917,-0.131354,0.112439


### 5.3.8 Split into upregulated / downregulated subnetworks

In [300]:
up_nodes = reach_df.query("direction == 'upregulated'")["UniProt"].tolist()
down_nodes = reach_df.query("direction == 'downregulated'")["UniProt"].tolist()

g_up = g_gcc.subgraph([name_to_idx[n] for n in up_nodes if n in name_to_idx])
g_down = g_gcc.subgraph([name_to_idx[n] for n in down_nodes if n in name_to_idx])

print(f"üìà Upregulated subgraph: {g_up.vcount()} nodes, {g_up.ecount()} edges")
print(f"üìâ Downregulated subgraph: {g_down.vcount()} nodes, {g_down.ecount()} edges")

üìà Upregulated subgraph: 129 nodes, 91 edges
üìâ Downregulated subgraph: 127 nodes, 95 edges


### 5.3.9 Export annotated graph and table

In [None]:
out_dir = base / "analysis" / "networks" / "reach_analysis"
out_dir.mkdir(parents=True, exist_ok=True)

reach_df.to_csv(out_dir / "phosphosite_network_reach_summary.csv", index=False)
g_gcc.write_graphml(str(out_dir / "phosphosite_network_reach_annotated.graphml"))

print(f"üíæ Saved annotated network and reach summary ‚Üí {out_dir}")


###  Optional: inspect subsets

In [301]:
downstream_high_bc = reach_df.query("relation_to_top == 'downstream' and betweenness > 0.05")
display(downstream_high_bc.head())

far_nodes = reach_df.query("avg_dist_to_top > 5")
display(far_nodes.head())

# ======================================================
# ‚úÖ Output overview
# ======================================================
print("""
Metric                Meaning
-------------------------------------------------------------
avg_dist_to_top       Mean shortest-path distance to top differential nodes (excl. self)
reach_coverage        Fraction of top differential nodes reachable from each node
relation_to_top       'upstream', 'downstream', 'bidirectional', or 'unrelated'
betweenness           Network centrality (information flow potential)
direction             From phospho metrics (up/down/neutral)
dyn_deviation         Dynamic trajectory deviation score
delta_slope, delta_auc Additional phospho-based trend metrics
""")

Unnamed: 0,UniProt,gene_symbol,direction,relation_to_top,avg_dist_to_top,reach_coverage,betweenness,delta_slope,delta_auc,dyn_deviation
10,P05067,,neutral,downstream,,0.0,286.0,0.0,0.0,0.0
34,P78344,EIF4G2,neutral,downstream,,0.0,39.069762,-0.039342,0.106134,0.024069
36,P55212,,neutral,downstream,,0.0,307.669611,0.0,0.0,0.0
44,Q9NR09,BIRC6,downregulated,downstream,,0.0,101.520453,-0.095556,0.383428,0.068309
45,O75385,ULK1,upregulated,downstream,,0.0,294.135714,0.191247,0.130107,0.129918


Unnamed: 0,UniProt,gene_symbol,direction,relation_to_top,avg_dist_to_top,reach_coverage,betweenness,delta_slope,delta_auc,dyn_deviation
30,P55957,,neutral,bidirectional,6.0,0.780488,905.931631,0.0,0.0,0.0
49,Q13188,STK3,downregulated,bidirectional,6.6875,0.780488,823.970221,-0.140163,0.275364,0.106655
51,Q92934,BAD,downregulated,bidirectional,6.0,0.780488,1792.531907,-0.136075,0.106846,0.097038
54,Q07812,,neutral,bidirectional,6.0,0.780488,878.606689,0.0,0.0,0.0
97,P04632,,neutral,upstream,5.125,0.780488,0.0,0.0,0.0,0.0



Metric                Meaning
-------------------------------------------------------------
avg_dist_to_top       Mean shortest-path distance to top differential nodes (excl. self)
reach_coverage        Fraction of top differential nodes reachable from each node
relation_to_top       'upstream', 'downstream', 'bidirectional', or 'unrelated'
betweenness           Network centrality (information flow potential)
direction             From phospho metrics (up/down/neutral)
dyn_deviation         Dynamic trajectory deviation score
delta_slope, delta_auc Additional phospho-based trend metrics



### 5.3.1 old collapse, Attach protein metrics to the graph, Identify top differential nodes (signal origin), Compute reach / average distance to differential proteins, Classify upstream/downstream influence,Compute centrality & coverage metrics

In [253]:
# ======================================================
# üìÇ Load prepared omics data
# ======================================================

from pathlib import Path
import pandas as pd

# Define base and input directories
base = Path(".")
in_dir = base / "analysis" / "networks" / "prepared_data"

# --- Expression mean tables ---
prot_expr_mean = pd.read_csv(in_dir / "proteome_expr_mean.csv", index_col=0)
phospho_expr_mean = pd.read_csv(in_dir / "phosphoproteome_expr_mean.csv", index_col=0)
combined_expr_mean = pd.read_csv(in_dir / "combined_expr_mean.csv", index_col=0)

# --- Fold-change tables ---
prot_fc = pd.read_csv(in_dir / "proteome_fc.csv", index_col=0)
phospho_fc = pd.read_csv(in_dir / "phosphoproteome_fc.csv", index_col=0)

# --- Detected proteins list ---
detected_uniprots = pd.read_csv(in_dir / "detected_uniprots.txt", header=None)[0].tolist()

# Summary info
print(f"‚úÖ Loaded {len(detected_uniprots)} detected UniProt IDs.")
print(f"üß¨ proteome_expr_mean shape: {prot_expr_mean.shape}")
print(f"üß™ phospho_expr_mean shape: {phospho_expr_mean.shape}")
print(f"üîó combined_expr_mean shape: {combined_expr_mean.shape}")
print(f"üìà proteome_fc shape: {prot_fc.shape}")
print(f"üìä phospho_fc shape: {phospho_fc.shape}")

# Quick preview of the phosphoproteome FC data
display(phospho_fc.head())


‚úÖ Loaded 2916 detected UniProt IDs.
üß¨ proteome_expr_mean shape: (2326, 8)
üß™ phospho_expr_mean shape: (1315, 8)
üîó combined_expr_mean shape: (2915, 28)
üìà proteome_fc shape: (2326, 10)
üìä phospho_fc shape: (3150, 11)


Unnamed: 0_level_0,logFC.10,P.Value.10,adj.P.Val.10,logFC.600,P.Value.600,adj.P.Val.600,logFC.1800,P.Value.1800,adj.P.Val.1800,psite_id,gene_symbol
UniProt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
A0A1B0GUW6,-0.48763,0.237964,0.609327,-0.121132,0.768297,0.9403,-1.182801,0.005222,0.070003,A0A1B0GUW6;NA;S773|T786|T789|T791;SPCLTQSPGLHK...,A0A1B0GUW6
A0FGR8,0.112968,0.561838,0.836479,-0.631699,0.001758,0.061496,0.024567,0.899486,0.951119,A0FGR8;ESYT2;S691;SHMSGSPGPGGSNTAPSTPVIGGSDKPG...,ESYT2
A0FGR8,-0.336258,0.216695,0.585516,0.364801,0.180614,0.546526,0.488185,0.074705,0.302857,A0FGR8;ESYT2;S738|S739;SSSSLLASPGHISVK;2581,ESYT2
A0FGR8,0.214166,0.098726,0.41905,0.162217,0.209094,0.569764,0.333042,0.011357,0.106868,A0FGR8;ESYT2;S739;SSSSLLASPGHISVK;2582,ESYT2
A0FGR8,0.177454,0.753409,0.918455,0.484828,0.39187,0.741377,0.658353,0.246055,0.541253,A0FGR8;ESYT2;S758;EPTPSIASDISLPIATQELR;3736,ESYT2


In [254]:
# ======================================================
# üßÆ Collapse phosphosite-level FC table ‚Üí protein-level summary (fixed)
# ======================================================

import numpy as np
import pandas as pd

# Ensure UniProt is a column (not index)
phospho_fc = phospho_fc.reset_index().rename(columns={'index': 'UniProt'})

# Identify logFC columns
fc_cols = [c for c in phospho_fc.columns if c.startswith("logFC")]
print(f"Detected logFC columns: {fc_cols}")

# Compute max absolute log2FC per phosphosite across timepoints
phospho_fc["max_abs_logFC"] = phospho_fc[fc_cols].abs().max(axis=1)

# Sort and select the phosphosite with the largest |logFC| per UniProt
collapsed_fc = (
    phospho_fc
    .sort_values("max_abs_logFC", ascending=False)
    .groupby("UniProt", as_index=False)
    .first()
)

# Compute mean logFC across all timepoints (for coloring or summarization)
collapsed_fc["mean_logFC"] = collapsed_fc[fc_cols].mean(axis=1)

# Keep clean, relevant columns
collapsed_fc = collapsed_fc[["UniProt", "gene_symbol", "mean_logFC", "max_abs_logFC"]]

# Sort for convenience
collapsed_fc = collapsed_fc.sort_values("max_abs_logFC", ascending=False)

print(f"\n‚úÖ Collapsed from {len(phospho_fc)} phosphosites ‚Üí {len(collapsed_fc)} unique UniProt proteins.")
display(collapsed_fc.head(10))

# Save collapsed version for network integration
out_dir = base / "analysis" / "networks" / "summaries"
out_dir.mkdir(parents=True, exist_ok=True)
collapsed_fc.to_csv(out_dir / "phosphoproteome_fc_collapsed.csv", index=False)

print(f"üíæ Saved collapsed phosphoproteome FC table ‚Üí {out_dir/'phosphoproteome_fc_collapsed.csv'}")


Detected logFC columns: ['logFC.10', 'logFC.600', 'logFC.1800']

‚úÖ Collapsed from 3150 phosphosites ‚Üí 1315 unique UniProt proteins.


Unnamed: 0,UniProt,gene_symbol,mean_logFC,max_abs_logFC
167,O95810,CAVIN2,0.157718,4.082586
1188,Q9UDT6,CLIP2,-1.44695,2.325571
725,Q6WCQ1,MPRIP,1.096298,2.187286
212,P07359,GP1BA,0.665256,2.113596
1260,Q9Y2Q0,ATP8A1,0.476136,2.007445
1056,Q9C0C9,UBE2O,-0.805276,1.979999
815,Q8IZ21,PHACTR4,-0.749431,1.920231
564,Q14432,PDE3A,0.976598,1.901809
1189,Q9UDY2,TJP2,0.087316,1.892123
453,Q00577,PURA,-0.616674,1.844774


üíæ Saved collapsed phosphoproteome FC table ‚Üí analysis/networks/summaries/phosphoproteome_fc_collapsed.csv


In [255]:
# Collapse site-level metrics_df ‚Üí protein-level (mean or top site)
metrics_df_prot = (
    metrics_df
    .sort_values("combined_score", ascending=False)
    .groupby("uniprot_id", as_index=False)
    .first()  # top site per protein by dynamic deviation
)

metrics_df_prot = metrics_df_prot[["uniprot_id", "gene_symbol", "combined_score"]]
metrics_df_prot = metrics_df_prot.rename(columns={
    "uniprot_id": "UniProt",
    "combined_score": "dyn_deviation"
})

print(f"‚úÖ Collapsed trajectory deviation to {len(metrics_df_prot)} proteins")
display(metrics_df_prot.head(10))


‚úÖ Collapsed trajectory deviation to 1315 proteins


Unnamed: 0,UniProt,gene_symbol,dyn_deviation
0,A0A1B0GUW6,,0.318928
1,A0FGR8,ESYT2,0.42718
2,A1A4S6,ARHGAP10,0.299838
3,A1L390,PLEKHG3,0.297009
4,A2A2Y4,FRMD3,0.383578
5,A2RRP1,NBAS,0.256866
6,A2RUS2,DENND3,0.345977
7,A6NFX1,MFSD2B,0.299007
8,A6QL63,ABTB3,0.325786
9,A7E2V4,ZSWIM8,0.338478


In [256]:
merged_fc_dyn = pd.merge(
    collapsed_fc,
    metrics_df_prot,
    on=["UniProt", "gene_symbol"],
    how="outer"
).fillna({"mean_logFC": 0, "max_abs_logFC": 0, "dyn_deviation": 0})

display(merged_fc_dyn.head(10))


Unnamed: 0,UniProt,gene_symbol,mean_logFC,max_abs_logFC,dyn_deviation
0,A0A1B0GUW6,A0A1B0GUW6,-0.597188,1.182801,0.0
1,A0A1B0GUW6,,0.0,0.0,0.318928
2,A0FGR8,ESYT2,0.440211,0.658353,0.42718
3,A1A4S6,ARHGAP10,-0.068705,0.244482,0.299838
4,A1L390,PLEKHG3,0.046903,0.279061,0.297009
5,A2A2Y4,FRMD3,0.274965,0.839885,0.383578
6,A2RRP1,NBAS,0.025268,0.05687,0.256866
7,A2RUS2,DENND3,-0.140743,0.263901,0.345977
8,A6NFX1,MFSD2B,0.130159,0.285341,0.299007
9,A6QL63,ABTB3,0.296745,0.516953,0.325786


In [257]:
import pandas as pd
import numpy as np

# ======================================================
# 1Ô∏è‚É£ Collapse phosphosites to one per UniProt (top by combined_score)
# ======================================================
collapsed_sites = (
    top_df.sort_values("combined_score", ascending=False)
          .groupby("uniprot_id", as_index=False)
          .first()
)

# Classify CXCR7 vs DMSO direction
def classify_dir(x, thr=0.05):
    if x > thr:
        return "upregulated"
    elif x < -thr:
        return "downregulated"
    else:
        return "neutral"

collapsed_sites["regulation"] = collapsed_sites["delta_slope"].apply(classify_dir)
collapsed_sites = collapsed_sites.rename(columns={"uniprot_id": "UniProt"})

print(f"‚úÖ Collapsed {len(top_df)} phosphosites ‚Üí {len(collapsed_sites)} proteins")
display(collapsed_sites.head(5))

# ======================================================
# 2Ô∏è‚É£ Merge collapsed phosphosite data with your igraph GCC
# ======================================================
# g_gcc is your giant connected component (igraph object)
node_names = g_gcc.vs["name"]  # UniProt IDs in GCC
nodes_df = pd.DataFrame({"UniProt": node_names})

merged = (
    nodes_df.merge(
        collapsed_sites[["UniProt", "gene_symbol", "delta_slope", "combined_score", "regulation"]],
        on="UniProt",
        how="left"
    )
    .fillna({"gene_symbol": "", "delta_slope": 0, "combined_score": 0, "regulation": "neutral"})
)

print(f"‚úÖ Merged phospho metrics to GCC ({merged.shape[0]} total nodes)")
display(merged.head(10))

# ======================================================
# 3Ô∏è‚É£ Compute signal strength (signed combined score)
# ======================================================
merged["signal_strength"] = merged["combined_score"] * np.sign(merged["delta_slope"])

# Optional color code for plotting
merged["color"] = merged["regulation"].map({
    "upregulated": "red",
    "downregulated": "blue",
    "neutral": "gray"
})

# ======================================================
# 4Ô∏è‚É£ Attach data back to igraph node attributes
# ======================================================
g_gcc.vs["signal_strength"] = merged.set_index("UniProt").loc[g_gcc.vs["name"], "signal_strength"].tolist()
g_gcc.vs["regulation"] = merged.set_index("UniProt").loc[g_gcc.vs["name"], "regulation"].tolist()
g_gcc.vs["color"] = merged.set_index("UniProt").loc[g_gcc.vs["name"], "color"].tolist()

print("üé® Annotated igraph GCC with signal_strength + regulation flags.")

# ======================================================
# 5Ô∏è‚É£ Optional ‚Äî export for Cytoscape
# ======================================================
out_path = in_dir.parent / "cxcr7_vs_dmso_annotated.graphml"
g_gcc.write_graphml(str(out_path))
print(f"üíæ Saved annotated GCC to: {out_path}")

# ======================================================
# 6Ô∏è‚É£ Prepare pseudo-log2FC table for downstream reach analysis
# ======================================================
log2fc_df = merged.set_index("UniProt")[["signal_strength"]].rename(columns={"signal_strength": "Combined"})
print(f"‚úÖ Generated pseudo log2FC table for influence analysis: {log2fc_df.shape}")
display(log2fc_df.head(10))


‚úÖ Collapsed 50 phosphosites ‚Üí 47 proteins


Unnamed: 0,UniProt,gene_symbol,cosine,dtw,euclidean,delta_slope,delta_auc,cosine_norm,dtw_norm,euclidean_norm,delta_slope_norm,delta_auc_norm,combined_score,regulation
0,O14639,ABLIM1,0.001105,0.504391,2.057053,0.212694,1.135433,0.000598,0.442014,0.423697,0.628323,0.659525,0.542969,upregulated
1,O43182,ARHGAP6,0.000385,0.463287,1.834907,0.334844,1.327191,0.000208,0.405626,0.377668,0.740081,0.679708,0.55776,upregulated
2,O43665,RGS10,0.00047,0.725852,2.808364,0.534542,-3.783795,0.000254,0.638067,0.579371,0.922788,0.141767,0.585172,upregulated
3,O60890,OPHN1,4.5e-05,0.952527,2.986638,0.24515,3.754927,2.4e-05,0.838737,0.61631,0.658018,0.935233,0.817681,upregulated
4,O75962,TRIO,0.004389,0.372792,1.679757,0.307192,0.907695,0.002374,0.325513,0.34552,0.714781,0.635555,0.500341,upregulated


‚úÖ Merged phospho metrics to GCC (581 total nodes)


Unnamed: 0,UniProt,gene_symbol,delta_slope,combined_score,regulation
0,P0DP24,,0.0,0.0,neutral
1,Q9Y210,,0.0,0.0,neutral
2,P06241,,0.0,0.0,neutral
3,P49137,,0.0,0.0,neutral
4,Q16539,,0.0,0.0,neutral
5,P31749,,0.0,0.0,neutral
6,O15111,,0.0,0.0,neutral
7,P19838,,0.0,0.0,neutral
8,P27986,,0.0,0.0,neutral
9,P78536,,0.0,0.0,neutral


üé® Annotated igraph GCC with signal_strength + regulation flags.
üíæ Saved annotated GCC to: analysis/networks/cxcr7_vs_dmso_annotated.graphml
‚úÖ Generated pseudo log2FC table for influence analysis: (581, 1)


Unnamed: 0_level_0,Combined
UniProt,Unnamed: 1_level_1
P0DP24,0.0
Q9Y210,0.0
P06241,0.0
P49137,0.0
Q16539,0.0
P31749,0.0
O15111,0.0
P19838,0.0
P27986,0.0
P78536,0.0


In [289]:
# ======================================================
# üß¨ Combine mapping sources
# ======================================================
# Each table must have UniProt ID and gene_symbol columns
prot_map = prot_fc.reset_index()[["UniProt", "gene_symbol"]].drop_duplicates()
phospho_map = phospho_fc[["UniProt", "gene_symbol"]].drop_duplicates()

# Merge both
combined_map = pd.concat([prot_map, phospho_map], ignore_index=True).drop_duplicates(subset="UniProt")

# Create dictionary
id_to_symbol = combined_map.set_index("UniProt")["gene_symbol"].to_dict()
print(f"‚úÖ Created UniProt ‚Üí gene_symbol mapping for {len(id_to_symbol)} unique IDs")

# ======================================================
# üß© Apply mapping to each Top_* table
# ======================================================
top_tables_mapped = {}
for name, tab in top_tables.items():
    # Reset index if necessary
    if tab.index.name == "UniProt":
        tab = tab.reset_index()

    # Add gene symbol column
    tab["gene_symbol"] = tab["UniProt"].map(id_to_symbol).fillna("")

    # Move gene_symbol to front for readability
    cols = ["UniProt", "gene_symbol"] + [c for c in tab.columns if c not in ["UniProt", "gene_symbol"]]
    top_tables_mapped[name] = tab[cols]

# ======================================================
# üßæ Display all mapped role-family tables
# ======================================================
print("\nüåü Top proteins per functional family (with gene symbols):")
for name, tab in top_tables_mapped.items():
    print(f"\n‚Äî {name} ‚Äî")
    display(tab)

# ======================================================
# üíæ Optional: export combined file
# ======================================================
mapped_path = in_dir.parent / "cxcr7_roles_igraph_with_symbols.csv"
pd.concat(top_tables_mapped, names=["role_family"]).to_csv(mapped_path)

print(f"üíæ Saved all top-ranked role-family tables with gene symbols ‚Üí {mapped_path}")


‚úÖ Created UniProt ‚Üí gene_symbol mapping for 2915 unique IDs

üåü Top proteins per functional family (with gene symbols):

‚Äî Top_Receptor_like ‚Äî


Unnamed: 0,UniProt,gene_symbol,ReceptorScore,signal_strength,abs_signal_strength,ppr_reach,betweenness,avg_dist_to_top,coverage_to_top
0,P0DP24,CALM2,0.08476,0.0,0.0,0.466842,0.0,3.181818,0.88
1,Q15118,PDK1,0.039636,0.0,0.0,0.142376,0.0,2.857143,0.84
2,Q9Y2U5,MAP3K2,0.030439,0.0,0.0,0.140773,0.0,3.190476,0.84
3,O60890,OPHN1,0.025607,0.817681,0.817681,0.290735,0.0,3.318182,0.88
4,Q12913,PTPRJ,0.025536,0.0,0.0,0.106959,0.0,2.714286,0.84
5,O60271,SPAG9,0.018792,0.0,0.0,0.245385,0.0,3.714286,0.84
6,Q13464,ROCK1,0.017161,0.0,0.0,0.03256,0.0,3.47619,0.84
7,P02751,FN1,0.017128,0.0,0.0,0.084493,0.0,3.47619,0.84
8,O14775,GNB5,0.017089,0.0,0.0,0.152355,0.0,3.809524,0.84
9,P40189,IL6ST,0.017014,0.0,0.0,0.141462,0.0,3.52381,0.84



‚Äî Top_Adapter_like ‚Äî


Unnamed: 0,UniProt,gene_symbol,AdapterScore,signal_strength,abs_signal_strength,ppr_reach,betweenness,avg_dist_to_top,coverage_to_top
0,P12931,SRC,0.689919,0.685982,0.685982,0.379839,30798.46386,2.0,0.84
1,P31749,AKT1,0.581198,0.0,0.0,0.33835,26749.364661,2.238095,0.84
2,P17252,PRKCA,0.342299,0.0,0.0,0.078403,19551.671854,2.047619,0.84
3,Q16539,MAPK14,0.285624,0.0,0.0,0.365353,12885.704631,2.714286,0.84
4,P28482,MAPK1,0.279347,0.0,0.0,0.06016,16230.523031,2.47619,0.84
5,P17612,PRKACA,0.258519,0.612314,0.612314,0.427957,11151.562233,2.047619,0.84
6,P63000,RAC1,0.190879,0.0,0.0,0.103893,10651.000049,2.47619,0.84
7,P42574,CASP3,0.169635,0.0,0.0,0.049085,9960.129944,2.809524,0.84
8,P35568,IRS1,0.16119,0.0,0.0,0.127737,8804.192305,2.428571,0.84
9,P27361,MAPK3,0.142298,0.0,0.0,0.063933,8238.432352,2.52381,0.84



‚Äî Top_Kinase_like ‚Äî


Unnamed: 0,UniProt,gene_symbol,KinaseScore,signal_strength,abs_signal_strength,ppr_reach,betweenness,avg_dist_to_top,coverage_to_top
0,Q14432,PDE3A,0.393007,0.642709,0.642709,1.0,0.0,0.0,0.04
1,O43665,RGS10,0.357824,0.585172,0.585172,1.0,0.0,0.0,0.04
2,Q9UDT6,CLIP2,0.356699,0.583332,0.583332,1.0,0.0,0.0,0.04
3,O76074,PDE5A,0.322202,0.526917,0.526917,1.0,0.0,0.0,0.04
4,P12931,SRC,0.31866,0.685982,0.685982,0.379839,30798.46386,2.0,0.84
5,Q96SB3,PPP1R9B,0.305022,0.498821,0.498821,1.0,0.0,0.0,0.04
6,P17612,PRKACA,0.272609,0.612314,0.612314,0.427957,11151.562233,2.047619,0.84
7,P50552,VASP,0.187874,0.511568,0.511568,0.59289,209.520699,2.952381,0.84
8,P29353,SHC1,0.174404,0.709608,0.709608,0.368436,1317.106387,2.571429,0.84
9,O60890,OPHN1,0.149143,0.817681,0.817681,0.290735,0.0,3.318182,0.88



‚Äî Top_TF_sinks ‚Äî


Unnamed: 0,UniProt,gene_symbol,TFScore,signal_strength,abs_signal_strength,ppr_reach,betweenness,avg_dist_to_top,coverage_to_top
0,Q14432,PDE3A,0.104802,0.642709,0.642709,1.0,0.0,0.0,0.04
1,Q96SB3,PPP1R9B,0.08133919,0.498821,0.498821,1.0,0.0,0.0,0.04
2,O43665,RGS10,0.04770991,0.585172,0.585172,1.0,0.0,0.0,0.04
3,Q9UDT6,CLIP2,0.04755983,0.583332,0.583332,1.0,0.0,0.0,0.04
4,O76074,PDE5A,0.04296024,0.526917,0.526917,1.0,0.0,0.0,0.04
5,P50552,VASP,7.418628e-11,0.511568,0.511568,0.59289,209.520699,2.952381,0.84
6,P29353,SHC1,4.263202e-11,0.709608,0.709608,0.368436,1317.106387,2.571429,0.84
7,P60709,ACTB,1.554914e-11,0.495129,0.495129,0.38518,4021.649431,2.952381,0.84
8,O75962,TRIO,1.153675e-11,0.500341,0.500341,0.282809,0.0,3.380952,0.84
9,P12931,SRC,9.10458e-12,0.685982,0.685982,0.379839,30798.46386,2.0,0.84



‚Äî Top_Local_amplifiers ‚Äî


Unnamed: 0,UniProt,gene_symbol,LocalAmpScore,signal_strength,abs_signal_strength,ppr_reach,betweenness,avg_dist_to_top,coverage_to_top
0,O76074,PDE5A,1.0,0.526917,0.526917,1.0,0.0,0.0,0.04
1,O43665,RGS10,1.0,0.585172,0.585172,1.0,0.0,0.0,0.04
2,Q9UDT6,CLIP2,1.0,0.583332,0.583332,1.0,0.0,0.0,0.04
3,Q96SB3,PPP1R9B,1.0,0.498821,0.498821,1.0,0.0,0.0,0.04
4,Q14432,PDE3A,1.0,0.642709,0.642709,1.0,0.0,0.0,0.04
5,Q9Y210,TRPC6,1.0,0.0,0.0,1.0,0.0,0.0,0.04
6,P05067,APP,0.535521,0.0,0.0,0.540541,286.0,0.0,0.04
7,P50552,VASP,0.364874,0.511568,0.511568,0.59289,209.520699,2.952381,0.84
8,Q5JRX3,PITRM1,0.287849,0.0,0.0,0.330418,0.0,1.0,0.04
9,P07339,CTSD,0.287849,0.0,0.0,0.330418,0.0,1.0,0.04



‚Äî Top_Silent_conduits ‚Äî


Unnamed: 0,UniProt,gene_symbol,SilentConduitScore,signal_strength,abs_signal_strength,ppr_reach,betweenness,avg_dist_to_top,coverage_to_top
0,Q9Y210,TRPC6,1.0,0.0,0.0,1.0,0.0,0.0,0.04
1,P05067,APP,0.540541,0.0,0.0,0.540541,286.0,0.0,0.04
2,P78536,ADAM17,0.482948,0.0,0.0,0.482948,571.021685,3.333333,0.84
3,P0DP24,CALM2,0.466842,0.0,0.0,0.466842,0.0,3.181818,0.88
4,P49137,MAPKAPK2,0.433149,0.0,0.0,0.433149,1238.549449,3.285714,0.84
5,P06241,FYN,0.390712,0.0,0.0,0.390712,4200.646077,2.47619,0.84
6,Q96SB3,PPP1R9B,0.389956,0.498821,0.498821,1.0,0.0,0.0,0.04
7,O15111,CHUK,0.378586,0.0,0.0,0.378586,705.675735,3.857143,0.84
8,Q16539,MAPK14,0.365353,0.0,0.0,0.365353,12885.704631,2.714286,0.84
9,P27986,PIK3R1,0.358726,0.0,0.0,0.358726,1221.408208,2.809524,0.84



‚Äî Top_Effectors ‚Äî


Unnamed: 0,UniProt,gene_symbol,EffectorScore,signal_strength,abs_signal_strength,ppr_reach,betweenness,avg_dist_to_top,coverage_to_top
0,O60890,OPHN1,0.974026,0.817681,0.817681,0.290735,0.0,3.318182,0.88
1,Q14432,PDE3A,0.786015,0.642709,0.642709,1.0,0.0,0.0,0.04
2,P29353,SHC1,0.755197,0.709608,0.709608,0.368436,1317.106387,2.571429,0.84
3,O43665,RGS10,0.715649,0.585172,0.585172,1.0,0.0,0.0,0.04
4,Q9UDT6,CLIP2,0.713398,0.583332,0.583332,1.0,0.0,0.0,0.04
5,O76074,PDE5A,0.644404,0.526917,0.526917,1.0,0.0,0.0,0.04
6,P02671,FGA,0.632989,0.524393,0.524393,0.345982,0.0,3.454545,0.88
7,Q9Y4H4,GPSM3,0.628223,0.520445,0.520445,0.426475,0.0,3.818182,0.88
8,P50552,VASP,0.613307,0.511568,0.511568,0.59289,209.520699,2.952381,0.84
9,Q96SB3,PPP1R9B,0.610044,0.498821,0.498821,1.0,0.0,0.0,0.04



‚Äî Top_FlexibleComposite ‚Äî


Unnamed: 0,UniProt,gene_symbol,FlexComposite,signal_strength,abs_signal_strength,ppr_reach,betweenness,avg_dist_to_top,coverage_to_top
0,Q14432,PDE3A,0.738077,0.642709,0.642709,1.0,0.0,0.0,0.04
1,O43665,RGS10,0.716967,0.585172,0.585172,1.0,0.0,0.0,0.04
2,Q9UDT6,CLIP2,0.716292,0.583332,0.583332,1.0,0.0,0.0,0.04
3,O76074,PDE5A,0.695594,0.526917,0.526917,1.0,0.0,0.0,0.04
4,Q96SB3,PPP1R9B,0.685286,0.498821,0.498821,1.0,0.0,0.0,0.04
5,P12931,SRC,0.675577,0.685982,0.685982,0.379839,30798.46386,2.0,0.84
6,P17612,PRKACA,0.571495,0.612314,0.612314,0.427957,11151.562233,2.047619,0.84
7,P50552,VASP,0.535557,0.511568,0.511568,0.59289,209.520699,2.952381,0.84
8,P29353,SHC1,0.528737,0.709608,0.709608,0.368436,1317.106387,2.571429,0.84
9,O60890,OPHN1,0.523544,0.817681,0.817681,0.290735,0.0,3.318182,0.88


üíæ Saved all top-ranked role-family tables with gene symbols ‚Üí analysis/networks/cxcr7_roles_igraph_with_symbols.csv


In [305]:
# ======================================================
# ‚öôÔ∏è Configurable parameters
# ======================================================

# Which metric defines "differential" importance
diff_metric = "delta_slope"    # options: "delta_slope", "delta_auc", "combined_score"

# Number of top nodes to include per category
TOP_N = 50

# Significance thresholds
UP_THR = 0.05
DOWN_THR = -0.05

print(f"üìä Differential metric: {diff_metric} | Top N per category: {TOP_N}")

# ======================================================
# üß© 1Ô∏è‚É£ Define directional categories
# ======================================================
def classify_direction(x, up_thr=UP_THR, down_thr=DOWN_THR):
    if x > up_thr:
        return "upregulated"
    elif x < down_thr:
        return "downregulated"
    else:
        return "neutral"

reach_df["direction_refined"] = reach_df[diff_metric].apply(classify_direction)
print(reach_df["direction_refined"].value_counts())

# ======================================================
# üß¨ 2Ô∏è‚É£ Define ranked subsets
# ======================================================
def get_top(df, metric, n=TOP_N, direction="up"):
    if direction == "up":
        return df.sort_values(metric, ascending=False).head(n)
    elif direction == "down":
        return df.sort_values(metric, ascending=True).head(n)
    else:
        raise ValueError("direction must be 'up' or 'down'")

top_up = get_top(reach_df, diff_metric, n=TOP_N, direction="up")
top_down = get_top(reach_df, diff_metric, n=TOP_N, direction="down")

print(f"‚úÖ Selected {len(top_up)} top-up and {len(top_down)} top-down differential nodes")

# ======================================================
# üîÄ 3Ô∏è‚É£ Logical group combinations
# ======================================================
def subset_nodes(df, relation=None, direction=None, top_df=None):
    subset = df.copy()
    if relation:
        subset = subset.query("relation_to_top == @relation")
    if direction:
        subset = subset.query("direction_refined == @direction")
    if top_df is not None:
        subset = subset[subset["UniProt"].isin(top_df["UniProt"])]
    return subset

# Core groups
groups = {
    "Top50_upregulated": subset_nodes(reach_df, top_df=top_up),
    "Top50_downregulated": subset_nodes(reach_df, top_df=top_down),
    "Upstream": subset_nodes(reach_df, relation="upstream"),
    "Downstream": subset_nodes(reach_df, relation="downstream"),
    "Upstream_upregulated": subset_nodes(reach_df, relation="upstream", direction="upregulated"),
    "Downstream_upregulated": subset_nodes(reach_df, relation="downstream", direction="upregulated"),
    "Upstream_downregulated": subset_nodes(reach_df, relation="upstream", direction="downregulated"),
    "Downstream_downregulated": subset_nodes(reach_df, relation="downstream", direction="downregulated"),
}

print(f"üì¶ Created {len(groups)} intelligent subsets.")

# ======================================================
# üìà 4Ô∏è‚É£ Summarize each group
# ======================================================
group_stats = []
for name, gdf in groups.items():
    if gdf.empty:
        continue
    stats = {
        "Group": name,
        "n_nodes": len(gdf),
        "mean_betweenness": gdf["betweenness"].mean(),
        "mean_reach_coverage": gdf["reach_coverage"].mean(),
        "mean_avg_dist_to_top": gdf["avg_dist_to_top"].mean(),
        "mean_dyn_deviation": gdf["dyn_deviation"].mean(),
        "fraction_up": (gdf["direction_refined"] == "upregulated").mean(),
        "fraction_down": (gdf["direction_refined"] == "downregulated").mean(),
    }
    group_stats.append(stats)

group_stats_df = pd.DataFrame(group_stats)
display(group_stats_df.sort_values("mean_betweenness", ascending=False))

# ======================================================
# üßÆ 5Ô∏è‚É£ Assign meta-behavioral roles
# ======================================================
# These are high-level categories derived from network behavior patterns.
def classify_behavior(row):
    if row["relation_to_top"] == "downstream" and row["direction_refined"] == "upregulated":
        return "Effector (activated downstream)"
    elif row["relation_to_top"] == "upstream" and row["direction_refined"] == "upregulated":
        return "Activator (upstream source)"
    elif row["relation_to_top"] == "downstream" and row["direction_refined"] == "downregulated":
        return "Silenced downstream"
    elif row["relation_to_top"] == "upstream" and row["direction_refined"] == "downregulated":
        return "Inhibited upstream"
    else:
        return "Intermediate / Neutral"

reach_df["functional_role"] = reach_df.apply(classify_behavior, axis=1)

print("‚úÖ Assigned high-level behavioral categories:")
print(reach_df["functional_role"].value_counts())


# ======================================================
# üíæ 6Ô∏è‚É£ Save enhanced table for downstream analysis (safe)
# ======================================================
from pathlib import Path

out_dir = base / "analysis" / "networks" / "reach_analysis"
out_dir.mkdir(parents=True, exist_ok=True)   # ‚úÖ make sure directory exists

enhanced_path = out_dir / "phosphosite_network_reach_enhanced.csv"
reach_df.to_csv(enhanced_path, index=False)

print(f"üíæ Saved enhanced reach table ‚Üí {enhanced_path}")


# ======================================================
# üé® 7Ô∏è‚É£ Optional visualization hints (e.g., for Cytoscape)
# ======================================================
# Example node coloring logic for plotting
color_map = {
    "Effector (activated downstream)": "red",
    "Activator (upstream source)": "orange",
    "Silenced downstream": "blue",
    "Inhibited upstream": "darkblue",
    "Intermediate / Neutral": "gray"
}
g_gcc.vs["functional_role"] = reach_df.set_index("UniProt").loc[g_gcc.vs["name"], "functional_role"].tolist()
g_gcc.vs["color_role"] = [color_map.get(role, "gray") for role in g_gcc.vs["functional_role"]]

print("üé® Annotated graph with high-level behavioral roles and colors.")


üìä Differential metric: delta_slope | Top N per category: 50
direction_refined
neutral          325
upregulated      129
downregulated    127
Name: count, dtype: int64
‚úÖ Selected 50 top-up and 50 top-down differential nodes
üì¶ Created 8 intelligent subsets.


Unnamed: 0,Group,n_nodes,mean_betweenness,mean_reach_coverage,mean_avg_dist_to_top,mean_dyn_deviation,fraction_up,fraction_down
0,Top50_upregulated,50,1031.783681,0.369756,3.712744,0.229235,1.0,0.0
1,Top50_downregulated,50,524.860213,0.340976,3.685117,0.21357,0.0,1.0
7,Downstream_downregulated,43,81.181783,0.0,,0.143948,0.0,1.0
6,Upstream_downregulated,18,77.138889,0.780488,4.289931,0.14326,0.0,1.0
2,Upstream,97,63.870187,0.773196,4.503251,0.058081,0.14433,0.185567
3,Downstream,202,39.834078,0.0,,0.080108,0.237624,0.212871
4,Upstream_upregulated,14,28.428571,0.783972,4.676542,0.107774,1.0,0.0
5,Downstream_upregulated,48,12.338409,0.0,,0.135691,1.0,0.0


‚úÖ Assigned high-level behavioral categories:
functional_role
Intermediate / Neutral             458
Effector (activated downstream)     48
Silenced downstream                 43
Inhibited upstream                  18
Activator (upstream source)         14
Name: count, dtype: int64
üíæ Saved enhanced reach table ‚Üí analysis/networks/reach_analysis/phosphosite_network_reach_enhanced.csv
üé® Annotated graph with high-level behavioral roles and colors.


In [306]:
# ======================================================
# ‚öôÔ∏è CONFIGURATION
# ======================================================
TOP_N = 50  # number of nodes to take per category
EXCLUDE_ISOLATED = True  # drop nodes that have no reach to others (avg_dist_to_top == 0)

# Define categories based on biological interpretation
role_definitions = {
    "Receptor_like": {
        "filter": lambda df: (df["relation_to_top"] == "upstream") & (df["direction"] == "neutral"),
        "score": "reach_coverage"
    },
    "Adapter_like": {
        "filter": lambda df: (df["relation_to_top"].isin(["upstream", "bidirectional"])) & (df["betweenness"] > 0),
        "score": "betweenness"
    },
    "Kinase_like": {
        "filter": lambda df: df["gene_symbol"].str.contains("PRK|AKT|MAPK|SRC|PTK|RAF|PIK|GRK|PRKG|JAK", regex=True),
        "score": "dyn_deviation"
    },
    "TF_sinks": {
        "filter": lambda df: (df["relation_to_top"] == "downstream") & (df["direction"] == "downregulated"),
        "score": "reach_coverage"
    },
    "Local_amplifiers": {
        "filter": lambda df: (df["relation_to_top"] == "bidirectional") & (df["direction"] == "upregulated"),
        "score": "betweenness"
    },
    "Silent_conduits": {
        "filter": lambda df: (df["relation_to_top"] == "downstream") & (df["reach_coverage"] == 0),
        "score": "betweenness"
    },
    "Effectors": {
        "filter": lambda df: (df["relation_to_top"] == "downstream") & (df["direction"] == "upregulated"),
        "score": "reach_coverage"
    },
    "FlexibleComposite": {
        "filter": lambda df: df["relation_to_top"].isin(["bidirectional", "downstream", "upstream"]),
        "score": "dyn_deviation"
    },
}

# ======================================================
# üßÆ FILTER + SCORE PER ROLE FAMILY
# ======================================================
top_tables = {}

df_valid = reach_df.copy()
if EXCLUDE_ISOLATED:
    df_valid = df_valid[df_valid["avg_dist_to_top"] > 0]

df_valid["abs_signal_strength"] = df_valid["delta_slope"].abs()
df_valid["signal_strength"] = df_valid["delta_slope"]
df_valid["ppr_reach"] = df_valid["reach_coverage"]

for role_name, info in role_definitions.items():
    filt = info["filter"](df_valid)
    score_col = info["score"]

    subset = df_valid[filt].copy()
    if subset.empty:
        print(f"‚ö†Ô∏è No matches for {role_name}, skipping.")
        continue

    # Rank and select top N
    subset = subset.sort_values(score_col, ascending=False).head(TOP_N)

    # Rename score column dynamically
    subset = subset.rename(columns={score_col: f"{role_name}Score"})

    # Keep essential columns
    cols = ["UniProt", "gene_symbol", f"{role_name}Score", "signal_strength",
            "abs_signal_strength", "ppr_reach", "betweenness",
            "avg_dist_to_top", "reach_coverage"]
    subset = subset[[c for c in cols if c in subset.columns]]

    top_tables[f"Top_{role_name}"] = subset

# ======================================================
# üåü DISPLAY SUMMARIES
# ======================================================
print(f"‚úÖ Created {len(top_tables)} functional role families\n")

for name, tab in top_tables.items():
    print(f"‚Äî {name} ‚Äî ({len(tab)} proteins)")
    display(tab.head(10))

# ======================================================
# üíæ EXPORT
# ======================================================
out_path = base / "analysis" / "networks" / "role_families"
out_path.mkdir(parents=True, exist_ok=True)

mapped_path = out_path / "functional_role_tables.csv"
pd.concat(top_tables, names=["role_family"]).to_csv(mapped_path)
print(f"üíæ Saved all role-family tables ‚Üí {mapped_path}")


‚ö†Ô∏è No matches for TF_sinks, skipping.
‚ö†Ô∏è No matches for Silent_conduits, skipping.
‚ö†Ô∏è No matches for Effectors, skipping.
‚úÖ Created 5 functional role families

‚Äî Top_Receptor_like ‚Äî (50 proteins)


Unnamed: 0,UniProt,gene_symbol,Receptor_likeScore,signal_strength,abs_signal_strength,ppr_reach,betweenness,avg_dist_to_top
0,P0DP24,,0.780488,0.0,0.0,0.780488,0.0,3.40625
38,P11233,,0.780488,0.0,0.0,0.780488,0.0,4.4375
40,P11234,,0.780488,0.0,0.0,0.780488,0.0,4.4375
66,Q13464,ROCK1,0.780488,-0.008596,0.008596,0.780488,0.0,3.71875
96,Q9UHD2,,0.780488,0.0,0.0,0.780488,1741.467,3.125
97,P04632,,0.780488,0.0,0.0,0.780488,0.0,5.125
113,Q9UBN7,,0.780488,0.0,0.0,0.780488,0.0,3.5
131,P10599,,0.780488,0.0,0.0,0.780488,0.0,4.6875
138,P16298,,0.780488,0.0,0.0,0.780488,0.0,7.0
155,P35813,,0.780488,0.0,0.0,0.780488,0.0,3.6875


‚Äî Top_Adapter_like ‚Äî (50 proteins)


Unnamed: 0,UniProt,gene_symbol,Adapter_likeScore,signal_strength,abs_signal_strength,ppr_reach,avg_dist_to_top,reach_coverage
129,P12931,SRC,30798.46386,0.413172,0.413172,0.756098,2.129032,0.756098
5,P31749,AKT1,26749.364661,0.045927,0.045927,0.780488,2.125,0.780488
12,P17252,PRKCA,19551.671854,-0.08651,0.08651,0.780488,2.125,0.780488
60,P28482,,16230.523031,0.0,0.0,0.780488,2.34375,0.780488
4,Q16539,MAPK14,12885.704631,-0.009051,0.009051,0.780488,2.6875,0.780488
136,P17612,PRKACA,11151.562233,-0.49096,0.49096,0.756098,2.193548,0.756098
23,P63000,,10651.000049,0.0,0.0,0.780488,2.53125,0.780488
16,P42574,,9960.129944,0.0,0.0,0.780488,3.0,0.780488
84,P35568,IRS1,8804.192305,0.017956,0.017956,0.780488,2.5625,0.780488
43,P55211,CASP9,8241.389418,0.040176,0.040176,0.780488,4.0,0.780488


‚Äî Top_Kinase_like ‚Äî (23 proteins)


Unnamed: 0,UniProt,gene_symbol,Kinase_likeScore,signal_strength,abs_signal_strength,ppr_reach,betweenness,avg_dist_to_top,reach_coverage
136,P17612,PRKACA,0.674963,-0.49096,0.49096,0.756098,11151.562233,2.193548,0.756098
129,P12931,SRC,0.458368,0.413172,0.413172,0.756098,30798.46386,2.129032,0.756098
121,Q05655,PRKCD,0.431786,-0.178325,0.178325,0.780488,2545.676046,2.84375,0.780488
206,Q14289,PTK2B,0.313748,-0.085418,0.085418,0.780488,930.412715,2.90625,0.780488
26,Q96B36,AKT1S1,0.238496,-0.040233,0.040233,0.780488,42.074288,3.84375,0.780488
461,Q9Y478,PRKAB1,0.167564,-0.08535,0.08535,0.780488,0.0,4.78125,0.780488
5,P31749,AKT1,0.167479,0.045927,0.045927,0.780488,26749.364661,2.125,0.780488
182,P15056,BRAF,0.1604,0.086948,0.086948,0.780488,1934.167021,3.28125,0.780488
411,Q9BZL6,PRKD2,0.143834,0.08067,0.08067,0.804878,0.0,5.848485,0.804878
145,Q04759,PRKCQ,0.134451,0.13253,0.13253,0.780488,1215.309725,3.5,0.780488


‚Äî Top_Local_amplifiers ‚Äî (47 proteins)


Unnamed: 0,UniProt,gene_symbol,Local_amplifiersScore,signal_strength,abs_signal_strength,ppr_reach,avg_dist_to_top,reach_coverage
129,P12931,SRC,30798.46386,0.413172,0.413172,0.756098,2.129032,0.756098
89,P49841,GSK3B,6049.81674,0.362015,0.362015,0.756098,4.225806,0.756098
2,P06241,FYN,4200.646077,0.067988,0.067988,0.780488,2.78125,0.780488
559,P60709,ACTB,4021.649431,0.48517,0.48517,0.756098,3.096774,0.756098
152,P29350,PTPN6,2555.361812,0.282983,0.282983,0.756098,2.709677,0.756098
230,Q13043,STK4,1936.791375,0.153381,0.153381,0.780488,4.6875,0.780488
182,P15056,BRAF,1934.167021,0.086948,0.086948,0.780488,3.28125,0.780488
133,P62993,GRB2,1916.219366,0.094486,0.094486,0.780488,2.9375,0.780488
127,Q13976,PRKG1,1529.442594,0.158412,0.158412,0.780488,3.5,0.780488
114,Q14247,CTTN,1527.201971,0.269097,0.269097,0.756098,2.967742,0.756098


‚Äî Top_FlexibleComposite ‚Äî (50 proteins)


Unnamed: 0,UniProt,gene_symbol,FlexibleCompositeScore,signal_strength,abs_signal_strength,ppr_reach,betweenness,avg_dist_to_top,reach_coverage
136,P17612,PRKACA,0.674963,-0.49096,0.49096,0.756098,11151.562233,2.193548,0.756098
531,O60890,OPHN1,0.593812,0.673443,0.673443,0.780488,0.0,3.53125,0.780488
129,P12931,SRC,0.458368,0.413172,0.413172,0.756098,30798.46386,2.129032,0.756098
121,Q05655,PRKCD,0.431786,-0.178325,0.178325,0.780488,2545.676046,2.84375,0.780488
161,P29353,SHC1,0.421468,0.406014,0.406014,0.756098,1317.106387,2.870968,0.756098
19,Q05209,PTPN12,0.332846,0.222116,0.222116,0.780488,195.11631,2.71875,0.780488
59,P51812,RPS6KA3,0.331884,-0.09726,0.09726,0.780488,824.073668,3.34375,0.780488
326,P23528,CFL1,0.331504,-0.142621,0.142621,0.780488,2655.0,4.0,0.780488
260,Q8N4C8,MINK1,0.330732,-0.181276,0.181276,0.780488,0.0,3.6875,0.780488
229,P21333,FLNA,0.327994,-0.337446,0.337446,0.756098,1018.685161,3.129032,0.756098


üíæ Saved all role-family tables ‚Üí analysis/networks/role_families/functional_role_tables.csv


In [308]:
# ======================================================
# üß≠ Compute directional reach breakdown (fixed)
# ======================================================
# Convert to Python sets for safe membership testing
up_nodes = set(df_valid.query("direction == 'upregulated'")["UniProt"])
down_nodes = set(df_valid.query("direction == 'downregulated'")["UniProt"])
upstream_nodes = set(df_valid.query("relation_to_top == 'upstream'")["UniProt"])
downstream_nodes = set(df_valid.query("relation_to_top == 'downstream'")["UniProt"])

# Precompute distance matrix (modern syntax)
print("‚è≥ Computing distance matrix (may take a few seconds)...")
dist_matrix = np.array(g_gcc.distances(weights=None, mode="OUT"))

# Mapping from UniProt ‚Üí row index
uni_index = {u: i for i, u in enumerate(df_valid["UniProt"])}

def fraction_reachable_from_list(source_idx, target_set):
    """Return fraction of target nodes reachable from source."""
    if len(target_set) == 0:
        return np.nan
    valid_targets = [uni_index[t] for t in target_set if t in uni_index]
    if not valid_targets:
        return np.nan
    dists = dist_matrix[source_idx, valid_targets]
    return np.isfinite(dists).sum() / len(valid_targets)

# Compute per-node directional reach
df_valid["reach_upregulated"] = [
    fraction_reachable_from_list(i, up_nodes) for i in range(len(df_valid))
]
df_valid["reach_downregulated"] = [
    fraction_reachable_from_list(i, down_nodes) for i in range(len(df_valid))
]
df_valid["reach_upstream_up"] = [
    fraction_reachable_from_list(i, upstream_nodes & up_nodes) for i in range(len(df_valid))
]
df_valid["reach_upstream_down"] = [
    fraction_reachable_from_list(i, upstream_nodes & down_nodes) for i in range(len(df_valid))
]
df_valid["reach_downstream_up"] = [
    fraction_reachable_from_list(i, downstream_nodes & up_nodes) for i in range(len(df_valid))
]
df_valid["reach_downstream_down"] = [
    fraction_reachable_from_list(i, downstream_nodes & down_nodes) for i in range(len(df_valid))
]

# Overall explanatory coverage = how much of all altered nodes are reachable
altered_nodes = up_nodes | down_nodes
df_valid["reach_to_altered"] = [
    fraction_reachable_from_list(i, altered_nodes) for i in range(len(df_valid))
]

print("‚úÖ Directional reach fractions computed for all nodes")


‚è≥ Computing distance matrix (may take a few seconds)...
‚úÖ Directional reach fractions computed for all nodes


In [309]:
# ======================================================
# üîß CONFIGURATION
# ======================================================
TOP_N = 50  # number of top nodes per role family
EXCLUDE_ISOLATED = True  # drop nodes that have no reach to others (avg_dist_to_top == 0)

# ======================================================
# üß© PREPARE BASE DATA
# ======================================================
df_valid = reach_df.copy()
if EXCLUDE_ISOLATED:
    df_valid = df_valid[df_valid["avg_dist_to_top"] > 0]

df_valid["abs_signal_strength"] = df_valid["delta_slope"].abs()
df_valid["signal_strength"] = df_valid["delta_slope"]
df_valid["ppr_reach"] = df_valid["reach_coverage"]

# ======================================================
# üß≠ COMPUTE DIRECTIONAL REACH FRACTIONS
# ======================================================
# Convert to Python sets for safe membership testing
up_nodes = set(df_valid.query("direction == 'upregulated'")["UniProt"])
down_nodes = set(df_valid.query("direction == 'downregulated'")["UniProt"])
upstream_nodes = set(df_valid.query("relation_to_top == 'upstream'")["UniProt"])
downstream_nodes = set(df_valid.query("relation_to_top == 'downstream'")["UniProt"])

print("‚è≥ Computing distance matrix (may take a few seconds)...")
dist_matrix = np.array(g_gcc.distances(weights=None, mode="OUT"))

uni_index = {u: i for i, u in enumerate(df_valid["UniProt"])}

def fraction_reachable_from_list(source_idx, target_set):
    """Return fraction of target nodes reachable from a given source node."""
    if len(target_set) == 0:
        return np.nan
    valid_targets = [uni_index[t] for t in target_set if t in uni_index]
    if not valid_targets:
        return np.nan
    dists = dist_matrix[source_idx, valid_targets]
    return np.isfinite(dists).sum() / len(valid_targets)

# Compute per-node directional reach
df_valid["reach_upregulated"] = [
    fraction_reachable_from_list(i, up_nodes) for i in range(len(df_valid))
]
df_valid["reach_downregulated"] = [
    fraction_reachable_from_list(i, down_nodes) for i in range(len(df_valid))
]
df_valid["reach_upstream_up"] = [
    fraction_reachable_from_list(i, upstream_nodes & up_nodes) for i in range(len(df_valid))
]
df_valid["reach_upstream_down"] = [
    fraction_reachable_from_list(i, upstream_nodes & down_nodes) for i in range(len(df_valid))
]
df_valid["reach_downstream_up"] = [
    fraction_reachable_from_list(i, downstream_nodes & up_nodes) for i in range(len(df_valid))
]
df_valid["reach_downstream_down"] = [
    fraction_reachable_from_list(i, downstream_nodes & down_nodes) for i in range(len(df_valid))
]

# Overall explanatory coverage = how much of all altered nodes are reachable
altered_nodes = up_nodes | down_nodes
df_valid["reach_to_altered"] = [
    fraction_reachable_from_list(i, altered_nodes) for i in range(len(df_valid))
]

print("‚úÖ Directional reach fractions computed for all nodes")

# ======================================================
# ‚öôÔ∏è DEFINE ROLE FAMILIES
# ======================================================
role_definitions = {
    "Receptor_like": {
        "filter": lambda df: (df["relation_to_top"] == "upstream") & (df["direction"] == "neutral"),
        "score": "reach_coverage"
    },
    "Adapter_like": {
        "filter": lambda df: (df["relation_to_top"].isin(["upstream", "bidirectional"])) & (df["betweenness"] > 0),
        "score": "betweenness"
    },
    "Kinase_like": {
        "filter": lambda df: df["gene_symbol"].str.contains(
            "PRK|AKT|MAPK|SRC|PTK|RAF|PIK|GRK|PRKG|JAK|SOS|SHC|GRB", regex=True, na=False
        ),
        "score": "dyn_deviation"
    },
    "TF_sinks": {
        "filter": lambda df: (df["relation_to_top"] == "downstream") & (df["direction"] == "downregulated"),
        "score": "reach_coverage"
    },
    "Local_amplifiers": {
        "filter": lambda df: (df["relation_to_top"] == "bidirectional") & (df["direction"] == "upregulated"),
        "score": "betweenness"
    },
    "Silent_conduits": {
        "filter": lambda df: (df["relation_to_top"] == "downstream") & (df["reach_coverage"] == 0),
        "score": "betweenness"
    },
    "Effectors": {
        "filter": lambda df: (df["relation_to_top"] == "downstream") & (df["direction"] == "upregulated"),
        "score": "reach_coverage"
    },
    "FlexibleComposite": {
        "filter": lambda df: df["relation_to_top"].isin(["bidirectional", "downstream", "upstream"]),
        "score": "dyn_deviation"
    },
}

# ======================================================
# üßÆ BUILD ROLE-FAMILY TABLES
# ======================================================
top_tables = {}

for role_name, info in role_definitions.items():
    subset = df_valid[info["filter"](df_valid)].copy()
    if subset.empty:
        print(f"‚ö†Ô∏è No matches for {role_name}, skipping.")
        continue

    subset = subset.sort_values(info["score"], ascending=False).head(TOP_N)
    subset = subset.rename(columns={info["score"]: f"{role_name}Score"})

    cols = [
        "UniProt", "gene_symbol", f"{role_name}Score",
        "signal_strength", "abs_signal_strength",
        "delta_slope", "delta_auc", "dyn_deviation",
        "ppr_reach", "betweenness", "avg_dist_to_top", "reach_coverage",
        "reach_upregulated", "reach_downregulated",
        "reach_upstream_up", "reach_upstream_down",
        "reach_downstream_up", "reach_downstream_down",
        "reach_to_altered"
    ]
    subset = subset[[c for c in cols if c in subset.columns]]
    top_tables[f"Top_{role_name}"] = subset

# ======================================================
# üåü DISPLAY SUMMARIES
# ======================================================
print(f"‚úÖ Created {len(top_tables)} functional role families\n")
for name, tab in top_tables.items():
    print(f"‚Äî {name} ‚Äî ({len(tab)} proteins)")
    display(tab.head(10))

# ======================================================
# üíæ EXPORT
# ======================================================
out_path = base / "analysis" / "networks" / "role_families_enhanced"
out_path.mkdir(parents=True, exist_ok=True)

export_path = out_path / "functional_role_tables_enhanced.csv"
pd.concat(top_tables, names=["role_family"]).to_csv(export_path)
print(f"üíæ Saved all enhanced role-family tables ‚Üí {export_path}")


‚è≥ Computing distance matrix (may take a few seconds)...
‚úÖ Directional reach fractions computed for all nodes
‚ö†Ô∏è No matches for TF_sinks, skipping.
‚ö†Ô∏è No matches for Silent_conduits, skipping.
‚ö†Ô∏è No matches for Effectors, skipping.
‚úÖ Created 5 functional role families

‚Äî Top_Receptor_like ‚Äî (50 proteins)


Unnamed: 0,UniProt,gene_symbol,Receptor_likeScore,signal_strength,abs_signal_strength,delta_slope,delta_auc,dyn_deviation,ppr_reach,betweenness,avg_dist_to_top,reach_upregulated,reach_downregulated,reach_upstream_up,reach_upstream_down,reach_downstream_up,reach_downstream_down,reach_to_altered
0,P0DP24,,0.780488,0.0,0.0,0.0,0.0,0.0,0.780488,0.0,3.40625,0.786885,0.720588,0.642857,0.5,,,0.751938
38,P11233,,0.780488,0.0,0.0,0.0,0.0,0.0,0.780488,0.0,4.4375,0.786885,0.720588,0.642857,0.5,,,0.751938
40,P11234,,0.780488,0.0,0.0,0.0,0.0,0.0,0.780488,0.0,4.4375,0.016393,0.0,0.071429,0.0,,,0.007752
66,Q13464,ROCK1,0.780488,-0.008596,0.008596,-0.008596,0.500543,0.107813,0.780488,0.0,3.71875,0.786885,0.720588,0.642857,0.5,,,0.751938
96,Q9UHD2,,0.780488,0.0,0.0,0.0,0.0,0.0,0.780488,1741.467,3.125,0.786885,0.720588,0.642857,0.5,,,0.751938
97,P04632,,0.780488,0.0,0.0,0.0,0.0,0.0,0.780488,0.0,5.125,0.786885,0.720588,0.642857,0.5,,,0.751938
113,Q9UBN7,,0.780488,0.0,0.0,0.0,0.0,0.0,0.780488,0.0,3.5,0.786885,0.720588,0.642857,0.5,,,0.751938
131,P10599,,0.780488,0.0,0.0,0.0,0.0,0.0,0.780488,0.0,4.6875,0.786885,0.720588,0.642857,0.5,,,0.751938
138,P16298,,0.780488,0.0,0.0,0.0,0.0,0.0,0.780488,0.0,7.0,0.786885,0.720588,0.642857,0.5,,,0.751938
155,P35813,,0.780488,0.0,0.0,0.0,0.0,0.0,0.780488,0.0,3.6875,0.016393,0.0,0.0,0.0,,,0.007752


‚Äî Top_Adapter_like ‚Äî (50 proteins)


Unnamed: 0,UniProt,gene_symbol,Adapter_likeScore,signal_strength,abs_signal_strength,delta_slope,delta_auc,dyn_deviation,ppr_reach,avg_dist_to_top,reach_coverage,reach_upregulated,reach_downregulated,reach_upstream_up,reach_upstream_down,reach_downstream_up,reach_downstream_down,reach_to_altered
129,P12931,SRC,30798.46386,0.413172,0.413172,0.413172,2.490372,0.458368,0.756098,2.129032,0.756098,0.786885,0.720588,0.642857,0.5,,,0.751938
5,P31749,AKT1,26749.364661,0.045927,0.045927,0.045927,-0.903394,0.167479,0.780488,2.125,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938
12,P17252,PRKCA,19551.671854,-0.08651,0.08651,-0.08651,-0.005338,0.070294,0.780488,2.125,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938
60,P28482,,16230.523031,0.0,0.0,0.0,0.0,0.0,0.780488,2.34375,0.780488,0.016393,0.0,0.0,0.0,,,0.007752
4,Q16539,MAPK14,12885.704631,-0.009051,0.009051,-0.009051,0.346202,0.044965,0.780488,2.6875,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938
136,P17612,PRKACA,11151.562233,-0.49096,0.49096,-0.49096,-4.566348,0.674963,0.756098,2.193548,0.756098,0.0,0.014706,0.0,0.0,,,0.007752
23,P63000,,10651.000049,0.0,0.0,0.0,0.0,0.0,0.780488,2.53125,0.780488,0.0,0.0,0.0,0.0,,,0.0
16,P42574,,9960.129944,0.0,0.0,0.0,0.0,0.0,0.780488,3.0,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938
84,P35568,IRS1,8804.192305,0.017956,0.017956,0.017956,1.067926,0.252015,0.780488,2.5625,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938
43,P55211,CASP9,8241.389418,0.040176,0.040176,0.040176,0.512541,0.099164,0.780488,4.0,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938


‚Äî Top_Kinase_like ‚Äî (26 proteins)


Unnamed: 0,UniProt,gene_symbol,Kinase_likeScore,signal_strength,abs_signal_strength,delta_slope,delta_auc,ppr_reach,betweenness,avg_dist_to_top,reach_coverage,reach_upregulated,reach_downregulated,reach_upstream_up,reach_upstream_down,reach_downstream_up,reach_downstream_down,reach_to_altered
136,P17612,PRKACA,0.674963,-0.49096,0.49096,-0.49096,-4.566348,0.756098,11151.562233,2.193548,0.756098,0.0,0.014706,0.0,0.0,,,0.007752
129,P12931,SRC,0.458368,0.413172,0.413172,0.413172,2.490372,0.756098,30798.46386,2.129032,0.756098,0.786885,0.720588,0.642857,0.5,,,0.751938
121,Q05655,PRKCD,0.431786,-0.178325,0.178325,-0.178325,-1.933503,0.780488,2545.676046,2.84375,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938
161,P29353,SHC1,0.421468,0.406014,0.406014,0.406014,2.833307,0.756098,1317.106387,2.870968,0.756098,0.032787,0.0,0.071429,0.0,,,0.015504
206,Q14289,PTK2B,0.313748,-0.085418,0.085418,-0.085418,-0.849174,0.780488,930.412715,2.90625,0.780488,0.786885,0.735294,0.642857,0.5,,,0.75969
26,Q96B36,AKT1S1,0.238496,-0.040233,0.040233,-0.040233,-1.593033,0.780488,42.074288,3.84375,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938
461,Q9Y478,PRKAB1,0.167564,-0.08535,0.08535,-0.08535,0.770234,0.780488,0.0,4.78125,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938
5,P31749,AKT1,0.167479,0.045927,0.045927,0.045927,-0.903394,0.780488,26749.364661,2.125,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938
182,P15056,BRAF,0.1604,0.086948,0.086948,0.086948,1.020664,0.780488,1934.167021,3.28125,0.780488,0.016393,0.0,0.0,0.0,,,0.007752
411,Q9BZL6,PRKD2,0.143834,0.08067,0.08067,0.08067,1.12335,0.804878,0.0,5.848485,0.804878,0.016393,0.014706,0.071429,0.0,,,0.015504


‚Äî Top_Local_amplifiers ‚Äî (47 proteins)


Unnamed: 0,UniProt,gene_symbol,Local_amplifiersScore,signal_strength,abs_signal_strength,delta_slope,delta_auc,dyn_deviation,ppr_reach,avg_dist_to_top,reach_coverage,reach_upregulated,reach_downregulated,reach_upstream_up,reach_upstream_down,reach_downstream_up,reach_downstream_down,reach_to_altered
129,P12931,SRC,30798.46386,0.413172,0.413172,0.413172,2.490372,0.458368,0.756098,2.129032,0.756098,0.786885,0.720588,0.642857,0.5,,,0.751938
89,P49841,GSK3B,6049.81674,0.362015,0.362015,0.362015,-0.307143,0.220227,0.756098,4.225806,0.756098,0.786885,0.720588,0.642857,0.5,,,0.751938
2,P06241,FYN,4200.646077,0.067988,0.067988,0.067988,-0.150654,0.047711,0.780488,2.78125,0.780488,0.016393,0.0,0.0,0.0,,,0.007752
559,P60709,ACTB,4021.649431,0.48517,0.48517,0.48517,-0.204652,0.263284,0.756098,3.096774,0.756098,0.016393,0.0,0.0,0.0,,,0.007752
152,P29350,PTPN6,2555.361812,0.282983,0.282983,0.282983,-0.62168,0.208663,0.756098,2.709677,0.756098,0.786885,0.720588,0.642857,0.5,,,0.751938
230,Q13043,STK4,1936.791375,0.153381,0.153381,0.153381,-0.705693,0.129283,0.780488,4.6875,0.780488,0.016393,0.0,0.0,0.0,,,0.007752
182,P15056,BRAF,1934.167021,0.086948,0.086948,0.086948,1.020664,0.1604,0.780488,3.28125,0.780488,0.016393,0.0,0.0,0.0,,,0.007752
133,P62993,GRB2,1916.219366,0.094486,0.094486,0.094486,0.262989,0.078805,0.780488,2.9375,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938
127,Q13976,PRKG1,1529.442594,0.158412,0.158412,0.158412,-0.94949,0.086524,0.780488,3.5,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938
114,Q14247,CTTN,1527.201971,0.269097,0.269097,0.269097,0.01679,0.167036,0.756098,2.967742,0.756098,0.016393,0.0,0.0,0.0,,,0.007752


‚Äî Top_FlexibleComposite ‚Äî (50 proteins)


Unnamed: 0,UniProt,gene_symbol,FlexibleCompositeScore,signal_strength,abs_signal_strength,delta_slope,delta_auc,ppr_reach,betweenness,avg_dist_to_top,reach_coverage,reach_upregulated,reach_downregulated,reach_upstream_up,reach_upstream_down,reach_downstream_up,reach_downstream_down,reach_to_altered
136,P17612,PRKACA,0.674963,-0.49096,0.49096,-0.49096,-4.566348,0.756098,11151.562233,2.193548,0.756098,0.0,0.014706,0.0,0.0,,,0.007752
531,O60890,OPHN1,0.593812,0.673443,0.673443,0.673443,3.279046,0.780488,0.0,3.53125,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938
129,P12931,SRC,0.458368,0.413172,0.413172,0.413172,2.490372,0.756098,30798.46386,2.129032,0.756098,0.786885,0.720588,0.642857,0.5,,,0.751938
121,Q05655,PRKCD,0.431786,-0.178325,0.178325,-0.178325,-1.933503,0.780488,2545.676046,2.84375,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938
161,P29353,SHC1,0.421468,0.406014,0.406014,0.406014,2.833307,0.756098,1317.106387,2.870968,0.756098,0.032787,0.0,0.071429,0.0,,,0.015504
19,Q05209,PTPN12,0.332846,0.222116,0.222116,0.222116,1.574198,0.780488,195.11631,2.71875,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938
59,P51812,RPS6KA3,0.331884,-0.09726,0.09726,-0.09726,-1.975463,0.780488,824.073668,3.34375,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938
326,P23528,CFL1,0.331504,-0.142621,0.142621,-0.142621,-2.171356,0.780488,2655.0,4.0,0.780488,0.0,0.014706,0.0,0.0,,,0.007752
260,Q8N4C8,MINK1,0.330732,-0.181276,0.181276,-0.181276,-1.625709,0.780488,0.0,3.6875,0.780488,0.786885,0.735294,0.642857,0.555556,,,0.75969
229,P21333,FLNA,0.327994,-0.337446,0.337446,-0.337446,-1.808568,0.756098,1018.685161,3.129032,0.756098,0.0,0.014706,0.0,0.0,,,0.007752


üíæ Saved all enhanced role-family tables ‚Üí analysis/networks/role_families_enhanced/functional_role_tables_enhanced.csv


In [310]:
# ======================================================
# üì¶ IMPORTS
# ======================================================
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

# ======================================================
# ‚öôÔ∏è CONFIGURATION
# ======================================================
TOP_N = 50  # Top |Œîslope| for candidate subset analyses
EXCLUDE_ISOLATED = True  # drop nodes with no connectivity
base_path = base / "analysis" / "networks" / "enhanced_reach"
base_path.mkdir(parents=True, exist_ok=True)

# ======================================================
# üß¨ BASE DATASET PREPARATION
# ======================================================
df_valid = reach_df.copy()

# Fix missing gene symbols
name_to_symbol = dict(zip(g_gcc.vs["name"], g_gcc.vs["gene_symbol"]))
df_valid["gene_symbol"] = df_valid["UniProt"].map(name_to_symbol).fillna(df_valid["gene_symbol"])

# Filter isolated
if EXCLUDE_ISOLATED:
    df_valid = df_valid[df_valid["avg_dist_to_top"] > 0]

# Compute derived metrics
df_valid["abs_delta_slope"] = df_valid["delta_slope"].abs()
df_valid["abs_delta_auc"] = df_valid["delta_auc"].abs()
df_valid["abs_dyn_dev"] = df_valid["dyn_deviation"].abs()
df_valid["abs_signal_strength"] = df_valid["abs_delta_slope"]
df_valid["signal_strength"] = df_valid["delta_slope"]

# Normalize betweenness
max_bet = df_valid["betweenness"].max()
df_valid["betweenness_frac"] = df_valid["betweenness"] / max_bet if max_bet > 0 else 0

# ======================================================
# üß≠ COMPUTE DIRECTIONAL REACH FRACTIONS
# ======================================================
print("‚è≥ Computing distance matrix and directional reach metrics...")
dist_matrix = np.array(g_gcc.distances(weights=None, mode="OUT"))
uni_index = {u: i for i, u in enumerate(df_valid["UniProt"])}

def fraction_reachable(source_idx, targets):
    if len(targets) == 0:
        return np.nan
    valid = [uni_index[t] for t in targets if t in uni_index]
    if not valid:
        return np.nan
    dists = dist_matrix[source_idx, valid]
    return np.isfinite(dists).sum() / len(valid)

# Prepare sets
up_nodes = set(df_valid.query("direction == 'upregulated'")["UniProt"])
down_nodes = set(df_valid.query("direction == 'downregulated'")["UniProt"])
upstream_nodes = set(df_valid.query("relation_to_top == 'upstream'")["UniProt"])
downstream_nodes = set(df_valid.query("relation_to_top == 'downstream'")["UniProt"])
altered_nodes = up_nodes | down_nodes

for i in tqdm(range(len(df_valid)), desc="Directional reach"):
    df_valid.loc[df_valid.index[i], "reach_upregulated"] = fraction_reachable(i, up_nodes)
    df_valid.loc[df_valid.index[i], "reach_downregulated"] = fraction_reachable(i, down_nodes)
    df_valid.loc[df_valid.index[i], "reach_upstream_up"] = fraction_reachable(i, upstream_nodes & up_nodes)
    df_valid.loc[df_valid.index[i], "reach_upstream_down"] = fraction_reachable(i, upstream_nodes & down_nodes)
    df_valid.loc[df_valid.index[i], "reach_downstream_up"] = fraction_reachable(i, downstream_nodes & up_nodes)
    df_valid.loc[df_valid.index[i], "reach_downstream_down"] = fraction_reachable(i, downstream_nodes & down_nodes)
    df_valid.loc[df_valid.index[i], "reach_to_altered"] = fraction_reachable(i, altered_nodes)

print("‚úÖ Directional reach fractions computed")

# ======================================================
# üìä SUBSET DEFINITIONS
# ======================================================
top_abs_slope = set(df_valid.nlargest(TOP_N, "abs_delta_slope")["UniProt"])
up_subset = set(df_valid.query("direction == 'upregulated'")["UniProt"])
down_subset = set(df_valid.query("direction == 'downregulated'")["UniProt"])
upstream_subset = set(df_valid.query("relation_to_top == 'upstream'")["UniProt"])
downstream_subset = set(df_valid.query("relation_to_top == 'downstream'")["UniProt"])

subsets = {
    "TopAbsDeltaSlope": top_abs_slope,
    "Upregulated": up_subset,
    "Downregulated": down_subset,
    "Upstream": upstream_subset,
    "Downstream": downstream_subset,
}

# ======================================================
# üßÆ COMPUTE METRIC SUMMARY PER SUBSET
# ======================================================
summary_records = []
for label, nodes in subsets.items():
    sub = df_valid[df_valid["UniProt"].isin(nodes)]
    summary_records.append({
        "subset": label,
        "N": len(sub),
        "mean_betweenness_frac": sub["betweenness_frac"].mean(),
        "mean_ppr_reach": sub["reach_coverage"].mean(),
        "mean_avg_dist_to_top": sub["avg_dist_to_top"].mean(),
        "mean_abs_delta_slope": sub["abs_delta_slope"].mean(),
        "mean_abs_delta_auc": sub["abs_delta_auc"].mean(),
        "mean_abs_dyn_dev": sub["abs_dyn_dev"].mean(),
    })

subset_summary = pd.DataFrame(summary_records)
display(subset_summary.style.background_gradient(cmap="Blues"))
subset_summary.to_csv(base_path / "subset_metric_summary.csv", index=False)
print("üíæ Saved subset summaries")

# ======================================================
# üß† ROLE FAMILY CLASSIFICATION (integrative logic)
# ======================================================
role_definitions = {
    "Receptor_like": {
        "filter": lambda df: (df["relation_to_top"] == "upstream"),
        "score": "reach_coverage"
    },
    "Adapter_like": {
        "filter": lambda df: (df["betweenness_frac"] > 0.1) & (df["relation_to_top"].isin(["upstream", "bidirectional"])),
        "score": "betweenness_frac"
    },
    "Kinase_like": {
        "filter": lambda df: df["gene_symbol"].str.contains("AKT|MAPK|SRC|PRK|PTK|RAF|JAK|GRK", regex=True, na=False),
        "score": "dyn_deviation"
    },
    "Local_amplifiers": {
        "filter": lambda df: (df["relation_to_top"] == "bidirectional") & (df["direction"] == "upregulated"),
        "score": "betweenness_frac"
    },
    "Effectors": {
        "filter": lambda df: (df["relation_to_top"] == "downstream") & (df["direction"] == "downregulated"),
        "score": "reach_to_altered"
    },
    "FlexibleComposite": {
        "filter": lambda df: df["relation_to_top"].isin(["bidirectional", "downstream", "upstream"]),
        "score": "dyn_deviation"
    },
}

role_tables = {}
for role_name, info in role_definitions.items():
    sub = df_valid[info["filter"](df_valid)].copy()
    if sub.empty:
        print(f"‚ö†Ô∏è No matches for {role_name}, skipping.")
        continue
    sub = sub.sort_values(info["score"], ascending=False).head(TOP_N)
    sub = sub.rename(columns={info["score"]: f"{role_name}Score"})
    cols = [
        "UniProt", "gene_symbol", f"{role_name}Score",
        "signal_strength", "abs_signal_strength", "delta_slope", "delta_auc", "dyn_deviation",
        "betweenness", "betweenness_frac", "ppr_reach", "avg_dist_to_top", "reach_coverage",
        "reach_upregulated", "reach_downregulated",
        "reach_upstream_up", "reach_upstream_down",
        "reach_downstream_up", "reach_downstream_down",
        "reach_to_altered"
    ]
    sub = sub[[c for c in cols if c in sub.columns]]
    role_tables[f"Top_{role_name}"] = sub

print(f"‚úÖ Created {len(role_tables)} functional role families\n")
for name, tab in role_tables.items():
    print(f"‚Äî {name} ‚Äî ({len(tab)} proteins)")
    display(tab.head(10))

# ======================================================
# üíæ EXPORT RESULTS
# ======================================================
pd.concat(role_tables, names=["role_family"]).to_csv(base_path / "functional_role_tables.csv")
print(f"üíæ Saved role-family tables ‚Üí {base_path/'functional_role_tables.csv'}")


‚è≥ Computing distance matrix and directional reach metrics...


Directional reach:   0%|          | 0/285 [00:00<?, ?it/s]

‚úÖ Directional reach fractions computed


Unnamed: 0,subset,N,mean_betweenness_frac,mean_ppr_reach,mean_avg_dist_to_top,mean_abs_delta_slope,mean_abs_delta_auc,mean_abs_dyn_dev
0,TopAbsDeltaSlope,50,0.050255,0.773171,3.720444,0.28236,1.066875,0.23743
1,Upregulated,61,0.038546,0.764894,3.848093,0.198987,0.763119,0.176498
2,Downregulated,68,0.039878,0.756456,3.764854,0.139847,0.877658,0.17438
3,Upstream,97,0.002074,0.773196,4.503251,0.041166,0.31334,0.058081
4,Downstream,0,,,,,,


üíæ Saved subset summaries
‚ö†Ô∏è No matches for Effectors, skipping.
‚úÖ Created 5 functional role families

‚Äî Top_Receptor_like ‚Äî (50 proteins)


Unnamed: 0,UniProt,gene_symbol,Receptor_likeScore,signal_strength,abs_signal_strength,delta_slope,delta_auc,dyn_deviation,betweenness,betweenness_frac,avg_dist_to_top,reach_upregulated,reach_downregulated,reach_upstream_up,reach_upstream_down,reach_downstream_up,reach_downstream_down,reach_to_altered
479,Q07912,TNK2,0.804878,0.113915,0.113915,0.113915,0.403774,0.133094,0.0,0.0,3.060606,0.803279,0.720588,0.714286,0.5,,,0.75969
411,Q9BZL6,PRKD2,0.804878,0.08067,0.08067,0.08067,1.12335,0.143834,0.0,0.0,5.848485,0.016393,0.014706,0.071429,0.0,,,0.015504
40,P11234,,0.780488,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.4375,0.016393,0.0,0.071429,0.0,,,0.007752
38,P11233,,0.780488,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.4375,0.786885,0.720588,0.642857,0.5,,,0.751938
95,Q7Z434,MAVS,0.780488,-0.068448,0.068448,-0.068448,0.125029,0.085345,402.0,0.013053,3.96875,0.819672,0.735294,0.642857,0.555556,,,0.775194
96,Q9UHD2,,0.780488,0.0,0.0,0.0,0.0,0.0,1741.467,0.056544,3.125,0.786885,0.720588,0.642857,0.5,,,0.751938
97,P04632,,0.780488,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.125,0.786885,0.720588,0.642857,0.5,,,0.751938
66,Q13464,ROCK1,0.780488,-0.008596,0.008596,-0.008596,0.500543,0.107813,0.0,0.0,3.71875,0.786885,0.720588,0.642857,0.5,,,0.751938
131,P10599,,0.780488,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.6875,0.786885,0.720588,0.642857,0.5,,,0.751938
132,Q9Y2U5,MAP3K2,0.780488,-0.050911,0.050911,-0.050911,-0.604702,0.100112,0.0,0.0,3.28125,0.786885,0.720588,0.642857,0.5,,,0.751938


‚Äî Top_Adapter_like ‚Äî (26 proteins)


Unnamed: 0,UniProt,gene_symbol,Adapter_likeScore,signal_strength,abs_signal_strength,delta_slope,delta_auc,dyn_deviation,betweenness,avg_dist_to_top,reach_coverage,reach_upregulated,reach_downregulated,reach_upstream_up,reach_upstream_down,reach_downstream_up,reach_downstream_down,reach_to_altered
129,P12931,SRC,1.0,0.413172,0.413172,0.413172,2.490372,0.458368,30798.46386,2.129032,0.756098,0.786885,0.720588,0.642857,0.5,,,0.751938
5,P31749,AKT1,0.868529,0.045927,0.045927,0.045927,-0.903394,0.167479,26749.364661,2.125,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938
12,P17252,PRKCA,0.634826,-0.08651,0.08651,-0.08651,-0.005338,0.070294,19551.671854,2.125,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938
60,P28482,,0.526991,0.0,0.0,0.0,0.0,0.0,16230.523031,2.34375,0.780488,0.016393,0.0,0.0,0.0,,,0.007752
4,Q16539,MAPK14,0.418388,-0.009051,0.009051,-0.009051,0.346202,0.044965,12885.704631,2.6875,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938
136,P17612,PRKACA,0.362082,-0.49096,0.49096,-0.49096,-4.566348,0.674963,11151.562233,2.193548,0.756098,0.0,0.014706,0.0,0.0,,,0.007752
23,P63000,,0.345829,0.0,0.0,0.0,0.0,0.0,10651.000049,2.53125,0.780488,0.0,0.0,0.0,0.0,,,0.0
16,P42574,,0.323397,0.0,0.0,0.0,0.0,0.0,9960.129944,3.0,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938
84,P35568,IRS1,0.285865,0.017956,0.017956,0.017956,1.067926,0.252015,8804.192305,2.5625,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938
43,P55211,CASP9,0.267591,0.040176,0.040176,0.040176,0.512541,0.099164,8241.389418,4.0,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938


‚Äî Top_Kinase_like ‚Äî (23 proteins)


Unnamed: 0,UniProt,gene_symbol,Kinase_likeScore,signal_strength,abs_signal_strength,delta_slope,delta_auc,betweenness,betweenness_frac,avg_dist_to_top,reach_coverage,reach_upregulated,reach_downregulated,reach_upstream_up,reach_upstream_down,reach_downstream_up,reach_downstream_down,reach_to_altered
136,P17612,PRKACA,0.674963,-0.49096,0.49096,-0.49096,-4.566348,11151.562233,0.362082,2.193548,0.756098,0.0,0.014706,0.0,0.0,,,0.007752
129,P12931,SRC,0.458368,0.413172,0.413172,0.413172,2.490372,30798.46386,1.0,2.129032,0.756098,0.786885,0.720588,0.642857,0.5,,,0.751938
121,Q05655,PRKCD,0.431786,-0.178325,0.178325,-0.178325,-1.933503,2545.676046,0.082656,2.84375,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938
206,Q14289,PTK2B,0.313748,-0.085418,0.085418,-0.085418,-0.849174,930.412715,0.03021,2.90625,0.780488,0.786885,0.735294,0.642857,0.5,,,0.75969
26,Q96B36,AKT1S1,0.238496,-0.040233,0.040233,-0.040233,-1.593033,42.074288,0.001366,3.84375,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938
461,Q9Y478,PRKAB1,0.167564,-0.08535,0.08535,-0.08535,0.770234,0.0,0.0,4.78125,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938
5,P31749,AKT1,0.167479,0.045927,0.045927,0.045927,-0.903394,26749.364661,0.868529,2.125,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938
182,P15056,BRAF,0.1604,0.086948,0.086948,0.086948,1.020664,1934.167021,0.062801,3.28125,0.780488,0.016393,0.0,0.0,0.0,,,0.007752
411,Q9BZL6,PRKD2,0.143834,0.08067,0.08067,0.08067,1.12335,0.0,0.0,5.848485,0.804878,0.016393,0.014706,0.071429,0.0,,,0.015504
145,Q04759,PRKCQ,0.134451,0.13253,0.13253,0.13253,0.742928,1215.309725,0.03946,3.5,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938


‚Äî Top_Local_amplifiers ‚Äî (47 proteins)


Unnamed: 0,UniProt,gene_symbol,Local_amplifiersScore,signal_strength,abs_signal_strength,delta_slope,delta_auc,dyn_deviation,betweenness,avg_dist_to_top,reach_coverage,reach_upregulated,reach_downregulated,reach_upstream_up,reach_upstream_down,reach_downstream_up,reach_downstream_down,reach_to_altered
129,P12931,SRC,1.0,0.413172,0.413172,0.413172,2.490372,0.458368,30798.46386,2.129032,0.756098,0.786885,0.720588,0.642857,0.5,,,0.751938
89,P49841,GSK3B,0.196432,0.362015,0.362015,0.362015,-0.307143,0.220227,6049.81674,4.225806,0.756098,0.786885,0.720588,0.642857,0.5,,,0.751938
2,P06241,FYN,0.136391,0.067988,0.067988,0.067988,-0.150654,0.047711,4200.646077,2.78125,0.780488,0.016393,0.0,0.0,0.0,,,0.007752
559,P60709,ACTB,0.13058,0.48517,0.48517,0.48517,-0.204652,0.263284,4021.649431,3.096774,0.756098,0.016393,0.0,0.0,0.0,,,0.007752
152,P29350,PTPN6,0.08297,0.282983,0.282983,0.282983,-0.62168,0.208663,2555.361812,2.709677,0.756098,0.786885,0.720588,0.642857,0.5,,,0.751938
230,Q13043,STK4,0.062886,0.153381,0.153381,0.153381,-0.705693,0.129283,1936.791375,4.6875,0.780488,0.016393,0.0,0.0,0.0,,,0.007752
182,P15056,BRAF,0.062801,0.086948,0.086948,0.086948,1.020664,0.1604,1934.167021,3.28125,0.780488,0.016393,0.0,0.0,0.0,,,0.007752
133,P62993,GRB2,0.062218,0.094486,0.094486,0.094486,0.262989,0.078805,1916.219366,2.9375,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938
127,Q13976,PRKG1,0.04966,0.158412,0.158412,0.158412,-0.94949,0.086524,1529.442594,3.5,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938
114,Q14247,CTTN,0.049587,0.269097,0.269097,0.269097,0.01679,0.167036,1527.201971,2.967742,0.756098,0.016393,0.0,0.0,0.0,,,0.007752


‚Äî Top_FlexibleComposite ‚Äî (50 proteins)


Unnamed: 0,UniProt,gene_symbol,FlexibleCompositeScore,signal_strength,abs_signal_strength,delta_slope,delta_auc,betweenness,betweenness_frac,avg_dist_to_top,reach_coverage,reach_upregulated,reach_downregulated,reach_upstream_up,reach_upstream_down,reach_downstream_up,reach_downstream_down,reach_to_altered
136,P17612,PRKACA,0.674963,-0.49096,0.49096,-0.49096,-4.566348,11151.562233,0.362082,2.193548,0.756098,0.0,0.014706,0.0,0.0,,,0.007752
531,O60890,OPHN1,0.593812,0.673443,0.673443,0.673443,3.279046,0.0,0.0,3.53125,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938
129,P12931,SRC,0.458368,0.413172,0.413172,0.413172,2.490372,30798.46386,1.0,2.129032,0.756098,0.786885,0.720588,0.642857,0.5,,,0.751938
121,Q05655,PRKCD,0.431786,-0.178325,0.178325,-0.178325,-1.933503,2545.676046,0.082656,2.84375,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938
161,P29353,SHC1,0.421468,0.406014,0.406014,0.406014,2.833307,1317.106387,0.042765,2.870968,0.756098,0.032787,0.0,0.071429,0.0,,,0.015504
19,Q05209,PTPN12,0.332846,0.222116,0.222116,0.222116,1.574198,195.11631,0.006335,2.71875,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938
59,P51812,RPS6KA3,0.331884,-0.09726,0.09726,-0.09726,-1.975463,824.073668,0.026757,3.34375,0.780488,0.786885,0.720588,0.642857,0.5,,,0.751938
326,P23528,CFL1,0.331504,-0.142621,0.142621,-0.142621,-2.171356,2655.0,0.086206,4.0,0.780488,0.0,0.014706,0.0,0.0,,,0.007752
260,Q8N4C8,MINK1,0.330732,-0.181276,0.181276,-0.181276,-1.625709,0.0,0.0,3.6875,0.780488,0.786885,0.735294,0.642857,0.555556,,,0.75969
229,P21333,FLNA,0.327994,-0.337446,0.337446,-0.337446,-1.808568,1018.685161,0.033076,3.129032,0.756098,0.0,0.014706,0.0,0.0,,,0.007752


üíæ Saved role-family tables ‚Üí analysis/networks/enhanced_reach/functional_role_tables.csv
