In [2]:
import networkx as nx
import pandas as pd
import numpy as np
import time
import os
import json
PATH_ROOT = os.getcwd().replace("\\","/").replace("/notebooks","")

In [3]:
def read_json_from(path):
  with open(path, "r") as read_file:
    init_dict = json.load(read_file)
    if isinstance(init_dict,str):
        return eval(init_dict)
    return init_dict

all_proteomeHD_pairs_path = f"{PATH_ROOT}/data_sources/ProteomeHD/all_proteomeHD_pairs.csv"
all_proteomeHD_pairs_df = pd.read_csv(all_proteomeHD_pairs_path)

In [4]:
all_corum_proteins_path = f"{PATH_ROOT}/data_sources/Corum/protein_to_included_complex_id.json"
all_corum_proteins = read_json_from(all_corum_proteins_path)
complex_id_to_exclude = 306 # Ribosome Complex ID
all_corum_proteins = [ key for (key,value) in all_corum_proteins.items() if complex_id_to_exclude not in value ]

In [5]:
all_proteomeHD_pairs_df_corum_only = all_proteomeHD_pairs_df[all_proteomeHD_pairs_df.protein1_simplified_name.isin(all_corum_proteins) & all_proteomeHD_pairs_df.protein2_simplified_name.isin(all_corum_proteins)]

In [6]:
all_proteomeHD_pairs_df_corum_only

Unnamed: 0,protein1_majority_name,protein2_majority_name,protein1_simplified_name,protein2_simplified_name,r,r2,observations
10324,A0AVF1-2;A0AVF1-3;A0AVF1,A0FGR8-2;A0FGR8;A0FGR8-4,A0AVF1,A0FGR8,0.083727,0.007010,10
10325,A0AVF1-2;A0AVF1-3;A0AVF1,A0FGR8-6,A0AVF1,A0FGR8,0.946424,0.895718,4
10326,A0AVF1-2;A0AVF1-3;A0AVF1,A0JLT2;A0JLT2-2,A0AVF1,A0JLT2,1.000000,1.000000,2
10332,A0AVF1-2;A0AVF1-3;A0AVF1,A1A4S6,A0AVF1,A1A4S6,-1.000000,1.000000,2
10341,A0AVF1-2;A0AVF1-3;A0AVF1,A2A288;A2A288-3;A2A288-4,A0AVF1,A2A288,0.000000,0.000000,1
...,...,...,...,...,...,...,...
53276888,Q9Y6R4;Q9Y6R4-2,Q9Y6X2,Q9Y6R4,Q9Y6X2,-0.300818,0.090492,9
53276896,Q9Y6R4;Q9Y6R4-2,Q9Y6Y8;Q9Y6Y8-2,Q9Y6R4,Q9Y6Y8,-0.052582,0.002765,120
53276948,Q9Y6W5;Q9Y6W5-2,Q9Y6X2,Q9Y6W5,Q9Y6X2,-0.279174,0.077938,14
53276956,Q9Y6W5;Q9Y6W5-2,Q9Y6Y8;Q9Y6Y8-2,Q9Y6W5,Q9Y6Y8,-0.258342,0.066740,182


In [12]:
# Returns a graph that is built with edges in each row
def pair_df_to_graph(df):
    #   start_time = time.time()
    ppa_graph = nx.Graph()
    ppa_graph.clear()
    for idx,row in df.iterrows():
        protein1 = row['protein1_majority_name']
        protein2 = row['protein2_majority_name']
        r = float(row['r'])
        r2 = float(row['r2'])
        observations = int(row['observations'])
        ppa_graph.add_edge(protein1,protein2,r=r,r2=r2,observations=observations)
    return ppa_graph

def generate_graph(r_list,obs_list):
    for r_val in r_list:
        for obs_val in obs_list:
            graph_path = f"{PATH_ROOT}/graph_outputs/graphml_graph_corum_only_no_ribosome/r_{str(r_val).replace('.','')}_obs_{obs_val}.graphml"
            print(f"Currently writing: r>={r_val}, obs>={obs_val} to")
            print(graph_path)
            filtered_df = all_proteomeHD_pairs_df_corum_only[all_proteomeHD_pairs_df_corum_only["r"] >= r_val]
            filtered_df = filtered_df[filtered_df["observations"] >= obs_val]
            graph_to_write = pair_df_to_graph(filtered_df)
            nx.write_graphml(graph_to_write, graph_path)


In [13]:
r_list = np.array(list(range(50,96,5))) / 100
obs_list = list(range(50,151,10))
print(f"r_list: {str(r_list)}")
print(f"obs_list: {str(obs_list)}")

r_list: [0.5  0.55 0.6  0.65 0.7  0.75 0.8  0.85 0.9  0.95]
obs_list: [50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150]


In [14]:
generate_graph(r_list,obs_list)

Currently writing: r>=0.5, obs>=50 to
D:/Desktop/Northeastern_University/Research/Proteomics/ProteinProteinAssociation/Development/graph_outputs/graphml_graph_corum_only_no_ribosome/r_05_obs_50.graphml
Currently writing: r>=0.5, obs>=60 to
D:/Desktop/Northeastern_University/Research/Proteomics/ProteinProteinAssociation/Development/graph_outputs/graphml_graph_corum_only_no_ribosome/r_05_obs_60.graphml
Currently writing: r>=0.5, obs>=70 to
D:/Desktop/Northeastern_University/Research/Proteomics/ProteinProteinAssociation/Development/graph_outputs/graphml_graph_corum_only_no_ribosome/r_05_obs_70.graphml
Currently writing: r>=0.5, obs>=80 to
D:/Desktop/Northeastern_University/Research/Proteomics/ProteinProteinAssociation/Development/graph_outputs/graphml_graph_corum_only_no_ribosome/r_05_obs_80.graphml
Currently writing: r>=0.5, obs>=90 to
D:/Desktop/Northeastern_University/Research/Proteomics/ProteinProteinAssociation/Development/graph_outputs/graphml_graph_corum_only_no_ribosome/r_05_obs_9

Currently writing: r>=0.65, obs>=130 to
D:/Desktop/Northeastern_University/Research/Proteomics/ProteinProteinAssociation/Development/graph_outputs/graphml_graph_corum_only_no_ribosome/r_065_obs_130.graphml
Currently writing: r>=0.65, obs>=140 to
D:/Desktop/Northeastern_University/Research/Proteomics/ProteinProteinAssociation/Development/graph_outputs/graphml_graph_corum_only_no_ribosome/r_065_obs_140.graphml
Currently writing: r>=0.65, obs>=150 to
D:/Desktop/Northeastern_University/Research/Proteomics/ProteinProteinAssociation/Development/graph_outputs/graphml_graph_corum_only_no_ribosome/r_065_obs_150.graphml
Currently writing: r>=0.7, obs>=50 to
D:/Desktop/Northeastern_University/Research/Proteomics/ProteinProteinAssociation/Development/graph_outputs/graphml_graph_corum_only_no_ribosome/r_07_obs_50.graphml
Currently writing: r>=0.7, obs>=60 to
D:/Desktop/Northeastern_University/Research/Proteomics/ProteinProteinAssociation/Development/graph_outputs/graphml_graph_corum_only_no_ribosom

Currently writing: r>=0.85, obs>=110 to
D:/Desktop/Northeastern_University/Research/Proteomics/ProteinProteinAssociation/Development/graph_outputs/graphml_graph_corum_only_no_ribosome/r_085_obs_110.graphml
Currently writing: r>=0.85, obs>=120 to
D:/Desktop/Northeastern_University/Research/Proteomics/ProteinProteinAssociation/Development/graph_outputs/graphml_graph_corum_only_no_ribosome/r_085_obs_120.graphml
Currently writing: r>=0.85, obs>=130 to
D:/Desktop/Northeastern_University/Research/Proteomics/ProteinProteinAssociation/Development/graph_outputs/graphml_graph_corum_only_no_ribosome/r_085_obs_130.graphml
Currently writing: r>=0.85, obs>=140 to
D:/Desktop/Northeastern_University/Research/Proteomics/ProteinProteinAssociation/Development/graph_outputs/graphml_graph_corum_only_no_ribosome/r_085_obs_140.graphml
Currently writing: r>=0.85, obs>=150 to
D:/Desktop/Northeastern_University/Research/Proteomics/ProteinProteinAssociation/Development/graph_outputs/graphml_graph_corum_only_no_r