In [1]:
import pandas as pd
import os
import json
import mygene
from tqdm.notebook import tqdm
import ast
from functools import partial, wraps
from typing import TextIO, Callable

In [2]:
def read_cx(fname):
    with open(fname, "r") as f:
        data = json.load(f)
    
    data_dict = {}
    for d in data:
        for k, v in d.items():
            data_dict[k] = v
    return data_dict

mg = mygene.MyGeneInfo()

# BioPlex3

In [7]:
bioplex_data = read_cx("data/networks/Bioplex3/BioPlex 3 - Shared Interactions.cx")

In [8]:
bioplex_data.keys()

dict_keys(['numberVerification', 'metaData', 'nodeAttributes', 'cyHiddenAttributes', 'nodes', 'networkAttributes', 'cyTableColumn', 'cartesianLayout', 'edgeAttributes', 'edges', 'cyVisualProperties', 'status'])

In [9]:
bioplex_data["nodes"][:10]

[{'@id': 8363, 'n': 'MATR3', 'r': 'ncbigene:9782'},
 {'@id': 8362, 'n': 'ZBTB40', 'r': 'ncbigene:9923'},
 {'@id': 8361, 'n': 'GAB2', 'r': 'ncbigene:9846'},
 {'@id': 8360, 'n': 'CYTIP', 'r': 'ncbigene:9595'},
 {'@id': 8359, 'n': 'PJA2', 'r': 'ncbigene:9867'},
 {'@id': 8358, 'n': 'SH3BP5', 'r': 'ncbigene:9467'},
 {'@id': 8357, 'n': 'PITPNM1', 'r': 'ncbigene:9600'},
 {'@id': 8356, 'n': 'SLC25A51', 'r': 'ncbigene:92014'},
 {'@id': 8355, 'n': 'ACVR2B', 'r': 'ncbigene:93'},
 {'@id': 8354, 'n': 'CABLES1', 'r': 'ncbigene:91768'}]

In [10]:
nodes_data = pd.DataFrame(bioplex_data["nodes"])
nodes_data[["r1", "NCBI_id"]] = nodes_data["r"].str.split(":", expand=True)#.drop(["r", "r1"], axis=1)
nodes_data = nodes_data.drop(["r", "r1"], axis=1)
nodes_data.to_csv("processed_data/networks/BioPlex3_shared/nodes_metadata.csv", index=False)
nodes_data

Unnamed: 0,@id,n,r,r1,NCBI_id
0,8363,MATR3,ncbigene:9782,ncbigene,9782
1,8362,ZBTB40,ncbigene:9923,ncbigene,9923
2,8361,GAB2,ncbigene:9846,ncbigene,9846
3,8360,CYTIP,ncbigene:9595,ncbigene,9595
4,8359,PJA2,ncbigene:9867,ncbigene,9867
...,...,...,...,...,...
8359,4,RNF123,ncbigene:63891,ncbigene,63891
8360,3,KCMF1,ncbigene:56888,ncbigene,56888
8361,2,UBR4,ncbigene:23352,ncbigene,23352
8362,1,UBAC1,ncbigene:10422,ncbigene,10422


In [11]:
nodes_data["r1"].value_counts()

ncbigene    8364
Name: r1, dtype: int64

In [23]:
node2ncbi = {}
for i, row in nodes_data.iterrows():
    node2ncbi[row["@id"]] = row["NCBI_id"]

In [24]:
edges_data = pd.DataFrame(bioplex_data["edges"])
edges_data["node1"] = edges_data["s"].map(node2ncbi)
edges_data["node2"] = edges_data["t"].map(node2ncbi)
edges_data

Unnamed: 0,@id,s,t,i,node1,node2
0,35695,8363,693,interacts-with,9782,9991
1,35538,8356,394,interacts-with,92014,939
2,35522,8353,3418,interacts-with,91452,9218
3,35521,8353,2754,interacts-with,91452,9217
4,35473,8352,6285,interacts-with,9060,9061
...,...,...,...,...,...,...
35699,2191,1,1705,interacts-with,10422,285848
35700,3,0,4,interacts-with,1,63891
35701,2,0,3,interacts-with,1,56888
35702,1,0,2,interacts-with,1,23352


In [25]:
edges_data[["node1", "node2"]].to_csv("processed_data/networks/BioPlex3_shared/edges_list_ncbi.csv", index=False)

# HumanNet

In [27]:
hnet_data = pd.read_csv("data/networks/HumanNetV3/HumanNet-XC.tsv", sep="\t", header=None)
hnet_data.columns = ["node1", "node2", "score"]
hnet_data

Unnamed: 0,node1,node2,score
0,7046,7048,6.044828
1,5211,5213,5.942313
2,10641,9681,5.899878
3,5213,5214,5.898546
4,5211,5214,5.898122
...,...,...,...
1125489,29935,9988,1.460302
1125490,25839,51226,1.460301
1125491,128653,55510,1.460299
1125492,100129543,138009,1.460298


In [28]:
hnet_data.to_csv("processed_data/networks/HumanNetV3/edges_list_ncbi.csv", index=False)

# PCNet

In [33]:
pcnet_data = read_cx("data/networks/PCNet/Updated PCNet v1.3.cx")
pcnet_data["nodes"][:10]
nodes_data = pd.DataFrame(pcnet_data["nodes"])
nodes_data

Unnamed: 0,@id,n
0,0,UBE2Q1
1,1,RNF14
2,2,UBE2Q2
3,3,TMCO1
4,4,UBAC1
...,...,...
18815,18815,CD300LD
18816,18816,SLC10A5
18817,18817,BAGE4
18818,18818,FAM181B


In [38]:
pcnet_data["edges"][:5]

[{'@id': 0, 's': 0, 't': 1, 'i': 'neighbor-of'},
 {'@id': 1, 's': 0, 't': 2, 'i': 'neighbor-of'},
 {'@id': 2, 's': 0, 't': 3, 'i': 'neighbor-of'},
 {'@id': 3, 's': 0, 't': 4, 'i': 'neighbor-of'},
 {'@id': 4, 's': 0, 't': 5, 'i': 'neighbor-of'}]

In [36]:
pcnet_genes_names_set = sorted(list(set(nodes_data["n"])))
pcnet_genes_names_set[:5]

['A1BG', 'A1CF', 'A2M', 'A2ML1', 'A3GALT2']

In [37]:
gene_mapping = mg.querymany(pcnet_genes_names_set, scopes=["symbol", "alias"], species="human", returnall=True)
gene_mapping["out"][:10]

querying 1-1000...done.
querying 1001-2000...done.
querying 2001-3000...done.
querying 3001-4000...done.
querying 4001-5000...done.
querying 5001-6000...done.
querying 6001-7000...done.
querying 7001-8000...done.
querying 8001-9000...done.
querying 9001-10000...done.
querying 10001-11000...done.
querying 11001-12000...done.
querying 12001-13000...done.
querying 13001-14000...done.
querying 14001-15000...done.
querying 15001-16000...done.
querying 16001-17000...done.
querying 17001-18000...done.
querying 18001-18820...done.
Finished.
894 input query terms found dup hits:
	[('A2M', 2), ('AAK1', 2), ('ABO', 3), ('ACAT1', 2), ('ACTB', 2), ('ADA2', 2), ('ADAM18', 2), ('ADAM1
159 input query terms found no hit:
	['AC002310.5', 'AC002996.1', 'AC003002.1', 'AC003688.1', 'AC004080.3', 'AC004223.3', 'AC004691.2', '


[{'query': 'A1BG',
  '_id': '1',
  '_score': 95.32558,
  'entrezgene': '1',
  'name': 'alpha-1-B glycoprotein',
  'symbol': 'A1BG',
  'taxid': 9606},
 {'query': 'A1CF',
  '_id': '29974',
  '_score': 87.59521,
  'entrezgene': '29974',
  'name': 'APOBEC1 complementation factor',
  'symbol': 'A1CF',
  'taxid': 9606},
 {'query': 'A2M',
  '_id': '2',
  '_score': 96.488075,
  'entrezgene': '2',
  'name': 'alpha-2-macroglobulin',
  'symbol': 'A2M',
  'taxid': 9606},
 {'query': 'A2M',
  '_id': '3494',
  '_score': 19.241358,
  'entrezgene': '3494',
  'name': 'immunoglobulin heavy constant alpha 2 (A2m marker)',
  'symbol': 'IGHA2',
  'taxid': 9606},
 {'query': 'A2ML1',
  '_id': '144568',
  '_score': 93.95495,
  'entrezgene': '144568',
  'name': 'alpha-2-macroglobulin like 1',
  'symbol': 'A2ML1',
  'taxid': 9606},
 {'query': 'A3GALT2',
  '_id': '127550',
  '_score': 92.46112,
  'entrezgene': '127550',
  'name': 'alpha 1,3-galactosyltransferase 2',
  'symbol': 'A3GALT2',
  'taxid': 9606},
 {'que

In [41]:
ncbi_ids = []
for gn in tqdm(pcnet_genes_names_set):
    res = [e for e in gene_mapping["out"] if e["query"]==gn]
    res = [r for r in res if "_score" in r.keys() and "entrezgene" in r.keys()]
    if len(res)<1:
        ncbi_ids.append(None)
        continue
#     elif len(res)>1:
#         break
        
    res = sorted(res, key=lambda x: x["_score"], reverse=True)
    ncbi_ids.append(res[0]["entrezgene"])
len(ncbi_ids)

  0%|          | 0/18820 [00:00<?, ?it/s]

18820

In [42]:
pcnet_gene2ncbi = {g: nid for g, nid in zip(pcnet_genes_names_set, ncbi_ids)}
pcnet_id2gene = {row["@id"]: row["n"] for i, row in nodes_data.iterrows()}
# for gname, ncbi_id in zip(pcnet_genes_names_set, ncbi_ids):
#     if ncbi_id is None:
#         pcnet_gene2ncbi[gname] = "pcnet_id_" + 

In [47]:
pcnet_ncbi_edges = []
for e in pcnet_data["edges"]:
    gname1 = pcnet_id2gene[e["s"]]
    gname2 = pcnet_id2gene[e["t"]]    
    ncbi1 = pcnet_gene2ncbi[gname1]
    ncbi2 = pcnet_gene2ncbi[gname2]
    if ncbi1 is None:
        ncbi1 = "pcnet_id_" + str(e["s"])
    if ncbi2 is None:
        ncbi2 = "pcnet_id_" + str(e["t"])
        
    pcnet_ncbi_edges.append({"node1": ncbi1, "node2": ncbi2})
pcnet_ncbi_edges = pd.DataFrame(pcnet_ncbi_edges)
pcnet_ncbi_edges

Unnamed: 0,node1,node2
0,55585,9604
1,55585,92912
2,55585,54499
3,55585,10422
4,55585,5867
...,...,...
2693245,652968,729438
2693246,154790,166752
2693247,25832,149013
2693248,150677,146556


In [49]:
pcnet_ncbi_edges.to_csv("processed_data/networks/PCNet/edges_list_ncbi.csv", index=False)

In [48]:
nodes_data["NCBI_id"]=nodes_data["n"].map(pcnet_gene2ncbi)
nodes_data

Unnamed: 0,@id,n,NCBI_id
0,0,UBE2Q1,55585
1,1,RNF14,9604
2,2,UBE2Q2,92912
3,3,TMCO1,54499
4,4,UBAC1,10422
...,...,...,...
18815,18815,CD300LD,100131439
18816,18816,SLC10A5,347051
18817,18817,BAGE4,85317
18818,18818,FAM181B,220382


In [50]:
nodes_data.to_csv("processed_data/networks/PCNet/nodes_metadata.csv", index=False)

# ProteomeHD

In [46]:
phd_data = read_cx("data/networks/ProteomeHD/ProteomeHD - Top 0.5% Co-Regulated Proteins Network.cx")
phd_data["nodes"][:5]

[{'@id': 0, 'n': 'ARPC3', 'r': 'uniprot:O15145'},
 {'@id': 1, 'n': 'ARPC2', 'r': 'uniprot:O15144'},
 {'@id': 2, 'n': 'MYL1', 'r': 'uniprot:P05976-2'},
 {'@id': 3, 'n': 'MYL12B', 'r': 'uniprot:O14950'},
 {'@id': 4, 'n': 'HNRNPA1', 'r': 'uniprot:P09651'}]

In [47]:
nodes_data = pd.DataFrame(phd_data["nodes"])
nodes_data[["id_type", "gene_id"]] = nodes_data["r"].str.split(":", expand=True)
nodes_data

Unnamed: 0,@id,n,r,id_type,gene_id
0,0,ARPC3,uniprot:O15145,uniprot,O15145
1,1,ARPC2,uniprot:O15144,uniprot,O15144
2,2,MYL1,uniprot:P05976-2,uniprot,P05976-2
3,3,MYL12B,uniprot:O14950,uniprot,O14950
4,4,HNRNPA1,uniprot:P09651,uniprot,P09651
...,...,...,...,...,...
2713,2713,APOBEC3F,uniprot:Q8IUX4,uniprot,Q8IUX4
2714,2714,PEF1,uniprot:Q9UBV8,uniprot,Q9UBV8
2715,2715,CNOT7,uniprot:Q9UIV1,uniprot,Q9UIV1
2716,2716,COG6,uniprot:Q9Y2V7,uniprot,Q9Y2V7


In [48]:
nodes_data["id_type"].value_counts()

uniprot    2718
Name: id_type, dtype: int64

In [49]:
gids = list(nodes_data["gene_id"])
gene_mapping = mg.querymany(gids, scopes="uniprot", species="human", returnall=True)

querying 1-1000...done.
querying 1001-2000...done.
querying 2001-2718...done.
Finished.
17 input query terms found dup hits:
	[('O14950', 2), ('Q6FI13', 2), ('P62805', 10), ('Q01081', 2), ('Q96PK6', 2), ('P68431', 10), ('Q9UG6
384 input query terms found no hit:
	['P05976-2', 'P16157-5', 'P61978-3', 'P62861', 'Q5QNW6-2', 'O14617-5', 'O94925-3', 'P07910-2', 'P257


In [51]:
ncbi_ids = []
for gn in tqdm(gids):
    res = [e for e in gene_mapping["out"] if e["query"]==gn]
    res = [r for r in res if "_score" in r.keys() and "entrezgene" in r.keys()]
    if len(res)<1:
        ncbi_ids.append(None)
        continue
#     elif len(res)>1:
#         break

    res = sorted(res, key=lambda x: x["_score"], reverse=True)
    ncbi_ids.append(res[0]["entrezgene"])
irow = 0
for i, row in nodes_data.iterrows():
    if ncbi_ids[irow] is None:
        ncbi_ids[irow] = row["id_type"] + "_" +(row["gene_id"])
    irow += 1
nodes_data["NCBI_id"] = ncbi_ids
nodes_data

  0%|          | 0/2718 [00:00<?, ?it/s]

Unnamed: 0,@id,n,r,id_type,gene_id,NCBI_id
0,0,ARPC3,uniprot:O15145,uniprot,O15145,10094
1,1,ARPC2,uniprot:O15144,uniprot,O15144,10109
2,2,MYL1,uniprot:P05976-2,uniprot,P05976-2,uniprot_P05976-2
3,3,MYL12B,uniprot:O14950,uniprot,O14950,103910
4,4,HNRNPA1,uniprot:P09651,uniprot,P09651,3178
...,...,...,...,...,...,...
2713,2713,APOBEC3F,uniprot:Q8IUX4,uniprot,Q8IUX4,200316
2714,2714,PEF1,uniprot:Q9UBV8,uniprot,Q9UBV8,553115
2715,2715,CNOT7,uniprot:Q9UIV1,uniprot,Q9UIV1,29883
2716,2716,COG6,uniprot:Q9Y2V7,uniprot,Q9Y2V7,57511


In [52]:
nodes_data.to_csv("processed_data/networks/ProteomeHD/nodes_metadata.csv", index=False)

In [55]:
phd_gid2ncbi = {}
for i, row in nodes_data.iterrows():
    phd_gid2ncbi[row["@id"]] = row["NCBI_id"]

In [56]:
edges_data = pd.DataFrame(phd_data["edges"])
edges_data["node1"] = edges_data["s"].map(phd_gid2ncbi)
edges_data["node2"] = edges_data["t"].map(phd_gid2ncbi)
edges_data

Unnamed: 0,@id,s,t,i,node1,node2
0,0,0,1,correlates-with,10094,10109
1,1,2,3,correlates-with,uniprot_P05976-2,103910
2,2,4,5,correlates-with,3178,10236
3,3,6,7,correlates-with,3434,3437
4,4,8,5,correlates-with,3191,10236
...,...,...,...,...,...,...
63285,63285,961,2067,correlates-with,23383,2073
63286,63286,961,1765,correlates-with,23383,uniprot_Q9Y5U2-2
63287,63287,1814,1021,correlates-with,22880,9646
63288,63288,1814,462,correlates-with,22880,uniprot_Q99504-5


In [57]:
edges_data[["node1", "node2"]].to_csv("processed_data/networks/ProteomeHD/edges_list_ncbi.csv", index=False)

# STRING

In [3]:
string_data = read_cx("data/networks/STRING/STRING - Human Protein Links - High Confidence (Score = 0.7).cx")
string_data["nodes"][:10]

[{'@id': 17184, 'n': 'NEK3', 'r': 'uniprot:P51956'},
 {'@id': 17183, 'n': 'ensembl:ENSP00000478796', 'r': 'uniprot:A0A0G2JMU2'},
 {'@id': 17182, 'n': 'NEURL3', 'r': 'uniprot:Q96EH8'},
 {'@id': 17181, 'n': 'APELA', 'r': 'uniprot:P0DMC3'},
 {'@id': 17180, 'n': 'ensembl:ENSP00000485615', 'r': 'uniprot:A0A096LPH7'},
 {'@id': 17179, 'n': 'CT45A10', 'r': 'uniprot:P0DMU9'},
 {'@id': 17178, 'n': 'ALG1L2', 'r': 'uniprot:C9J202'},
 {'@id': 17177, 'n': 'SLC27A3', 'r': 'uniprot:Q5K4L6'},
 {'@id': 17176, 'n': 'CCER2', 'r': 'uniprot:I3L3R5'},
 {'@id': 17175, 'n': 'ensembl:ENSP00000457689', 'r': 'uniprot:H3BUK9'}]

In [14]:
nodes_data = pd.DataFrame(string_data["nodes"])
nodes_data[["id_type", "gene_id"]] = nodes_data["r"].str.split(":", expand=True)
nodes_data

Unnamed: 0,@id,n,r,id_type,gene_id
0,17184,NEK3,uniprot:P51956,uniprot,P51956
1,17183,ensembl:ENSP00000478796,uniprot:A0A0G2JMU2,uniprot,A0A0G2JMU2
2,17182,NEURL3,uniprot:Q96EH8,uniprot,Q96EH8
3,17181,APELA,uniprot:P0DMC3,uniprot,P0DMC3
4,17180,ensembl:ENSP00000485615,uniprot:A0A096LPH7,uniprot,A0A096LPH7
...,...,...,...,...,...
17180,4,KIF21A,uniprot:Q7Z4S6,uniprot,Q7Z4S6
17181,3,AP1B1,uniprot:Q10567,uniprot,Q10567
17182,2,KIF13B,uniprot:Q9NQT8,uniprot,Q9NQT8
17183,1,SPTBN2,uniprot:O15020,uniprot,O15020


In [38]:
scopes_mapping = {
    "uniprot": "uniprot",
    "hgnc": ["symbol", "alias"],
    "ensembl": "ensembl.gene"
}

mapped_nodes = pd.DataFrame()
for gtype in ["uniprot", "hgnc", "ensembl"]: #
    nd = nodes_data[nodes_data["id_type"]== gtype]
    gids = list(nd["gene_id"])
    
    if gtype=="ensembl":
        nd["NCBI_id"] = nd["id_type"] + "_" +(nd["gene_id"])
        mapped_nodes = pd.concat((mapped_nodes, nd))
        continue
    
    gene_mapping = mg.querymany(gids, scopes=scopes_mapping[gtype], species="human", returnall=True)
    
    ncbi_ids = []
    for gn in tqdm(gids):
        res = [e for e in gene_mapping["out"] if e["query"]==gn]
        res = [r for r in res if "_score" in r.keys() and "entrezgene" in r.keys()]
        if len(res)<1:
            ncbi_ids.append(None)
            continue
    #     elif len(res)>1:
    #         break

        res = sorted(res, key=lambda x: x["_score"], reverse=True)
        ncbi_ids.append(res[0]["entrezgene"])
    irow = 0
    for i, row in nd.iterrows():
        if ncbi_ids[irow] is None:
            ncbi_ids[irow] = row["id_type"] + "_" +(row["gene_id"])
        irow += 1
    nd["NCBI_id"] = ncbi_ids
    mapped_nodes = pd.concat((mapped_nodes, nd))
mapped_nodes
# gene_mapping["out"][:10]

querying 1-1000...done.
querying 1001-2000...done.
querying 2001-3000...done.
querying 3001-4000...done.
querying 4001-5000...done.
querying 5001-6000...done.
querying 6001-7000...done.
querying 7001-8000...done.
querying 8001-9000...done.
querying 9001-10000...done.
querying 10001-11000...done.
querying 11001-12000...done.
querying 12001-13000...done.
querying 13001-14000...done.
querying 14001-15000...done.
querying 15001-16000...done.
querying 16001-16982...done.
Finished.
147 input query terms found dup hits:
	[('E9PI22', 2), ('A0A0B4J1V0', 2), ('Q0WX57', 7), ('B8ZZ74', 2), ('B2RC85', 2), ('A1L429', 4), ('A0A
179 input query terms found no hit:
	['H3BPF8', 'P0CJ85', 'P0CJ87', 'Q8N8P7', 'Q3C1V0', 'D6RF30', 'Q9Y4X1', 'Q8N1T3', 'C9JMV9', 'Q96LS8',


  0%|          | 0/16982 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


querying 1-178...done.
Finished.
32 input query terms found dup hits:
	[('DUX4L2', 3), ('DUX4L5', 2), ('DUX4L6', 2), ('PRR23D2', 2), ('USP17L28', 2), ('USP17L27', 2), ('US


  0%|          | 0/178 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]


Unnamed: 0,@id,n,r,id_type,gene_id,NCBI_id
0,17184,NEK3,uniprot:P51956,uniprot,P51956,4752
1,17183,ensembl:ENSP00000478796,uniprot:A0A0G2JMU2,uniprot,A0A0G2JMU2,102723502
2,17182,NEURL3,uniprot:Q96EH8,uniprot,Q96EH8,93082
3,17181,APELA,uniprot:P0DMC3,uniprot,P0DMC3,100506013
4,17180,ensembl:ENSP00000485615,uniprot:A0A096LPH7,uniprot,A0A096LPH7,uniprot_A0A096LPH7
...,...,...,...,...,...,...
8652,8532,ensembl:ENSP00000371886,ensembl:ENSP00000371886,ensembl,ENSP00000371886,ensembl_ENSP00000371886
9789,7395,ensembl:ENSP00000452479,ensembl:ENSP00000452479,ensembl,ENSP00000452479,ensembl_ENSP00000452479
10798,6386,ensembl:ENSP00000485591,ensembl:ENSP00000485591,ensembl,ENSP00000485591,ensembl_ENSP00000485591
13662,3522,ensembl:ENSP00000459962,ensembl:ENSP00000459962,ensembl,ENSP00000459962,ensembl_ENSP00000459962


In [40]:
mapped_nodes.sort_values(by="@id").to_csv("processed_data/networks/STRING/nodes_metadata.csv", index=False)

In [43]:
string_gid2ncbi = {}
for i, row in mapped_nodes.iterrows():
    string_gid2ncbi[row["@id"]] = row["NCBI_id"]

In [44]:
edges_data = pd.DataFrame(string_data["edges"])
edges_data["node1"] = edges_data["s"].map(string_gid2ncbi)
edges_data["node2"] = edges_data["t"].map(string_gid2ncbi)
edges_data

Unnamed: 0,@id,s,t,i,node1,node2
0,420010,17183,14908,interacts-with,102723502,727832
1,419997,17182,15097,interacts-with,93082,7016
2,418861,17176,4141,interacts-with,643669,126432
3,418762,17175,14908,interacts-with,100287399,727832
4,418733,17174,15120,interacts-with,653720,102724631
...,...,...,...,...,...,...
420529,4,0,5,interacts-with,381,100302736
420530,3,0,4,interacts-with,381,55605
420531,2,0,3,interacts-with,381,162
420532,1,0,2,interacts-with,381,23303


In [45]:
edges_data[["node1", "node2"]].to_csv("processed_data/networks/STRING/edges_list_ncbi.csv", index=False)