In [1]:
import pandas as pd
import ndex2
import ndexutil.tsv.tsv2nicecx2 as t2n
import json
#from IPython.lib.pretty import pretty
#from IPython.display import display

### Plans for transforming the single table, row-oriented network format to a CX network and uploading it to NDEx


In [2]:
# 'kinome_interactions-plan.json'


kinome_int_context = {
        "ncbigene": "http://identifiers.org/ncbigene/",
        "pubmed": "http://identifiers.org/pubmed/",
        "biogrid": "http://identifiers.org/biogrid/"
    }

kinome_int_source_plan = {
        "rep_column": "BioGRID ID Interactor A",
        "node_name_column": "Official Symbol Interactor A",
        "rep_prefix": "biogrid",
        "property_columns": [
            {
                "column_name": "Organism Interactor A",
                "attribute_name": "Organism Taxon Id"
            },
            {
                "column_name": "Synonyms Interactor A",
                "attribute_name": "alias",
                "data_type": "list_of_string",
                "delimiter": "|"
            },
            {
                "attribute_name": "type",
                "default_value": "protein"
            }
           ]
    }

# ['Chemical Interaction Count B', 'SubCategory Values B', 'Category Values B', 'PTM Count B', 'Interaction Count B'] 

kinome_int_target_plan = {
        "rep_column": "BioGRID ID Interactor B",
        "node_name_column": "Official Symbol Interactor B",
        "rep_prefix": "biogrid",
        "property_columns": [
            {
                "column_name": "Organism Interactor B",
                "attribute_name": "Organism Taxon Id"
            },
            {
                "column_name": "Synonyms Interactor B",
                "attribute_name": "alias",
                "data_type": "list_of_string",
                "delimiter": "|"
             },
            {
                "attribute_name": "type",
                "default_value": "protein"
            }
        ]
    }

kinome_int_edge_plan = {
        "default_predicate": "interacts-with",
        "property_columns": [
             {
                "column_name": "Source Database",
                "attribute_name": "Source Database",
                "data_type": "list_of_string",
                "delimiter": "|"
             },

            {
                "column_name": "#BioGRID Interaction ID",
                "attribute_name": "BioGRID Interaction ID",
                "data_type": "list_of_string",
                "delimiter": "|"
            },

            {
                "column_name": "Experimental System",
                "data_type": "list_of_string",
                "delimiter": "|"
            },
            {
                "column_name": "Experimental System Type",
                "data_type": "list_of_string",
                "delimiter": "|"
            },
            {
                "column_name": "Author",
                "data_type": "list_of_string",
                "delimiter": "|"
            },
            {
                "column_name": "Pubmed ID",
                "attribute_name": "citation",
                "data_type": "list_of_string",
                "value_prefix": "pubmed",
                "delimiter": "|"
            },
            {
                "column_name": "Throughput",
                "data_type": "list_of_string"
            },
            {
                "column_name": "Score",
                "data_type": "list_of_double"
            },
            {
                "column_name": "Modification",
                "data_type": "list_of_string",
                "delimiter": "|"
            },
            {
                "column_name": "Phenotypes",
                "data_type": "list_of_string",
                "delimiter": "|"
            },
            {
                "column_name": "Qualifications",
                "data_type": "list_of_string",
                "delimiter": "|"
            }
        ]
    }




### Load PPI INTERACTIONS

In [3]:
interactions_filename = "BIOGRID-PROJECT-kinome_project_sc-INTERACTIONS-3.5.177.tab2.txt"
df_interactions = pd.read_csv(interactions_filename,
                                dtype=str,
                                na_filter=False,
                                delimiter='\t',
                                engine='python')

### Parse PPI INTERACTIONS

In the node table:
 - the _name_ should use the 'official symbol (A or B)'
 - _represents_ should use the 'biogrid id (A or B)'. 
 - All other identifiers and the synonyms should become part of the ALIAS attribute

### Functions to make node and edge dataframes from a TSV-schema network dataframe

The transformation by a load plan JSON object that specifies:
 - names of the column to process (any that are not mentioned are ignored) for
 -- source nodes
 -- target nodes
 -- edges
 - New names for those columns in the new dataframe
 -- "name" and "represents" have special syntax, at least one is required
 -- "interaction" is a reserved edge attribute name
 -- optionally, a default value for the interaction can be specified
 -- optionally, a prefix to be added to the value of the "represents" attribute
 - Data types for the new columns if different from "string". Types will be coerced
 - For list datatypes, the delimiter to use to split the original string value 
 - New columns with default values
     

In [7]:
def get_column_names(plan):
    names = [property_column.get("column_name") for property_column in plan.get("property_columns")]
    names = [item for item in names if item is not None]
    return names

def rename_columns(df, plan):
    renames ={}
    name_column = plan.get("node_name_column")
    represents_column = plan.get("rep_column")
    if represents_column is not None: 
        renames[represents_column] = "represents"
    if name_column is not None: 
        renames[name_column] = "name"
    for property_column in plan.get("property_columns"):
        if property_column.get("column_name") is not None and property_column.get("attribute_name") is not None:
            renames[property_column.get("column_name")] = property_column.get("attribute_name")
    # TBD: test to be sure renames are in df columns
    if len(renames) > 0: df = df.rename(columns=renames)
    return df

def process_datatypes(df, plan):
    # TBD
    return None

def edges_from_tsv_df (tsv_df, plan):
    interaction = plan.get("default_predicate")
    # First get the names of columns that we want to use to create the edge dataframe
    property_columns = get_column_names(plan) 
    # make a new dataframe from the selected edge columns
    edges = tsv_df[property_columns]   
    return edges

def source_or_target_nodes_from_tsv_df(tsv_df, plan):
    property_columns = get_column_names(plan)    
    name_column = plan.get("node_name_column")
    represents_column = plan.get("rep_column")
    # TODO : error check
    if name_column is not None: 
        property_columns.insert(0,name_column)
    if represents_column is not None: 
        property_columns.insert(0, represents_column)
    #print(property_columns)
    #print(tsv_df.columns)
    nodes = tsv_df[property_columns].drop_duplicates()
    nodes = rename_columns(nodes, plan)
    return nodes
    
def nodes_from_tsv_df (tsv_df, source_plan, target_plan):
    source_nodes = source_or_target_nodes_from_tsv_df(tsv_df, source_plan)
    target_nodes = source_or_target_nodes_from_tsv_df(tsv_df, target_plan)
    # Concatenate the source and target node tables - they are
    # compatible because we gave them matching column names.
    # Many nodes are the source of some edges and the target
    # of others so we drop duplicate rows again
    nodes = pd.concat([target_nodes, source_nodes]).drop_duplicates()
    return nodes
    

In [9]:
# The edge dataframe
kinome_edges = edges_from_tsv_df(
    df_interactions, 
    kinome_int_edge_plan)

rename_columns(kinome_edges, kinome_int_edge_plan)
kinome_edges

#kinome_target_nodes = source_or_target_nodes_from_tsv_df(df_interactions,kinome_int_target_plan)


Unnamed: 0,Source Database,#BioGRID Interaction ID,Experimental System,Experimental System Type,Author,Pubmed ID,Throughput,Score,Modification,Phenotypes,Qualifications
0,BIOGRID,68842,Affinity Capture-MS,physical,Krogan NJ (2004),14759368,High Throughput,-,-,-,-
1,BIOGRID,68843,Affinity Capture-MS,physical,Krogan NJ (2004),14759368,High Throughput,-,-,-,-
2,BIOGRID,68846,Affinity Capture-MS,physical,Krogan NJ (2004),14759368,High Throughput,-,-,-,-
3,BIOGRID,68847,Affinity Capture-MS,physical,Krogan NJ (2004),14759368,High Throughput,-,-,-,-
4,BIOGRID,68849,Affinity Capture-MS,physical,Krogan NJ (2004),14759368,High Throughput,-,-,-,-
...,...,...,...,...,...,...,...,...,...,...,...
98273,BIOGRID,2606334,Co-fractionation,physical,Lidschreiber M (2018),30247719,Low Throughput,-,-,-,-
98274,BIOGRID,2606352,Synthetic Growth Defect,genetic,Jones MH (2018),30044722,Low Throughput,-,-,vegetative growth,-
98275,BIOGRID,2606354,Synthetic Growth Defect,genetic,Jones MH (2018),30044722,Low Throughput,-,-,heat sensitivity|vegetative growth,-
98276,BIOGRID,2606357,Synthetic Lethality,genetic,Jones MH (2018),30044722,Low Throughput,-,-,inviable,genetic complex|triple mutants are dead


In [10]:
# The node dataframe (includes renames)
kinome_nodes = nodes_from_tsv_df(df_interactions, kinome_int_source_plan,kinome_int_target_plan)

#print(kinome_edge_df)
kinome_nodes

Unnamed: 0,represents,name,Organism Taxon Id,alias
0,34955,CKA1,559292,casein kinase 2 catalytic subunit CKA1|L000000343
1,36917,CHD1,559292,chromatin-remodeling ATPase CHD1|L000003467
2,34460,CKA2,559292,YOR29-12|casein kinase 2 catalytic subunit CKA...
4,33227,CKB1,559292,casein kinase 2 regulatory subunit CKB1|L00000...
8,36877,GLC7,559292,CID1|DIS2|type 1 serine/threonine-protein phos...
...,...,...,...,...
90173,200653,Impact,10090,E430016J11Rik
92269,36638,MNL1,559292,"HTM1|alpha-1,2-mannosidase MNL1"
93666,35892,YNR066C,559292,-
97052,36701,IMT4,559292,L000003790


### Merge Information from the GENES file into the Node Table 
Also, some info must be mapped on certain nodes based on the content of the GENES file. 

here are the fields that need to be used from the GENES file:

- interaction count
- ptm count
- chemical interaction count
- source
- category (values)
- sub-category (values)

In [12]:
genes_filename = "BIOGRID-PROJECT-kinome_project_sc-GENES-3.5.177.projectindex.txt"
df_genes = pd.read_csv(genes_filename,
                                dtype=str,
                                na_filter=False,
                                delimiter='\t',
                                engine='python')
df_genes = df_genes[["#BIOGRID ID",
                     "INTERACTION COUNT", 
                     "PTM COUNT", 
                     "CHEMICAL INTERACTION COUNT" , 
                     "SOURCE",
                     "CATEGORY VALUES", 
                     "SUBCATEGORY VALUES"]]

df_genes = df_genes.rename(columns={"#BIOGRID ID": "represents"})
#df_genes.info()
df_genes

Unnamed: 0,represents,INTERACTION COUNT,PTM COUNT,CHEMICAL INTERACTION COUNT,SOURCE,CATEGORY VALUES,SUBCATEGORY VALUES
0,35540,1418,15,0,BIOGRID,Kinase,STE
1,34656,205,51,0,BIOGRID,Kinase,-
2,31293,97,7,0,BIOGRID,Phosphatase,-
3,34419,699,13,0,BIOGRID,Phosphatase Associated,-
4,33322,101,4,0,BIOGRID,Other,-
...,...,...,...,...,...,...,...
255,33997,1141,5,0,BIOGRID,Kinase,CMGC
256,36072,233,1,0,BIOGRID,Kinase,ATYPICAL
257,31238,3210,29,0,BIOGRID,Other,-
258,34897,260,23,0,BIOGRID,Kinase,-


In [14]:
# join the genes data to the node table by represents
kinome_nodes_plus_gene_info = pd.merge(kinome_nodes, df_genes, on="represents", how="left")
kinome_nodes_plus_gene_info

Unnamed: 0,represents,name,Organism Taxon Id,alias,INTERACTION COUNT,PTM COUNT,CHEMICAL INTERACTION COUNT,SOURCE,CATEGORY VALUES,SUBCATEGORY VALUES
0,34955,CKA1,559292,casein kinase 2 catalytic subunit CKA1|L000000343,561,1,0,BIOGRID,Kinase,CMGC
1,36917,CHD1,559292,chromatin-remodeling ATPase CHD1|L000003467,,,,,,
2,34460,CKA2,559292,YOR29-12|casein kinase 2 catalytic subunit CKA...,613,4,0,BIOGRID,Kinase,CMGC
3,33227,CKB1,559292,casein kinase 2 regulatory subunit CKB1|L00000...,861,11,0,BIOGRID,Kinase Associated,-
4,36877,GLC7,559292,CID1|DIS2|type 1 serine/threonine-protein phos...,1034,14,0,BIOGRID,Phosphatase,PPP
...,...,...,...,...,...,...,...,...,...,...
5762,200653,Impact,10090,E430016J11Rik,,,,,,
5763,36638,MNL1,559292,"HTM1|alpha-1,2-mannosidase MNL1",,,,,,
5764,35892,YNR066C,559292,-,,,,,,
5765,36701,IMT4,559292,L000003790,,,,,,


### Load the Post-Translational Modifications
- load PTM file as df_ptm
PTM network  using the PTM file following this schema:

Source node:



Target node:

name = residue + position

represents = PTM ID

attributes: PTM ID

Edges:

default predicate = "has-post-translational-modification-on"

attributes: PTM ID (list),  position, residue, post-translational mod, author(list), citation = pubmed ID (list), source DB, has relationship, notes(list).
- Each row of the PTM file specifies a known modification of a protein
- Each row therefore specifies an edge between the protein and the modification
- We create a node table from the protein nodes and modification nodes
- The columns for the edges are extracted from the PTM 


In [15]:
ptm_filename = "BIOGRID-PROJECT-kinome_project_sc-PTM-3.5.177.ptmtab.txt"
df_ptm = pd.read_csv(ptm_filename,
                                dtype=str,
                                na_filter=False,
                                delimiter='\t',
                                engine='python')


#df_genes = df_genes.rename(columns={"#BIOGRID ID": "represents"})

Unnamed: 0,#PTM ID,Entrez Gene ID,BioGRID ID,Systematic Name,Official Symbol,Synonymns,Sequence,Refseq ID,Position,Post Translational Modification,Residue,Author,Pubmed ID,Organism ID,Organism Name,Has Relationships,Notes,Source Database
0,491,851812,32278,YDR226W,ADK1,AKY1|AKY2|adenylate kinase ADK1|L000000046,MSSSESIRMVLIGPPGAGKGTQAPNLQERFHAAHLATGDMLRSQIA...,NP_010512,140,Phosphorylation,S,Holt LJ (2009),19779198,559292,Saccharomyces cerevisiae (S288c),FALSE,-,PhosphoGRID
1,492,851812,32278,YDR226W,ADK1,AKY1|AKY2|adenylate kinase ADK1|L000000046,MSSSESIRMVLIGPPGAGKGTQAPNLQERFHAAHLATGDMLRSQIA...,NP_010512,140,Phosphorylation,S,Smolka MB (2007),17563356,559292,Saccharomyces cerevisiae (S288c),FALSE,-,PhosphoGRID
2,503,851802,32269,YDR216W,ADR1,DNA-binding transcription factor ADR1|L000000050,MANVEKPNDCSGFPVVDLNSCFSNGFNNEKQEIEMETDDSPILLMS...,NP_010502,54,Phosphorylation,S,Holt LJ (2009),19779198,559292,Saccharomyces cerevisiae (S288c),FALSE,-,PhosphoGRID
3,504,851802,32269,YDR216W,ADR1,DNA-binding transcription factor ADR1|L000000050,MANVEKPNDCSGFPVVDLNSCFSNGFNNEKQEIEMETDDSPILLMS...,NP_010502,54,Phosphorylation,S,Breitkreutz A (2010),20489023,559292,Saccharomyces cerevisiae (S288c),FALSE,-,PhosphoGRID
4,505,851802,32269,YDR216W,ADR1,DNA-binding transcription factor ADR1|L000000050,MANVEKPNDCSGFPVVDLNSCFSNGFNNEKQEIEMETDDSPILLMS...,NP_010502,98,Phosphorylation,S,Kacherovsky N (2008),18791642,559292,Saccharomyces cerevisiae (S288c),TRUE,Inhibits binding of ADR1 to DNA in response to...,PhosphoGRID
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7608,1302889,856924,36930,YER177W,BMH1,APR6|14-3-3 family protein BMH1|L000000185,MSTSREDSVYLAKLAEQAERYEEMVENMKTVASSGQELSVEERNLL...,NP_011104,74,Ubiquitination,K,Beltrao P (2012),22817900,559292,Saccharomyces cerevisiae (S288c),FALSE,-,BIOGRID: UbiGRID Project
7609,1302890,856924,36930,YER177W,BMH1,APR6|14-3-3 family protein BMH1|L000000185,MSTSREDSVYLAKLAEQAERYEEMVENMKTVASSGQELSVEERNLL...,NP_011104,76,Ubiquitination,K,Beltrao P (2012),22817900,559292,Saccharomyces cerevisiae (S288c),FALSE,-,BIOGRID: UbiGRID Project
7610,1302891,856924,36930,YER177W,BMH1,APR6|14-3-3 family protein BMH1|L000000185,MSTSREDSVYLAKLAEQAERYEEMVENMKTVASSGQELSVEERNLL...,NP_011104,76,Ubiquitination,K,Starita LM (2011),22106047,559292,Saccharomyces cerevisiae (S288c),FALSE,-,BIOGRID: UbiGRID Project
7611,1302892,856924,36930,YER177W,BMH1,APR6|14-3-3 family protein BMH1|L000000185,MSTSREDSVYLAKLAEQAERYEEMVENMKTVASSGQELSVEERNLL...,NP_011104,76,Ubiquitination,K,Swaney DL (2013),23749301,559292,Saccharomyces cerevisiae (S288c),FALSE,-,BIOGRID: UbiGRID Project


### Get the protein nodes referenced in the PTM data
- name = official symbol
- represents = biogrid id
- alias = entrez gene id, systematic name, synonyms, refseq id

In [21]:
df_ptm_protein_nodes = df_ptm[["Official Symbol", "BioGRID ID", "Entrez Gene ID", "Refseq ID", "Synonymns"]].drop_duplicates()
df_ptm_protein_nodes = df_ptm_protein_nodes.rename(columns={"BioGRID ID": "represents", "Official Symbol": "name"})
df_ptm_protein_nodes

Unnamed: 0,name,represents,Entrez Gene ID,Refseq ID,Synonymns
0,ADK1,32278,851812,NP_010512,AKY1|AKY2|adenylate kinase ADK1|L000000046
2,ADR1,32269,851802,NP_010502,DNA-binding transcription factor ADR1|L000000050
70,AKL1,32763,852351,NP_009615,serine/threonine protein kinase AKL1|S000007479
162,ALK1,33225,852863,NP_011494,protein kinase ALK1|L000002852
196,ALK2,32690,852274,NP_009544,protein kinase ALK2
...,...,...,...,...,...
7184,URA6,34107,853844,NP_012901,SOC8|bifunctional uridylate/adenylate kinase|L...
7215,CKA2,34460,854227,NP_014704,YOR29-12|casein kinase 2 catalytic subunit CKA...
7485,PHO85,36147,856076,NP_015294,LDB15|cyclin-dependent serine/threonine-protei...
7492,SKS1,36152,856081,NP_015299,SHA3|putative serine/threonine protein kinase ...


### Get the post-translational modification nodes

In [17]:
ptm_nodes = df_ptm[["#PTM ID", "Official Symbol", "Residue", "Position"]]
# add a name column concatenating symbol, residue, position
ptm_nodes

Unnamed: 0,#PTM ID,Official Symbol,Residue,Position
0,491,ADK1,S,140
1,492,ADK1,S,140
2,503,ADR1,S,54
3,504,ADR1,S,54
4,505,ADR1,S,98
...,...,...,...,...
7608,1302889,BMH1,K,74
7609,1302890,BMH1,K,76
7610,1302891,BMH1,K,76
7611,1302892,BMH1,K,76


### Make edges connecting the proteins to their modifications

In [18]:
ptm_edges = df_ptm[["BioGRID ID",
                       '#PTM ID',
                       "Position", 
                       "Post Translational Modification", 
                       "Residue", 
                       "Author",
                       "Pubmed ID",
                       "Organism ID",
                       "Organism Name",
                       "Has Relationships",
                       "Notes",
                       "Source Database"]]
ptm_edges = ptm_edges.rename(columns={"BioGRID ID": "source", "#PTM ID": "target"})
ptm_edges

Unnamed: 0,source,target,Position,Post Translational Modification,Residue,Author,Pubmed ID,Organism ID,Organism Name,Has Relationships,Notes,Source Database
0,32278,491,140,Phosphorylation,S,Holt LJ (2009),19779198,559292,Saccharomyces cerevisiae (S288c),FALSE,-,PhosphoGRID
1,32278,492,140,Phosphorylation,S,Smolka MB (2007),17563356,559292,Saccharomyces cerevisiae (S288c),FALSE,-,PhosphoGRID
2,32269,503,54,Phosphorylation,S,Holt LJ (2009),19779198,559292,Saccharomyces cerevisiae (S288c),FALSE,-,PhosphoGRID
3,32269,504,54,Phosphorylation,S,Breitkreutz A (2010),20489023,559292,Saccharomyces cerevisiae (S288c),FALSE,-,PhosphoGRID
4,32269,505,98,Phosphorylation,S,Kacherovsky N (2008),18791642,559292,Saccharomyces cerevisiae (S288c),TRUE,Inhibits binding of ADR1 to DNA in response to...,PhosphoGRID
...,...,...,...,...,...,...,...,...,...,...,...,...
7608,36930,1302889,74,Ubiquitination,K,Beltrao P (2012),22817900,559292,Saccharomyces cerevisiae (S288c),FALSE,-,BIOGRID: UbiGRID Project
7609,36930,1302890,76,Ubiquitination,K,Beltrao P (2012),22817900,559292,Saccharomyces cerevisiae (S288c),FALSE,-,BIOGRID: UbiGRID Project
7610,36930,1302891,76,Ubiquitination,K,Starita LM (2011),22106047,559292,Saccharomyces cerevisiae (S288c),FALSE,-,BIOGRID: UbiGRID Project
7611,36930,1302892,76,Ubiquitination,K,Swaney DL (2013),23749301,559292,Saccharomyces cerevisiae (S288c),FALSE,-,BIOGRID: UbiGRID Project


### Add the Protein-PTM Relationships:
- load the RELATIONSHIPS file as ptm_rel
- Add ptm_edges to kinome_edges
- ??? group df_kinome_nodes by source and target to make the final df_kinome_edges

In [20]:
ptm_rel_filename = "BIOGRID-PROJECT-kinome_project_sc-PTM-RELATIONSHIPS-3.5.177.ptmrel.txt"
ptm_rel = pd.read_csv(ptm_rel_filename,
                                dtype=str,
                                na_filter=False,
                                delimiter='\t',
                                engine='python')

ptm_rel

Unnamed: 0,#PTM ID,Entrez Gene ID,BioGRID ID,Systematic Name,Official Symbol,Synonymns,Relationship,Identity,Author,Pubmed ID,Organism ID,Organism Name,Source Database
0,6,852433,32836,YBR136W,MEC1,ESR1|RAD31|SAD3|protein kinase MEC1|L000000586...,kinase,catalytic,Chen SH (2010),20190278,559292,Saccharomyces cerevisiae (S288c),PhosphoGRID
1,6,852190,32616,YBL088C,TEL1,DNA-binding protein kinase TEL1|L000002281,kinase,catalytic,Chen SH (2010),20190278,559292,Saccharomyces cerevisiae (S288c),PhosphoGRID
2,12,852433,32836,YBR136W,MEC1,ESR1|RAD31|SAD3|protein kinase MEC1|L000000586...,kinase,catalytic,Smolka MB (2007),17563356,559292,Saccharomyces cerevisiae (S288c),PhosphoGRID
3,12,852190,32616,YBL088C,TEL1,DNA-binding protein kinase TEL1|L000002281,kinase,catalytic,Smolka MB (2007),17563356,559292,Saccharomyces cerevisiae (S288c),PhosphoGRID
4,13,852433,32836,YBR136W,MEC1,ESR1|RAD31|SAD3|protein kinase MEC1|L000000586...,kinase,catalytic,Smolka MB (2007),17563356,559292,Saccharomyces cerevisiae (S288c),PhosphoGRID
...,...,...,...,...,...,...,...,...,...,...,...,...,...
14002,42895,854946,35110,YML057W,CMP2,CNA2|calcineurin catalytic subunit A|L000000368,phosphatase,catalytic,Bodenmiller B (2010),21177495,559292,Saccharomyces cerevisiae (S288c),PhosphoGRID
14003,42896,854946,35110,YML057W,CMP2,CNA2|calcineurin catalytic subunit A|L000000368,phosphatase,catalytic,Bodenmiller B (2010),21177495,559292,Saccharomyces cerevisiae (S288c),PhosphoGRID
14004,42897,854946,35110,YML057W,CMP2,CNA2|calcineurin catalytic subunit A|L000000368,phosphatase,catalytic,Bodenmiller B (2010),21177495,559292,Saccharomyces cerevisiae (S288c),PhosphoGRID
14005,42898,854946,35110,YML057W,CMP2,CNA2|calcineurin catalytic subunit A|L000000368,phosphatase,catalytic,Bodenmiller B (2010),21177495,559292,Saccharomyces cerevisiae (S288c),PhosphoGRID


### Export as TSV

In [None]:
def nodes_and_edges_to_tsv (node_df, edge_df):
    # for each edge in the edge data frame, add the source node properties 
    # and the target node properties with column names prefixed by "source" and "target"
    
    

### END
scratch cells below


In [None]:
# old create node table
source_node_columns = ['Entrez Gene Interactor A','BioGRID ID Interactor A','Systematic Name Interactor A',
       'Official Symbol Interactor A',
       'Synonyms Interactor A',
       'Organism Interactor A']

target_node_columns = ['Entrez Gene Interactor B','BioGRID ID Interactor B','Systematic Name Interactor B',
       'Official Symbol Interactor B',
       'Synonyms Interactor B',
       'Organism Interactor B']

df_interactions_source_nodes = df_interactions[source_node_columns].drop_duplicates()
df_interactions_source_nodes = df_interactions_source_nodes.rename(columns={'Entrez Gene Interactor A': "Entrez Gene Interactor",
                                             'BioGRID ID Interactor A': "represents",
                                             'Systematic Name Interactor A': 'Systematic Name Interactor',
                                             'Official Symbol Interactor A': "name",
                                             'Synonyms Interactor A': "alias",
                                             'Organism Interactor A': "Organism Taxon Id"})

df_interactions_target_nodes = df_interactions[target_node_columns].drop_duplicates()
df_interactions_target_nodes = df_interactions_target_nodes.rename(columns={'Entrez Gene Interactor B': "Entrez Gene Interactor",
                                             'BioGRID ID Interactor B': "represents",
                                             'Systematic Name Interactor B': 'Systematic Name Interactor',
                                             'Official Symbol Interactor B': "name",
                                             'Synonyms Interactor B': "alias",
                                             'Organism Interactor B': "Organism Taxon Id"})

df_interactions_nodes = pd.concat([df_interactions_target_nodes, df_interactions_source_nodes]).drop_duplicates()
df_interactions_nodes

In [None]:
#Old Edge Table from Interactions
edge_columns = ['BioGRID ID Interactor A', 'BioGRID ID Interactor B','#BioGRID Interaction ID','Experimental System',
       'Experimental System Type', 'Author', 'Pubmed ID',
       'Organism Interactor A', 'Organism Interactor B', 'Throughput', 'Score',
       'Modification', 'Phenotypes', 'Qualifications', 'Tags',
       'Source Database']
df_interactions_edges = df_interactions[edge_columns].rename(columns={'BioGRID ID Interactor B': "target", 
                                                                      'BioGRID ID Interactor A': "source"})
# collapse edges where source and target are the same?
df_interactions_edges

In [None]:
def node_and_edge_dataframes_to_nicecx_builder (node_df, edge_df,
                           source_col, 
                           target_col, 
                           interaction_col):
    nice_cx_builder = NiceCXBuilder()
    # This assumes that node names are unique and that the column is "name"
    node_name_to_node_id_lookup = create_nice_cx_nodes_from_node_df(node_df, nice_cx_builder)
    add_edges_from_edge_df(edge_df, 
                           nice_cx_builder, 
                           node_name_to_node_id_lookup,
                           source_col, 
                           target_col, 
                           interaction_col)  
    return nice_cx_builder
    
def create_nice_cx_nodes_from_node_df(node_df, nice_cx):
    lookup = {}
    attribute_columns = node_df.columns.remove("name")
    # assumes name column is column 1
    for index, row in node_df.iterrows:
        # create the node
        node = nice_cx.create_node(row["name"])
        lookup[node.name] = node.id
        for column_name in attribute_columns:
            nice_cx.set_attribute(node, column_name, row[column_name])    
    return lookup

def add_edges_from_edge_df(edge_df, nice_cx_builder, node_name_to_node_id_lookup, source_col, target_col, interaction_col):
    




### Save to NDEx:
- create a NiceCX network from df_kinome_nodes and df_kinome_edges
- add metadata
- upload to NDEx

In [None]:
nice_cx_builder = node_and_edge_dataframes_to_nicecx_builder (df_kinome_nodes, df_kinome_edges)
nice_cx_builder.set_network_name("...")
nice_cx_builder.add_network_attribute("description", network_description)  

In [46]:

with open(


source_plan = load_plan.get("source_plan")
target_plan = load_plan.get("target_plan")
edge_plan = load_plan.get("edge_plan")
#display(source_plan)

#print(source_plan)
        
# Make empty node table
#node_df_column_names = set()
node_df_source_column_names = []
node_df_target_column_names = []

#print(source_plan.get("property_columns"))
for property_column in source_plan.get("property_columns"):
    if property_column.get("column_name") is not None:
        node_df_source_column_names.append(property_column.get("column_name"))
#    node_df_attribute_names.append(property_column.get("attribute_name"))
    
for property_column in target_plan.get("property_columns"):
    if property_column.get("column_name") is not None:
     node_df_target_column_names.append(property_column.get("attribute_name"))
#node_df_column_names = ["name"] + list(node_df_column_names)
#df_interactions_nodes = pd.DataFrame(columns=node_df_column_names)

# Make empty edge table
edge_df_column_names = []
for property_column in edge_plan.get("property_columns"):
    if property_column.get("column_name") is not None:
        edge_df_column_names.append(property_column.get("attribute_name"))
#df_interactions_edges = pd.DataFrame(columns=edge_df_column_names)
#df_interactions_edges
node_df_source_column_names

['Organism Interactor A',
 'Synonyms Interactor A',
 'Interaction Count A',
 'PTM Count A',
 'Chemical Interaction Count A',
 'Category Values A',
 'SubCategory Values A']

In [None]:
def create_node(row, plan, df_nodes):
    for property_column in source_plan.get("property_columns"):
        if property_column.get("rep_column") not in df_nodes:
            df_nodes[property_column.get[]]
    
def create_edge(row, plan, source_index, target_index, df_nodes, df_edges):

    
for row_id, row in df_interactions:
    source_node = create_node(row, source_plan, df_interactions_nodes)
    target_node = create_node(row, target_plan, df_interactions_nodes)

    if source_node is not None and target_node is not None:
        create_edge(row, edge_plan, df_interactions_edges, source_index, target_index)