In [1]:
import pandas as pd
import networkx as nx

import re

In [2]:
flatten = lambda l: [item for sublist in l for item in sublist]

In [3]:
reactome_df = pd.read_csv('../data/reactome/FIsInGene_022717_with_annotations.txt', sep='\t')
# Get only high confidence interactions
reactome_df = reactome_df[(~reactome_df['Annotation'].str.contains('predict')) & (reactome_df['Score'] == 1)]
reactome_df.head()

Unnamed: 0,Gene1,Gene2,Annotation,Direction,Score
2,<ALPHA><BETA>CREB-1,AKT1,activated by,<-,1.0
3,<ALPHA><BETA>CREB-1,ARNT,complex; input,-,1.0
4,<ALPHA><BETA>CREB-1,ASPH,inhibited by,|-,1.0
5,<ALPHA><BETA>CREB-1,CAMK2A,activated by,<-,1.0
6,<ALPHA><BETA>CREB-1,CAMK2G,activated by,<-,1.0


In [10]:
def _populate_edge_attributes(source, target, edge_type, interactions): 

    # Attribute `effect` takes values 0 (---), 1 (-->), 2 (<->), or -1 (--|) to indicate cases where
    # orientation is unknown, the edge is activating, the edge is bidirectional (protein complex), or the edge is inhibitory.
    # Perhaps add `binding` as an attribute? Interactions? 
    edge_attributes = { 'source': source, 'target': target, 'type': edge_type, 
                        'effect': 0, 'indirect': 0, 'modification': "" }

    # Attributes must be updated in two steps, since descriptors examined in the second loop
    # are more specific than those in the first, and should be used to overwrite them. 
    for interaction in interactions: 

        if   interaction == 'binding/association': edge_attributes.update({ 'effect': 2 })
        elif interaction == 'dissociation': 	   edge_attributes.update({ 'effect': 1 })
        elif interaction == 'missing interaction': edge_attributes.update({ 'effect': 0 })
        elif interaction == 'indirect effect':	   edge_attributes.update({ 'effect': 1, 'indirect': 1 })
        else: pass

    for interaction in interactions: 

        if   interaction == 'phosphorylation':	 edge_attributes.update({ 'effect': 1, 'modification': "+p" })
        elif interaction == 'dephosphorylation': edge_attributes.update({ 'effect': 1, 'modification': "-p" })
        elif interaction == 'glycosylation': 	 edge_attributes.update({ 'effect': 1, 'modification': "+g" })
        elif interaction == 'ubiquitination': 	 edge_attributes.update({ 'effect': 1, 'modification': "+u" })
        elif interaction == 'methylation': 		 edge_attributes.update({ 'effect': 1, 'modification': "+m" })

    for interaction in interactions: 

        if   interaction == 'activation': 	   edge_attributes.update({ 'effect':  1 })
        elif interaction == 'inhibition': 	   edge_attributes.update({ 'effect': -1 })
        elif interaction == 'expression': 	   edge_attributes.update({ 'effect':  1, 'modification': 'e'})
        elif interaction == 'repression': 	   edge_attributes.update({ 'effect': -1, 'modification': 'e'})
        else: pass
        
    for interaction in interactions: 
        
        if interaction == 'protein complex':	   edge_attributes.update({ 'effect': 2 }) # not standard type, but including for clarity
        elif interaction == 'bidirected':	   	   edge_attributes.update({ 'effect': 2 }) # not standard type, but including for clarity
        elif interaction == 'binhibited':	   	   edge_attributes.update({ 'effect':-2 }) # not standard type, but including for clarity

    return edge_attributes

In [12]:
interaction_attribs = []

for _, row in reactome_df.iterrows():
    
    gene1, gene2, annotation, direction, _ = row.tolist()
    
    interactions = []
    
    if 'binding/association' in annotation: interactions.append('binding/association')
    if 'compound' in annotation: interactions.append('protein complex')
    if 'complex' in annotation: interactions.append('protein complex')
    if 'dissociat' in annotation: interactions.append('dissociation')
    if 'indirect' in annotation: interactions.append('indirect effect')        
    
    if 'dephosphoryl' in annotation: interactions.append('dephosphorylation')
    elif 'phosphoryl' in annotation: interactions.append('phosphorylation')
    if 'glycosylat' in annotation: interactions.append('glycosylation')
    if 'ubiquitinat' in annotation: interactions.append('ubiquitination')
    if 'methylat' in annotation: interactions.append('methylation')
        
    if 'activ' in annotation: interactions.append('activation')
    if 'inhibit' in annotation: interactions.append('inhibition')
    if 'express' in annotation: interactions.append('expression')
    if 'repress' in annotation: interactions.append('repression')
        
    if direction == '<->': 
        interactions.append('bidirected')
        interaction_attribs.append(_populate_edge_attributes(gene1, gene2, 'reactome', interactions))
        interaction_attribs.append(_populate_edge_attributes(gene2, gene1, 'reactome', interactions))
    elif '->' in direction: 
        interactions.append('activation')
        interaction_attribs.append(_populate_edge_attributes(gene1, gene2, 'reactome', interactions))    
    elif '<-' in direction: 
        interactions.append('activation')
        interaction_attribs.append(_populate_edge_attributes(gene2, gene1, 'reactome', interactions))
        
    if direction == '|-|':
        interactions.append('binhibited')
        interaction_attribs.append(_populate_edge_attributes(gene1, gene2, 'reactome', interactions))
        interaction_attribs.append(_populate_edge_attributes(gene2, gene1, 'reactome', interactions))
    elif '-|' in direction: 
        interactions.append('inhibition')
        interaction_attribs.append(_populate_edge_attributes(gene1, gene2, 'reactome', interactions)) 
    elif '|-' in direction: 
        interactions.append('inhibition')
        interaction_attribs.append(_populate_edge_attributes(gene2, gene1, 'reactome', interactions))
        
    if direction == '-': 
        interactions.append('bidirected')
        interaction_attribs.append(_populate_edge_attributes(gene1, gene2, 'reactome', interactions))
        interaction_attribs.append(_populate_edge_attributes(gene2, gene1, 'reactome', interactions))

In [13]:
reactome_edges_df = pd.DataFrame(interaction_attribs)
reactome_edges_df = reactome_edges_df[['source', 'target', 'effect', 'indirect', 'modification', 'type']]
reactome_edges_df.head()

Unnamed: 0,source,target,effect,indirect,modification,type
0,AKT1,<ALPHA><BETA>CREB-1,1,0,,reactome
1,<ALPHA><BETA>CREB-1,ARNT,2,0,,reactome
2,ARNT,<ALPHA><BETA>CREB-1,2,0,,reactome
3,ASPH,<ALPHA><BETA>CREB-1,-1,0,,reactome
4,CAMK2A,<ALPHA><BETA>CREB-1,1,0,,reactome


In [14]:
reactome_edges_df.shape

(298833, 6)

In [15]:
reactome_edges_df[reactome_edges_df['effect']==-2]

Unnamed: 0,source,target,effect,indirect,modification,type
11023,AKT1,PPP2CA,-2,0,-p,reactome
11024,PPP2CA,AKT1,-2,0,-p,reactome
11025,AKT1,PPP2CB,-2,0,-p,reactome
11026,PPP2CB,AKT1,-2,0,-p,reactome
11027,AKT1,PPP2R1A,-2,0,-p,reactome
11028,PPP2R1A,AKT1,-2,0,-p,reactome
11031,AKT1,PPP2R2B,-2,0,-p,reactome
11032,PPP2R2B,AKT1,-2,0,-p,reactome
11033,AKT1,PPP2R2C,-2,0,-p,reactome
11034,PPP2R2C,AKT1,-2,0,-p,reactome


In [16]:
reactomeX = nx.from_pandas_edgelist(reactome_edges_df, edge_attr=True, create_using=nx.DiGraph())

In [18]:
nx.write_gpickle(reactomeX, '../output/reactomeX_v1.1.gpickle')

# Extra

In [41]:
tags = set([x.replace('ed', 'ion') for x in flatten([re.split("[;,]", x) for x in reactome_df['Annotation']])])
tags

{' ECrel: compound',
 ' GErel: expression',
 ' GErel: expression by',
 ' GErel: repression by',
 ' PPrel',
 ' PPrel: activation',
 ' PPrel: activation binding/association',
 ' PPrel: activation by',
 ' PPrel: binding/association',
 ' PPrel: compound',
 ' PPrel: dephosphorylation',
 ' PPrel: dissociation',
 ' PPrel: expression',
 ' PPrel: expression by',
 ' PPrel: indirect effect',
 ' PPrel: inhibition',
 ' PPrel: inhibition by',
 ' PPrel: phosphorylation',
 ' PPrel: phosphorylation by',
 ' PPrel: ubiquitination',
 ' activate',
 ' activation',
 ' activation by',
 ' binding/association',
 ' catalyze',
 ' catalyzion by',
 ' complex',
 ' dephosphorylation',
 ' dissociation',
 ' expression',
 ' expression regulates',
 ' expression regulation by',
 ' glycosylation',
 ' indirect effect',
 ' inhibit',
 ' inhibite',
 ' inhibition',
 ' inhibition by',
 ' input',
 ' interaction',
 ' methylation',
 ' phosphorylation',
 ' phosphorylation by',
 ' reaction',
 ' ubiquitination',
 'ECrel: activation',


In [40]:
for tag in tags:
    if 'activ' in tag: 
        pass
    elif 'phosphoryl' in tag: 
        pass
    elif 'inhibit' in tag: pass
    elif 'express' in tag: pass
    elif 'repress' in tag: pass
    elif 'indirect' in tag: pass
    elif 'binding/association' in tag: pass
    elif 'ubiquitinat' in tag: pass
    elif 'compound' in tag: pass
    elif 'complex' in tag: pass
    elif 'glycosylat' in tag: pass
    elif 'dissociat' in tag: pass
    elif 'methylation' in tag: pass
    elif 'PPrel' in tag: pass
    elif 'predict' in tag: pass
    else: 
        print(tag)

interaction
 catalyzed by
 input
 interaction
reaction
 catalyze
 reaction
catalyze
catalyzed by
input
