In [6]:
import pandas as pd

In [7]:
df = pd.read_csv('string-network-1.csv')
df.head()

Unnamed: 0,node1,node2,node1_string_id,node2_string_id,homology,experimentally_determined_interaction,database_annotated,automated_textmining,combined_score
0,AP1B1,HIP1R,9606.ENSP00000350199,9606.ENSP00000253083,0,0.131,0.6,0,0.637
1,AP1B1,CLVS2,9606.ENSP00000350199,9606.ENSP00000275162,0,0.0,0.6,0,0.6
2,ASPSCR1,UFD1L,9606.ENSP00000306625,9606.ENSP00000263202,0,0.438,0.0,0,0.438
3,ASPSCR1,UBXN7,9606.ENSP00000306625,9606.ENSP00000296328,0,0.472,0.0,0,0.472
4,ASPSCR1,NSFL1C,9606.ENSP00000306625,9606.ENSP00000418529,0,0.747,0.0,0,0.747


In [8]:
# get list of nodes

nodes = df.node1.unique()

In [9]:
nodes

array(['AP1B1', 'ASPSCR1', 'ATP6V1A', 'ATP6V1B2', 'ATP6V1D', 'ATP6V1E1',
       'ATP6V1F', 'ATP6V1G1', 'ATP6V1H', 'B2M', 'CALR', 'CD1B', 'CD1D',
       'CD8A', 'CHMP2A', 'CHMP2B', 'CHMP3', 'CHMP4B', 'CLVS2', 'DERL1',
       'GGA1', 'HGS', 'HIP1R', 'HLA-A', 'HLA-B', 'HLA-C', 'HLA-E',
       'HLA-G', 'KLRC1', 'KLRD1', 'KPNA1', 'KPNA2', 'KPNB1', 'NSFL1C',
       'NUTF2', 'RAB5A', 'RAB7A', 'RAN', 'STAM', 'TSG101', 'UBXN7',
       'UFD1L', 'VCP', 'VPS33B', 'XPO1', 'XPO5'], dtype=object)

In [10]:
# build dict of node inputs

node_inputs = {}
for node in nodes:
    inputs = list(df[df.node1 == node].node2)
    node_inputs[node] = inputs

In [11]:
# construct boolean rules as list
rules = []
for key, value in node_inputs.items():
    rule = key + ' *= '
    rule = rule + ' and '.join(value)
    rules.append(rule)

In [12]:
# construct initial conditions
initial_conditions = []
for node in nodes:
    initial_condition = node + ' = Random'
    initial_conditions.append(initial_condition)

In [13]:
# export model
definition = '#initial conditions\n'+'\n'.join(initial_conditions)+'\n\n#rules\n'+'\n'.join(rules)
fp = open('model.txt', 'w')
fp.write(definition)
fp.close()

In [14]:
inp = file('model.txt').read()

In [15]:
#  print(inp)

In [2]:
tabular_text_output = 'string-network-1.csv'
complex_map = 'string-1-complex-map.csv'
initial_value = True

In [76]:
def string2definition(tabular_text_output, complex_map, initial_value=True):
    """
    model 2
    """
    
    # read edges
    df = pd.read_csv(tabular_text_output)

    # build dict of factor node edges
    factor_nodes = list(df.node1.unique())
    node_edges = {}
    for node in factor_nodes:
        edges = list(df[df.node1 == node].node2)
        node_edges[node] = edges

    # read complexes
    df = pd.read_csv(complex_map)

    # build complex maps
    complex_nodes = list(df.complex.unique())
    complex2component = {}
    component2complex = {}
    for node in complex_nodes:
        components = list(df[df.complex == node].component)
        component2complex.update(dict([(component, node) for component in components]))
        complex2component[node] = components

    # build OR edges
    node_edges_or = {}   
    component_nodes = df.component.unique() # all components

    for complex in complex_nodes: # aggregate edges for each complex
        complex_edges = []
        for node, edges in node_edges.items():
            if node in component_nodes: 
                if node in complex2component[complex]: complex_edges+=edges
                # node is a component AND belongs to this complex
            else:
                node_edges_or[node]=edges # if node is not a complex component store down
        node_edges_or[complex] = list(set(complex_edges)) # store down unique set of edges foe each complex
    
    for node, edges in node_edges_or.items(): 
        # now replace edge nodes that are complex components with complex nodes
        or_edges =[] 
        for edge in edges:
            or_edge = component2complex[edge] if edge in component_nodes else edge          
            or_edges.append(or_edge)
        node_edges_or[node] = list(set(or_edges)) # unique values
        
        # for complexes this will enable us to easily add 'OR edges' as a recursive rule
        # complex *= complex OR edge OR ...
        # and then the complex components can be included in a seperate AND rule
        # complex *= component AND ...
        # this rule will be evaluated first before OR edges, resulting in the effect of using brackets

    # build AND edges
    node_edges_and = complex2component # components AND to complex nodes
    
    # generate rules
    complex_and = []
    for node, edges in node_edges_and.items(): # AND rules
        rule = node + ' *= ' + ' and '.join(edges)
        complex_and.append(rule)
    
    factor_or = []
    complex_or = []
    for node, edges in node_edges_or.items(): # OR rules
        if node in complex_nodes:
            edges.remove(node)
            rule = node + ' *= ' + node + ' or ' + ' or '.join(edges) # order the recursive rule for niceness
            complex_or.append(rule)

        else:
            rule = node + ' *= ' + ' or '.join(edges)
            factor_or.append(rule)
    
    
    # generate node initialisations
    initial_conditions = []
    initial_value = 'True' if initial_value else 'False'
    for node in factor_nodes + complex_nodes:
        initial_condition = node + ' = ' + initial_value
        initial_conditions.append(initial_condition)
        
    # construct definition 
    return(
            '#initial conditions\n'+
            '\n'.join(initial_conditions)+         
            '\n\n'+
            '#rules\n'+
            '\n'.join(complex_and) + # AND rules first, so OR rules can be added recursively
            '\n\n'+
            '\n'.join(complex_or) +
            '\n\n'+
            '\n'.join(factor_or)
    )       

In [107]:
print string2definition(tabular_text_output,complex_map,)

#initial conditions
AP1B1 = True
ASPSCR1 = True
ATP6V1A = True
ATP6V1B2 = True
ATP6V1D = True
ATP6V1E1 = True
ATP6V1F = True
ATP6V1G1 = True
ATP6V1H = True
B2M = True
CALR = True
CD1B = True
CD1D = True
CD8A = True
CHMP2A = True
CHMP2B = True
CHMP3 = True
CHMP4B = True
CLVS2 = True
DERL1 = True
GGA1 = True
HGS = True
HIP1R = True
HLA-A = True
HLA-B = True
HLA-C = True
HLA-E = True
HLA-G = True
KLRC1 = True
KLRD1 = True
KPNA1 = True
KPNA2 = True
KPNB1 = True
NSFL1C = True
NUTF2 = True
RAB5A = True
RAB7A = True
RAN = True
STAM = True
TSG101 = True
UBXN7 = True
UFD1L = True
VCP = True
VPS33B = True
XPO1 = True
XPO5 = True
ESCRT_complexes = True
MHC_I_complexes = True
vATPase_complexes = True
ImportinBetaAlpha_complexes = True

#rules
vATPase_complexes *= ATP6V1F and ATP6V1B2 and ATP6V1A and ATP6V1G1 and ATP6V1D and ATP6V1E1 and ATP6V1H
ESCRT_complexes *= HGS and CHMP2A and CHMP4B and CHMP2B and CHMP3 and STAM and TSG101
MHC_I_complexes *= B2M and HLA-C and HLA-B and HLA-G and HLA-E and CD

In [108]:
from string_model_drawer import draw_edgelists
# draw_edgelists('string-network-1.csv', add_mtb='mtb-edgelist.csv', add_process='string-1-process-edgelist.csv')

In [109]:
definition = string2definition(tabular_text_output,complex_map,)
edgelist = 'string-1-process-edgelist.csv'
complex_map = 'string-1-complex-map.csv'


In [110]:
def add_processes2definition(definition, process_edgelist, complex_map, initial_value=True):
    """
    model 2
    """
    
    # read edges
    df = pd.read_csv(process_edgelist)

    # build dict of process node edges
    process_nodes = list(df.process.unique())
    process_edges = {}
    for node in process_nodes:
        edges = list(df[df.process == node].node)
        process_edges[node] = edges

    # remove nodes not in the network (not modelled)
    for edges in process_edges.values():
        for node in edges:
            if node not in definition: edges.remove(node)

    # read complexes
    df = pd.read_csv(complex_map)

    # build complex maps
    complex_nodes = list(df.complex.unique())
    component2complex = {}
    for node in complex_nodes:
        components = list(df[df.complex == node].component)
        component2complex.update(dict([(component, node) for component in components]))

    # replace component factor nodes with complex node
    components = df.component.unique() # all components
    for process, edges in process_edges.items():
        for i, node in enumerate(edges):
            if node in components: edges[i] = component2complex[node]
        process_edges[process] = list(set(edges))

    # generate boolean rules
    rules = []
    for process, edges in process_edges.items():
        rule = process + ' *= ' + ' and '.join(edges) # assume AND
        rules.append(rule)

    # generate process initilaisations
    initial_conditions = []
    initial_value = 'True' if initial_value else 'False'
    for process in process_nodes:
        initial_condition = process + ' = ' + initial_value
        initial_conditions.append(initial_condition)

    # message
    print 'added: '+ str(process_nodes)
    
    # construct definition 
    return(
        definition +
            '\n\n'+
            '#processes\n'+
            '\n'.join(initial_conditions)+         
            '\n\n'+
            '#process rules\n'+
            '\n'.join(rules)
    )    

In [111]:
definition = add_processes2definition(definition,edgelist,complex_map)
print definition

added: ['Phagosome_acidification', 'Phagosome_maturation', 'Phagolysosome_assembly']
#initial conditions
AP1B1 = True
ASPSCR1 = True
ATP6V1A = True
ATP6V1B2 = True
ATP6V1D = True
ATP6V1E1 = True
ATP6V1F = True
ATP6V1G1 = True
ATP6V1H = True
B2M = True
CALR = True
CD1B = True
CD1D = True
CD8A = True
CHMP2A = True
CHMP2B = True
CHMP3 = True
CHMP4B = True
CLVS2 = True
DERL1 = True
GGA1 = True
HGS = True
HIP1R = True
HLA-A = True
HLA-B = True
HLA-C = True
HLA-E = True
HLA-G = True
KLRC1 = True
KLRD1 = True
KPNA1 = True
KPNA2 = True
KPNB1 = True
NSFL1C = True
NUTF2 = True
RAB5A = True
RAB7A = True
RAN = True
STAM = True
TSG101 = True
UBXN7 = True
UFD1L = True
VCP = True
VPS33B = True
XPO1 = True
XPO5 = True
ESCRT_complexes = True
MHC_I_complexes = True
vATPase_complexes = True
ImportinBetaAlpha_complexes = True

#rules
vATPase_complexes *= ATP6V1F and ATP6V1B2 and ATP6V1A and ATP6V1G1 and ATP6V1D and ATP6V1E1 and ATP6V1H
ESCRT_complexes *= HGS and CHMP2A and CHMP4B and CHMP2B and CHMP3 and 

In [112]:
with open('string_model_3_definition.txt', 'w') as fp: fp.write(definition)

In [113]:
with open('string_model_3_definition.txt', 'r') as fp: print fp.read()

#initial conditions
AP1B1 = True
ASPSCR1 = True
ATP6V1A = True
ATP6V1B2 = True
ATP6V1D = True
ATP6V1E1 = True
ATP6V1F = True
ATP6V1G1 = True
ATP6V1H = True
B2M = True
CALR = True
CD1B = True
CD1D = True
CD8A = True
CHMP2A = True
CHMP2B = True
CHMP3 = True
CHMP4B = True
CLVS2 = True
DERL1 = True
GGA1 = True
HGS = True
HIP1R = True
HLA-A = True
HLA-B = True
HLA-C = True
HLA-E = True
HLA-G = True
KLRC1 = True
KLRD1 = True
KPNA1 = True
KPNA2 = True
KPNB1 = True
NSFL1C = True
NUTF2 = True
RAB5A = True
RAB7A = True
RAN = True
STAM = True
TSG101 = True
UBXN7 = True
UFD1L = True
VCP = True
VPS33B = True
XPO1 = True
XPO5 = True
ESCRT_complexes = True
MHC_I_complexes = True
vATPase_complexes = True
ImportinBetaAlpha_complexes = True

#rules
vATPase_complexes *= ATP6V1F and ATP6V1B2 and ATP6V1A and ATP6V1G1 and ATP6V1D and ATP6V1E1 and ATP6V1H
ESCRT_complexes *= HGS and CHMP2A and CHMP4B and CHMP2B and CHMP3 and STAM and TSG101
MHC_I_complexes *= B2M and HLA-C and HLA-B and HLA-G and HLA-E and CD

In [None]:
mtb_edgelist = 'mtb-edgelist.csv'
complex_map = 'string-1-complex-map.csv'

In [99]:
def add_mtb2definition(definition, mtb_edgelist, initial_value = False):
    """
    model 2
    """

    # read edges
    df = pd.read_csv(mtb_edgelist)

    # build dict of mtb node edges
    target_nodes = list(df.node.unique())
    target_edges = {}
    for node in target_nodes:
        edges = list(df[df.node == node].mtb)
        target_edges[node] = edges

    # remove targets not in the network (not modelled)
    for target in target_nodes:
        if target not in definition: del target_edges[target]

    # enumerate mtb factors modelled
    mtb_nodes = list(set([node for nodes in target_edges.values() for node in nodes]))

    # generate boolean rules
    rules = []
    for target, mtb in target_edges.items():
        rule = target + ' *= ' + target # add the inhibition rule recursively
        rule = rule + ' and not (' +' or '.join(mtb) + ')'
        rules.append(rule)

    # generate mtb initilaisations
    initial_conditions = []
    initial_value = 'True' if initial_value else 'False'
    for node in mtb_nodes:
        initial_condition = node + ' = ' + initial_value
        initial_conditions.append(initial_condition)

    # message
    print 'added: '+str(mtb_nodes)

    # construct definition 
    return (
        definition + 
            '\n\n'+
            '#mtb\n'+
            '\n'.join(initial_conditions)+
            '\n\n'+
            '#mtb rules\n'+
            '\n'.join(rules)
    )


In [100]:
print add_mtb2definition(definition, 'mtb-edgelist.csv')

added: ['SapM', 'esxH_esxG', 'ndkA', 'esxA', 'PtpA', 'PPE2']
#initial conditions
AP1B1 = True
ASPSCR1 = True
ATP6V1A = True
ATP6V1B2 = True
ATP6V1D = True
ATP6V1E1 = True
ATP6V1F = True
ATP6V1G1 = True
ATP6V1H = True
B2M = True
CALR = True
CD1B = True
CD1D = True
CD8A = True
CHMP2A = True
CHMP2B = True
CHMP3 = True
CHMP4B = True
CLVS2 = True
DERL1 = True
GGA1 = True
HGS = True
HIP1R = True
HLA-A = True
HLA-B = True
HLA-C = True
HLA-E = True
HLA-G = True
KLRC1 = True
KLRD1 = True
KPNA1 = True
KPNA2 = True
KPNB1 = True
NSFL1C = True
NUTF2 = True
RAB5A = True
RAB7A = True
RAN = True
STAM = True
TSG101 = True
UBXN7 = True
UFD1L = True
VCP = True
VPS33B = True
XPO1 = True
XPO5 = True
ESCRT_complexes = True
MHC_I_complexes = True
vATPase_complexes = True
ImportinBetaAlpha_complexes = True

#rules
vATPase_complexes *= ATP6V1F and ATP6V1B2 and ATP6V1A and ATP6V1G1 and ATP6V1D and ATP6V1E1 and ATP6V1H
ESCRT_complexes *= HGS and CHMP2A and CHMP4B and CHMP2B and CHMP3 and STAM and TSG101
MHC_I_co