In [2]:
import pandas as pd

In [None]:
df = pd.read_csv('string-network-1.csv')
df.head()

In [None]:
# get list of nodes

nodes = df.node1.unique()

In [None]:
nodes

In [None]:
# build dict of node inputs

node_inputs = {}
for node in nodes:
    inputs = list(df[df.node1 == node].node2)
    node_inputs[node] = inputs

In [None]:
# construct boolean rules as list
rules = []
for key, value in node_inputs.items():
    rule = key + ' *= '
    rule = rule + ' and '.join(value)
    rules.append(rule)

In [None]:
# construct initial conditions
initial_conditions = []
for node in nodes:
    initial_condition = node + ' = Random'
    initial_conditions.append(initial_condition)

In [None]:
# export model
definition = '#initial conditions\n'+'\n'.join(initial_conditions)+'\n\n#rules\n'+'\n'.join(rules)
fp = open('model.txt', 'w')
fp.write(definition)
fp.close()

In [None]:
inp = file('model.txt').read()

In [None]:
#  print(inp)

In [4]:
tabular_text_output = 'string-network-1.csv'
complex_map = 'string-1-complex-map.csv'
initial_value = True

In [42]:
def string2definition(tabular_text_output, complex_map, initial_value=True):
    """
    model 2
    """
    
    # read edges
    df = pd.read_csv(tabular_text_output)

    # build dict of factor node edges
    factor_nodes = list(df.node1.unique())
    node_edges = {}
    for node in factor_nodes:
        edges = list(df[df.node1 == node].node2)
        node_edges[node] = edges

    # read complexes
    df = pd.read_csv(complex_map)

    # build complex maps
    complex_nodes = list(df.complex.unique())
    complex2component = {}
    component2complex = {}
    for node in complex_nodes:
        components = list(df[df.complex == node].component)
        component2complex.update(dict([(component, node) for component in components]))
        complex2component[node] = components

    # replace component factor nodes with complex node
    components = df.component.unique() # all components
    node_edges_or = dict([(node, edges) # remove nodes that are complex components
                          for node, edges in node_edges.items() 
                          if node not in components]
                        ) 
    for node1, edges in node_edges_or.items():
        or_edges =[] 
        for node2 in edges:
            or_edges.append(node2 if node2 not in components else component2complex[node2])
        node_edges_or[node1] = list(set(or_edges)) # unique values

    # generate OR rules
    rules = []
    for node, edges in node_edges_or.items():
        rule = node + ' *= ' + ' or '.join(edges)
        rules.append(rule)

    # generate AND rules
    node_edges_and = complex2component # complex components and to complex node
    for node, edges in node_edges_and.items():
        rule = node + ' *= ' + ' and '.join(edges)
        rules.append(rule)

    # generate node initialisations
    initial_conditions = []
    initial_value = 'True' if initial_value else 'False'
    for node in factor_nodes + complex_nodes:
        initial_condition = node + ' = ' + initial_value
        initial_conditions.append(initial_condition)

    # construct definition 
    return(
            '#initial conditions\n'+
            '\n'.join(initial_conditions)+         
            '\n\n'+
            '#rules\n'+
            '\n'.join(rules)
    )
        

In [43]:
print string2definition(tabular_text_output,complex_map,)

#initial conditions
AP1B1 = True
ASPSCR1 = True
ATP6V1A = True
ATP6V1B2 = True
ATP6V1D = True
ATP6V1E1 = True
ATP6V1F = True
ATP6V1G1 = True
ATP6V1H = True
B2M = True
CALR = True
CD1B = True
CD1D = True
CD8A = True
CHMP2A = True
CHMP2B = True
CHMP3 = True
CHMP4B = True
CLVS2 = True
DERL1 = True
GGA1 = True
HGS = True
HIP1R = True
HLA-A = True
HLA-B = True
HLA-C = True
HLA-E = True
HLA-G = True
KLRC1 = True
KLRD1 = True
KPNA1 = True
KPNA2 = True
KPNB1 = True
NSFL1C = True
NUTF2 = True
RAB5A = True
RAB7A = True
RAN = True
STAM = True
TSG101 = True
UBXN7 = True
UFD1L = True
VCP = True
VPS33B = True
XPO1 = True
XPO5 = True
ESCRT_complexes = True
MHC_I_complexes = True
vATPase_complexes = True

#rules
NUTF2 *= RAN or KPNB1 or KPNA2
CD1D *= MHC_I_complexes or CD1B
CD1B *= CD1D or MHC_I_complexes
KLRD1 *= KLRC1 or MHC_I_complexes
HIP1R *= AP1B1 or ESCRT_complexes
XPO1 *= RAN or ESCRT_complexes
XPO5 *= RAN
VPS33B *= UFD1L
RAN *= NUTF2 or XPO1 or XPO5 or KPNB1 or KPNA2 or KPNA1
DERL1 *= VCP or 

In [27]:
from string_model_drawer import draw_edgelists
# draw_edgelists('string-network-1.csv', add_mtb='mtb-edgelist.csv', add_process='string-1-process-edgelist.csv')

In [44]:
definition = string2definition(tabular_text_output,complex_map,)
edgelist = 'string-1-process-edgelist.csv'
complex_map = 'string-1-complex-map.csv'


In [56]:
def add_processes2definition(definition, process_edgelist, complex_map, initial_value=True):
    """
    model 2
    """
    
    # read edges
    df = pd.read_csv(process_edgelist)

    # build dict of process node edges
    process_nodes = list(df.process.unique())
    process_edges = {}
    for node in process_nodes:
        edges = list(df[df.process == node].node)
        process_edges[node] = edges

    # remove nodes not in the network (not modelled)
    for edges in process_edges.values():
        for node in edges:
            if node not in definition: edges.remove(node)

    # read complexes
    df = pd.read_csv(complex_map)

    # build complex maps
    complex_nodes = list(df.complex.unique())
    component2complex = {}
    for node in complex_nodes:
        components = list(df[df.complex == node].component)
        component2complex.update(dict([(component, node) for component in components]))

    # replace component factor nodes with complex node
    components = df.component.unique() # all components
    for process, edges in process_edges.items():
        for i, node in enumerate(edges):
            if node in components: edges[i] = component2complex[node]
        process_edges[process] = list(set(edges))

    # generate boolean rules
    rules = []
    for process, edges in process_edges.items():
        rule = process + ' *= ' + ' and '.join(edges) # assume AND
        rules.append(rule)

    # generate process initilaisations
    initial_conditions = []
    initial_value = 'True' if initial_value else 'False'
    for process in process_nodes:
        initial_condition = process + ' = ' + initial_value
        initial_conditions.append(initial_condition)

    # message
    print 'added: '+process_nodes
    
    # construct definition 
    return(
        definition +
            '\n\n'+
            '#processes\n'+
            '\n'.join(initial_conditions)+         
            '\n\n'+
            '#process rules\n'+
            '\n'.join(rules)
    )    

In [58]:
print add_process_edgelist(definition,edgelist,complex_map,)

#initial conditions
AP1B1 = True
ASPSCR1 = True
ATP6V1A = True
ATP6V1B2 = True
ATP6V1D = True
ATP6V1E1 = True
ATP6V1F = True
ATP6V1G1 = True
ATP6V1H = True
B2M = True
CALR = True
CD1B = True
CD1D = True
CD8A = True
CHMP2A = True
CHMP2B = True
CHMP3 = True
CHMP4B = True
CLVS2 = True
DERL1 = True
GGA1 = True
HGS = True
HIP1R = True
HLA-A = True
HLA-B = True
HLA-C = True
HLA-E = True
HLA-G = True
KLRC1 = True
KLRD1 = True
KPNA1 = True
KPNA2 = True
KPNB1 = True
NSFL1C = True
NUTF2 = True
RAB5A = True
RAB7A = True
RAN = True
STAM = True
TSG101 = True
UBXN7 = True
UFD1L = True
VCP = True
VPS33B = True
XPO1 = True
XPO5 = True
ESCRT_complexes = True
MHC_I_complexes = True
vATPase_complexes = True

#rules
NUTF2 *= RAN or KPNB1 or KPNA2
CD1D *= MHC_I_complexes or CD1B
CD1B *= CD1D or MHC_I_complexes
KLRD1 *= KLRC1 or MHC_I_complexes
HIP1R *= AP1B1 or ESCRT_complexes
XPO1 *= RAN or ESCRT_complexes
XPO5 *= RAN
VPS33B *= UFD1L
RAN *= NUTF2 or XPO1 or XPO5 or KPNB1 or KPNA2 or KPNA1
DERL1 *= VCP or 

In [60]:
definition = add_process_edgelist(definition,edgelist,complex_map)
mtb_edgelist = 'mtb-edgelist.csv'
complex_map = 'string-1-complex-map.csv'


In [72]:
def add_mtb2definition(definition, mtb_edgelist, initial_value = False):
    """
    model 2
    """

    # read edges
    df = pd.read_csv(mtb_edgelist)

    # build dict of mtb node edges
    target_nodes = list(df.node.unique())
    target_edges = {}
    for node in target_nodes:
        edges = list(df[df.node == node].mtb)
        target_edges[node] = edges

    # remove targets not in the network (not modelled)
    for target in target_nodes:
        if target not in definition: del target_edges[target]

    # enumerate mtb factors modelled
    mtb_nodes = list(set([node for nodes in target_edges.values() for node in nodes]))

    # generate boolean rules
    rules = []
    for target, mtb in target_edges.items():
        rule = target + ' *= ' + target # add the inhibition rule recursively
        rule = rule + ' and not (' +' or '.join(mtb) + ')'
        rules.append(rule)

    # generate mtb initilaisations
    initial_conditions = []
    initial_value = 'True' if initial_value else 'False'
    for node in mtb_nodes:
        initial_condition = node + ' = ' + initial_value
        initial_conditions.append(initial_condition)

    # message
    print 'added: '+str(mtb_nodes)

    # construct definition 
    return (
        definition + 
            '\n\n'+
            '#mtb\n'+
            '\n'.join(initial_conditions)+
            '\n\n'+
            '#mtb rules\n'+
            '\n'.join(rules)
    )


In [74]:
print add_mtb2definition(definition, 'mtb-edgelist.csv')

added: ['SapM', 'esxH_esxG', 'ndkA', 'esxA', 'PtpA', 'PPE2']
#initial conditions
AP1B1 = True
ASPSCR1 = True
ATP6V1A = True
ATP6V1B2 = True
ATP6V1D = True
ATP6V1E1 = True
ATP6V1F = True
ATP6V1G1 = True
ATP6V1H = True
B2M = True
CALR = True
CD1B = True
CD1D = True
CD8A = True
CHMP2A = True
CHMP2B = True
CHMP3 = True
CHMP4B = True
CLVS2 = True
DERL1 = True
GGA1 = True
HGS = True
HIP1R = True
HLA-A = True
HLA-B = True
HLA-C = True
HLA-E = True
HLA-G = True
KLRC1 = True
KLRD1 = True
KPNA1 = True
KPNA2 = True
KPNB1 = True
NSFL1C = True
NUTF2 = True
RAB5A = True
RAB7A = True
RAN = True
STAM = True
TSG101 = True
UBXN7 = True
UFD1L = True
VCP = True
VPS33B = True
XPO1 = True
XPO5 = True
ESCRT_complexes = True
MHC_I_complexes = True
vATPase_complexes = True

#rules
NUTF2 *= RAN or KPNB1 or KPNA2
CD1D *= MHC_I_complexes or CD1B
CD1B *= CD1D or MHC_I_complexes
KLRD1 *= KLRC1 or MHC_I_complexes
HIP1R *= AP1B1 or ESCRT_complexes
XPO1 *= RAN or ESCRT_complexes
XPO5 *= RAN
VPS33B *= UFD1L
RAN *= NUTF