In [6]:
import pandas as pd

In [254]:
df = pd.read_csv('string-network-1.csv')
df.head()

Unnamed: 0,node1,node2,node1_string_id,node2_string_id,homology,experimentally_determined_interaction,database_annotated,automated_textmining,combined_score
0,AP1B1,HIP1R,9606.ENSP00000350199,9606.ENSP00000253083,0,0.131,0.6,0,0.637
1,AP1B1,CLVS2,9606.ENSP00000350199,9606.ENSP00000275162,0,0.0,0.6,0,0.6
2,ASPSCR1,UFD1L,9606.ENSP00000306625,9606.ENSP00000263202,0,0.438,0.0,0,0.438
3,ASPSCR1,UBXN7,9606.ENSP00000306625,9606.ENSP00000296328,0,0.472,0.0,0,0.472
4,ASPSCR1,NSFL1C,9606.ENSP00000306625,9606.ENSP00000418529,0,0.747,0.0,0,0.747


In [195]:
# get list of nodes

nodes = df.node1.unique()

In [196]:
nodes

array(['AP1B1', 'ASPSCR1', 'ATP6V1A', 'ATP6V1B2', 'ATP6V1D', 'ATP6V1E1',
       'ATP6V1F', 'ATP6V1G1', 'ATP6V1H', 'B2M', 'CALR', 'CD1B', 'CD1D',
       'CD8A', 'CHMP2A', 'CHMP2B', 'CHMP3', 'CHMP4B', 'CLVS2', 'DERL1',
       'GGA1', 'HGS', 'HIP1R', 'HLA-A', 'HLA-B', 'HLA-C', 'HLA-E',
       'HLA-G', 'KLRC1', 'KLRD1', 'KPNA1', 'KPNA2', 'KPNB1', 'NSFL1C',
       'NUTF2', 'RAB5A', 'RAB7A', 'RAN', 'STAM', 'TSG101', 'UBXN7',
       'UFD1L', 'VCP', 'VPS33B', 'XPO1', 'XPO5'], dtype=object)

In [197]:
# build dict of node inputs

node_inputs = {}
for node in nodes:
    inputs = list(df[df.node1 == node].node2)
    node_inputs[node] = inputs

In [198]:
# construct boolean rules as list
rules = []
for key, value in node_inputs.items():
    rule = key + ' *= '
    rule = rule + ' and '.join(value)
    rules.append(rule)

In [199]:
# construct initial conditions
initial_conditions = []
for node in nodes:
    initial_condition = node + ' = Random'
    initial_conditions.append(initial_condition)

In [200]:
# export model
definition = '#initial conditions\n'+'\n'.join(initial_conditions)+'\n\n#rules\n'+'\n'.join(rules)
fp = open('model.txt', 'w')
fp.write(definition)
fp.close()

In [201]:
inp = file('model.txt').read()

In [202]:
#  print(inp)

In [37]:
def string2definition(tabular_text_output, initial_value):
    """
    model 1
    """
    df = pd.read_csv(tabular_text_output)
    
    # get list of nodes
    nodes = df.node1.unique()
    
    # build dict of node inputs
    node_inputs = {}
    for node in nodes:
        inputs = list(df[df.node1 == node].node2)
        node_inputs[node] = inputs
        
    # construct boolean rules as list
    rules = []
    for key, value in node_inputs.items():
        rule = key + ' *= '
        rule = rule + ' or '.join(value)
        rules.append(rule)
        
    # construct initial conditions
    initial_conditions = []
    initial_value = 'True' if initial_value else 'False'
    for node in nodes:
        initial_condition = node + ' = ' + initial_value
        initial_conditions.append(initial_condition)
        
    # definition
    return (
        '#initial conditions\n'+
        '\n'.join(initial_conditions)+         
        '\n\n'+
        '#rules\n'+
        '\n'.join(rules)
    )        

In [38]:
def add_process_edgelist(definition, edgelist, initial_value):
    """
    model 1
    """
    df = pd.read_csv(edgelist)
    
    # get list of nodes
    nodes = df.process.unique()

    # build dict of node inputs
    node_inputs = {}
    for node in nodes:
        inputs = list(df[df.process == node].node)
        node_inputs[node] = inputs
    
    # remove input nodes not in the network (not modelled)
    for inputs in node_inputs.values():
        for input in inputs:
            if input not in definition: inputs.remove(input)

    # construct boolean rules as list
    rules = []
    for key, value in node_inputs.items():
        rule = key + ' *= '
        rule = rule + ' and '.join(value)
        rules.append(rule)
        
    # construct initial conditions
    initial_conditions = []
    initial_value = 'True' if initial_value else 'False'
    for node in nodes:
        initial_condition = node + ' = ' + initial_value
        initial_conditions.append(initial_condition)
        
    # definition
    return (
        definition + 
        '\n\n'+
        '#process node initial conditions\n'+
        '\n'.join(initial_conditions)+
        '\n\n'+
        '#process node rules\n'+
        '\n'.join(rules)
    )

In [40]:
def add_mtb_edgelist(definition, mtb_edgelist, initial_value):
    """
    model 1
    """
    df = pd.read_csv(mtb_edgelist)

    # get list of nodes
    target_nodes = df.node.unique()
    
    # build dict of node inputs
    node_inputs = {}
    for node in target_nodes:
        inputs = list(df[df.node == node].mtb)
        node_inputs[node] = inputs
    
    # remove factors without target nodes in the network (not modelled)
    mtb_nodes = []
    for node, mtb in node_inputs.items():
        if node not in definition:
            del node_inputs[node]
        else:
            mtb_nodes+=mtb
            mtb_nodes = list(set(mtb_nodes)) #unique values

    # construct boolean rules as list
    rules = []
    for target_node, mtb in node_inputs.items():
        rule = target_node + ' *= ' + target_node # add the inhibition rule recursively
        rule = rule + ' and not (' +' or '.join(mtb) + ')'
        rules.append(rule)

    # construct initial conditions
    initial_conditions = []
    initial_value = 'True' if initial_value else 'False'
    for node in mtb_nodes:
        initial_condition = node + ' = ' + initial_value
        initial_conditions.append(initial_condition)
        
    # definition
    return (
        definition + 
        '\n\n'+
        '#mtb node initial conditions\n'+
        '\n'.join(initial_conditions)+
        '\n\n'+
        '#mtb update rules\n'+
        '\n'.join(rules)
    )

In [41]:
definition = string2definition('string-network-1.csv', True)
definition = add_process_edgelist(definition,'string-1-process-edgelist.csv', True)
print add_mtb_edgelist(definition, 'mtb-edgelist.csv', False)

#initial conditions
AP1B1 = True
ASPSCR1 = True
ATP6V1A = True
ATP6V1B2 = True
ATP6V1D = True
ATP6V1E1 = True
ATP6V1F = True
ATP6V1G1 = True
ATP6V1H = True
B2M = True
CALR = True
CD1B = True
CD1D = True
CD8A = True
CHMP2A = True
CHMP2B = True
CHMP3 = True
CHMP4B = True
CLVS2 = True
DERL1 = True
GGA1 = True
HGS = True
HIP1R = True
HLA-A = True
HLA-B = True
HLA-C = True
HLA-E = True
HLA-G = True
KLRC1 = True
KLRD1 = True
KPNA1 = True
KPNA2 = True
KPNB1 = True
NSFL1C = True
NUTF2 = True
RAB5A = True
RAB7A = True
RAN = True
STAM = True
TSG101 = True
UBXN7 = True
UFD1L = True
VCP = True
VPS33B = True
XPO1 = True
XPO5 = True

#rules
NUTF2 *= KPNA2 or KPNB1 or RAN
ATP6V1D *= ATP6V1G1 or ATP6V1B2 or ATP6V1E1 or ATP6V1H or ATP6V1F or ATP6V1A
CD1D *= CD1B or B2M
ATP6V1F *= ATP6V1D or ATP6V1E1 or ATP6V1A or ATP6V1B2 or UBXN7 or ATP6V1H or ATP6V1G1
ATP6V1A *= ATP6V1D or ATP6V1E1 or ATP6V1G1 or ATP6V1F or ATP6V1H or ATP6V1B2
KLRD1 *= HLA-E or HLA-A or B2M or KLRC1
HLA-C *= CALR or KPNA2 or CD8A or 

In [206]:
# read in complexes and create a dict
# build dict of node inputs

df_complexes = pd.read_csv('string-1-complexes.csv')
complexes = df_complexes.complex.unique()
complexes

array(['ESCRT_complexes', 'MHC_I_complexes', 'vATPase_complexes'],
      dtype=object)

In [207]:
complex_components = {}
for complex in complexes:
    inputs = list(df_complexes[df_complexes.complex == complex].node)
    complex_components[complex] = inputs
complex_components

{'ESCRT_complexes': ['HGS',
  'CHMP2A',
  'CHMP4B',
  'CHMP2B',
  'CHMP3',
  'STAM',
  'TSG101'],
 'MHC_I_complexes': ['B2M',
  'HLA-C',
  'HLA-B',
  'HLA-G',
  'HLA-E',
  'CD8A',
  'CALR',
  'HLA-A'],
 'vATPase_complexes': ['ATP6V1F',
  'ATP6V1B2',
  'ATP6V1A',
  'ATP6V1G1',
  'ATP6V1D',
  'ATP6V1E1',
  'ATP6V1H']}

In [208]:
node_inputs

def reduce_complexes(node_inputs, complex_components):
    # remove nodes in a complex
    for components in complex_components.values():
        for node in node_inputs.keys():
            if node in components:
                del node_inputs[node]
                continue

    # replace complex nodes
    for node, inputs in node_inputs.items():
        for complex, components in complex_components.items():
            for i, input in enumerate(inputs):
                if input in components:
                    node_inputs[node][i] = complex
                    continue
        node_inputs[node] = list(set(node_inputs[node]))
    return node_inputs

node_inputs # get OR'd

{'AP1B1': ['HIP1R', 'CLVS2'],
 'ASPSCR1': ['UFD1L', 'UBXN7', 'NSFL1C', 'VCP'],
 'ATP6V1A': ['ATP6V1D',
  'ATP6V1E1',
  'ATP6V1G1',
  'ATP6V1F',
  'ATP6V1H',
  'ATP6V1B2'],
 'ATP6V1B2': ['ATP6V1D',
  'ATP6V1E1',
  'ATP6V1A',
  'UBXN7',
  'ATP6V1H',
  'ATP6V1F',
  'ATP6V1G1'],
 'ATP6V1D': ['ATP6V1G1',
  'ATP6V1B2',
  'ATP6V1E1',
  'ATP6V1H',
  'ATP6V1F',
  'ATP6V1A'],
 'ATP6V1E1': ['ATP6V1D',
  'ATP6V1H',
  'ATP6V1A',
  'ATP6V1F',
  'ATP6V1G1',
  'ATP6V1B2'],
 'ATP6V1F': ['ATP6V1D',
  'ATP6V1E1',
  'ATP6V1A',
  'ATP6V1B2',
  'UBXN7',
  'ATP6V1H',
  'ATP6V1G1'],
 'ATP6V1G1': ['ATP6V1D',
  'ATP6V1E1',
  'ATP6V1A',
  'ATP6V1B2',
  'ATP6V1H',
  'ATP6V1F'],
 'ATP6V1H': ['ATP6V1D',
  'ATP6V1E1',
  'ATP6V1A',
  'ATP6V1B2',
  'ATP6V1G1',
  'ATP6V1F'],
 'B2M': ['CALR',
  'CD1B',
  'CD1D',
  'HLA-C',
  'HLA-E',
  'KLRD1',
  'HLA-A',
  'CD8A',
  'HLA-B',
  'HLA-G',
  'KLRC1',
  'KPNA2'],
 'CALR': ['VCP', 'HLA-G', 'HLA-C', 'CD8A', 'HLA-E', 'HLA-B', 'HLA-A', 'B2M'],
 'CD1B': ['CD1D', 'B2M'],
 'CD1D':

In [209]:
complex_components # get AND'd

{'ESCRT_complexes': ['HGS',
  'CHMP2A',
  'CHMP4B',
  'CHMP2B',
  'CHMP3',
  'STAM',
  'TSG101'],
 'MHC_I_complexes': ['B2M',
  'HLA-C',
  'HLA-B',
  'HLA-G',
  'HLA-E',
  'CD8A',
  'CALR',
  'HLA-A'],
 'vATPase_complexes': ['ATP6V1F',
  'ATP6V1B2',
  'ATP6V1A',
  'ATP6V1G1',
  'ATP6V1D',
  'ATP6V1E1',
  'ATP6V1H']}

In [220]:
# check complex to complex

complex_inputs = {}
for complex, components in complex_components.items():
    inputs = []
    for component in components:
        inputs+=node_inputs[component]
    complex_inputs[complex] = inputs
complex_inputs=reduce_complexes(complex_inputs, complex_components)
for complex, inputs in complex_inputs.items():
    inputs.remove(complex)
complex_inputs

{'ESCRT_complexes': ['XPO1', 'RAB7A', 'HIP1R'],
 'MHC_I_complexes': ['KLRC1',
  'CD1D',
  'CD1B',
  'KLRD1',
  'VCP',
  'DERL1',
  'KPNA2'],
 'vATPase_complexes': ['UBXN7']}

In [232]:
def get_complex_inputs(node_inputs, complex_components):
    complex_inputs = {}
    for complex, components in complex_components.items():
        inputs = []
        for component in components:
            inputs+=node_inputs[component]
        complex_inputs[complex] = list(set(inputs))
    complex_inputs=reduce_complexes(complex_inputs, complex_components)
    for complex, inputs in complex_inputs.items():
        inputs.remove(complex)
    return complex_inputs

In [233]:
    complex_inputs = get_complex_inputs(node_inputs, complex_components)

complex_inputs

{'ESCRT_complexes': ['XPO1', 'RAB7A', 'HIP1R'],
 'MHC_I_complexes': ['KLRC1',
  'CD1D',
  'KLRD1',
  'VCP',
  'DERL1',
  'KPNA2',
  'CD1B'],
 'vATPase_complexes': ['UBXN7']}

In [250]:
# write export helper functions

def string2definition2(tabular_text_output, complexes, initial_value):
    df = pd.read_csv(tabular_text_output)
    df_complexes = pd.read_csv(complexes)

    # get list of nodes 
    nodes = list(df.node1.unique())
    
    # build dict of node inputs
    node_inputs = {}
    for node in nodes:
        inputs = list(df[df.node1 == node].node2)
        node_inputs[node] = inputs
        
    # get list of complexes 
    complexes = list(df_complexes.complex.unique())

    # build dict of components
    complex_components = {}
    for complex in complexes:
        inputs = list(df_complexes[df_complexes.complex == complex].node)
        complex_components[complex] = inputs

    # add complex inputs
    complex_inputs = get_complex_inputs(node_inputs, complex_components)
    
    # reduce complexes
    node_inputs = reduce_complexes(node_inputs, complex_components)
    
    # OR node inputs
    rules = []
    for node, inputs in node_inputs.items():
        rule = node + ' *= '
        rule = rule + ' or '.join(inputs)
        rules.append(rule)
        
    # AND complex components
    for complex, components in complex_components.items():
        rule = complex + ' *= '
        rule = rule + ' and '.join(components)
        rules.append(rule)
    
    # OR complex inputs
    for complex, inputs in complex_inputs.items():
        rule = complex + ' *= '
        rule = rule + complex + ' or '+' or '.join(inputs)
        rules.append(rule)
        
    # initial conditions
    initial_value = 'True' if initial_value else 'False'
    initial_conditions = []
    for node in (nodes+complexes):
        initial_condition = node + ' = ' + initial_value
        initial_conditions.append(initial_condition)
        
    # definition
    definition = '#initial conditions\n'+'\n'.join(initial_conditions)+'\n\n#rules\n'+'\n'.join(rules)
    
    return definition
        

In [251]:
definition2 = string2definition2('string-network-1.csv', 'string-1-complexes.csv', True)

In [252]:
print definition2

#initial conditions
AP1B1 = True
ASPSCR1 = True
ATP6V1A = True
ATP6V1B2 = True
ATP6V1D = True
ATP6V1E1 = True
ATP6V1F = True
ATP6V1G1 = True
ATP6V1H = True
B2M = True
CALR = True
CD1B = True
CD1D = True
CD8A = True
CHMP2A = True
CHMP2B = True
CHMP3 = True
CHMP4B = True
CLVS2 = True
DERL1 = True
GGA1 = True
HGS = True
HIP1R = True
HLA-A = True
HLA-B = True
HLA-C = True
HLA-E = True
HLA-G = True
KLRC1 = True
KLRD1 = True
KPNA1 = True
KPNA2 = True
KPNB1 = True
NSFL1C = True
NUTF2 = True
RAB5A = True
RAB7A = True
RAN = True
STAM = True
TSG101 = True
UBXN7 = True
UFD1L = True
VCP = True
VPS33B = True
XPO1 = True
XPO5 = True
ESCRT_complexes = True
MHC_I_complexes = True
vATPase_complexes = True

#rules
NUTF2 *= RAN or KPNB1 or KPNA2
CD1D *= MHC_I_complexes or CD1B
KLRD1 *= KLRC1 or MHC_I_complexes
CD1B *= CD1D or MHC_I_complexes
HIP1R *= AP1B1 or ESCRT_complexes
XPO1 *= RAN or ESCRT_complexes
XPO5 *= RAN
VPS33B *= UFD1L
RAN *= NUTF2 or XPO1 or XPO5 or KPNB1 or KPNA2 or KPNA1
DERL1 *= VCP or 

In [190]:
def add_process_edgelist2(definition, edgelist, complexes, initial_value):
    df = pd.read_csv(edgelist)
    df_complexes = pd.read_csv(complexes)

    # get list of nodes
    nodes = df.process.unique()

    # build dict of node inputs
    node_inputs = {}
    for node in nodes:
        inputs = list(df[df.process == node].node)
        node_inputs[node] = inputs
    
    # get list of complexes 
    complexes = list(df_complexes.complex.unique())

    # build dict of components
    complex_components = {}
    for complex in complexes:
        inputs = list(df_complexes[df_complexes.complex == complex].node)
        complex_components[complex] = inputs
    
    
    
    # reduce complexes
    node_inputs = reduce_complexes(node_inputs, complex_components)
    
    # construct boolean rules as list
    rules = []
    for key, value in node_inputs.items():
        rule = key + ' *= '
        rule = rule + ' and '.join(value)
        rules.append(rule)
        
    # initial conditions
    initial_value = 'True' if initial_value else 'False'
    initial_conditions = []
    for node in (nodes):
        initial_condition = node + ' = ' + initial_value
        initial_conditions.append(initial_condition)
        
    # definition
    return definition + '\n\n#process node initial conditions\n'+'\n'.join(initial_conditions)+'\n\n#process node rules\n'+'\n'.join(rules)


In [191]:
print add_process_edgelist2(definition2, 'string-1-process-edgelist.csv', 'string-1-complexes.csv', True)

#initial conditions
AP1B1 = True
ASPSCR1 = True
ATP6V1A = True
ATP6V1B2 = True
ATP6V1D = True
ATP6V1E1 = True
ATP6V1F = True
ATP6V1G1 = True
ATP6V1H = True
B2M = True
CALR = True
CD1B = True
CD1D = True
CD8A = True
CHMP2A = True
CHMP2B = True
CHMP3 = True
CHMP4B = True
CLVS2 = True
DERL1 = True
GGA1 = True
HGS = True
HIP1R = True
HLA-A = True
HLA-B = True
HLA-C = True
HLA-E = True
HLA-G = True
KLRC1 = True
KLRD1 = True
KPNA1 = True
KPNA2 = True
KPNB1 = True
NSFL1C = True
NUTF2 = True
RAB5A = True
RAB7A = True
RAN = True
STAM = True
TSG101 = True
UBXN7 = True
UFD1L = True
VCP = True
VPS33B = True
XPO1 = True
XPO5 = True
ESCRT_complexes = True
MHC_I_complexes = True
vATPase_complexes = True

#rules
NUTF2 *= RAN or KPNB1 or KPNA2
CD1D *= MHC_I_complexes or CD1B
KLRD1 *= KLRC1 or MHC_I_complexes
CD1B *= CD1D or MHC_I_complexes
HIP1R *= AP1B1 or ESCRT_complexes
XPO1 *= RAN or ESCRT_complexes
XPO5 *= RAN
VPS33B *= UFD1L
RAN *= NUTF2 or XPO1 or XPO5 or KPNB1 or KPNA2 or KPNA1
DERL1 *= VCP or 