In [5]:
import pandas as pd

In [6]:
df = pd.read_csv('string-network-1.csv')
df.head()

Unnamed: 0,node1,node2,node1_string_id,node2_string_id,homology,experimentally_determined_interaction,database_annotated,automated_textmining,combined_score
0,AP1B1,HIP1R,9606.ENSP00000350199,9606.ENSP00000253083,0,0.131,0.6,0,0.637
1,AP1B1,CLVS2,9606.ENSP00000350199,9606.ENSP00000275162,0,0.0,0.6,0,0.6
2,ASPSCR1,UFD1L,9606.ENSP00000306625,9606.ENSP00000263202,0,0.438,0.0,0,0.438
3,ASPSCR1,UBXN7,9606.ENSP00000306625,9606.ENSP00000296328,0,0.472,0.0,0,0.472
4,ASPSCR1,NSFL1C,9606.ENSP00000306625,9606.ENSP00000418529,0,0.747,0.0,0,0.747


In [7]:
# get list of nodes

nodes = df.node1.unique()

In [8]:
nodes

array(['AP1B1', 'ASPSCR1', 'ATP6V1A', 'ATP6V1B2', 'ATP6V1D', 'ATP6V1E1',
       'ATP6V1F', 'ATP6V1G1', 'ATP6V1H', 'B2M', 'CALR', 'CD1B', 'CD1D',
       'CD8A', 'CHMP2A', 'CHMP2B', 'CHMP3', 'CHMP4B', 'CLVS2', 'DERL1',
       'GGA1', 'HGS', 'HIP1R', 'HLA-A', 'HLA-B', 'HLA-C', 'HLA-E',
       'HLA-G', 'KLRC1', 'KLRD1', 'KPNA1', 'KPNA2', 'KPNB1', 'NSFL1C',
       'NUTF2', 'RAB5A', 'RAB7A', 'RAN', 'STAM', 'TSG101', 'UBXN7',
       'UFD1L', 'VCP', 'VPS33B', 'XPO1', 'XPO5'], dtype=object)

In [130]:
# build dict of node inputs

node_inputs = {}
for node in nodes:
    inputs = list(df[df.node1 == node].node2)
    node_inputs[node] = inputs

In [10]:
# construct boolean rules as list
rules = []
for key, value in node_inputs.items():
    rule = key + ' *= '
    rule = rule + ' and '.join(value)
    rules.append(rule)

In [11]:
# construct initial conditions
initial_conditions = []
for node in nodes:
    initial_condition = node + ' = Random'
    initial_conditions.append(initial_condition)

In [12]:
# export model
definition = '#initial conditions\n'+'\n'.join(initial_conditions)+'\n\n#rules\n'+'\n'.join(rules)
fp = open('model.txt', 'w')
fp.write(definition)
fp.close()

In [13]:
inp = file('model.txt').read()

In [65]:
#  print(inp)

In [34]:
# write export helper functions

def string2definition1(tabular_text_output, initial_node_value, out='model.txt'):
    df = pd.read_csv(tabular_text_output)
    
    # get list of nodes
    nodes = df.node1.unique()
    
    # build dict of node inputs
    node_inputs = {}
    for node in nodes:
        inputs = list(df[df.node1 == node].node2)
        node_inputs[node] = inputs
        
    # construct boolean rules as list
    rules = []
    for key, value in node_inputs.items():
        rule = key + ' *= '
        rule = rule + ' or '.join(value)
        rules.append(rule)
        
    # construct initial conditions
    initial_conditions = []
    for node in nodes:
        initial_condition = node + ' = ' + initial_node_value
        initial_conditions.append(initial_condition)
        
    # definition
    definition = '#initial conditions\n'+'\n'.join(initial_conditions)+'\n\n#rules\n'+'\n'.join(rules)

#     # export
#     fp = open(out, 'w+')
#     fp.write(definition)
#     fp.close()
    
    return definition
        

In [45]:
def add_process_edgelist1(definition, edgelist, initial_process_value):
    df = pd.read_csv(edgelist)
    
    # get list of nodes
    nodes = df.process.unique()

    # build dict of node inputs
    node_inputs = {}
    for node in nodes:
        inputs = list(df[df.process == node].node)
        node_inputs[node] = inputs
        
    # construct boolean rules as list
    rules = []
    for key, value in node_inputs.items():
        rule = key + ' *= '
        rule = rule + ' and '.join(value)
        rules.append(rule)
        
    # construct initial conditions
    initial_conditions = []
    for node in nodes:
        initial_condition = node + ' = ' + initial_process_value
        initial_conditions.append(initial_condition)
        
    # definition
    return definition + '\n\n#process node initial conditions\n'+'\n'.join(initial_conditions)+'\n\n#process node rules\n'+'\n'.join(rules)


In [63]:
def add_mtb_edgelist(definition, edgelist, initial_mtb_value):
    df = pd.read_csv(edgelist)

    # get list of nodes
    target_nodes = df.node.unique()
    mtb_nodes = df.mtb.unique()
    
    # build dict of node inputs
    node_inputs = {}
    for node in target_nodes:
        inputs = list(df[df.node == node].mtb)
        node_inputs[node] = inputs
    
    # construct boolean rules as list
    rules = []
    for key, value in node_inputs.items():
        rule = key + ' *= ' + key
        rule = rule + ' and not (' +' or '.join(value) + ')'
        rules.append(rule)

    # construct initial conditions
    initial_conditions = []
    for node in mtb_nodes:
        initial_condition = node + ' = ' + initial_mtb_value
        initial_conditions.append(initial_condition)
        
    # definition
    return definition + '\n\n#mtb node initial conditions\n'+'\n'.join(initial_conditions)+'\n\n#mtb update rules\n'+'\n'.join(rules)

    

In [89]:
# read in complexes and create a dict
# build dict of node inputs

df_complexes = pd.read_csv('string-1-complexes.csv')
complexes = df_complexes.complex.unique()
complexes

array(['ESCRT_complexes', 'MHC_I_complexes', 'vATPase_complexes'],
      dtype=object)

In [90]:
complex_inputs = {}
for complex in complexes:
    inputs = list(df_complexes[df_complexes.complex == complex].node)
    complex_inputs[complex] = inputs
complex_inputs

{'ESCRT_complexes': ['HGS',
  'CHMP2A',
  'CHMP4B',
  'CHMP2B',
  'CHMP3',
  'STAM',
  'TSG101'],
 'MHC_I_complexes': ['B2M',
  'HLA-C',
  'HLA-B',
  'HLA-G',
  'HLA-E',
  'CD8A',
  'CALR',
  'HLA-A'],
 'vATPase_complexes': ['ATP6V1F',
  'ATP6V1B2',
  'ATP6V1A',
  'ATP6V1G1',
  'ATP6V1D',
  'ATP6V1E1',
  'ATP6V1H']}

In [174]:
node_inputs

def reduce_complexes(node_inputs, complex_inputs):
    # remove nodes in a complex
    for components in complex_inputs.values():
        for node in node_inputs.keys():
            if node in components:
                del node_inputs[node]
                continue

    # replace complex nodes
    for node, inputs in node_inputs.items():
        for complex, components in complex_inputs.items():
            for i, input in enumerate(inputs):
                if input in components:
                    node_inputs[node][i] = complex
                    continue
        node_inputs[node] = list(set(node_inputs[node]))
    return node_inputs

node_inputs # get OR'd

{'AP1B1': ['CLVS2', 'HIP1R'],
 'ASPSCR1': ['VCP', 'UBXN7', 'UFD1L', 'NSFL1C'],
 'CD1B': ['CD1D', 'MHC_I_complexes'],
 'CD1D': ['MHC_I_complexes', 'CD1B'],
 'CLVS2': ['AP1B1'],
 'DERL1': ['VCP', 'MHC_I_complexes'],
 'GGA1': ['RAB5A'],
 'HIP1R': ['AP1B1', 'ESCRT_complexes'],
 'KLRC1': ['MHC_I_complexes', 'KLRD1'],
 'KLRD1': ['KLRC1', 'MHC_I_complexes'],
 'KPNA1': ['RAN', 'KPNB1'],
 'KPNA2': ['NUTF2', 'RAN', 'KPNB1', 'MHC_I_complexes'],
 'KPNB1': ['NUTF2', 'RAN', 'KPNA2', 'KPNA1'],
 'NSFL1C': ['VCP', 'ASPSCR1', 'UBXN7', 'UFD1L'],
 'NUTF2': ['RAN', 'KPNB1', 'KPNA2'],
 'RAB5A': ['GGA1', 'RAB7A'],
 'RAB7A': ['ESCRT_complexes', 'RAB5A'],
 'RAN': ['NUTF2', 'XPO1', 'XPO5', 'KPNB1', 'KPNA2', 'KPNA1'],
 'UBXN7': ['VCP', 'ASPSCR1', 'vATPase_complexes', 'UFD1L', 'NSFL1C'],
 'UFD1L': ['VCP', 'VPS33B', 'ASPSCR1', 'UBXN7', 'NSFL1C'],
 'VCP': ['ASPSCR1', 'MHC_I_complexes', 'DERL1', 'NSFL1C', 'UBXN7', 'UFD1L'],
 'VPS33B': ['UFD1L'],
 'XPO1': ['RAN', 'ESCRT_complexes'],
 'XPO5': ['RAN']}

In [138]:
complex_inputs # get AND'd

{'ESCRT_complexes': ['HGS',
  'CHMP2A',
  'CHMP4B',
  'CHMP2B',
  'CHMP3',
  'STAM',
  'TSG101'],
 'MHC_I_complexes': ['B2M',
  'HLA-C',
  'HLA-B',
  'HLA-G',
  'HLA-E',
  'CD8A',
  'CALR',
  'HLA-A'],
 'vATPase_complexes': ['ATP6V1F',
  'ATP6V1B2',
  'ATP6V1A',
  'ATP6V1G1',
  'ATP6V1D',
  'ATP6V1E1',
  'ATP6V1H']}

In [175]:
# write export helper functions

def string2definition2(tabular_text_output, complexes, initial_value):
    df = pd.read_csv(tabular_text_output)
    df_complexes = pd.read_csv('string-1-complexes.csv')

    # get list of nodes 
    nodes = list(df.node1.unique())
    
    # build dict of node inputs
    node_inputs = {}
    for node in nodes:
        inputs = list(df[df.node1 == node].node2)
        node_inputs[node] = inputs
        
    # get list of complexes 
    complexes = list(df_complexes.complex.unique())

    # build dict of complexes
    complex_inputs = {}
    for complex in complexes:
        inputs = list(df_complexes[df_complexes.complex == complex].node)
        complex_inputs[complex] = inputs

    # reduce complexes
    node_inputs = reduce_complexes(node_inputs, complex_inputs)
    
    # OR the nodes
    rules = []
    for key, value in node_inputs.items():
        rule = key + ' *= '
        rule = rule + ' or '.join(value)
        rules.append(rule)
        
    # AND the complexes
    for key, value in complex_inputs.items():
        rule = key + ' *= '
        rule = rule + ' and '.join(value)
        rules.append(rule)
        
    # initial conditions
    initial_value = 'True' if initial_value else 'False'
    initial_conditions = []
    for node in (nodes+complexes):
        initial_condition = node + ' = ' + initial_value
        initial_conditions.append(initial_condition)
        
    # definition
    definition = '#initial conditions\n'+'\n'.join(initial_conditions)+'\n\n#rules\n'+'\n'.join(rules)
    
    return definition
        

In [181]:
definition2 = string2definition2('string-network-1.csv', 'string-1-complexes.csv', True)

In [184]:
def add_process_edgelist2(definition, edgelist, complexes, initial_value):
    df = pd.read_csv(edgelist)
    
    # get list of nodes
    nodes = df.process.unique()

    # build dict of node inputs
    node_inputs = {}
    for node in nodes:
        inputs = list(df[df.process == node].node)
        node_inputs[node] = inputs
    
    # get list of complexes 
    complexes = list(df_complexes.complex.unique())

    # build dict of complexes
    complex_inputs = {}
    for complex in complexes:
        inputs = list(df_complexes[df_complexes.complex == complex].node)
        complex_inputs[complex] = inputs
    
    # reduce complexes
    node_inputs = reduce_complexes(node_inputs, complex_inputs)
    
    # construct boolean rules as list
    rules = []
    for key, value in node_inputs.items():
        rule = key + ' *= '
        rule = rule + ' and '.join(value)
        rules.append(rule)
        
    # initial conditions
    initial_value = 'True' if initial_value else 'False'
    initial_conditions = []
    for node in (nodes):
        initial_condition = node + ' = ' + initial_value
        initial_conditions.append(initial_condition)
        
    # definition
    return definition + '\n\n#process node initial conditions\n'+'\n'.join(initial_conditions)+'\n\n#process node rules\n'+'\n'.join(rules)


In [185]:
print add_process_edgelist2(definition2, 'string-1-process-edgelist.csv', 'string-1-complexes.csv', True)

#initial conditions
AP1B1 = True
ASPSCR1 = True
ATP6V1A = True
ATP6V1B2 = True
ATP6V1D = True
ATP6V1E1 = True
ATP6V1F = True
ATP6V1G1 = True
ATP6V1H = True
B2M = True
CALR = True
CD1B = True
CD1D = True
CD8A = True
CHMP2A = True
CHMP2B = True
CHMP3 = True
CHMP4B = True
CLVS2 = True
DERL1 = True
GGA1 = True
HGS = True
HIP1R = True
HLA-A = True
HLA-B = True
HLA-C = True
HLA-E = True
HLA-G = True
KLRC1 = True
KLRD1 = True
KPNA1 = True
KPNA2 = True
KPNB1 = True
NSFL1C = True
NUTF2 = True
RAB5A = True
RAB7A = True
RAN = True
STAM = True
TSG101 = True
UBXN7 = True
UFD1L = True
VCP = True
VPS33B = True
XPO1 = True
XPO5 = True
ESCRT_complexes = True
MHC_I_complexes = True
vATPase_complexes = True

#rules
NUTF2 *= RAN or KPNB1 or KPNA2
CD1D *= MHC_I_complexes or CD1B
KLRD1 *= KLRC1 or MHC_I_complexes
CD1B *= CD1D or MHC_I_complexes
HIP1R *= AP1B1 or ESCRT_complexes
XPO1 *= RAN or ESCRT_complexes
XPO5 *= RAN
VPS33B *= UFD1L
RAN *= NUTF2 or XPO1 or XPO5 or KPNB1 or KPNA2 or KPNA1
DERL1 *= VCP or 