# Subsystem Homogenization between GEMs

In [1]:
import cobra
import pandas as pd
import pickle

Primero intentar asignar los subsistemas por número de reacciones compartidas entre ellos.

In [2]:
def jaccard_index(list_1, list_2):
    s1 = set(list_1)
    s2 = set(list_2)
    
    return float(len(s1.intersection(s2)) / len(s1.union(s2)))

def recall(reference, list_2):
    s1 = set(reference)
    s2 = set(list_2)
    
    return float(len(s1.intersection(s2)) / len(reference))    

In [11]:
model_dict = { 'pseudomonas_putida' : cobra.io.load_matlab_model('models/iJN1463.mat'),
               'escherichia_coli' : cobra.io.load_matlab_model('models/iJO1366.mat'),
               'synechocystis_sp_pcc_6803' : cobra.io.load_matlab_model('models/iJN678_new_annotation.mat')
             }

species = list(model_dict.keys())
subsystem_dict = {}

for specie in species:
    model = model_dict[specie]
    all_subsystems = set([r.subsystem for r in model.reactions])
    subsystem_dict[specie] = { ss : [r.id for r in model.reactions if ss in r.subsystem] for ss in all_subsystems}

subsystem_dict

This model seems to have metCharge instead of metCharges field. Will use metCharge for what metCharges represents.
No defined compartments in model iJN1463. Compartments will be deduced heuristically using regular expressions.
Using regular expression found the following compartments:c, e, p
This model seems to have metCharge instead of metCharges field. Will use metCharge for what metCharges represents.
No defined compartments in model iML1515. Compartments will be deduced heuristically using regular expressions.
Using regular expression found the following compartments:c, e, p


{'pseudomonas_putida': {'S_PHAs_Metabolism': ['3HAACOAT100',
   '3HAACOAT120',
   '3HAACOAT121',
   '3HAACOAT140',
   '3HAACOAT141',
   '3HAACOAT60',
   '3HAACOAT80',
   'ACSPHAC100',
   'ACSPHAC101',
   'ACSPHAC120',
   'ACSPHAC121',
   'ACSPHAC121d6',
   'ACSPHAC140',
   'ACSPHAC141',
   'ACSPHAC141d5',
   'ACSPHAC142',
   'ACSPHAC160',
   'ACSPHAC40',
   'ACSPHAC50',
   'ACSPHAC60',
   'ACSPHAC70',
   'ACSPHAC80',
   'ACSPHAC90',
   'ACSPHACP100',
   'ACSPHACP40',
   'ACSPHACP50',
   'ACSPHACP60',
   'ACSPHACP70',
   'ACSPHACP80',
   'ACSPHACP90',
   'ACSPHACT40',
   'ACSPHACT60',
   'HATBH',
   'PHADPC100',
   'PHADPC101',
   'PHADPC120',
   'PHADPC121',
   'PHADPC121d6',
   'PHADPC140',
   'PHADPC141',
   'PHADPC141d5',
   'PHADPC142',
   'PHADPC40',
   'PHADPC50',
   'PHADPC60',
   'PHADPC70',
   'PHADPC80',
   'PHADPC90',
   'PHADPCP100',
   'PHADPCP40',
   'PHADPCP50',
   'PHADPCP60',
   'PHADPCP70',
   'PHADPCP80',
   'PHADPCP90',
   'PHADPCT40',
   'PHADPCT60',
   'PHAP2C100'

In [4]:
reference_model = model_dict.pop('pseudomonas_putida')

In [5]:
# open a file, where you stored the pickled data
file = open('tools/reactionsID.pickle', 'rb')

# dump information to that file
reaction_translator = pickle.load(file)

# close the file
file.close()

for key, values in reaction_translator.items():
    if ('BiGG' in values and 'BiGG_all' in values):
        reaction = values['Reaction']
        reac2 = reaction.replace('<->', '<=>')
        reac2 = reac2.replace('->', '-->')
        values['Reaction'] = reac2

In [6]:
reaction_translator

{'DM_4crsol_c': {'BiGG': 'DM_4crsol_c',
  'Description': 'Sink needed to allow p-Cresol to leave system',
  'Reaction': '4crsol_c <=> ',
  'MNX': 'MNXR96888',
  'MNX_all': ['MNXR96888'],
  'MetaCyc': 'TRANS-RXN0-500',
  'MetaCyc_all': ['TRANS-RXN0-500'],
  'BiGG_all': ['DM_4CRSOL', 'DM_4crsol_c']},
 'DM_aacald_c': {'BiGG': 'DM_aacald_c',
  'Description': 'Sink needed to allow aminoacetaldehyde to leave system',
  'Reaction': 'aacald_c <=> ',
  'MNX': 'MNXR142747',
  'MNX_all': ['MNXR142747'],
  'BiGG_all': ['DM_AACALD', 'DM_aacald_c', 'R_DM_AACALD']},
 'DM_amob_c': {'BiGG': 'DM_amob_c',
  'Description': 'Sink needed to allow S-Adenosyl-4-methylthio-2-oxobutanoate to leave system',
  'Reaction': 'amob_c <=> ',
  'MNX': 'MNXR142748',
  'MNX_all': ['MNXR142748'],
  'BiGG_all': ['DM_AMOB', 'DM_amob_c', 'sink_amob']},
 'BIOMASS_Ec_iJO1366_core_53p95M': {'BiGG': 'BIOMASS_Ec_iJO1366_core_53p95M',
  'Description': 'E. coli biomass objective function (iJO1366) - core - with 53.95 GAM estimate',

## Translate all reactions to model reference

In [7]:
ecoli_to_translate_by_name = [r.id for r in model_dict['escherichia_coli'].reactions 
                              if r.name in  [rxn.name for rxn in reference_model.reactions]]

ecoli_to_translate_by_id = [r.id for r in model_dict['escherichia_coli'].reactions 
                            if r.id in  [rxn.id for rxn in reference_model.reactions]]

In [8]:
len(set(ecoli_to_translate_by_name).intersection(set(ecoli_to_translate_by_id)))

1427

In [9]:
len(ecoli_to_translate_by_id)

1427

In [10]:
len(ecoli_to_translate_by_name)

1480

In [11]:
synec_to_translate_by_name = [r.id for r in model_dict['synechocystis_sp_pcc_6803'].reactions 
                              if r.name in  [rxn.name for rxn in reference_model.reactions]]

synec_to_translate_by_id = [r.id for r in model_dict['synechocystis_sp_pcc_6803'].reactions 
                            if r.id in  [rxn.id for rxn in reference_model.reactions]]

In [12]:
len(synec_to_translate_by_id)

530

In [13]:
len(synec_to_translate_by_name)

563

In [14]:
len(set(synec_to_translate_by_name).intersection(set(synec_to_translate_by_id)))

530

In [15]:
reference_reaction_name_dict = {r.id : r.name for r in reference_model.reactions}
reaction_parser = {}

for specie in model_dict:
    to_translate_by_name = [(r.id, r.name) for r in model_dict[specie].reactions 
                            if r.name in  [rxn.name for rxn in reference_model.reactions]]
    
    reaction_parser[specie] = { r_d[0] : [k for k, v in reference_reaction_name_dict.items() if v==r_d[1] ]
                                for r_d in to_translate_by_name }
    

In [16]:
reaction_parser

{'escherichia_coli': {'EX_cm_e': ['EX_cm_e'],
  'EX_co2_e': ['EX_co2_e'],
  'EX_cobalt2_e': ['EX_cobalt2_e'],
  'DM_5drib_c': ['DM_5drib_c'],
  'DM_amob_c': ['DM_amob_c'],
  'EX_glc__D_e': ['EX_glc__D_e'],
  'EX_glcn_e': ['EX_glcn_e'],
  'EX_glcr_e': ['EX_glcr_e'],
  'EX_glcur_e': ['EX_glcur_e'],
  'EX_gln__L_e': ['EX_gln__L_e'],
  'EX_glu__L_e': ['EX_glu__L_e'],
  'EX_gly_e': ['EX_gly_e'],
  'EX_glyald_e': ['EX_glyald_e'],
  'EX_glyb_e': ['EX_glyb_e'],
  'EX_glyc_e': ['EX_glyc_e'],
  'EX_15dap_e': ['EX_15dap_e'],
  'EX_glyc__R_e': ['EX_glyc__R_e'],
  'EX_26dap__M_e': ['EX_26dap__M_e'],
  'EX_glyclt_e': ['EX_glyclt_e'],
  'EX_gua_e': ['EX_gua_e'],
  'EX_h_e': ['EX_h_e'],
  'EX_h2o_e': ['EX_h2o_e'],
  'EX_h2o2_e': ['EX_h2o2_e'],
  'EX_h2s_e': ['EX_h2s_e'],
  'EX_hdca_e': ['EX_hdca_e'],
  'EX_hdcea_e': ['EX_hdcea_e'],
  'EX_hg2_e': ['EX_hg2_e'],
  'EX_his__L_e': ['EX_his__L_e'],
  'EX_2ddglcn_e': ['EX_2ddglcn_e'],
  'EX_hom__L_e': ['EX_hom__L_e'],
  'EX_hxa_e': ['EX_hxa_e'],
  'EX_hxan_e

In [17]:
{ ids : synonyms for ids, synonyms in reaction_parser['escherichia_coli'].items() if len(synonyms)>2}

{'ASPO3': ['ASPO1', 'ASPO3', 'ASPO5', 'ASPO6'],
 'ASPO4': ['ASPO1', 'ASPO3', 'ASPO5', 'ASPO6'],
 'ASPO5': ['ASPO1', 'ASPO3', 'ASPO5', 'ASPO6'],
 'ASPO6': ['ASPO1', 'ASPO3', 'ASPO5', 'ASPO6'],
 'CTECOAI6': ['CTECOAI7', 'CTECOAI8', 'CTECOAI6'],
 'CTECOAI7': ['CTECOAI7', 'CTECOAI8', 'CTECOAI6'],
 'CTECOAI8': ['CTECOAI7', 'CTECOAI8', 'CTECOAI6'],
 'FMNRx': ['FMNRx2_copy1', 'FMNRx', 'FMNRx2_copy2'],
 'FA100ACPHi': ['FA100ACPHi',
  'FA120ACPHi',
  'FA140ACPHi',
  'FA141ACPHi',
  'FA160ACPHi',
  'FA161ACPHi',
  'FA80ACPHi'],
 'FA120ACPHi': ['FA100ACPHi',
  'FA120ACPHi',
  'FA140ACPHi',
  'FA141ACPHi',
  'FA160ACPHi',
  'FA161ACPHi',
  'FA80ACPHi'],
 'FA140ACPHi': ['FA100ACPHi',
  'FA120ACPHi',
  'FA140ACPHi',
  'FA141ACPHi',
  'FA160ACPHi',
  'FA161ACPHi',
  'FA80ACPHi'],
 'FA141ACPHi': ['FA100ACPHi',
  'FA120ACPHi',
  'FA140ACPHi',
  'FA141ACPHi',
  'FA160ACPHi',
  'FA161ACPHi',
  'FA80ACPHi'],
 'FA160ACPHi': ['FA100ACPHi',
  'FA120ACPHi',
  'FA140ACPHi',
  'FA141ACPHi',
  'FA160ACPHi',
  'F

In [18]:
#replace ids to match the putida ones
for specie in model_dict:
    for reaction in model_dict[specie].reactions:
        if reaction.id in reaction_parser.values():
            
            if len(reaction_parser[reaction.id]) == 1:
                reaction.id = reaction_parser[reaction.id][0]

In [19]:
for specie in model_dict:
    
    for subsystem in subsystem_dict['pseudomonas_putida']:
        print('-'*100)
        print('Searching for %s analogues in %s' % (subsystem, specie))
        subsystem_reactions = subsystem_dict['pseudomonas_putida'][subsystem]
        subsystem_reactions_in_model = [r for r in model_dict[specie].reactions if r.id in subsystem_reactions]
        
        if len(subsystem_reactions_in_model) > 0:
            subsystems_to_replace = set([r.subsystem for r in subsystem_reactions_in_model])
            print('Analogues found : %s' % ' ,'.join(subsystems_to_replace))
            
            for subsystem_to_replace in subsystems_to_replace:
                reactions_in_subsystem = [r.id for r in model_dict[specie].reactions if r.subsystem==subsystem_to_replace]
                print('Replacing subsystem %s in reactions : %s' % (subsystem_to_replace, ' ,'.join(reactions_in_subsystem)))
                
                for reaction in reactions_in_subsystem:
                    model_dict[specie].reactions.get_by_id(reaction).subsystem = subsystem
                print('')

----------------------------------------------------------------------------------------------------
Searching for S_Cofactor_and_Prosthetic_Group_Biosynthesis__Vitamin_B6_Metabolism analogues in escherichia_coli
Analogues found : Cofactor and Prosthetic Group Biosynthesis
Replacing subsystem Cofactor and Prosthetic Group Biosynthesis in reactions : 2MAHMP ,5DOAN ,ACBIPGT ,4HTHRS ,ACPS1 ,ADCL ,ADCS ,ADOCBIK ,ADOCBLS ,ALATA_D2 ,ALATA_L2 ,AMAOTr ,AMMQLT8 ,AMPMS2 ,ASP1DC ,ASPO3 ,ASPO4 ,ASPO5 ,ASPO6 ,AOXSr2 ,CBIAT ,CBLAT ,CCGS ,CDGR ,BMOCOS ,BMOGDS1 ,BMOGDS2 ,BSORx ,BSORy ,CDGS ,CDPMEK ,BTS5 ,BWCOGDS1 ,BWCOGDS2 ,BWCOS ,CHRPL ,CPH4S ,CPMPS ,CPPPGO ,CPPPGO2 ,APRAUR ,DB4PS ,DBTS ,DHBD ,DHBS ,DHBSH ,DHFR ,DHFS ,DNMPPA ,DHMPTR ,DHNAOT4 ,DHNCOAS ,DNTPPA ,DHNCOAT ,DHNPA2r ,DHNPTE ,DPCOAK ,DPR ,DHPPDA2 ,DHPS2 ,DHPTPE ,DXPRIi ,DXPS ,DMATT ,DMPPS ,DMQMT ,DXYLK ,E4PD ,EGMEACPR ,ENTCS ,EPMEACPR ,FCLT ,FLVR ,FLVRx ,FMNAT ,FMNRx ,FMNRx2 ,G1SAT ,FADRx ,FADRx2 ,FALDH2 ,FALGTHLs ,GLUCYS ,GLUTRR ,GLUTRS ,FE