In [1]:
import requests
import xml.etree.ElementTree as ET
import pandas as pd
import numpy as np
import pickle
import os

os.chdir('..')
s = requests.Session() # create session
# Post login credentials to session:
s.post('https://websvc.biocyc.org/credentials/login/', data={'email':'diegoalbaburbano@gmail.com', 'password':'qwerty'})


<Response [200]>

In [4]:
# from EC to BioCyc Reaction ID
EC = '2.3.3.16'
r = s.get('https://metacyc.org/META/substring-search?type=NIL&object=EC+'+EC)

In [9]:
loc = r.text.find('/reaction?orgid=META&id=') # this only finds the first instance, there may be other reactinos associated with this EC...
BioCycID = r.text[loc+len('/reaction?orgid=META&id='):].split('"')[0]

In [10]:
BioCycID

'MALATE-DEH-RXN'

In [4]:

# from BioCyc Reaction ID to  reactions
r = s.get('https://websvc.biocyc.org/apixml?fn=enzymes-of-reaction&id=META:{i}&detail=low'.format(i=BioCycID))
reactions = [e.find('catalyzes').find('Enzymatic-Reaction').items()[0][1] for e in list(ET.fromstring(r.text))[1:]]

# from reactions to inhibitors
inhibitors = []
inhibitors_smiles = []
for reaction in reactions:
    r = s.get('https://websvc.biocyc.org/apixml?fn=direct-inhibitors&id={i}&detail=low'.format(i=reaction))
    inhibitors.append([list(c)[-1].text for c in list(ET.fromstring(r.text))[1:]])
    inhibitors_smiles.append([c.find('cml')[0][-1].text for c in list(ET.fromstring(r.text))[1:]])

inhibitors = sum(inhibitors,[])
inhibitors_smiles = sum(inhibitors_smiles,[])

In [6]:
inhibitors_smiles

['CCCCCCCCCCCCCCCC(SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(OP(=O)(OC[C@@H]1([C@@H](OP([O-])(=O)[O-])[C@@H](O)[C@@H](O1)N2(C3(\\N=C/N=C(C(\\N=C/2)=3)/N))))[O-])[O-])=O',
 'C(O)[C@H]3(O[C@@H](N1(C2(\\C(\\N=C/1)=C(N)/N=C\\N=2)))[C@H](O)[C@H](OP([O-])(=O)[O-])3)',
 'C(CC([O-])=O)C(=O)C([O-])=O',
 'CCC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(OP(=O)(OC[C@@H]1([C@@H](OP([O-])(=O)[O-])[C@@H](O)[C@@H](O1)N2(C3(\\N=C/N=C(C(\\N=C/2)=3)/N))))[O-])[O-]',
 'CC(C)([C@@H](O)C(=O)NCCC(=O)NCCSCC([O-])=O)COP(=O)(OP(=O)(OC[C@H]1(O[C@H]([C@@H]([C@@H]1OP([O-])(=O)[O-])O)N2(C3(\\N=C/N=C(C(\\N=C/2)=3)/N))))[O-])[O-]',
 'C1(/N(\\C=C/CC(/C(N)=O)=1)[C@@H]5(O[C@H](COP(OP(OC[C@H]4(O[C@@H](N2(C3(\\C(\\N=C/2)=C(N)/N=C\\N=3)))[C@H](O)[C@H](O)4))(=O)[O-])(=O)[O-])[C@@H](O)[C@@H](O)5))']

In [208]:
from equilibrator_api import ComponentContribution
cc = ComponentContribution()

inhibitors_kegg = []
for inhibitor in inhibitors:
    try: # the inhibitor string may be too off, or has no kegg id
        for i in cc.search_compound(inhibitor).identifiers:
            if i.registry.namespace == 'kegg':
                inhibitors_kegg.append(i.accession)
    except:
        inhibitors_kegg.append('')

In [209]:
inhibitors_kegg

['C00036',
 'C00002',
 'D08646',
 'C00024',
 'C00010',
 'C00209',
 'C02441',
 'C11592',
 'C00741',
 '']

In [2]:
# now let put in a function and run thorugh all ECs

def get_inhibitors(s, EC, output = 'SMILES'):
    try:
        with open(os.getcwd()+'/src/kinetic_estimator/biocyc_cache.pickle', 'rb') as handle:
            biocyc_cache = pickle.load(handle)
    except:
        biocyc_cache = {}
    
    if EC+'_'+output in biocyc_cache:
        return biocyc_cache[EC+'_'+output]
    
    else:
        r = s.get('https://metacyc.org/META/substring-search?type=NIL&object=EC+'+EC)
        # old version
        # loc = r.text.find('/META/NEW-IMAGE?type=REACTION&object=') # this only finds the first instance, there may be other reactinos associated with this EC...
        # BioCycID = r.text[loc+37:].split('"')[0]
        loc = r.text.find('/reaction?orgid=META&id=') # this only finds the first instance, there may be other reactinos associated with this EC...
        BioCycID = r.text[loc+len('/reaction?orgid=META&id='):].split('"')[0]
        print(BioCycID)

        try:
            # from BioCyc Reaction ID to  reactions
            r = s.get('https://websvc.biocyc.org/apixml?fn=enzymes-of-reaction&id=META:{i}&detail=low'.format(i=BioCycID))
            if r.status_code != 200:
                print('No reaction found for EC '+EC)
                return []
            reactions = [e.find('catalyzes').find('Enzymatic-Reaction').items()[0][1] for e in list(ET.fromstring(r.text))[1:]]
        except:
            with open('src/kinetic_estimator/xmls/'+BioCycID+'.xml', 'r') as handle:
                r = handle.read()
                reactions = [e.find('catalyzes').find('Enzymatic-Reaction').items()[0][1] for e in list(ET.fromstring(r))[1:]]

        # print(reactions)
        # from reactions to inhibitors
        inhibitors = []
        inhibitors_smiles = []
        for reaction in reactions:
            r = s.get('https://websvc.biocyc.org/apixml?fn=direct-inhibitors&id={i}&detail=low'.format(i=reaction))
            inhibitors.append([list(c)[-1].text for c in list(ET.fromstring(r.text))[1:] if c ])
            inhibitors_smiles.append([c.find('cml')[0][-1].text for c in list(ET.fromstring(r.text))[1:] if c and c.find('cml')])

        inhibitors = sum(inhibitors,[])
        inhibitors_smiles = sum(inhibitors_smiles,[])
        biocyc_cache[EC+'_SMILES'] = inhibitors_smiles
        biocyc_cache[EC+'_names'] = inhibitors
        with open(os.getcwd()+'/src/kinetic_estimator/biocyc_cache.pickle', 'wb') as handle:
            pickle.dump(biocyc_cache, handle, protocol=pickle.HIGHEST_PROTOCOL)

        if output == 'kegg':
            from equilibrator_api import ComponentContribution
            cc = ComponentContribution()
            inhibitors_kegg = []
            for inhibitor in inhibitors:
                try: # the inhibitor string may be too off, or has no kegg id
                    for i in cc.search_compound(inhibitor).identifiers:
                        if i.registry.namespace == 'kegg':
                            inhibitors_kegg.append(i.accession)
                except:
                    print("Didn't find kegg id for "+inhibitor)
                    # inhibitors_kegg.append('')
                    pass
            biocyc_cache[EC+'_kegg'] = inhibitors_kegg
            with open(os.getcwd()+'/src/kinetic_estimator/biocyc_cache.pickle', 'wb') as handle:
                pickle.dump(biocyc_cache, handle, protocol=pickle.HIGHEST_PROTOCOL)
            return inhibitors_kegg
        
        elif output == 'SMILES':
            return inhibitors_smiles
        elif output == 'names':
            return inhibitors
        else:
            return inhibitors, inhibitors_smiles
    
# get all EC numbers
import gzip
import json

with gzip.open("src/thermo_calculations/kegg_enzymes.json.gz", "r") as f:
        ECs = {e['EC']:e['reaction_ids'] for e in json.load(f)}

with gzip.open("src/thermo_calculations/kegg_reactions.json.gz", "r") as f:
        RXNs = {r['RID']:r['reaction'] for r in json.load(f)}

# reactions = pd.read_csv('src/frenda_brenda/Files/Reaction_full.csv')
# all_enzymes = []
# all_organisms = []
# for i,row in reactions.iterrows():
#     ec_string = row['EC']
#     try:
#         for r in ECs[ec_string]:
#             try:
#                 if ec_string not in all_enzymes:
#                     all_enzymes.append(ec_string)
#                     if type(row['Species']) is str:
#                         organism = r['Species']
#                     else:
#                         organism = 'Escherichia coli' 
#                     all_organisms.append(organism)
#             except:
#                 pass
#     except:
#         pass

In [3]:
reactions = pd.read_csv('src/frenda_brenda/Files/KEGG_Filtered/Reactions_M8.csv')
reactions = reactions.iloc[[288, 88, 115]]
all_enzymes = reactions['EC'].values
all_organisms = reactions['Species'].values

In [10]:
all_inhibitors = list(map(lambda e: get_inhibitors(s, e, output = 'kegg'), all_enzymes))

# flatten lists
all_enzymes = [[enzyme]*len(inhibitors) for enzyme, inhibitors in zip(all_enzymes, all_inhibitors)]
all_organisms = [[organism]*len(inhibitors) for organism, inhibitors in zip(all_organisms, all_inhibitors)]
all_enzymes = sum(all_enzymes,[])
all_inhibitors = sum(all_inhibitors,[])
all_organisms = sum(all_organisms,[])

1.2.1.2-RXN


KeyError: 'content-md5'

In [10]:
all_inhibitors, all_enzymes, all_organisms = pickle.load(open('inhibitors.pickle', 'rb'))

In [11]:
from src.kinetic_estimator.estimators import Estimator
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
os.environ['TORCH_HOME'] = '/mmfs1/gscratch/stf/dalba'

KMest = Estimator('KM_prediction','Km')
kis = KMest.estimate(all_inhibitors, all_enzymes, all_organisms, True)
pd.DataFrame.from_dict(kis).to_csv('241023_missing_kis.csv')

[20:32:53] SMILES Parse Error: duplicated ring closure 0 bonds atom 0 to itself for input: 'C00244'
[20:32:54] SMILES Parse Error: syntax error while parsing: D02313
[20:32:54] SMILES Parse Error: Failed parsing SMILES 'D02313' for input: 'D02313'
[20:32:54] SMILES Parse Error: unclosed ring for input: 'C19935'
[20:32:54] SMILES Parse Error: duplicated ring closure 0 bonds atom 0 to itself for input: 'C00004'
[20:32:55] SMILES Parse Error: duplicated ring closure 0 bonds atom 0 to itself for input: 'C00177'
[20:32:55] SMILES Parse Error: unclosed ring for input: 'C01326'
[20:32:55] SMILES Parse Error: duplicated ring closure 0 bonds atom 0 to itself for input: 'C00177'
[20:32:55] SMILES Parse Error: unclosed ring for input: 'C01326'
[20:32:55] SMILES Parse Error: duplicated ring closure 0 bonds atom 0 to itself for input: 'C00284'
[20:32:55] SMILES Parse Error: syntax error while parsing: D00052
[20:32:55] SMILES Parse Error: Failed parsing SMILES 'D00052' for input: 'D00052'
[20:32:56

Request failed: list index out of range
Could not get sequence string for enzyme:2.3.3.8 organism: Escherichia colifrom Uniprot. Returning 'None'


[20:32:59] SMILES Parse Error: duplicated ring closure 0 bonds atom 0 to itself for input: 'C00008'
[20:32:59] SMILES Parse Error: syntax error while parsing: G11113
[20:32:59] SMILES Parse Error: Failed parsing SMILES 'G11113' for input: 'G11113'
[20:33:00] SMILES Parse Error: duplicated ring closure 0 bonds atom 0 to itself for input: 'C00154'
[20:33:01] SMILES Parse Error: unclosed ring for input: 'C01367'
[20:33:01] SMILES Parse Error: duplicated ring closure 0 bonds atom 0 to itself for input: 'C00026'
[20:33:01] SMILES Parse Error: duplicated ring closure 0 bonds atom 0 to itself for input: 'C00100'
[20:33:02] SMILES Parse Error: duplicated ring closure 0 bonds atom 0 to itself for input: 'C00004'
[20:33:02] ERROR: 

[20:33:02] ERROR: 

[20:33:02] ERROR: 

[20:33:02] ERROR: 

[20:33:02] ERROR: 

[20:33:02] ERROR: 

[20:33:02] ERROR: 

[20:33:02] ERROR: 

[20:33:02] ERROR: 

[20:33:02] ERROR: 

[20:33:02] ERROR: 

[20:33:02] ERROR: 

[20:33:02] ERROR: 

[20:33:02] ERROR: 

[20:33:

28
28
28
28
