In [13]:
import requests
import xml.etree.ElementTree as ET
import pandas as pd
import numpy as np
import pickle
import os

s = requests.Session() # create session
# Post login credentials to session:
s.post('https://websvc.biocyc.org/credentials/login/', data={'email':'dalba@uw.edu', 'password':'RNAdevices2024'})


<Response [200]>

In [195]:
# from EC to BioCyc Reaction ID
EC = '1.1.1.37'
r = s.get('https://metacyc.org/META/substring-search?type=NIL&object=EC+'+EC)

In [196]:
loc = r.text.find('/META/NEW-IMAGE?type=REACTION&object=') # this only finds the first instance, there may be other reactinos associated with this EC...
BioCycID = r.text[loc+37:].split('"')[0]

In [277]:

# from BioCyc Reaction ID to  reactions
r = s.get('https://websvc.biocyc.org/apixml?fn=enzymes-of-reaction&id=META:{i}&detail=low'.format(i=BioCycID))
reactions = [e.find('catalyzes').find('Enzymatic-Reaction').items()[0][1] for e in list(ET.fromstring(r.text))[1:]]

# from reactions to inhibitors
inhibitors = []
inhibitors_smiles = []
for reaction in reactions:
    r = s.get('https://websvc.biocyc.org/apixml?fn=direct-inhibitors&id={i}&detail=low'.format(i=reaction))
    inhibitors.append([list(c)[-1].text for c in list(ET.fromstring(r.text))[1:]])
    inhibitors_smiles.append([c.find('cml')[0][-1].text for c in list(ET.fromstring(r.text))[1:]])

inhibitors = sum(inhibitors,[])
inhibitors_smiles = sum(inhibitors_smiles,[])

In [278]:
inhibitors_smiles

['C(C([O-])=O)C(=O)C([O-])=O',
 'C(OP(=O)([O-])OP(=O)([O-])OP(=O)([O-])[O-])[C@H]3(O[C@@H](N1(C2(\\C(\\N=C/1)=C(N)/N=C\\N=2)))[C@H](O)[C@H](O)3)',
 'CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(OP([O-])(OC[C@@H]1([C@@H](OP([O-])([O-])=O)[C@@H](O)[C@@H](O1)N2(C3(\\N=C/N=C(C(\\N=C/2)=3)/N))))=O)([O-])=O',
 'CC(C)(COP([O-])(=O)OP(OC[C@H]3(O[C@@H](N1(C2(\\N=C/N=C(C(\\N=C/1)=2)/N)))[C@H](O)[C@H](OP([O-])(=O)[O-])3))(=O)[O-])[C@@H](O)C(=O)NCCC(=O)NCCS',
 'C([O-])(C(=O)[O-])=O',
 'CCN1(C(=O)\\C=C/C(=O)1)',
 'C2(/C=C(C(/C(=O)[O-])=C\\C(\\SSC1(\\C=C(C(/[N+]([O-])=O)=C\\C=1)\\C(=O)[O-]))=2)/[N+]([O-])=O)',
 'C1(\\C=C(\\C=C/N=1)\\SSC2(\\C=C/N=C\\C=2))',
 'CCOC(OC(OCC)=O)=O',
 'CC(C(C)=O)=O',
 '[Cd+2]']

In [208]:
from equilibrator_api import ComponentContribution
cc = ComponentContribution()

inhibitors_kegg = []
for inhibitor in inhibitors:
    try: # the inhibitor string may be too off, or has no kegg id
        for i in cc.search_compound(inhibitor).identifiers:
            if i.registry.namespace == 'kegg':
                inhibitors_kegg.append(i.accession)
    except:
        inhibitors_kegg.append('')

In [209]:
inhibitors_kegg

['C00036',
 'C00002',
 'D08646',
 'C00024',
 'C00010',
 'C00209',
 'C02441',
 'C11592',
 'C00741',
 '']

In [38]:
# now let put in a function and run thorugh all ECs

def get_inhibitors(s, EC, output = 'SMILES'):
    try:
        with open(os.getcwd()+'/src/kinetic_estimator/biocyc_cache.pickle', 'rb') as handle:
            biocyc_cache = pickle.load(handle)
    except:
        biocyc_cache = {}
    
    if EC+'_'+output in biocyc_cache:
        return biocyc_cache[EC+'_'+output]
    
    else:
        r = s.get('https://metacyc.org/META/substring-search?type=NIL&object=EC+'+EC)
        loc = r.text.find('/META/NEW-IMAGE?type=REACTION&object=') # this only finds the first instance, there may be other reactinos associated with this EC...
        BioCycID = r.text[loc+37:].split('"')[0]

        # from BioCyc Reaction ID to  reactions
        r = s.get('https://websvc.biocyc.org/apixml?fn=enzymes-of-reaction&id=META:{i}&detail=low'.format(i=BioCycID))
        if r.status_code != 200:
            print('No reaction found for EC '+EC)
            return []
        reactions = [e.find('catalyzes').find('Enzymatic-Reaction').items()[0][1] for e in list(ET.fromstring(r.text))[1:]]

        # from reactions to inhibitors
        inhibitors = []
        inhibitors_smiles = []
        for reaction in reactions:
            r = s.get('https://websvc.biocyc.org/apixml?fn=direct-inhibitors&id={i}&detail=low'.format(i=reaction))
            inhibitors.append([list(c)[-1].text for c in list(ET.fromstring(r.text))[1:] if c ])
            inhibitors_smiles.append([c.find('cml')[0][-1].text for c in list(ET.fromstring(r.text))[1:] if c and c.find('cml')])

        inhibitors = sum(inhibitors,[])
        inhibitors_smiles = sum(inhibitors_smiles,[])
        biocyc_cache[EC+'_SMILES'] = inhibitors_smiles
        biocyc_cache[EC+'_names'] = inhibitors
        with open(os.getcwd()+'/src/kinetic_estimator/biocyc_cache.pickle', 'wb') as handle:
            pickle.dump(biocyc_cache, handle, protocol=pickle.HIGHEST_PROTOCOL)

        if output == 'kegg':
            from equilibrator_api import ComponentContribution
            cc = ComponentContribution()
            inhibitors_kegg = []
            for inhibitor in inhibitors:
                try: # the inhibitor string may be too off, or has no kegg id
                    for i in cc.search_compound(inhibitor).identifiers:
                        if i.registry.namespace == 'kegg':
                            inhibitors_kegg.append(i.accession)
                except:
                    print("Didn't find kegg id for "+inhibitor)
                    # inhibitors_kegg.append('')
                    pass
            biocyc_cache[EC+'_kegg'] = inhibitors_kegg
            with open(os.getcwd()+'/src/kinetic_estimator/biocyc_cache.pickle', 'wb') as handle:
                pickle.dump(biocyc_cache, handle, protocol=pickle.HIGHEST_PROTOCOL)
            return inhibitors_kegg
        
        elif output == 'SMILES':
            return inhibitors_smiles
        elif output == 'names':
            return inhibitors
        else:
            return inhibitors, inhibitors_smiles
    
# get all EC numbers
import gzip
import json

with gzip.open("src/thermo_calculations/kegg_enzymes.json.gz", "r") as f:
        ECs = {e['EC']:e['reaction_ids'] for e in json.load(f)}

with gzip.open("src/thermo_calculations/kegg_reactions.json.gz", "r") as f:
        RXNs = {r['RID']:r['reaction'] for r in json.load(f)}

reactions = pd.read_csv('src/frenda_brenda/Files/Reaction_full.csv')
all_enzymes = []
all_organisms = []
for i,row in reactions.iterrows():
    ec_string = row['EC']
    try:
        for r in ECs[ec_string]:
            try:
                if ec_string not in all_enzymes:
                    all_enzymes.append(ec_string)
                    if type(row['Species']) is str:
                        organism = r['Species']
                    else:
                        organism = 'Escherichia coli' 
                    all_organisms.append(organism)
            except:
                pass
    except:
        pass

In [53]:
all_inhibitors = list(map(lambda e: get_inhibitors(s, e, output = 'kegg'), all_enzymes))

# flatten lists
all_enzymes = [[enzyme]*len(inhibitors) for enzyme, inhibitors in zip(all_enzymes, all_inhibitors)]
all_organisms = [[organism]*len(inhibitors) for organism, inhibitors in zip(all_organisms, all_inhibitors)]
all_enzymes = sum(all_enzymes,[])
all_inhibitors = sum(all_inhibitors,[])
all_organisms = sum(all_organisms,[])

In [52]:
from src.kinetic_estimator.estimators import Estimator

KMest = Estimator('KM_prediction','Km')
kis = KMest.estimate(all_inhibitors, all_enzymes, all_organisms, True)
pd.DataFrame.from_dict(kis).to_csv('full_report_kis.csv')

OSError: [WinError 182] The operating system cannot run %1. Error loading "c:\Users\Diego Alba\.conda\envs\ODBM2\lib\site-packages\torch\lib\shm.dll" or one of its dependencies.