In [1]:
import gzip
import json
import os
import sys
import math
import re

import numpy as np
import pandas as pd

from brendapyrser import BRENDA
from equilibrator_api import ComponentContribution
from itertools import filterfalse
os.chdir('..')
path = os.getcwd()

ModuleNotFoundError: No module named 'brendapyrser'

In [6]:
path

'/workspaces/ECFERS'

In [7]:
with gzip.open(path+"/src/thermo_calculations/kegg_enzymes.json.gz", "r") as f:
        ECs = {e['EC']:e['reaction_ids'] for e in json.load(f)}

with gzip.open(path+"/src/thermo_calculations/kegg_reactions.json.gz", "r") as f:
        RXNs = {r['RID']:r['reaction'] for r in json.load(f)}

reaction_og = pd.read_csv(path+'/src/frenda_brenda/Files/Reaction.csv')
sbm_og = pd.read_csv(path+'/src/frenda_brenda/Files/SpeciesBaseMechanisms.csv')
inac = pd.read_csv(path+'/src/frenda_brenda/Files/Inaccessible_IDs.csv')
kcats = pd.read_csv(path+'/src/kinetic_estimator/full_report_kcats.csv')
kms = pd.read_csv(path+'/src/kinetic_estimator/full_report_kms.csv')
kis = pd.read_csv(path+'/src/kinetic_estimator/full_report_kis.csv')
metabconc_ref = pd.read_csv(path+'/src/frenda_brenda/Files/Metabolite_Concentrations.csv')
dataFile = path+'/src/frenda_brenda/Files/brenda_download.txt'

In [8]:
brenda = BRENDA(dataFile)
cc = ComponentContribution()
compound_dict = {}

KeyError: 'content-md5'

In [None]:
class ECIndexError(Exception):
    pass

In [None]:
def manualEC(sbm, reaction, inac):
    inac = inac.dropna()
    # Appending data to sbm DataFrame
    sbm_rows = []
    for index, row in inac.iterrows():
        sbm_row = pd.DataFrame({'Label': row['Accession Number'],
                                'EC': row['EC'],
                                'Type': 'Enzyme',
                                'StartingConc': row['Conc'],
                                'Conc': np.nan,
                                'Mechanisms': np.nan,
                                'Parameters': np.nan,
                                'Species': np.nan}, index=[0])
        sbm_rows.append(sbm_row)

    if sbm_rows:
        sbm = pd.concat([sbm] + sbm_rows, ignore_index=True)

    # Appending data to reaction DataFrame
    reaction_rows = []
    for index, row in inac.iterrows():
        reaction_row = pd.DataFrame({'Accession Number': row['Accession Number'],
                                     'EC': row['EC'],
                                     'Species': np.nan,
                                     'Label': np.nan,
                                     'Enzyme': np.nan,
                                     'Reaction ID': np.nan,
                                     'Mechanism': np.nan,
                                     'Substrates': np.nan,
                                     'Products': np.nan,
                                     'Km': np.nan,
                                     'Kcat': np.nan,
                                     'Inhibitors': np.nan,
                                     'KI': np.nan}, index=[0])
        reaction_rows.append(reaction_row)

    if reaction_rows:
        reaction = pd.concat([reaction] + reaction_rows, ignore_index=True)

    return sbm, reaction

In [None]:
def get_enzyme_name(reaction):
    return reaction.name

In [None]:
def get_substrates_and_products(EC):
    try:
        rxn_IDs = ECs[EC]
    except KeyError:
        raise ECIndexError("This EC is not present in the KEGG database")

    rxn_comp = []

    for rxn in rxn_IDs:
        try:
            substrate = [str(abs(species[0])) + ' ' + species[1] for species in RXNs[rxn] if species[0] <= 0]
            products = [str(abs(species[0])) + ' ' + species[1] for species in RXNs[rxn] if species[0] >= 0]
        except KeyError:
            print('Could not index ',rxn)
            continue
        rxn_comp.append([rxn, substrate, products])

    df = pd.DataFrame(rxn_comp, columns=['Reaction ID', 'Substrates', 'Products'])

    # Remove the brackets from the DataFrame
    df = df.applymap(lambda x: '; '.join(x) if isinstance(x, list) else x)

    return df

Add stoichiometry to the existing Reaction CSV instead of fixing the base frenda code

In [1]:
import gzip
import json

In [2]:
import pandas as pd
import os 
os.chdir('..')

os.getcwd()

'/workspaces/ECFERS'

In [3]:
with gzip.open("src/thermo_calculations/kegg_reactions.json.gz", "r") as f:
        RXNs = {r['RID']:r['reaction'] for r in json.load(f)}

In [4]:
filtered_rxn = pd.read_csv('src/frenda_brenda/Files/Model Reduction Approaches/Reaction_filtByMap.csv')

In [9]:
RXNs['R00519']

[[-1, 'C00003'], [1, 'C00004'], [1, 'C00011'], [-1, 'C00058'], [1, 'C00080']]

In [18]:
# Create a copy of the original dataframe
filtered_rxn_copy = filtered_rxn.copy()

for index, row in filtered_rxn_copy.iterrows():
    RID = row['Reaction ID']
    print(row['Reaction ID'])
    subs = []
    prods = []
    for cpd in RXNs[RID]:
        if cpd[0] < 0:
            subs.append(cpd)
        elif cpd[0] > 0:
            prods.append(cpd)
        elif cpd[0] == 0:
            subs.append([-1, cpd[1]])  # Change to -1 for subs
            prods.append([1, cpd[1]])  # Change to 1 for prods

    # Format the substrates and products
    formattedsubs = "; ".join([f"{int(item[0])} {item[1]}" for item in subs])
    formattedprods = "; ".join([f"{int(item[0])} {item[1]}" for item in prods])
    
    # Update the copy of the dataframe with the new values
    filtered_rxn_copy.at[index, 'Substrates'] = formattedsubs
    filtered_rxn_copy.at[index, 'Products'] = formattedprods

# The filtered_rxn_copy dataframe now contains the updated Substrates and Products columns

In [20]:
filtered_rxn_copy.to_csv('src/frenda_brenda/Files/Model Reduction Approaches/Reaction_filtByMap_withStoich.csv')

Fixing concentrations

In [37]:
proteome = pd.read_csv('src/frenda_brenda/Files/proteome_exe.csv', header=None)
sbmplain = pd.read_csv('src/frenda_brenda/Files/SpeciesBaseMechanisms.csv')
sbmfilt = pd.read_csv('src/frenda_brenda/Files/Model Reduction Approaches/SpeciesBaseMechanism_filtByMap.csv')

In [39]:
ecdict = dict(zip(sbmplain['EC'], sbmplain['Label']))

In [45]:
concdict = dict(zip(proteome[proteome.columns[0]], proteome[proteome.columns[1]]))

In [51]:
concdict['WP_001307570.1']*0.36526946

17.35029935

In [54]:
sbmfilt_copy = sbmfilt.copy()
for index, row in sbmfilt.iterrows():
    if pd.notna(row['EC']):  # Check if the value is not NaN
        try:
            print(ecdict[row['EC']])
            conc = ((concdict[ecdict[row['EC']]]*0.36526946)/1000)
            sbmfilt_copy.at[index, 'StartingConc'] = conc
        except KeyError:
            print(row['Label'])

WP_000099534.1
WP_001181473.1
WP_000153502.1
WP_000003829.1
WP_000597260.1
WP_000102485.1
WP_000025458.1
WP_000111269.1
WP_000036723.1
WP_000098614.1
WP_000963518.1
WP_000024939.1
WP_001295461.1
WP_001295272.1
WP_000130189.1
WP_000069410.1
WP_000099823.1
WP_001265681.1
WP_000919159.1
WP_000066639.1
WP_000785834.1
WP_000069437.1
WP_001295373.1
WP_000043460.1
WP_000462687.1
WP_000527955.1
WP_000506490.1
WP_000136788.1
WP_001005579.1
WP_001186650.1
WP_000195061.1
WP_000034372.1
WP_001220233.1
WP_000210878.1
WP_000334099.1
WP_001062128.1
WP_000695655.1
WP_001293003.1
WP_001295403.1
WP_000086722.1
WP_000988027.1
WP_001299507.1
WP_001127419.1
WP_001295247.1
WP_001300978.1
WP_001120798.1
WP_000817178.1
WP_001271717.1
WP_000138270.1
WP_001320168.1
WP_000963837.1
WP_000342648.1
WP_001230087.1
WP_000078239.1
WP_001298109.1
WP_001216325.1
WP_000193841.1
WP_000811065.1
WP_000207665.1
WP_001320171.1
WP_001287154.1
WP_001213837.1
WP_000057149.1
WP_000045290.1
WP_000208515.1
WP_000068701.1
WP_0000957

In [56]:
sbmfilt_copy.to_csv('src/frenda_brenda/Files/Model Reduction Approaches/SpeciesBaseMechanism_filtByMap.csv')

extra to add inhibitors

In [2]:
import pandas as pd

In [11]:
missing_kis = pd.read_csv('/workspaces/ECFERS/notebooks/241023_missing_kis.csv')
rxn = pd.read_csv('/workspaces/ECFERS/src/frenda_brenda/Files/Model Reduction Approaches/Reaction_filtByMap_withStoich.csv')
sbm = pd.read_csv('/workspaces/ECFERS/src/frenda_brenda/Files/Model Reduction Approaches/SpeciesBaseMechanism_filtByMap.csv')

In [13]:
# Filter and group by enzyme, creating lists only for substrates starting with "C"
inhibitor_ki_dict = {}
for enzyme, group in missing_kis.groupby('enzyme'):
    filtered_group = group[group['substrates'].str.startswith('C')]
    
    inhibitors = ';'.join(filtered_group['substrates'])
    KIs = ';'.join([f"{substrate}_KI: {km}" for substrate, km in zip(filtered_group['substrates'], filtered_group['Km'])])
    
    inhibitor_ki_dict[enzyme] = {'Inhibitors': inhibitors, 'KI': KIs}

# Update only rows with matching "EC" in missing_kis
rxn['Inhibitors'] = rxn.apply(
    lambda row: inhibitor_ki_dict[row['EC']]['Inhibitors'] if row['EC'] in inhibitor_ki_dict else row['Inhibitors'],
    axis=1
)
rxn['KI'] = rxn.apply(
    lambda row: inhibitor_ki_dict[row['EC']]['KI'] if row['EC'] in inhibitor_ki_dict else row['KI'],
    axis=1
)

In [27]:
pd.set_option('display.max_colwidth', None)

In [31]:
rxn[rxn['EC']=='2.3.3.8']

Unnamed: 0.1,Unnamed: 0,Accession Number,EC,Species,Label,Enzyme,Reaction ID,Mechanism,Substrates,Products,Km,Kcat,Inhibitors,KI,Keq
122,122,,2.3.3.8,Escherichia coli,R123,Atp citrate synthase,R00352,MRL,-1 C00002; -1 C00010; -1 C00158,1 C00008; 1 C00009; 1 C00024; 1 C00036,Km_C00008: 11.421359055528603; Km_C00009: 11.421359055528603; Km_C00024: 11.421359055528603; Km_C00036: 11.421359055528603; Km_C00002: 11.421359055528603; Km_C00010: 11.421359055528603; Km_C00158: 11.421359055528603,Kcat_F: 19.78221183941891; Kcat_R: 19.78221183941891,C00036;C00008,C00036_KI: nan;C00008_KI: nan,
