In [1]:
import pandas as pd
import numpy as np
import sys
from multiprocessing import Pool
import cobra
import os

In [2]:
cwd = os.getcwd()
os.chdir('/home/fanwc/dGPredictor')
sys.path.append('/home/fanwc/dGPredictor/streamlit')

from main import load_molsig_rad1, load_molsig_rad2, load_model
from main import parse_novel_molecule, parse_novel_smiles, decompse_novel_mets_rad1, decompse_novel_mets_rad2
from main import get_dG0

molsig_r1 = load_molsig_rad1()
molsig_r2 = load_molsig_rad2()
loaded_model = load_model()

def main(rxn_dict, add_info, pH=7.0, I=0.1):
    
    try:
        novel_mets = parse_novel_molecule(add_info)
        novel_smiles = parse_novel_smiles(novel_mets)
        novel_decomposed_r1 = decompse_novel_mets_rad1(novel_smiles)
        novel_decomposed_r2 = decompse_novel_mets_rad2(novel_smiles)

    except Exception as e:
        novel_mets = None
        novel_smiles = None
        novel_decomposed_r1 = None
        novel_decomposed_r2 = None

    mu, std, _, _ = get_dG0(rxn_dict, 'R00801', pH, I, 
                            loaded_model, molsig_r1, molsig_r2, novel_decomposed_r1, novel_decomposed_r2, novel_mets)
    
    return mu, std

2024-08-30 11:41:27.101 
  command:

    streamlit run /home/fanwc/anaconda3/envs/dGPredictor/lib/python3.8/site-packages/ipykernel_launcher.py [ARGUMENTS]
2024-08-30 11:41:27.102 No runtime found, using MemoryCacheStorageManager
2024-08-30 11:41:27.142 No runtime found, using MemoryCacheStorageManager


In [3]:
def parse_rxn(rxn):

    rxn_dict = {}
    add_info = {}
    for met, coeff in rxn.metabolites.items():
        met_kegg_id = met.annotation.get('kegg.compound')
        met_inchi = met.annotation.get('inchi')
        if isinstance(met_kegg_id, list):
            met_kegg_id = met_kegg_id[0]
        if isinstance(met_inchi, list):
            met_inchi = met_inchi[0]
        if met_kegg_id:
            rxn_dict[met_kegg_id] = rxn_dict.get(met_kegg_id, 0) + coeff
        elif met_inchi:
            rxn_dict[met.id] = rxn_dict.get(met.id, 0) + coeff
            add_info[met.id] = met_inchi
        else:
            return None, None
    return rxn_dict, add_info


def predict_rxn(rxn):
    rxn_dict, add_info = parse_rxn(rxn)
    if rxn_dict is None:
        return np.nan, np.nan
    else:
        return main(rxn_dict, add_info)

class formation_rxn():
    def __init__(self, met):
        self.metabolites = {met:1}

def predict_dG_formation(met):
    return predict_rxn(formation_rxn(met))

### 1. Predicting standard Gibbs energy for Recon3D

In [4]:
recon3d = cobra.io.load_matlab_model(os.path.join(cwd, '../data/Recon3D/Recon3D_301.mat'))
S = cobra.util.array.create_stoichiometric_matrix(recon3d) # shape = [met, rxn]

# patch
recon3d.metabolites.get_by_id('aqcobal[e]').annotation['pubchem.compound'] = ['4238']
recon3d.metabolites.get_by_id('aqcobal[c]').annotation['pubchem.compound'] = ['4238']
recon3d.metabolites.get_by_id('yvite[e]').annotation['kegg.compound'] = ['C02483']

No defined compartments in model Recon3D. Compartments will be deduced heuristically using regular expressions.
Using regular expression found the following compartments:c, e, g, i, l, m, n, r, x


In [5]:
# Recon3D's standard Gibbs energy of formation
try:
    dGf_df = pd.read_csv(os.path.join(cwd, '../data/Recon3D/Recon3D_standard_dGf_dGPredictor.csv'), index_col=0)
except:
    dGf_df = pd.DataFrame([], columns=['met id', 'standard dGr', 'SD'])


p = Pool(64)
parallel_p = []
for i, met in enumerate(recon3d.metabolites):
    if i not in dGf_df.index:
        r = p.apply_async(func=predict_dG_formation, args=(met,))
        parallel_p.append([i, met.id, r])
        
    if len(parallel_p) >= 2e4:
        print(i, 'done')
        break

p.close()
p.join()

for i, met_id, r in parallel_p:
    if r.successful():
        dGf_df.loc[i,:] = [met_id, *r.get()]
    else:
        dGf_df.loc[i,:] = [met_id, np.nan, np.nan]

# save
dGf_df.to_csv(os.path.join(cwd, '../data/Recon3D/Recon3D_standard_dGf_dGPredictor.csv'))

In [6]:
# Recon3D's standard Gibbs energy of reaction
try:
    dGr_df = pd.read_csv(os.path.join(cwd, '../data/Recon3D/Recon3D_standard_dGr_dGPredictor.csv'), index_col=0)
except:
    dGr_df = pd.DataFrame([], columns=['standard dGr', 'SD'])


p = Pool(64)
parallel_p = []
for i, rxn in enumerate(recon3d.reactions):
    if i not in dGr_df.index:
        r = p.apply_async(func=predict_rxn, args=(rxn,))
        parallel_p.append([i, r])
        
    if len(parallel_p) >= 1e6:
        print('done')
        break

p.close()
p.join()

for i, r in parallel_p:
    if r.successful():
        dGr_df.loc[i,:] = r.get()
    else:
        dGr_df.loc[i,:] = [np.nan, np.nan]

# save
dGr_df.to_csv(os.path.join(cwd, '../data/Recon3D/Recon3D_standard_dGr_dGPredictor.csv'))

### 2. Predicting standard Gibbs energy for Human1

In [7]:
# Read model and patch it
human1 = cobra.io.read_sbml_model(os.path.join(cwd, "../data/Human1/Human-GEM/model/Human-GEM.xml"))
human1.metabolites.get_by_id('MAM01935e').annotation['kegg.compound'] = 'C02483'

In [8]:
# Human1's standard Gibbs energy of formation
try:
    dGf_df = pd.read_csv(os.path.join(cwd, '../data/Human1/Human1_standard_dGf_dGPredictor.csv'), index_col=0)
except:
    dGf_df = pd.DataFrame([], columns=['met id', 'standard dGf', 'SD'])


p = Pool(40)
parallel_p = []
for i, met in enumerate(human1.metabolites):
    if i not in dGf_df.index:
        r = p.apply_async(func=predict_dG_formation, args=(met,))
        parallel_p.append([i, met.id, r])
        
    if len(parallel_p) >= 2e4:
        print(i, 'done')
        break

p.close()
p.join()

for i, met_id, r in parallel_p:
    if r.successful():
        dGf_df.loc[i,:] = [met_id, *r.get()]
    else:
        dGf_df.loc[i,:] = [met_id, np.nan, np.nan]

# save
dGf_df.to_csv(os.path.join(cwd, '../data/Human1/Human1_standard_dGf_dGPredictor.csv'))

In [9]:
# Human1's standard Gibbs energy of reaction
try:
    dGr_df = pd.read_csv(os.path.join(cwd, '../data/Human1/Human1_standard_dGr_dGPredictor.csv'), index_col=0)
except:
    dGr_df = pd.DataFrame([], columns=['standard dGr', 'SD'])


p = Pool(64)
parallel_p = []
for i, rxn in enumerate(human1.reactions):
    if i not in dGr_df.index:
        r = p.apply_async(func=predict_rxn, args=(rxn,))
        parallel_p.append([i, r])
        
    if len(parallel_p) >= 1e6:
        print('done')
        break

p.close()
p.join()

for i, r in parallel_p:
    if r.successful():
        dGr_df.loc[i,:] = r.get()
    else:
        dGr_df.loc[i,:] = [np.nan, np.nan]

# save
dGr_df.to_csv(os.path.join(cwd, '../data/Human1/Human1_standard_dGr_dGPredictor.csv'))