In [1]:
import pandas as pd
import re 
import matlab.engine
from rdkit import Chem

m = matlab.engine.start_matlab('MATLAB_10528')
model = m.readSBML('working_model.xml', float(1000));

In [2]:
from rdkit import Chem
import re

def formula_to_dummy_smiles(formula):

    if 'C' in formula:
        carbon_count = int(re.search(r'C(\d+)', formula).group(1))
        return 'C'*carbon_count
    else:
        return '[H]'
    
    #if formula == 'C1800H3002O1501':
    #    return "C"*1800
    # Replace placeholder 'R' or 'X' with '*'
    formula = re.sub(r'R\d*|X', '[*]', formula)

    # Parse formula into elements and counts, including '*'
    elements = re.findall(r'(\*|[A-Z][a-z]*)(\d*)', formula)  # Updated pattern to include '*'
    
    # Create editable molecule
    mol = Chem.EditableMol(Chem.Mol())
    heavy_atoms = []  # Track indices of non-hydrogen atoms
    
    # Add all atoms
    for elem, count in elements:
        cnt = int(count) if count else 1
        for _ in range(cnt):
            if elem != 'H':  # Skip hydrogens
                if elem == '*':
                    atom = Chem.Atom(0)  # 0 is the atomic number for a dummy atom (*)
                else:
                    atom = Chem.Atom(elem)
                mol.AddAtom(atom)
                heavy_atoms.append(len(heavy_atoms))  # Track heavy atom indices
    
    # Connect only heavy atoms in a chain
    for i in range(len(heavy_atoms) - 1):
        mol.AddBond(heavy_atoms[i], heavy_atoms[i + 1], Chem.BondType.SINGLE)
    
    # Convert to regular molecule and sanitize
    mol = mol.GetMol()
    try:
        Chem.SanitizeMol(mol)
    except:
        # If sanitization fails, return a simple wildcard string with brackets
        return '[*]' * len(heavy_atoms)
    
    # Get SMILES and ensure dummy atoms are bracketed
    smiles = Chem.MolToSmiles(mol)
    # Replace standalone '*' with '[*]' in the SMILES string
    smiles = re.sub(r'(?<!\[)\*(?!\])', '[*]', smiles)
    
    return smiles

 
formula_to_dummy_smiles('C18H32O2SR')

'CCCCCCCCCCCCCCCCCC'

In [10]:
from cobra.io import read_sbml_model

kokel = read_sbml_model("working_model.xml")

columns = ['#reaction_ID', 'reactant_IDs(atom)', 'product_IDs(atom)', 'reversibility']
df = pd.DataFrame(columns=columns)

for rxn in kokel.reactions:
    print(rxn.id, ":", rxn.reaction)


No objective in listOfObjectives
No objective coefficients in model. Unclear what should be optimized


GAPDH_nadp_hi : 13dpg[h] + h[h] + nadph[h] --> g3p[h] + nadp[h] + pi[h]
PRUK : atp[h] + ru5p_D[h] --> adp[h] + h[h] + rb15bp[h]
RBPCh : co2[h] + h2o[h] + rb15bp[h] --> 2.0 3pg[h] + 2.0 h[h]
BFBPh : fdp_B[h] + h2o[h] --> f6p_B[h] + pi[h]
ENO : 2pg[c] <=> h2o[c] + pep[c]
PGM : 3pg[c] <=> 2pg[c]
PDHe2r : adhlam[m] + coa[m] <=> accoa[m] + dhlam[m]
PDHam1mi : h[m] + pyr[m] + thmpp[m] --> 2ahethmpp[m] + co2[m]
PDHam2mi : 2ahethmpp[m] + lpam[m] --> adhlam[m] + thmpp[m]
PYK : adp[c] + h[c] + pep[c] --> atp[c] + pyr[c]
PYKm : adp[m] + h[m] + pep[m] --> atp[m] + pyr[m]
PDHe3mr : dhlam[m] + nad[m] <=> h[m] + lpam[m] + nadh[m]
ATPSm : adp[m] + 3.0 h[i] + pi[m] --> atp[m] + h2o[m] + 2.0 h[m]
CYOO6m : 4.0 focytc[m] + 8.0 h[m] + o2[m] --> 4.0 ficytc[m] + 2.0 h2o[m] + 4.0 h[i]
CYOR_q8_m : 2.0 ficytc[m] + 2.0 h[m] + q8h2[m] --> 2.0 focytc[m] + 4.0 h[i] + q8[m]
NADHOR_2m : h[m] + nadh[m] + q8[m] --> nad[m] + q8h2[m]
RPIh : r5p[h] <=> ru5p_D[h]
ATPSh : adp[h] + 4.0 h[u] + pi[h] --> atp[h] + h2o[h] + 3.0 

## Mark fatty acid synthesis reactions as export
1) Carbon fixation
2) Glycolisis
3) TCA
4) Pentose phosphate pathway
5) Photosynthesis

In [3]:
ref = pd.read_csv('elements.csv')

ref['Abbreviation'] = ref['Abbreviation'].str.replace('-', '_')

filtered_ref = ref[ref['Abbreviation'].isin(model['mets'])]

#missing_mets = set(model['mets']) - set(ref['Abbreviation'])

In [4]:
metabolite_smiles = dict()
metabolite_inchi = dict()

In [5]:
ref = pd.read_csv('elements.csv')

def add_compartment(compound):
    if not re.search(r'\[[a-z]\]$', compound):  # Check if compartment is missing
        return f"{compound}[c]"
    return compound

#### Apply to the column
ref['Abbreviation'] = ref['Abbreviation'].apply(add_compartment)
ref['Abbreviation'] = ref['Abbreviation'].str.replace('-', '_')

filtered_df = ref[ref['Abbreviation'].isin(model['mets'])]
filtered_df.to_excel('essentmets.xlsx')
filtered_df

Unnamed: 0,Abbreviation,Compartment,Name,Formula,Charge,CAS Number,Formula Neutral,KEGG cmpd ID,PubChem Substance ID
28,13dpg[h],Chloroplast,3-Phospho-D-glyceroyl phosphate,C3H4O10P2,-4,38168-82-0,C3H8O10P2,C00236,3535
68,2ahethmpp[m],Mitochondria,2-(alpha-Hydroxyethyl)thiamine diphosphate,C14H20N4O8P2S,-2,,C14H23N4O8P2S,C05125,7546
96,2pg[c],Cytosol,D-Glycerate 2-phosphate,C3H4O7P,-3,,C3H7O7P,C00631,3904
185,3pg[c],Cytosol,3-Phospho-D-glycerate,C3H4O7P,-3,,C3H7O7P,C00197,3497
187,3pg[h],Chloroplast,3-Phospho-D-glycerate,C3H4O7P,-3,,C3H7O7P,C00197,3497
...,...,...,...,...,...,...,...,...,...
1633,succ[h],Chloroplast,Succinate,C4H4O4,-2,110-15-6,C4H6O4,C00042,3344
1634,succ[m],Mitochondria,Succinate,C4H4O4,-2,110-15-6,C4H6O4,C00042,3344
1635,succoa[m],Mitochondria,Succinyl-CoA,C25H36N7O19P3S,-4,604-98-8,C25H40N7O19P3S,C00091,3391
1712,thmpp[m],Mitochondria,Thiamine diphosphate,C12H16N4O7P2S,-2,154-87-0,C12H19N4O7P2S,C00068,3368


In [6]:
import requests
import pubchempy as pcp
from time import sleep
import cv2
import numpy as np
from io import BytesIO
from PIL import Image
import re
from xml.etree import ElementTree as ET
from bs4 import BeautifulSoup  # Add this for web scraping

def get_smiles_from_id(kegg_id=None, pubchem_sid=None, formula=None, image_path=None, image_data=None, max_retries=3):
    # --- PubChem SID Lookup Function ---
    def fetch_pubchem_smiles(sid):
        for attempt in range(max_retries):
            try:
                substance = pcp.Substance.from_sid(sid)
                if substance.standardized_compound:
                    return substance.standardized_compound.canonical_smiles
                return None
            except Exception as e:
                if attempt < max_retries - 1:
                    sleep(2 ** attempt)
                else:
                    print(f"PubChem SID lookup failed for {sid}: {str(e)}")
        return None

    # --- KEGG Image Lookup Function (using DECIMER) ---
    def fetch_kegg_smiles_from_image(kegg_id):
        image_url = f"https://www.kegg.jp/Fig/compound/{kegg_id}.gif"
        for attempt in range(max_retries):
            try:
                response = requests.get(image_url, timeout=10)
                if response.status_code == 200:
                    img = Image.open(BytesIO(response.content))
                    img = img.convert("RGB")
                    image_data = np.array(img)
                    smiles = predict_SMILES(image_data)
                    smiles = re.sub(r'\[R[a-zA-Z0-9]*\]', '[*]', smiles)
                    if smiles:
                        return smiles
                    else:
                        print(f"DECIMER failed to predict SMILES from KEGG image for {kegg_id}")
                        return None
                elif response.status_code == 404:
                    print(f"KEGG structure image not found for {kegg_id}")
                    return None
            except Exception as e:
                if attempt < max_retries - 1:
                    sleep(2 ** attempt)
                else:
                    print(f"Failed to fetch or process KEGG image for {kegg_id}: {str(e)}")
                    return None
        return None

    # --- ChEBI Lookup Function (via KEGG cross-reference) ---
    def fetch_chebi_smiles_from_kegg(kegg_id):
        # Step 1: Get ChEBI ID from KEGG
        kegg_url = f"http://rest.kegg.jp/get/compound:{kegg_id}"
        for attempt in range(max_retries):
            try:
                response = requests.get(kegg_url, timeout=10)
                if response.status_code == 200:
                    kegg_data = response.text
                    chebi_match = re.search(r'ChEBI:\s*(\d+)', kegg_data)
                    if chebi_match:
                        chebi_id = f"CHEBI:{chebi_match.group(1)}"
                    else:
                        print(f"No ChEBI cross-reference found for KEGG ID {kegg_id}")
                        return None
                else:
                    print(f"KEGG API request failed for {kegg_id}: Status {response.status_code}")
                    return None
                break
            except Exception as e:
                if attempt < max_retries - 1:
                    sleep(2 ** attempt)
                else:
                    print(f"Failed to fetch KEGG data for {kegg_id}: {str(e)}")
                    return None

        if 'chebi_id' not in locals():
            return None

        # Step 2: Try ChEBI API first
        chebi_api_url = f"https://www.ebi.ac.uk/webservices/chebi/2.0/webservice/getCompleteEntity?chebiId={chebi_id}"
        for attempt in range(max_retries):
            try:
                headers = {"Accept": "application/xml"}
                response = requests.get(chebi_api_url, headers=headers, timeout=10)
                if response.status_code == 200:
                    tree = ET.fromstring(response.content)
                    smiles_elem = tree.find(".//{http://www.ebi.ac.uk/webservices/chebi}smiles")
                    if smiles_elem is not None and smiles_elem.text:
                        return smiles_elem.text
                break  # Only try once; fallback to scraping if it fails
            except Exception as e:
                print(f"ChEBI API request error for {chebi_id}: {str(e)}")
                break

        # Step 3: Fallback to scraping ChEBI webpage
        chebi_web_url = f"https://www.ebi.ac.uk/chebi/searchId.do?chebiId={chebi_id}"
        for attempt in range(max_retries):
            try:
                response = requests.get(chebi_web_url, timeout=10)
                if response.status_code == 200:
                    soup = BeautifulSoup(response.text, 'html.parser')
                    smiles_tag = soup.find('td', string='SMILES')
                    if smiles_tag:
                        smiles = smiles_tag.find_next_sibling('td').text.strip()
                        return smiles
                    else:
                        print(f"No SMILES found on ChEBI webpage for {chebi_id}")
                        return None
                else:
                    print(f"ChEBI webpage request failed for {chebi_id}: Status {response.status_code}")
                    return None
            except Exception as e:
                if attempt < max_retries - 1:
                    sleep(2 ** attempt)
                else:
                    print(f"Failed to scrape ChEBI webpage for {chebi_id}: {str(e)}")
                    return None
        return None

    # --- DECIMER Image Analysis Function ---
    '''
    def fetch_smiles_from_image(image_path=None, image_data=None):
        try:
            if image_path:
                image = cv2.imread(image_path)
                if image is None:
                    print(f"Failed to load image from {image_path}")
                    return None
            elif image_data is not None:
                image = image_data
            else:
                print("No image path or data provided for DECIMER analysis")
                return None
            smiles = predict_SMILES(image)
            
            if smiles_fixed:
                return smiles_fixed
            else:
                print("DECIMER failed to predict SMILES from the image")
                return None
        except Exception as e:
            print(f"DECIMER image analysis failed: {str(e)}")
            return None
    '''
    # --- Execution Logic ---
    if pubchem_sid and pubchem_sid != '':
        try:
            sid = int(pubchem_sid) if str(pubchem_sid).isdigit() else pubchem_sid
            smiles = fetch_pubchem_smiles(sid)
            if smiles:
                return smiles
        except ValueError:
            print(f"Invalid PubChem SID format: {pubchem_sid}")

    if kegg_id and kegg_id != '':
        smiles = fetch_chebi_smiles_from_kegg(kegg_id)
        if smiles:
            return smiles
    """        
    if kegg_id and kegg_id != '':
        smiles = fetch_kegg_smiles_from_image(kegg_id)
        if smiles:
            return smiles

    if image_path or image_data:
        smiles = fetch_smiles_from_image(image_path, image_data)
        if smiles:
            return smiles
    """
    sus = formula_to_dummy_smiles(formula)
    return sus

In [7]:
for index, row in filtered_df.iterrows():

    if row['Abbreviation'] not in metabolite_smiles.keys():
        result = get_smiles_from_id(row['KEGG cmpd ID'], row['PubChem Substance ID'], row['Formula']) 
        metabolite_smiles[row['Abbreviation']] = result
      
    #print(row['KEGG cmpd ID'], row['Abbreviation'], row['KEGG cmpd ID'], row['PubChem Substance ID'])
    print(result)


O[C@H](COP(O)(O)=O)C(=O)OP(O)(O)=O
CC(O)c1sc(CCOP(O)(=O)OP(O)(O)=O)c(C)[n+]1Cc1cnc(C)nc1N
OC[C@@H](OP(O)(O)=O)C(O)=O
O[C@H](COP(O)(O)=O)C(O)=O
O[C@H](COP(O)(O)=O)C(O)=O
O=C(SCCNC(=O)CCNC(=O)C(O)C(C)(C)COP(=O)(O)OP(=O)(O)OCC1OC(N2C=NC=3C(=NC=NC32)N)C(O)C1OP(=O)(O)O)C
O=C(SCCNC(=O)CCNC(=O)C(O)C(C)(C)COP(=O)(O)OP(=O)(O)OCC1OC(N2C=NC=3C(=NC=NC32)N)C(O)C1OP(=O)(O)O)C
CC(=O)SC(CCS)CCCCC(N)=O
Nc1ncnc2n(cnc12)[C@@H]1O[C@H](COP(O)(=O)OP(O)(O)=O)[C@@H](O)[C@H]1O
Nc1ncnc2n(cnc12)[C@@H]1O[C@H](COP(O)(=O)OP(O)(O)=O)[C@@H](O)[C@H]1O
Nc1ncnc2n(cnc12)[C@@H]1O[C@H](COP(O)(=O)OP(O)(O)=O)[C@@H](O)[C@H]1O
Nc1ncnc2n(cnc12)[C@@H]1O[C@H](COP(O)(=O)OP(O)(=O)O[C@H]2O[C@H](CO)[C@@H](O)[C@H](O)[C@H]2O)[C@@H](O)[C@H]1O
[O-]C(=O)CCC(=O)C([O-])=O
Nc1ncnc2n(cnc12)[C@@H]1O[C@H](COP(O)(=O)OP(O)(=O)OP(O)(O)=O)[C@@H](O)[C@H]1O
Nc1ncnc2n(cnc12)[C@@H]1O[C@H](COP(O)(=O)OP(O)(=O)OP(O)(O)=O)[C@@H](O)[C@H]1O
Nc1ncnc2n(cnc12)[C@@H]1O[C@H](COP(O)(=O)OP(O)(=O)OP(O)(O)=O)[C@@H](O)[C@H]1O
OC(CC([O-])=O)(CC([O-])=O)C([O-])=O
OC(CC(

In [8]:
from rdkit import Chem
from rdkit.Chem import inchi
from rdkit import RDLogger

for compound, smiles in metabolite_smiles.items():

    pattern = r'(?<!\[)\*(?!\])'
    # Replace standalone * with [*]
    smi = re.sub(pattern, '[*]', smiles)
    smi_fixed = smi.replace('*', 'H')

    #metabolite_smiles[compound] = smi_fixed
    
    try:
        molecule2 = Chem.MolFromSmiles(smi_fixed)
        # Generate InChI
        inchi_string = inchi.MolToInchi(molecule2)
        print(compound)
        inchi_key = inchi.InchiToInchiKey(inchi_string)
        print(inchi_key)
        print('#######################################################################################################')
        # Generate InChI Key
        
        RDLogger.DisableLog('rdApp.warning')
        #print(f"{compound}\t{inchi_key}")
        metabolite_inchi[compound] = inchi_key
        metabolite_smiles[compound] = smi_fixed
    except Exception as e: 
        continue

13dpg[h]
LJQLQCAXBUHEAZ-UWTATZPHSA-N
#######################################################################################################
2ahethmpp[m]
RRUVJGASJONMDY-UHFFFAOYSA-O
#######################################################################################################
2pg[c]
GXIURPTVHJPJLF-UWTATZPHSA-N
#######################################################################################################
3pg[c]
OSJPPGNTCRNQQC-UWTATZPHSA-N
#######################################################################################################
3pg[h]
OSJPPGNTCRNQQC-UWTATZPHSA-N
#######################################################################################################
accoa[c]
ZSLZBFCDCINBPY-UHFFFAOYSA-N
#######################################################################################################
accoa[m]
ZSLZBFCDCINBPY-UHFFFAOYSA-N
#######################################################################################################
adhlam[m]
ARGXE

In [9]:
import os

import numpy as np

S_df = pd.DataFrame(model['S'], model['mets'], model['rxns'])

failed_rxns = list()

# Define parent folder path and subfolders
parent_folder = 'C:\\Users\\Illia\\Desktop\\Thesis\\seb_mapping\\reaction_intermediates'

# Create subfolders

folder_path = 'C:\\Users\\Illia\\Desktop\\Thesis\\seb_mapping\\reaction_intermediates\\'  # Replace with your folder path

for rxn in S_df.columns:
    
    #if rxn == 'Biomass_Chlamy_auto':
    #    continue
    
    reactants = list()
    products = list()

    cofactors = ['atp', 'adp', 'nadh', 'nad', 'nadp', 'nadph', 'ficytc', 'focytc', 'q8', 'q8h2', 'amp', 'fad', 'fadh2',
                'fdxox', 'fdxrd', 'pq', 'pqh2']
    
    # Get the column as a Series
    col_data = S_df[rxn]
    
    # Find rows where the value is non-zero (<0 or >0)
    non_zero_mask = col_data != 0
    non_zero_mets = col_data[non_zero_mask].index
    
    # Print the results
    #print(f"\nReaction: {rxn}")
    #print("Metabolites with non-zero stoichiometric coefficients:")
    if len(non_zero_mets) > 0:
        for met in non_zero_mets:
            coeff = col_data[met]

            if met[:-3] in cofactors:
                continue
            
            if coeff < 0:
                for i in range(abs(int(coeff))):
                    reactants.append(met) 
            else:
                for i in range(abs(int(coeff))):
                    products.append(met)
                               
            role = "consumed" if coeff < 0 else "produced"
            #print(f"  {met}: coefficient = {coeff} ({role})")

    reactantSM = [metabolite_smiles[r] for r in reactants]
    productSM = [metabolite_smiles[p] for p in products] 
    
    SM = reactantSM + productSM

    compounds = reactants + products
    
    if any('C' in compound for compound in SM) == False:
        continue
    if len(reactantSM) == 0 or len(productSM) == 0:
        continue

    os.makedirs(os.path.join(parent_folder, rxn), exist_ok=True)
    
    reactants_merged = ".".join(reactantSM)
    products_merged = ".".join(productSM)

    carbon_react = reactants_merged.upper().count("C")
    carbon_prod = products_merged.upper().count("C")

    #if carbon_react != carbon_prod:
    #    print(rxn)
    #    failed_rxns.append(rxn)
    #    print(f'Carbon track: reactants {carbon_react}\tproducts {carbon_prod}')
    
    #if reaction.reversibility:
    #    fullR = reactants_merged + "<=>" + products_merged
    #else:

    fullR = reactants_merged + ">>" + products_merged

    file_name_smiles = "rxn.smiles"       # File name
    file_path_smiles = os.path.join(f'{folder_path}{rxn}', file_name_smiles)

    file_name_reactants = "from_species_with_cmp"
    file_path_reactants = os.path.join(f'{folder_path}{rxn}', file_name_reactants)

    file_name_products = "to_species_with_cmp"
    file_path_products = os.path.join(f'{folder_path}{rxn}', file_name_products)

    file_name_inchi = "species_id_inchikey.txt"
    file_path_inchi = os.path.join(f'{folder_path}{rxn}', file_name_inchi)
    
    #file_name_inchi_from = "species_id_inchikey_from.txt"
    #file_path_inchi_from = os.path.join(f'{folder_path}{rxn}', file_name_inchi_from)
    
    
    with open(file_path_smiles, "w") as file:
        file.write(fullR)

    with open(file_path_reactants, "w") as file:
        for reactant in reactants:
            file.write(reactant + "\n")

    with open(file_path_products, "w") as file:
        for product in products:
            file.write(product + "\n")


    #unique_comp = list(set(compounds))
    
    with open(file_path_inchi, "w") as file:
        for compound in compounds:
            file.write(f"{compound}\t{metabolite_inchi[compound]}\n")

    #with open(file_path_inchi_to, "w") as file:
    #    for product in products:
    #        file.write(f"{product}\t{metabolite_inchi[product]}\n")


In [191]:
metabolite_smiles

{'13dpg[h]': 'C(C(C(=O)OP(=O)(O)O)O)OP(=O)(O)O',
 '3pg[h]': 'C(C(C(=O)O)O)OP(=O)(O)O',
 'adp[h]': 'C1=NC(=C2C(=N1)N(C=N2)C3C(C(C(O3)COP(=O)(O)OP(=O)(O)O)O)O)N',
 'adpglc[h]': 'C1=NC(=C2C(=N1)N(C=N2)C3C(C(C(O3)COP(=O)(O)OP(=O)(O)OC4C(C(C(C(O4)CO)O)O)O)O)O)N',
 'atp[h]': 'C1=NC(=C2C(=N1)N(C=N2)C3C(C(C(O3)COP(=O)(O)OP(=O)(O)OP(=O)(O)O)O)O)N',
 'co2[c]': 'C(=O)=O',
 'co2[e]': 'C(=O)=O',
 'co2[h]': 'C(=O)=O',
 'dhap[h]': 'C(C(=O)COP(=O)(O)O)O',
 'f6p_B[h]': 'C(C1C(C(C(O1)(CO)O)O)O)OP(=O)(O)O',
 'fdp_B[h]': 'C(C1C(C(C(O1)(COP(=O)(O)O)O)O)O)OP(=O)(O)O',
 'g1p[h]': 'C(C1C(C(C(C(O1)OP(=O)(O)O)O)O)O)O',
 'g3p[h]': 'C(C(C=O)O)OP(=O)(O)O',
 'g6p_A[h]': 'C(C1C(C(C(C(O1)O)O)O)O)OP(=O)(O)O',
 'h[h]': '[H+]',
 'h2o[h]': 'O',
 'nadp[h]': 'C1=CC(=C[N+](=C1)C2C(C(C(O2)COP(=O)(O)OP(=O)(O)OCC3C(C(C(O3)N4C=NC5=C(N=CN=C54)N)OP(=O)(O)O)O)O)O)C(=O)N',
 'nadph[h]': 'C1C=CN(C=C1C(=O)N)C2C(C(C(O2)COP(=O)(O)OP(=O)(O)OCC3C(C(C(O3)N4C=NC5=C(N=CN=C54)N)OP(=O)(O)O)O)O)O',
 'pi[h]': 'OP(=O)(O)O',
 'ppi[h]': 'OP(=O)(O)O

met_smiles = {key.replace('-', '_'): value for key, value in metabolite_smiles.items()}
met_smiles = {key + '[c]' if not re.search(r'\[.*\]$', key) else key: value 
                     for key, value in met_smiles.items()}
met_smiles = {key.replace('_LPAREN', '').replace('_RPAREN', ''): value for key, value in met_smiles.items()}

In [54]:
failed_rxns

def highlight(s):
    res=s.str.split(':').str[0].astype(int)>72
    return ['background-color: red' if v else '' for v in res]

In [52]:
smiles = "c1nc(c2c(n1)[nH]cn2)NC1(O)C(O)C(O)COP(=O)(O)OP(=O)(O)OCC(C)(C)C(O)C(=O)NCC(=O)NCCSC(=O)CCCCCCCCCCCCCCCCCCCCCCCCCC"
print(smiles.count("C"))  # Output: 49

41


In [57]:
smiles = "c1nc(c2c(n1)[nH]cn2)NC1(O)C(O)C(O)COP(=O)(O)OP(=O)(O)OCC(C)(C)C(O)C(=O)NCC(=O)NCCSC(=O)CCCC/C=C/CC/C=C/CC/C=C/CCCCC"
carbon_count = smiles.count("C") + smiles.count("c")
print(carbon_count)  # Output: 39

39


In [78]:
ref2.to_csv('result.csv')

In [79]:
import pandas as pd

# Load your data
ref2 = pd.read_csv('elements2.csv')
ref2['Reaction ID'] = model['rxns']  # Assuming 'model' is defined elsewhere

# Define your failed reactions set (assuming this exists)
# failed_rxns = {...}

def color_cells(val):
    return 'background-color: #ff0000' if val in failed_rxns else ''

# Apply the styling
styled_df = ref2.style.applymap(color_cells, subset=["Reaction ID"])

# Export to Excel with styling
styled_df.to_excel('styled_output.xlsx', engine='openpyxl', index=False)