In [1]:
import cobra
import pandas as pd
import json
from cobra import Model, Reaction, Metabolite
from cobra.io import load_model, write_sbml_model, read_sbml_model, save_json_model
from pathlib import Path
from collections import defaultdict

import networkx as nx
import matplotlib.pyplot as plt

In [2]:
SBMLmodel = Path("iJV803_M_album_BG8_GEM_R_full_version_updated.sbml")
model = read_sbml_model(SBMLmodel)

'' is not a valid SBML 'SId'.


In [3]:
model.optimize

<bound method Model.optimize of <Model  at 0x24bc1549a90>>

In [None]:
partial_name = "phosphatidyl"

# Find reactions that contain the partial name
matching_reactions = [rxn for rxn in model.metabolites if partial_name.lower() in rxn.name.lower()]

# Display results
if matching_reactions:
    for rxn in matching_reactions:
        print(f"{rxn.id}")
else:
    print("No reactions found containing that name.")

Diisohexadecanoylphosphatidylserine_c
Diisohexadecanoylphosphatidylglycerophosphate_c
Diisohexadecanoylphosphatidylglycerol_c
Diisopentadecanoylphosphatidylglycerophosphate_c
Diisotetradecanoylphosphatidylserine_c
phosphatidylserine_ditetradecanoyl_c
phosphatidylserine_dihexadec_9_enoyl_c
phosphatidylethanolamine_ditetradecanoyl_c
phosphatidylserine_dioctadecanoyl_c
phosphatidylethanolamine_dioctadecanoyl_c
Dianteisopentadecanoylphosphatidylserine_c
L_2_Lysophosphatidylethanolamine_c
Phosphatidylglycerophosphate_dihexadec_9_enoyl_c
Phosphatidylglycerol_dihexadec_9_enoyl_c
Dianteisopentadecanoylphosphatidylglycerophosphate_c
Phosphatidylglycerophosphate_dihexadecanoyl_c
Phosphatidylglycerophosphate_dioctadec_11_enoyl_c
Diisohexadecanoylphosphatidylethanolamine_c
Dianteisoheptadecanoylphosphatidylserine_c
1_Phosphatidyl_1D_myo_inositol_3_4_bisphosphate_c
1_Phosphatidyl_1D_myo_inositol_3_phosphate_c
phosphatidylserine_didodecanoyl_c
phosphatidylethanolamine_didodecanoyl_c
Dianteisopentade

In [None]:
met = model.metabolites.get_by_id("fa4coa_c")
met.summary()


In [None]:
for reaction in model.reactions:
    if "biomass" in reaction.id.lower():  
        print(F"Reaction ID: {reaction.id}")
        print(F"Reaction Name: {reaction.name}")
        print(F"Reaction: {reaction.reaction}")
        print(50 * "-")


In [None]:
def download_SBML():
    try:
        write_sbml_model(model, 'BG8EnhancedModel.xml')

        with open('model.xml', 'r') as file:
            sbml_content = file.read()

        return sbml_content
        
    except Exception as e:
        raise Exception(f"Error saving or loading SBML model: {str(e)}")
    

download_SBML()


In [5]:
solution = model.optimize()
fluxes = solution.fluxes
flux_dict = fluxes.to_dict()
with open("fluxData.json" , "w") as f: 
    json.dump(flux_dict, f)

In [None]:
solution

In [6]:
with open("rxnsAndMetabolitesJSONS/output_reactions.json", "r") as f:
    output_reactions = json.load(f)

unique_metabolites = set()

for rxn_id in output_reactions:
    try:
        rxn = model.reactions.get_by_id(rxn_id)
        
        if rxn.metabolites:
            for metabolite in rxn.metabolites:
                unique_metabolites.add(metabolite.id)

    except KeyError:
        print(f"Reaction {rxn_id} not found in the model")

unique_metabolites_list = sorted(list(unique_metabolites))

with open("rxnsAndMetabolitesJSONS/unique_metabolites.json", "w") as outfile:
    json.dump(unique_metabolites_list, outfile, indent=4)

print("Unique metabolites written to unique_metabolites.json")

Unique metabolites written to unique_metabolites.json


In [7]:
with open("rxnsAndMetabolitesJSONS/output_reactions.json", "r") as f:
    output_reactions = json.load(f)

unique_metabolites = set()

for rxn_id in output_reactions:
    try:
        rxn = model.reactions.get_by_id(rxn_id)
        
        if rxn.metabolites:
            for metabolite in rxn.metabolites:
                metabolite_id = metabolite.id
                
                # Filter: ignore metabolites with less than 5 characters or "ose_c"
                if len(metabolite_id) >= 6 and "ose_c" not in metabolite_id:
                    unique_metabolites.add(metabolite_id)

    except KeyError:
        print(f"Reaction {rxn_id} not found in the model")

unique_metabolites_list = sorted(list(unique_metabolites))


with open("rxnsAndMetabolitesJSONS/filtered_metabolites.json", "w") as outfile:
    json.dump(unique_metabolites_list, outfile, indent=4)

print("Filtered metabolites written to filtered_metabolites.json")


Filtered metabolites written to filtered_metabolites.json


In [8]:
with open("rxnsAndMetabolitesJSONS/filtered_metabolites.json") as f:
    filtered_metabolite_ids = json.load(f)

metabolite_list = []
for met_id in filtered_metabolite_ids:
    metabolite = model.metabolites.get_by_id(met_id) 
    metabolite_list.append({
        "id": met_id,
        "name": metabolite.name if metabolite else "Unknown",
        "compartment": metabolite.compartment if metabolite else "Unknown"
    })

df = pd.DataFrame(metabolite_list)

print(df)

with open("rxnsAndMetabolitesJSONS/metabolites_summary.json", "w") as f:
    json.dump(metabolite_list, f, indent=4)

                                                    id  \
0                 10_methyl_3_hydroxy_dodecanoyl_ACP_c   
1                     10_methyl_3_oxo_dodecanoyl_ACP_c   
2                           10_methyl_dodecanoyl_ACP_c   
3                  10_methyl_trans_dodec_2_enoyl_ACP_c   
4                 11_methyl_3_hydroxy_dodecanoyl_ACP_c   
..                                                 ...   
207                phosphatidylserine_dioctadecanoyl_c   
208            phospho_heptosyl_heptosyl_kdo2_lipidA_c   
209  phospho_heptosyl_phospho_heptosyl_heptosyl_kdo...   
210                                           strcoa_c   
211                                   tetracosanoate_c   

                                                  name compartment  
0                 10-methyl-3-hydroxy-dodecanoyl-ACP-c           c  
1                     10-methyl-3-oxo-dodecanoyl-ACP-c           c  
2                           10-methyl-dodecanoyl-ACP-c           c  
3                  10-methy

In [None]:
#ddca_c  = dodecanoate

reactions = [
    {"id": "rxn08803_c", "reactants": ["H2O_c", "1_Acyl_sn_glycero_3_phosphoethanolamine_dodecanoyl_c"], "products": ["H_c", "Glycerophosphoethanolamine_c", "ddca_c"]},
    {"id": "rxn08804_c", "reactants": ["H2O_c", "1_Acyl_sn_glycero_3_phosphoethanolamine_tetradecanoyl_c"], "products": ["H_c", "Glycerophosphoethanolamine_c", "Myristic_acid_c"]},
    {"id": "rxn08806_c", "reactants": ["H2O_c", "1_Acyl_sn_glycero_3_phosphoethanolamine_hexadecanoyl_c"], "products": ["H_c", "Glycerophosphoethanolamine_c", "Palmitate_c"]},
    {"id": "rxn08808_c", "reactants": ["H2O_c", "H_c", "L_2_Lysophosphatidylethanolamine_c"], "products": ["H_c", "Glycerophosphoethanolamine_c", "ocdca_c"]},
    {"id": "rxn08805_c", "reactants": ["H2O_c", "1_Acyl_sn_glycero_3_phosphoethanolamine_tetradec_7_enoyl_c"], "products": ["H_c", "Glycerophosphoethanolamine_c", "tetradecenoate_c"]},
    {"id": "rxn08807_c", "reactants": ["H2O_c", "2_Acyl_sn_glycero_3_phosphoethanolamine_hexadec_9_enoyl_c"], "products": ["H_c", "Glycerophosphoethanolamine_c", "hexadecenoate_c"]},
    {"id": "rxn08809_c", "reactants": ["H2O_c", "L_2_Lysophosphatidylethanolamine_c"], "products": ["H_c", "Glycerophosphoethanolamine_c", "octadecenoate_c"]},
    {"id": "rxn08838_c", "reactants": ["H2O_c", "2_Acyl_sn_glycero_3_phosphoethanolamine_dodecanoyl_c"], "products": ["H_c", "Glycerophosphoethanolamine_c", "ddca_c"]},
    {"id": "rxn08839_c", "reactants": ["H2O_c", "2_Acyl_sn_glycero_3_phosphoethanolamine_tetradecanoyl_c"], "products": ["H_c", "Glycerophosphoethanolamine_c", "Myristic_acid_c"]},
    {"id": "rxn08841_c", "reactants": ["H2O_c", "2_Acyl_sn_glycero_3_phosphoethanolamine_n_C16_0_c"], "products": ["H_c", "Glycerophosphoethanolamine_c", "Palmitate_c"]},
    {"id": "rxn08843_c", "reactants": ["H2O_c", "2_Acyl_sn_glycero_3_phosphoethanolamine_n_C18_0_c"], "products": ["H_c", "Glycerophosphoethanolamine_c", "ocdca_c"]},
    {"id": "rxn08840_c", "reactants": ["H2O_c", "2_Acyl_sn_glycero_3_phosphoethanolamine_tetradec_7_enoyl_c"], "products": ["H_c", "Glycerophosphoethanolamine_c", "tetradecenoate_c"]},
    {"id": "rxn08842_c", "reactants": ["H2O_c", "1_Acyl_sn_glycero_3_phosphoethanolamine_hexadec_9_enoyl_c"], "products": ["H_c", "Glycerophosphoethanolamine_c", "hexadecenoate_c"]},
    {"id": "rxn08844_c", "reactants": ["H2O_c", "2_Acyl_sn_glycero_3_phosphoethanolamine_octadec_11_enoyl_c"], "products": ["H_c", "Glycerophosphoethanolamine_c", "octadecenoate_c"]},
    {"id": "rxn08796_c", "reactants": ["H2O_c", "1_dodecanoyl_sn_glycerol_3_phosphate_c"], "products": ["H_c", "Glycerol_3_phosphate_c", "ddca_c"]},
    {"id": "rxn08797_c", "reactants": ["H2O_c", "1_tetradecanoyl_sn_glycerol_3_phosphate_c"], "products": ["H_c", "Glycerol_3_phosphate_c", "Myristic_acid_c"]},
    {"id": "rxn08799_c", "reactants": ["H2O_c", "1_hexadecanoyl_sn_glycerol_3_phosphate_c"], "products": ["H_c", "Glycerol_3_phosphate_c", "Palmitate_c"]},
    {"id": "rxn08801_c", "reactants": ["H2O_c", "1_octadec_11_enoyl_sn_glycerol_3_phosphate_c"], "products": ["H_c", "Glycerol_3_phosphate_c", "ocdca_c"]},
    {"id": "rxn08798_c", "reactants": ["H2O_c", "1_tetradec_7_enoyl_sn_glycerol_3_phosphate_c"], "products": ["H_c", "Glycerol_3_phosphate_c", "tetradecenoate_c"]},
    {"id": "rxn08800_c", "reactants": ["H2O_c", "1_hexadec_9_enoyl_sn_glycerol_3_phosphate_c"], "products": ["H_c", "hexadecenoate_c", "Glycerol_3_phosphate_c"]},
    {"id": "rxn08802_c", "reactants": ["H2O_c", "1_octadec_11_enoyl_sn_glycerol_3_phosphate_c"], "products": ["H_c", "Glycerol_3_phosphate_c", "octadecenoate_c"]},
    {"id": "rxn08817_c", "reactants": ["H2O_c", "2_dodecanoyl_sn_glycerol_3_phosphate_c"], "products": ["H_c", "Glycerol_3_phosphate_c", "ddca_c"]},
    {"id": "rxn08818_c", "reactants": ["H2O_c", "2_tetradecanoyl_sn_glycerol_3_phosphate_c"], "products": ["H_c", "Glycerol_3_phosphate_c", "Myristic_acid_c"]},
    {"id": "rxn08820_c", "reactants": ["H2O_c", "2_hexadecanoyl_sn_glycerol_3_phosphate_c"], "products": ["H_c", "Glycerol_3_phosphate_c", "Palmitate_c"]},
    {"id": "rxn08822_c", "reactants": ["H2O_c", "2_octadecanoyl_sn_glycerol_3_phosphate_c"], "products": ["H_c", "Glycerol_3_phosphate_c", "ocdca_c"]},
    {"id": "rxn08819_c", "reactants": ["H2O_c", "2_tetradec_7_enoyl_sn_glycerol_3_phosphate_c"], "products": ["H_c", "Glycerol_3_phosphate_c", "tetradecenoate_c"]},
    {"id": "rxn08821_c", "reactants": ["H2O_c", "2_hexadec_9_enoyl_sn_glycerol_3_phosphate_c"], "products": ["H_c", "Glycerol_3_phosphate_c", "hexadecenoate_c"]},
    {"id": "rxn08823_c", "reactants": ["H2O_c", "2_octadec_11_enoyl_sn_glycerol_3_phosphate_c"], "products": ["H_c", "Glycerol_3_phosphate_c", "octadecenoate_c"]},
    {"id": "rxn08810_c", "reactants": ["H2O_c", "1_Acyl_sn_glycero_3_phosphoglycerol_dodecanoyl_c"], "products": ["H_c", "ddca_c", "Glycerophosphoglycerol_c"]},
    {"id": "rxn08811_c", "reactants": ["H2O_c", "1_Acyl_sn_glycero_3_phosphoglycerol_tetradecanoyl_c"], "products": ["H_c", "Glycerophosphoglycerol_c", "Myristic_acid_c"]},
    {"id": "rxn08813_c", "reactants": ["H2O_c", "1_Acyl_sn_glycero_3_phosphoglycerol_hexadecanoyl_c"], "products": ["H_c", "Glycerophosphoglycerol_c", "Palmitate_c"]},
    {"id": "rxn08815_c", "reactants": ["H2O_c", "1_Acyl_sn_glycero_3_phosphoglycerol_octadecanoyl_c"], "products": ["H_c", "Glycerophosphoglycerol_c", "ocdca_c"]},
    {"id": "rxn08812_c", "reactants": ["H2O_c", "1_Acyl_sn_glycero_3_phosphoglycerol_tetradec_7_enoyl_c"], "products": ["H_c", "Glycerophosphoglycerol_c", "tetradecenoate_c"]},
    {"id": "rxn08814_c", "reactants": ["H2O_c", "1_Acyl_sn_glycero_3_phosphoglycerol_hexadec_9_enoyl_c"], "products": ["H_c", "hexadecenoate_c", "Glycerophosphoglycerol_c"]},
    {"id": "rxn08816_c", "reactants": ["H2O_c", "1_Acyl_sn_glycero_3_phosphoglycerol_octadec_11_enoyl_c"], "products": ["H_c", "octadecenoate_c", "Glycerophosphoglycerol_c"]},
    {"id": "rxn08845_c", "reactants": ["H2O_c", "2_Acyl_sn_glycero_3_phosphoglycerol_dodecanoyl_c"], "products": ["H_c", "ddca_c", "Glycerophosphoglycerol_c"]},
    {"id": "rxn08846_c", "reactants": ["H2O_c", "2_Acyl_sn_glycero_3_phosphoglycerol_tetradecanoyl_c"], "products": ["H_c", "Glycerophosphoglycerol_c", "Myristic_acid_c"]},
    {"id": "rxn08848_c", "reactants": ["H2O_c", "2_Acyl_sn_glycero_3_phosphoglycerol_hexadecanoyl_c"], "products": ["H_c", "Glycerophosphoglycerol_c", "Palmitate_c"]},
    {"id": "rxn08850_c", "reactants": ["H2O_c", "2_Acyl_sn_glycero_3_phosphoglycerol_octadecanoyl_c"], "products": ["H_c", "Glycerophosphoglycerol_c", "ocdca_c"]},
    {"id": "rxn08847_c", "reactants": ["H2O_c", "2_Acyl_sn_glycero_3_phosphoglycerol_tetradec_7_enoyl_c"], "products": ["H_c", "Glycerophosphoglycerol_c", "tetradecenoate_c"]},
    {"id": "rxn08849_c", "reactants": ["H2O_c", "2_Acyl_sn_glycero_3_phosphoglycerol_hexadec_9_enoyl_c"], "products": ["H_c", "Glycerophosphoglycerol_c", "hexadecenoate_c"]},
    {"id": "rxn08851_c", "reactants": ["H2O_c", "2_Acyl_sn_glycero_3_phosphoglycerol_octadec_11_enoyl_c"], "products": ["H_c", "Glycerophosphoglycerol_c", "octadecenoate_c"]},
]

In [81]:
met = model.metabolites.get_by_id("1_Acyl_sn_glycero_3_phosphoethanolamine_dodecanoyl_c")
met

0,1
Metabolite identifier,1_Acyl_sn_glycero_3_phosphoethanolamine_dodecanoyl_c
Name,1-Acyl-sn-glycero-3-phosphoethanolamine-...
Memory address,0x197df98b470
Formula,C17H36NO7P
Compartment,c
In 1 reaction(s),rxn08803_c


In [78]:
rxn = model.reactions.get_by_id("rxn08803_c")
rxn

0,1
Reaction identifier,rxn08803_c
Name,"Lysophospholipase L1 (2-acylglycerophosphoethanolamine, n-C12:0) (periplasm)"
Memory address,0x197dfeb5d30
Stoichiometry,1_Acyl_sn_glycero_3_phosphoethanolamine_dodecanoyl_c + H2O_c <=> Glycerophosphoethanolamine_c + H_c + ddca_c  1-Acyl-sn-glycero-3-phosphoethanolamine-dodecanoyl-c + H2O-e <=> Glycerophosphoethanolamine-c + H-e + ddca-c
GPR,Malb_1519 or Malb_14
Lower bound,-1000.0
Upper bound,1000.0


In [None]:
G = nx.DiGraph()

reactions = [
    ("1-Acyl-PE (dodecanoyl)", "Glycerophosphoethanolamine")
    ("1-Acyl-PE (dodecanoyl)", "Dodecanoate (ddca)")
    ("1-Acyl-PE (tetradecanoyl)", "Glycerophosphoethanolamine")
    ("1-Acyl-PE (tetradecanoyl)", "Myristic acid")
    ("1-Acyl-PE (hexadecanoyl)", "Glycerophosphoethanolamine")
    ("1-Acyl-PE (hexadecanoyl)", "Palmitate")
    ("Lysophosphatidylethanolamine", "Glycerophosphoethanolamine")
    ("Lysophosphatidylethanolamine", "ocdca")
    ("1-Acyl-PE (tetradec_7_enoyl)", "Glycerophosphoethanolamine")
    ("1-Acyl-PE (tetradec_7_enoyl)", "Tetradecenoate")

    ("2-Acyl-PE (dodecanoyl)", "Glycerophosphoethanolamine")
    ("2-Acyl-PE (dodecanoyl)", "Dodecanoate (ddca)")
    ("2-Acyl-PE (tetradecanoyl)", "Glycerophosphoethanolamine")
    ("2-Acyl-PE (tetradecanoyl)", "Myristic acid")
    ("2-Acyl-PE (hexadecanoyl)", "Glycerophosphoethanolamine")
    ("2-Acyl-PE (hexadecanoyl)", "Palmitate")
    
    ("2-Acyl-PE (octadec_11_enoyl)", "Glycerophosphoethanolamine")
    ("2-Acyl-PE (octadec_11_enoyl)", "Octadecenoate")
    ("1-Acyl-G3P (dodecanoyl)", "Glycerophosphoglycerol")
    ("1-Acyl-G3P (dodecanoyl)", "Dodecanoate (ddca)")
    ("1-Acyl-G3P (tetradecanoyl)", "Glycerophosphoglycerol")
    ("1-Acyl-G3P (tetradecanoyl)", "Myristic acid")
    ("1-Acyl-G3P (hexadecanoyl)", "Glycerophosphoglycerol")
    ("1-Acyl-G3P (hexadecanoyl)", "Palmitate")
    ("1-Acyl-G3P (octadecanoyl)", "Glycerophosphoglycerol")
    ("1-Acyl-G3P (octadecanoyl)", "ocdca")
    ("1-Acyl-G3P (tetradec_7_enoyl)", "Glycerophosphoglycerol")
    ("1-Acyl-G3P (tetradec_7_enoyl)", "Tetradecenoate")
    ("2-Acyl-G3P (dodecanoyl)", "Glycerophosphoglycerol")
    ("2-Acyl-G3P (dodecanoyl)", "Dodecanoate (ddca)")
    ("2-Acyl-G3P (tetradecanoyl)", "Glycerophosphoglycerol")
    ("2-Acyl-G3P (tetradecanoyl)", "Myristic acid")
    ("2-Acyl-G3P (hexadecanoyl)", "Glycerophosphoglycerol")
    ("2-Acyl-G3P (hexadecanoyl)", "Palmitate")
    ("2-Acyl-G3P (octadecanoyl)", "Glycerophosphoglycerol")
    ("2-Acyl-G3P (octadecanoyl)", "ocdca")
    ("2-Acyl-G3P (tetradec_7_enoyl)", "Glycerophosphoglycerol")
    ("2-Acyl-G3P (tetradec_7_enoyl)", "Tetradecenoate")
    ("2-Acyl-G3P (hexadec_9_enoyl)", "Glycerophosphoglycerol")
    ("2-Acyl-G3P (hexadec_9_enoyl)", "Hexadecenoate")
    ("2-Acyl-G3P (octadec_11_enoyl)", "Glycerophosphoglycerol")
    ("2-Acyl-G3P (octadec_11_enoyl)", "Octadecenoate")
]

# Add nodes and edges
for reactant, product in reactions:
    G.add_edge(reactant, product)

# Define categories
fatty_acids = {"Dodecanoate", "Myristic acid", "Tetradecenoate", "Palmitate", "Octadecenoate"}
backbones = {"Glycerophosphoethanolamine", "Glycerophosphoglycerol", "Glycerol-3-phosphate"}

# Assign colors
node_colors = []
for node in G.nodes():
    if node in fatty_acids:
        node_colors.append("lightcoral")  # Fatty acids = Red
    elif node in backbones:
        node_colors.append("lightgreen")  # Backbones = Green
    else:
        node_colors.append("lightblue")  # Others = Blue

# Draw graph
plt.figure(figsize=(100, 60))
pos = nx.spring_layout(G, seed=42, k=1.5)
nx.draw(G, pos, with_labels=True, node_size=20000, node_color=node_colors, edge_color="gray", font_size=40, font_weight="bold")
plt.title("Phospholipid Deacylation Pathway")
plt.show()


In [9]:
partial_names = [
    "dodec", "tetra", "tridec", "penta", "hexa", "octa", "hepta", "methyl",
    "propyl", "decanoyl", "palmi", "doco", "erucic", "icosen", "lauroyl",
    "myrist", "nervon", "propionyl", "tiglyl", "lipid", "fa11", "fa12",
    "fa1", "fa3", "fa4", "fa6", "ethanolamine", "ddca", "ocdca",
    "phosphoglycerol", "phosphotidate", "phosphotidyl"
]

excluded_metabolites = {
    "tetrahydrofolate",
    "5_10_Methylenetetrahydrofolate",
    "cyclohexanone",
    "dimethylbenzimidazole",
    "5_Methylcytosine",
    "S_Aminomethyldihydrolipoylprotein",
    "propionyl_phosphate",
    "hexanesulfonate",
    "6_Hydroxymethyl_dihydropterin"
}

matching_metabolites = {
    met.id: {
        "id": met.id,
        "name": met.name,
        "compartment": met.compartment
    }
    for met in model.metabolites
    if any(partial.lower() in met.name.lower() for partial in partial_names)  
    and not any(excluded.lower() in met.name.lower() for excluded in excluded_metabolites)  
}

with open("rxnsAndMetabolitesJSONS/whole_model_filtered_metabolites.json", "w") as f:
    json.dump(list(matching_metabolites.keys()), f, indent=4)

with open("rxnsAndMetabolitesJSONS/whole_model_metabolites_summary.json", "w") as f:
    json.dump(list(matching_metabolites.values()), f, indent=4)

df = pd.DataFrame(matching_metabolites.values())
print(df)


                                                    id  \
0    2_Amino_4_hydroxy_6_hydroxymethyl_7_8_dihydrop...   
1                                                fa6_c   
2                                             fa6coa_c   
3                     7_methyl_trans_oct_2_enoyl_ACP_c   
4                              7_methyl_octanoyl_ACP_c   
..                                                 ...   
439                     core_oligosaccharide_lipid_A_c   
440                           Lauroyl_KDO2_lipid_IVA_c   
441                                 Palmitoleic_acid_c   
442                    cpd16330_PQQ_dicarboxylicAcid_c   
443            5_10_Methylenetetrahydromethanopterin_c   

                                                  name compartment  
0    2-Amino-4-hydroxy-6-hydroxymethyl-7-8-dihydrop...           c  
1                                                fa6-c           c  
2                                             fa6coa-c           c  
3                     7-met

In [10]:
with open("rxnsAndMetabolitesJSONS/filtered_metabolites.json") as f:
    filtered_metabolites = set(json.load(f)) 

with open("rxnsAndMetabolitesJSONS/whole_model_filtered_metabolites.json") as f:
    whole_model_filtered_metabolites = set(json.load(f))  

count_filtered = len(filtered_metabolites)
count_whole_model_filtered = len(whole_model_filtered_metabolites)

in_filtered_not_in_model = filtered_metabolites - whole_model_filtered_metabolites
in_model_not_in_filtered = whole_model_filtered_metabolites - filtered_metabolites

print(f"Number of metabolites in filtered_metabolites.json: {count_filtered}")
print(f"Number of metabolites in whole_model_filtered_metabolites.json: {count_whole_model_filtered}\n")

print("Metabolites in filtered but NOT in whole model filtered:")
print("--------------------------------------------------------")
if in_filtered_not_in_model:
    print("\n".join(in_filtered_not_in_model))
else:
    print("None")
print() 

print("Metabolites in whole model filtered but NOT in filtered:")
print("--------------------------------------------------------")
if in_model_not_in_filtered:
    print("\n".join(in_model_not_in_filtered))
else:
    print("None")
print()  



Number of metabolites in filtered_metabolites.json: 212
Number of metabolites in whole_model_filtered_metabolites.json: 444

Metabolites in filtered but NOT in whole model filtered:
--------------------------------------------------------
Lignoceroyl_CoA_c
NADP_c
4MOP_c
Stearoylcardiolipin_B_subtilis_c
Acetyl_ACP_c
Malonyl_CoA_c
strcoa_c
Malonyl_acyl_carrierprotein__c
Acetoacetyl_ACP_c
isovaleryl_ACP_c
Glycerol_3_phosphate_c
L_Glutamate_c
Acetyl_CoA_c
NADH_c
2_Oxoglutarate_c
Phosphate_c
L_Leucine_c
dTDP_c
NADPH_c
Behenoyl_CoA_c
CMP_KDO_c
Isovaleryl_CoA_c
Glycerol_c
L_Serine_c

Metabolites in whole model filtered but NOT in filtered:
--------------------------------------------------------
2_tetradec_7_enoyl_sn_glycerol_3_phosphate_c
all_trans_Hexaprenyl_diphosphate_c
Phosphoethanolamine_c
D_methylmalonyl_CoA_c
1_Acyl_sn_glycero_3_phosphoethanolamine_hexadecanoyl_c
Lipid_A_disaccharide_c
Glycerophosphoglycerol_c
Hexanoyl_CoA_c
2_octadec_11_enoyl_sn_glycerol_3_phosphate_c
Octadecenoyl_AC