In [7]:
from hypergraphs import *
from kegg import *

In [8]:
superpathwayDict = {
    "Carbohydrate metabolism":
[
	("00010", "Glycolysis / Gluconeogenesis"),
	("00020", "Citrate cycle (TCA cycle)"),
	("00030", "Pentose phosphate pathway"),
	("00040", "Pentose and glucuronate interconversions"),
	("00051", "Fructose and mannose metabolism"),
	("00052", "Galactose metabolism"),
	("00053", "Ascorbate and aldarate metabolism"),
	("00500", "Starch and sucrose metabolism"),
	("00520", "Amino sugar and nucleotide sugar metabolism"),
	("00620", "Pyruvate metabolism"),
	("00630", "Glyoxylate and dicarboxylate metaboliscm"),
	("00640", "Propanoate metabolism"),
	("00650", "Butanoate metabolism"),
	("00562", "Inositol phosphate metabolism")
],
"Energy metabolism":
[
	("00190", "Oxidative phosphorylation"),
	("00910", "Nitrogen metabolism"),
	("00920", "Sulfur metabolism")
],
",Lipid metabolism":
[	("00061", "Fatty acid biosynthesis"),
	("00062", "Fatty acid elongation"),	
	("00071", "Fatty acid degradation"),
	("00100", "Steroid biosynthesis"),	
	("00120", "Primary bile acid biosynthesis"),
	("00140", "Steroid hormone biosynthesis"),
	("00561", "Glycerolipid metabolism"),
	("00564", "Glycerophospholipid metabolism"),
	("00565", "Ether lipid metabolism"),
	("00600", "Sphingolipid metabolism"),
	("00590", "Arachidonic acid metabolism"),
	("00591", "Linoleic acid metabolism"),
	("00592", "alpha-Linolenic acid metabolism"),
	("01040", "Biosynthesis of unsaturated fatty acids")
],
"Nucleotide metabolism":
[
	("00230", "Purine metabolism"),
	("00240", "Pyrimidine metabolism")
],
"Amino acid metabolism":
[
	("00250", "Alanine, aspartate and glutamate metabolism"),
	("00260", "Glycine, serine and threonine metabolism"),
	("00270", "Cysteine and methionine metabolism"),
	("00280", "Valine, leucine and isoleucine degradation"),
	("00290", "Valine, leucine and isoleucine biosynthesis"),
	("00310", "Lysine degradation"),
	("00220", "Arginine biosynthesis"),
	("00330", "Arginine and proline metabolism"),
	("00340", "Histidine metabolism"),
	("00350", "Tyrosine metabolism"),
	("00360", "Phenylalanine metabolism"),
	("00380", "Tryptophan metabolism"),
	("00400", "Phenylalanine, tyrosine and tryptophan biosynthesis")
],
"Metabolism of other amino acids":
[
	("00410", "beta-Alanine metabolism"),
	("00430", "Taurine and hypotaurine metabolism"),
	("00440", "Phosphonate and phosphinate metabolism"),
	("00450", "Selenocompound metabolism"),
	("00470", "D-Amino acid metabolism"),
	("00480", "Glutathione metabolism")
],
"Glycan biosynthesis and metabolism":
[
	("00510", "N-Glycan biosynthesis"),
	("00513", "Various types of N-glycan biosynthesis"),
	("00512", "Mucin type O-glycan biosynthesis"),
	("00515", "Mannose type O-glycan biosynthesis"),
	("00514", "Other types of O-glycan biosynthesis"),
	("00532", "Glycosaminoglycan biosynthesis - chondroitin sulfate / dermatan sulfate"),
	("00534", "Glycosaminoglycan biosynthesis - heparan sulfate / heparin"),
	("00533", "Glycosaminoglycan biosynthesis - keratan sulfate"),
	("00531", "Glycosaminoglycan degradation"),
	("00563", "Glycosylphosphatidylinositol (GPI)-anchor biosynthesis"),
	("00601", "Glycosphingolipid biosynthesis - lacto and neolacto series"),
	("00603", "Glycosphingolipid biosynthesis - globo and isoglobo series"),
	("00604", "Glycosphingolipid biosynthesis - ganglio series"),
	("00511", "Other glycan degradation")
],
"Metabolism of cofactors and vitamins":
[
	("00730", "Thiamine metabolism"),
	("00740", "Riboflavin metabolism"),
	("00750", "Vitamin B6 metabolism"),
	("00760", "Nicotinate and nicotinamide metabolism"),
	("00770", "Pantothenate and CoA biosynthesis"),
	("00780", "Biotin metabolism"),
	("00785", "Lipoic acid metabolism"),
	("00790", "Folate biosynthesis"),
	("00670", "One carbon pool by folate"),
	("00830", "Retinol metabolism"),
	("00860", "Porphyrin metabolism"),
	("00130", "Ubiquinone and other terpenoid-quinone biosynthesis")
],
"Metabolism of terpenoids and polyketides":
[
	("00900", "Terpenoid backbone biosynthesis")
],
"Biosynthesis of other secondary metabolites":
[
	("00232", "Caffeine metabolism"),
	("00524", "Neomycin, kanamycin and gentamicin biosynthesis")
],
"Xenobiotics biodegradation and metabolism":
[
	("00980", "Metabolism of xenobiotics by cytochrome P450"),
	("00982", "Drug metabolism - cytochrome P450"),
	("00983", "Drug metabolism - other enzymes")
]
}

In [9]:
organismDict = {"mmu": "Mus musculus (house mouse)", "dre": "Danio rerio (zebrafish)", "nfu": "Nothobranchius furzeri (turquoise killifish)"}

In [10]:
outputDirectory = "/Users/boldi/Desktop/CENTAI/"
dataDirectory = "../../LaTeX/Data/KEGG-Pathways/"

In [29]:
def convert_SBML_to_csv(outfile, filenames):
    """
    """
    if not isinstance(filenames, list):
        filenames = [filenames]
    totalR = 0
    for filename in filenames:
        document = libsbml.readSBMLFromFile(filename)
        model = document.getModel()
        if model is None:
            print("Ignoring file {}, couldn't get a model out of it".format(filename))
            continue
        totalR += model.getNumReactions()
        for i in range(model.getNumReactions()):
            reaction = model.getReaction(i)
            reactionId = reaction.id
            reactantsIds = [p.species for p in reaction.getListOfReactants()]
            productsIds = [p.species for p in reaction.getListOfProducts()]
            numProducts = len(productsIds)
            numReactants = len(reactantsIds)
            if reaction.reversible:
                revFlag = "R"
            else:
                revFlag = "I"
            outfile.write(f"{reaction.name},{numReactants},{numProducts},{revFlag}," +
                 ",".join(reactantsIds) + "," + ",".join(productsIds) + "\n")
    print("****** Number of reactions: ", outfile, totalR)


In [30]:
for organism in organismDict.keys():
    for superpathway in superpathwayDict.keys():
        safeSuperpathway = makesafe(superpathway)
        os.makedirs(outputDirectory, exist_ok=True)
        outFilename = os.path.join(outputDirectory, organism + "-" + safeSuperpathway + ".csv")
        with open(outFilename, "w") as file:
            convert_SBML_to_csv(file,
                [os.path.join(dataDirectory, organism, f"{organism}{x[0]}-sbml.xml") for x in superpathwayDict[superpathway]])

****** Number of reactions:  <_io.TextIOWrapper name='/Users/boldi/Desktop/CENTAI/mmu-Carbohydratemetabolism.csv' mode='w' encoding='UTF-8'> 318
****** Number of reactions:  <_io.TextIOWrapper name='/Users/boldi/Desktop/CENTAI/mmu-Energymetabolism.csv' mode='w' encoding='UTF-8'> 11
****** Number of reactions:  <_io.TextIOWrapper name='/Users/boldi/Desktop/CENTAI/mmu-Lipidmetabolism.csv' mode='w' encoding='UTF-8'> 476
****** Number of reactions:  <_io.TextIOWrapper name='/Users/boldi/Desktop/CENTAI/mmu-Nucleotidemetabolism.csv' mode='w' encoding='UTF-8'> 144
****** Number of reactions:  <_io.TextIOWrapper name='/Users/boldi/Desktop/CENTAI/mmu-Aminoacidmetabolism.csv' mode='w' encoding='UTF-8'> 311
****** Number of reactions:  <_io.TextIOWrapper name='/Users/boldi/Desktop/CENTAI/mmu-Metabolismofotheraminoacids.csv' mode='w' encoding='UTF-8'> 78
****** Number of reactions:  <_io.TextIOWrapper name='/Users/boldi/Desktop/CENTAI/mmu-Glycanbiosynthesisandmetabolism.csv' mode='w' encoding='UTF