# Supplemental Notebook A

This notebook exists to link the metabolite names in the GC-MS and LC-MS data sets to KEGG metabolite IDs.

### Load imports

In [1]:
from bioservices import KEGG
import pandas as pd
import sys

kegg = KEGG()

source_dir = '../'
sys.path.append(source_dir)

from functions.get_pathways_for_metabolite import get_pathways_for_metabolite

### Load Data

In [2]:
intracellular_df = pd.read_csv('../processed_data/intra_gcms.csv', index_col=0)
extracellular_1_df = pd.read_csv('../processed_data/extra_gcms_1.csv', index_col=0)
extracellular_4_df = pd.read_csv('../processed_data/extra_gcms_4.csv', index_col=0)
intracellular_pos_df = pd.read_csv('../processed_data/intra_lcms_pos.csv', index_col=0)
intracellular_neg_df = pd.read_csv('../processed_data/intra_lcms_neg.csv', index_col=0)
extracellular_pos_df = pd.read_csv('../processed_data/extra_lcms_pos.csv', index_col=0)
extracellular_neg_df = pd.read_csv('../processed_data/extra_lcms_neg.csv', index_col=0)

intracellular_df.head()

Unnamed: 0,2-aminoadipic acid,2'-Deoxycytidine 5'-monophosphoric acid,3-phosphoglycerate,4-aminobutyric acid (GABA),Adenine,Adipic acid,Arachidic acid,Beta-alanine,Carbonate ion,Citric acid,...,Unknown 220,Unknown 221,Unknown 222,Unknown 223,Unknown 224,Unknown 225,Unknown 226,Unknown 227,Unknown 228,Unknown 229
Tags,High Confidence,High Confidence,High Confidence,High Confidence,High Confidence,High Confidence,High Confidence,High Confidence,High Confidence,High Confidence,...,,,,,,,,,,
in_switchgrass_pvhg6_early_stat_1,206675.073240236,265628.2073572,398155.546965357,461693.997122307,553917.012625364,0.0,39884.14605205,1751763.14357969,1766960.21177728,98325.766858502,...,429928.1,49456.507927,371788.05341,234280.995486,5832.703213,1645217.0,351925.677818,24385.393103,217630.729539,198363.596018
in_switchgrass_pvhg6_early_stat_3,341996.103780714,358143.914807286,448290.918266103,496714.999168787,638854.84035566,0.0,84423.2394990384,2040960.80587233,2032543.136365,115405.400973157,...,523340.0,54438.918937,417617.364083,370183.713935,30622.454946,2054061.0,492993.409911,61659.348813,214347.074793,200325.541072
in_switchgrass_pvhg6_early_stat_2,263361.804510689,316901.937516122,382225.895834572,549619.810167144,802743.275250288,0.0,65523.7689777048,2144767.05673166,2175518.31265378,157969.941265746,...,522363.2,78126.743977,411800.779492,501043.347306,528.570981,2814759.0,305358.590677,0.0,267134.158139,243310.544402
in_switchgrass_wt_early_stat_1,361520.397329639,158448.34341112,322605.073303065,677195.048082466,710902.55546792,0.0,21587.08362939,1742924.76926073,2187317.70629786,73643.828000748,...,1094674.0,68781.500733,389899.251774,53068.346347,41841.431914,459118.5,0.0,0.0,0.0,0.0


### Get a list of the named and measured metabolites

In [3]:
all_metabolites = []

# loop through all dataframes and get all metabolites
for df in [intracellular_df, extracellular_1_df, extracellular_4_df, intracellular_pos_df, intracellular_neg_df, extracellular_pos_df, extracellular_neg_df]:

    # get all metabolites in the dataframe
    metabolite_list = df.columns

    # add to the list of all metabolites
    all_metabolites.extend(df.columns)

# get unique metabolites
all_metabolites = list(set(all_metabolites))

# remove the metabolites with 'Unknown' or 'Unnamed' in the name
all_metabolites = [met for met in all_metabolites if 'Unknown' not in met]
all_metabolites = [met for met in all_metabolites if 'Unnamed' not in met]

# remove the duplicates
all_metabolites = list(set(all_metabolites))

print(f'There are {len(all_metabolites)} unique metabolites in the dataset')

all_metabolites

There are 560 unique metabolites in the dataset


['Arecoline',
 'DL-Glutamic acid',
 'Sugars -Disaccharride',
 'L-citrulline',
 'Phosphonoacetic acid',
 'L-Cysteic acid',
 'D-glucose-6-phosphate',
 '4-Methyl-2-oxopentanoic acid',
 'Lumichrome',
 'L-proline',
 'Nicotinic acid',
 'D-Pantothenic acid',
 'Succinamide',
 'Malonic acid',
 'N,N-dimethyl-L-histidine',
 'O-Acetyl-DL-serine',
 'N-Methyl-DL-Glutamic acid',
 'Isoxazolin-5-one',
 'L-valine',
 '12-Hydroxydodecanoic acid',
 "2',4'-Dihdyroxyacetophenone",
 'Vinyl Carbamate',
 '16-Hydroxyhexadecanoic acid',
 'Dialanine',
 'Homoserine',
 'Picolinamide',
 'Adipic acid',
 "Inosine-5'-monophosphate (IMP)",
 'Cyclopentanone',
 "Uridine-5'-monophosphate (UMP)",
 'Sugars-Monosaccharides-Hexoses.1',
 'N-Acetyl-DL-ornithine',
 'L-glutamine',
 'Pipecolic acid/Homoproline',
 'Ethyl 3-Ureidopropionic acid',
 'Phosphoric acid',
 'DL-Arginine',
 '1-Vinylimidazole',
 'DL-Histidine',
 'Glyceric acid',
 '3-Amino-5-Hydroxybenzoic acid',
 '4-Imidazolemethanol',
 'Erythritol',
 'DL-Mandelic acid',
 "2'-

### Look up the KEGG compound id for each compound

In [4]:
metabolite_ids = {}

for metabolite in all_metabolites:
    result = kegg.find("compound", metabolite)
    
    if isinstance(result, str) and result.strip():
        # Extracting the first result which should be the closest match
        entry = result.split("\n")[0]
        print(entry)
        try:
            compound_id, compound_name = entry.split("\t")
        except ValueError:
            compound_id = entry
        metabolite_ids[metabolite] = compound_id
    elif isinstance(result, int):
        print(f"Error retrieving {metabolite}: Bad Request")
        metabolite_ids[metabolite] = "Request Error"
    else:
        print(f'{metabolite} not found')
        metabolite_ids[metabolite] = "Not Found"

# Make a dataframe of the metabolite IDs
metabolite_df = pd.DataFrame(metabolite_ids.items(), columns=['Metabolite', 'KEGG ID'])

metabolite_df

cpd:C10129	Arecoline
DL-Glutamic acid not found




Error retrieving Sugars -Disaccharride: Bad Request
cpd:C00327	L-Citrulline; 2-Amino-5-ureidovaleric acid; Citrulline
cpd:C05682	Phosphonoacetate; Phosphonoacetic acid; Fosfonet
cpd:C00506	L-Cysteate; L-Cysteic acid; 3-Sulfoalanine; 2-Amino-3-sulfopropionic acid
D-glucose-6-phosphate not found
4-Methyl-2-oxopentanoic acid not found
cpd:C01727	Lumichrome; 7,8-Dimethylalloxazine
cpd:C00148	L-Proline; 2-Pyrrolidinecarboxylic acid
cpd:C00153	Nicotinamide; Nicotinic acid amide; Niacinamide; Vitamin PP
D-Pantothenic acid not found
Succinamide not found
cpd:C00383	Malonate; Malonic acid; Propanedioic acid




Error retrieving N,N-dimethyl-L-histidine: Bad Request
O-Acetyl-DL-serine not found
N-Methyl-DL-Glutamic acid not found
cpd:C20891	Isoxazolin-5-one; 5-Hydroxyisoxazole
cpd:C00183	L-Valine; 2-Amino-3-methylbutyric acid
cpd:C08317	12-Hydroxydodecanoic acid; omega-Hydroxydodecanoic acid




Error retrieving 2',4'-Dihdyroxyacetophenone: Bad Request
Vinyl Carbamate not found
cpd:C18218	16-Hydroxypalmitate; 16-Hydroxypalmitic acid; 16-Hydroxyhexadecanoic acid; Juniperic acid
Dialanine not found
cpd:C00263	L-Homoserine; 2-Amino-4-hydroxybutyric acid
cpd:C01950	Picolinamide
cpd:C00322	2-Oxoadipate; 2-Oxoadipic acid; 2-Oxohexanedioic acid
Inosine-5'-monophosphate (IMP) not found
cpd:C00557	Cyclopentanone
Uridine-5'-monophosphate (UMP) not found
Sugars-Monosaccharides-Hexoses.1 not found
N-Acetyl-DL-ornithine not found
cpd:C00064	L-Glutamine; L-2-Aminoglutaramic acid




Error retrieving Pipecolic acid/Homoproline: Bad Request
Ethyl 3-Ureidopropionic acid not found
cpd:C00009	Orthophosphate; Phosphate; Phosphoric acid; Orthophosphoric acid
DL-Arginine not found
1-Vinylimidazole not found
DL-Histidine not found
cpd:C00258	D-Glycerate; Glycerate; (R)-Glycerate; Glyceric acid
cpd:C12107	3-Amino-5-hydroxybenzoate; AHBA; 3-Amino-5-hydroxybenzoic acid; 3-Ahba
4-Imidazolemethanol not found
cpd:C00503	Erythritol; Erythrol; Phycitol; Phycite; Erythrite
DL-Mandelic acid not found
cpd:C00239	dCMP; Deoxycytidylic acid; Deoxycytidine monophosphate; Deoxycytidylate; 2'-Deoxycytidine 5'-monophosphate
cpd:C00498	ADP-glucose; ADP-alpha-D-glucose; Adenosine diphosphoglucose
Sugars-Monosaccharides-Pentoses.2 not found
cpd:C00049	L-Aspartate; L-Aspartic acid; 2-Aminosuccinic acid; L-Asp
cpd:C00262	Hypoxanthine; Purine-6-ol
Triisopropanolamine not found
cpd:C00712	(9Z)-Octadecenoic acid; (Z)-Octadec-9-enoic acid; Oleate; Oleic acid
Prolylleucine not found
Methacrylamide no



Error retrieving DL-Isoleucine/DL-Leucine: Bad Request
cpd:C00096	GDP-mannose; GDP-D-mannose; GDP-alpha-D-mannose
Possible peptide (Glu-Leu) not found
cpd:C00047	L-Lysine; Lysine acid; 2,6-Diaminohexanoic acid
cpd:C01606	Phthalate; o-Phthalic acid; 1,2-Benzenedicarboxylic acid
cis-4-Hydroxyproline not found
cpd:C02240	5-Valerolactone; delta-Valerolactone; gamma-Valerolactone
cpd:C01835	Maltotriose; Amylotriose
cpd:C00122	Fumarate; Fumaric acid; trans-Butenedioic acid
Sugars-Hexoses-Phosphate  not found
cpd:C00186	(S)-Lactate; L-Lactate; L-Lactic acid




Error retrieving 3,4-Dehydroproline: Bad Request
cpd:C22921	Lactobionate; Lactobionic acid; 4-O-beta-D-Galactopyranosyl-D-gluconic acid
Glycolaldehyde Dimer not found
Possible purine not found
cpd:C00035	GDP; Guanosine 5'-diphosphate; Guanosine diphosphate
Atrazin-2-hydroxy not found
cpd:C00188	L-Threonine; 2-Amino-3-hydroxybutyric acid
Citroflex A-4 not found
Possible Peptide.1 not found
Monomethyl glutaric acid not found
cpd:C06044	4-Hydroxyphenylethanol; Tyrosol
cpd:C00029	UDP-glucose; UDPglucose; UDP-D-glucose; Uridine diphosphate glucose; UDP-alpha-D-glucose
cpd:C00093	sn-Glycerol 3-phosphate; Glycerophosphoric acid; D-Glycerol 1-phosphate
cpd:C19456	N-Methylolacrylamide; N-(Hydroxymethyl)acrylamide
cpd:C00160	Glycolate; Glycolic acid; Hydroxyacetic acid
cpd:C00178	Thymine; 5-Methyluracil




Error retrieving Υ-Aminobutyric acid (GABA): Bad Request
cpd:C00755	4-Hydroxy-3-methoxy-benzaldehyde; Vanillin; Vanillaldehyde; 4-Hydroxy-3-methoxybenzaldehyde
cpd:C00476	D-Lyxose
cpd:C02230	3-Methylguanine; 2-Amino-3,7-dihydro-3-methyl-6H-purin-6-one
D-xylulose-5-phosphate not found
Uridine 5'-diphosphate (UDP) not found
DL-Serine not found




Error retrieving Pentulose, 5-phosphate: Bad Request
cpd:C00036	Oxaloacetate; Oxalacetic acid; Oxaloacetic acid; 2-Oxobutanedioic acid; 2-Oxosuccinic acid; keto-Oxaloacetate
cpd:C02341	trans-Aconitate; trans-Aconitic acid
Sugars-Hexoses-Phosphate.1 not found
cpd:C05382	Sedoheptulose 7-phosphate; D-Sedoheptulose 7-phosphate; D-altro-Heptulose 7-phosphate; altro-Heptulose 7-phosphate
cpd:C00870	4-Nitrophenol; p-Nitrophenol; PNP; Niphen; 4-Hydroxynitrobenzene
cpd:C00252	Isomaltose; Brachiose




Error retrieving 2,3-Dimethyl-3-hydroxyglutaric acid: Bad Request




Error retrieving Nicotinic acid/Niacin: Bad Request




Error retrieving DL-Valine/DL-Norvaline: Bad Request
cpd:C01168	Pseudouridine 5'-phosphate
2'-Deoxycytidine 5'-diphosphate (dCDP) not found
cpd:C00059	Sulfate; Sulfuric acid




Error retrieving 3,4-dihydroxybenzoic acid (protocatechuic acid): Bad Request
Possible Fatty Acid not found
cpd:C04308	Phosphatidyl-N-dimethylethanolamine
cpd:C01152	N(pi)-Methyl-L-histidine; N-pros-Methyl-L-histidine; 3-Methylhistidine; 1-Methylhistidine
2-(Dimethylamino)acetonitrile not found
cpd:C01546	2-Furoate; 2-Furoic acid; 2-Furancarboxylic acid; Pyromucic acid
cpd:C01546	2-Furoate; 2-Furoic acid; 2-Furancarboxylic acid; Pyromucic acid
cpd:C02216	1-Methyladenine
Aminolevulinic acid not found
cpd:C00077	L-Ornithine; (S)-2,5-Diaminovaleric acid; (S)-2,5-Diaminopentanoic acid; (S)-2,5-Diaminopentanoate
N-Succinyl-L-diaminopimelic acid not found
N-alpha-Acetyl-L-lysine not found
Glutarylglycine not found
cpd:C00052	UDP-alpha-D-galactose; UDP-D-galactose; UDP-galactose; UDP-D-galactopyranose; UDP-alpha-D-galactopyranose
cpd:C00166	Phenylpyruvate; Phenylpyruvic acid; alpha-Ketohydrocinnamic acid; keto-Phenylpyruvate; 3-Phenyl-2-oxopropanoate; 2-Oxo-3-phenylpropanoate
cpd:C00805	Salic



Error retrieving cis,cis-muconic acid: Bad Request
3-(2-Hydroxyphenyl)propanoic acid not found
cpd:C16884	D-Threitol
Bis(methylbenzylidene)sorbitol not found




Error retrieving 3,5-dihydroxybenzoic acid: Bad Request
cpd:C00431	5-Aminopentanoate; 5-Aminopentanoic acid; 5-Aminovaleric acid
cpd:C02502	2-Hydroxypyridine; 2-Pyridone; 2-Pyridinol
Carbohydrate Phosphate not found
Tributylamine not found
D-Pantothenicacid not found
Dimethyldecylamine not found
cpd:C14514	Chinomethionat; Quinomethionate; 6-Methyl-2,3-quinoxalinedithiol cyclic S,S-dithiocarbonate
cpd:C00198	D-Glucono-1,5-lactone; Gluconic lactone; Gluconic acid lactone; 1,5-Gluconolactone; delta-Gluconolactone; D-Gluconolactone; Gluconolactone
DL-Aspartic acid not found
cpd:C12262	4,4-Disubstituted cyclohexenone
3-Isopropylmalic acid not found
2-Aminopurine not found
Adenosine 3'5'-cyclic monophosphate not found
Allopurinol not found
Riboflavin (Vit B2) not found
Aurantioobtusin not found
cpd:C10684	2',6'-Dimethoxy-4'-hydroxyacetophenone




Error retrieving 2,3-dihydroxyisovaleric acid: Bad Request
Possible Carbohydrate.1 not found
cpd:C00047	L-Lysine; Lysine acid; 2,6-Diaminohexanoic acid
D-Xylonic acid not found
N-Acetyl-DL-aspartic acid not found
cpd:C01877	4-Oxoproline; 4-Oxo-L-proline
DL-Lactic Acid not found
Sugars-Monosaccharides-Pentoses not found
Acetylsulfaphenazole not found
cpd:C02774	10-Hydroxydecanoic acid; 10-Hydroxydecanoate
Acetylpyrazine not found




Error retrieving Benzene-1,2,4-triol: Bad Request
cpd:C10996	Daminozide




Error retrieving N-Acetylvaline\N-Acetylnorvaline: Bad Request
3-Pyridinol not found
cpd:C00239	dCMP; Deoxycytidylic acid; Deoxycytidine monophosphate; Deoxycytidylate; 2'-Deoxycytidine 5'-monophosphate




Error retrieving N,N-Dimethylglycine: Bad Request
cpd:C00388	Histamine; 1H-Imidazole-4-ethanamine; 2-(4-Imidazolyl)ethylamine




Error retrieving 2,4-Dimethyloxazole: Bad Request
cpd:C01127	4-Hydroxy-2-oxoglutarate; 4-Hydroxy-2-oxoglutaric acid
cpd:C00380	Cytosine
cpd:C07599	Alloxanthine; Oxipurinol; Oxypurinol
cpd:C02220	2-Aminomuconate; 2-Aminomuconic acid; o-Aminomuconate; (2E,4Z)-2-Aminohexa-2,4-dienedioate; (2E,4Z)-2-Aminohexa-2,4-dienedioic acid




Error retrieving N,N-Dimethyl-histidine: Bad Request
cpd:C00013	Diphosphate; Diphosphoric acid; Pyrophosphate; Pyrophosphoric acid; PPi
cpd:C00117	D-Ribose 5-phosphate; Ribose 5-phosphate
1-Acetylimidazole not found
cpd:C13728	4-Aminopyridine; 4-AP
cpd:C00022	Pyruvate; Pyruvic acid; 2-Oxopropanoate; 2-Oxopropanoic acid; Pyroracemic acid
Cyclic adenosine monophosphate (cAMP) not found
Phlorobenzophenone not found
2'-Deoxyadenosine-5'-monophosphate (dAMP) not found
Adenosine-5-monophosphate not found
N-Acetylasparagine not found
11(E)-Eicosenoic Acid not found
cpd:C10726	Xanthoxylin
4-aminobutyric acid (GABA) not found
2-Foryml-1H-pyrrol not found
Methyl acetoacetic acid not found
Methylmalonic acid semialdehyde not found
cpd:C00490	Itaconate; Itaconic acid; Methylenesuccinic acid
cpd:C02305	Phosphocreatine; N-Phosphocreatine; Creatine phosphate
cpd:C05616	3-O-Methylgallate; 3-O-Methylgallic acid; 5-Hydroxyvanillic acid; 3,4-Dihydroxy-5-methoxybenzoate
Sugars-Deoxy-Hexoses (Rhamnose) not



Error retrieving beta-Nicotinamide adenine dinucleotide (NAD+): Bad Request
cpd:C00025	L-Glutamate; L-Glutamic acid; L-Glutaminic acid; Glutamate
cpd:C01799	D-Norvaline; D-2-Aminovaleric acid; D-2-Aminopentanoic acid
N-acetyl-L-2-aminoadipic acid not found
cpd:C01020	6-Hydroxynicotinate; 6-Hydroxynicotinic acid
cpd:C02170	Methylmalonate; Methylmalonic acid
Deoxycarnitine not found
cpd:C00021	S-Adenosyl-L-homocysteine; S-Adenosylhomocysteine




Error retrieving trans,trans-muconic acid: Bad Request
cpd:C00197	3-Phospho-D-glycerate; D-Glycerate 3-phosphate; 3-Phospho-(R)-glycerate; 3-Phosphoglycerate
Tetraglyme not found
cpd:C01268	5-Amino-6-(5'-phosphoribosylamino)uracil; 5-Amino-6-(ribosylamino)-2,4-(1H,3H)-pyrimidinedione 5'-phosphate; 5-Amino-6-(5-phosphoribosylamino)uracil
cpd:C11341	N-Acetylphenylalanine beta-naphthyl ester; N-Acetyl-DL-phenylalanine beta-naphthyl ester
cpd:C14514	Chinomethionat; Quinomethionate; 6-Methyl-2,3-quinoxalinedithiol cyclic S,S-dithiocarbonate
Heptadecanoic acid not found
cpd:C10269	Lunularin; 3,4'-Ethylenebisphenol
cpd:C11711	Succinylproline; Succinyl-L-proline
4-hydroxybenzoic acid (p-salicylic acid) not found
cpd:C00108	Anthranilate; Anthranilic acid; o-Aminobenzoic acid; Vitamin L1; 2-Aminobenzoate
4-Hydroxy-DL-phenylglycine not found
cpd:C06554	Cyanuric acid
N-Methylglutamine not found
cpd:C00149	(S)-Malate; L-Malate; L-Apple acid; L-Malic acid; L-2-Hydroxybutanedioic acid; Malate; Malic 



Error retrieving (S,S)-Tartaric acid: Bad Request
Sedoheptulose-7-phosphate not found




Error retrieving D-Ribulose 1,5-bisphosphate: Bad Request
Beta-Ketoadipic acid not found
cpd:C00990	5-Aminopentanamide
cpd:C00041	L-Alanine; L-2-Aminopropionic acid; L-alpha-Alanine
cpd:C18492	Tricyclazole
cpd:C00015	UDP; Uridine 5'-diphosphate
2-Hydroxyatrazine not found
cpd:C01005	O-Phospho-L-serine; L-O-Phosphoserine; 3-Phosphoserine; Dexfosfoserine; 3-Phospho-L-serine
D-fructose-6-phosphate not found
cpd:C05984	2-Hydroxybutanoic acid; 2-Hydroxybutyrate; 2-Hydroxybutyric acid; (S)-2-Hydroxybutanoate; (2S)-2-Hydroxybutanoic acid
3-Methyl-2-oxovaleric acid not found
cpd:C06317	Vanillyl alcohol; 4-Hydroxy-3-methoxy-benzenemethanol; 4-Hydroxy-3-methoxybenzyl alcohol; 4-Hydroxy-3-methoxybenzenemethanol
DL-Normetanephrine not found
Sugars-Disaccharides.1 not found




Error retrieving 4,5-Pyrimidinediamine,N5,2-dimehtyl-: Bad Request
DL-Lysine lactam not found
cpd:C01607	Phytanate; Phytanic acid; 3,7,11,15-Tetramethylhexadecanoic acid
cpd:C03665	2-Amino-2-methylpropanoate; 2-Aminoisobutyric acid
cpd:C11735	N-Ethylglycine
Gycolic acid not found
Possible Peptide not found
1-Methyl-L-proline not found




Error retrieving 2,4-Dihydroxypyrimidine-5-Carboxylic acid: Bad Request
cpd:C00120	Biotin; D-Biotin; Vitamin H; Coenzyme R
thymidine diphosphate (dTDP) not found




Error retrieving Υ-Glutamylcysteine: Bad Request
1-pyrroline-5-carboxylic acid not found
cpd:C03672	3-(4-Hydroxyphenyl)lactate; 4-Hydroxyphenyllactate; p-Hydroxyphenyllactate; 2-Hydroxy-3-(4-hydroxyphenyl)propanoate
Desaminometribuzin not found
cpd:C01879	5-Oxoproline; Pidolic acid; Pyroglutamic acid; 5-Pyrrolidone-2-carboxylic acid; Pyroglutamate; 5-Oxo-L-proline; L-Pyroglutamic acid; L-5-Pyrrolidone-2-carboxylic acid
cpd:C03137	N-Acetyl-D-tryptophan
cpd:C00108	Anthranilate; Anthranilic acid; o-Aminobenzoic acid; Vitamin L1; 2-Aminobenzoate
cpd:C01214	1-Amino-1-deoxy-scyllo-inositol; scyllo-Inosamine




Error retrieving 2,3-dihydro-1H-pyrrole-2-carboxylic acid: Bad Request




Error retrieving 1,4-Diamino-2-butyne: Bad Request
cpd:C00544	Homogentisate; Homogentisic acid; 2,5-Dihydroxyphenylacetic acid; 2,5-Dihydroxyphenylacetate
DL-Proline.1 not found
cpd:C00105	UMP; Uridylic acid; Uridine monophosphate; Uridine 5'-monophosphate; 5'Uridylic acid; Uridylate
4-Dodecylbenzenesulfonic acid not found
Trifluoroacetic acid.1 not found
cpd:C00407	L-Isoleucine; 2-Amino-3-methylvaleric acid
cpd:C00741	Diacetyl; Biacetyl; Dimethylglyoxal; 2,3-Butanedione
2-Hydroxycinnamaldehyde not found
O-propenoyl-D-carnitine not found
DL-Proline not found
cpd:C00956	L-2-Aminoadipate; L-alpha-Aminoadipate; L-alpha-Aminoadipic acid; L-2-Aminoadipic acid; L-2-Aminohexanedioate
DL-Homoserine not found
2'-Deoxyguanosine-5'-diphosphate (dGDP) not found
cpd:C00181	D-Xylose; Wood sugar
Aminoproline not found




Error retrieving Cytidine 5′-monophosphate (CMP): Bad Request
cpd:C00134	Putrescine; 1,4-Butanediamine; 1,4-Diaminobutane; Tetramethylenediamine; Butane-1,4-diamine




Error retrieving 1,4-Dimethylimidazole: Bad Request
3-oxopalmitic acid not found
2'-Deoxycytidine 5'-monophosphate (dCMP) not found




Error retrieving 2,6-Diamino-4-hexenoic acid: Bad Request
N-Methyl-DL-Aspartic acid not found




Error retrieving (2S,4S)-4-Amino-2-hydroxy-2-methylpentanedioic acid: Bad Request
cpd:C00431	5-Aminopentanoate; 5-Aminopentanoic acid; 5-Aminovaleric acid
cpd:C06771	Triethanolamine; Trolamine
cpd:C05400	Epimelibiose; 6-O-(alpha-D-Galactopyranosyl)-D-mannopyranose
cpd:C00079	L-Phenylalanine; (S)-alpha-Amino-beta-phenylpropionic acid
alpha-Methylene-gamma-butyrolactone (Tulipalin A) not found
cpd:C00483	Tyramine; 2-(p-Hydroxyphenyl)ethylamine




Error retrieving Trans-Cyclohexane-1,2-diol: Bad Request
cpd:C00394	GDP-glucose; GDP-D-glucose; GDP-alpha-D-glucose
cpd:C00003	NAD+; NAD; Nicotinamide adenine dinucleotide; DPN; Diphosphopyridine nucleotide; Nadide; beta-NAD+
Sugars-Deoxy-Hexoses not found
cpd:C00322	2-Oxoadipate; 2-Oxoadipic acid; 2-Oxohexanedioic acid
3-Amino-2-piperidone not found
Methyl mesylate not found
DL-3-Aminoisobutyric acid not found
cpd:C09541	(S)-Isoboldine; (+)-Isoboldine; Isoboldine; 2,10-Dimethoxy-6aalpha-aporphine-1,9-diol
Uridine diphosphate-N-acetylgalactosamine not found
PPG n4 not found
2'-Deoxyadenosine-5'-diphosphate (dADP) not found
cpd:C06424	Tetradecanoic acid; Tetradecanoate; Myristic acid
cpd:C00633	4-Hydroxybenzaldehyde; p-Hydroxybenzaldehyde
cpd:C05472	Urocortisol; Tetrahydrocortisol; 5beta-Pregnane-3alpha,11beta,17alpha,21-tetrol-20-one
Levulinic acid not found
cpd:C06425	Icosanoic acid; Eicosanoic acid; Arachidic acid
cpd:C00262	Hypoxanthine; Purine-6-ol
Possible Peptide.4 not found
cpd:



Error retrieving 2,3-dihydroxybenzoic acid: Bad Request
Trisaccharide-phosphate not found
N-Alpha-Acetyl-L-Lysine not found
cpd:C00048	Glyoxylate; Glyoxalate; Glyoxylic acid
cpd:C01077	O-Acetyl-L-homoserine; O-Acetylhomoserine
cpd:C00666	LL-2,6-Diaminoheptanedioate; LL-2,6-Diaminopimelate; LL-2,6-Diaminopimelic acid; (2S,6S)-2,6-Diaminoheptanedioic acid
cpd:C19636	Turanose; 3-O-alpha-D-Glucopyranosyl-D-fructose; D-Turanose
1-(Diethylamino)ethanol not found
DL-Threonine not found




Error retrieving Prostaglandin F3α: Bad Request




Error retrieving (2S)-2-Piperazinecarboxamide\(2R)-2-Piperazinecarboxamide: Bad Request
cpd:C01050	UDP-N-acetylmuramate; UDP-N-acetyl-alpha-D-muramate; UDP-N-acetylmuramic acid; UDP-MurNAc
cpd:C00170	5'-Methylthioadenosine; Methylthioadenosine; S-Methyl-5'-thioadenosine; 5-Methylthioadenosine; 5'-Deoxy-5'-(methylthio)adenosine; Thiomethyladenosine; MTA; 5'-Deoxy-5'-(methylsulfanyl)adenosine




Error retrieving cis,cis-Muconic acid: Bad Request
Possible peptide.1 not found
DL-Isoleucine not found
Inosine 5'-monophosphate (IMP) not found
Methyl Dehydrojasmonate not found
cpd:C15311	N,N-Bis(2-chloroethyl)-DL-alanine hydrochloride
cpd:C00794	D-Sorbitol; D-Glucitol; L-Gulitol; Sorbitol
cpd:C00019	S-Adenosyl-L-methionine; S-Adenosylmethionine; AdoMet; SAM
N-Acetylglycine not found
cpd:C06231	Ectoine; L-Ectoine; (4S)-2-Methyl-1,4,5,6-tetrahydropyrimidine-4-carboxylate
Possible Carbohydrate (disaccharide) not found
Shikimate-3-phosphate not found
cpd:C01904	D-Arabitol; D-Arabinitol; D-Arabinol; D-Lyxitol
cpd:C02647	4-Guanidinobutanal
cpd:C14457	Simetryn; 2,4-Di(ethylamino)-6-methylthio-1,3,5-triazine
Amino-Sugars-C8 not found
cpd:C00934	Sugar phosphate
Cytidine 5'-monophosphate (CMP) not found
cpd:C21554	Torcitabine; L-dC; 2'-Deoxy-beta-L-cytidine; beta-L-2'-Deoxycytidine
cpd:C00242	Guanine; 2-Amino-6-hydroxypurine
cpd:C00450	(S)-2,3,4,5-Tetrahydropyridine-2-carboxylate; Delta1-Pipe



Error retrieving LL-2,6-diaminoheptanedioate: Bad Request
cpd:C00847	4-Pyridoxate; 4-Pyridoxic acid
Possible Peptide.3 not found
cpd:C03623	N-Methyl-2-oxoglutaramate; N-Methyl-2-oxoglutaramic acid




Error retrieving 1,3-Diphenylurea: Bad Request
Uridine 5'-diphospho-monosaccharide not found
cpd:C13426	I-123 BMIPP; 123I-15-(p-iodophenyl)-3-R,S-methylpentadecanoic acid; I-123-beta-methyl-p-iodophenyl-methylpentadecanoic acid
cpd:C01909	Dethiobiotin; Desthiobiotin
Cyclic guanosine monophosphate (cGMP) not found
Iminodiacetic acid not found
cpd:C00956	L-2-Aminoadipate; L-alpha-Aminoadipate; L-alpha-Aminoadipic acid; L-2-Aminoadipic acid; L-2-Aminohexanedioate
cpd:C00263	L-Homoserine; 2-Amino-4-hydroxybutyric acid
cpd:C05227	UDP-sugar; UDP-monosaccharide
4-hydroxy-3-methoxybenzoic acid (isovanillic acid) not found
cpd:C01632	N-Carbobenzoxyglycylproline; Z-Gly-Pro; Z-Gly-Pro-OH
DL-Tyrosine not found
cpd:C00157	Phosphatidylcholine; Lecithin; Phosphatidyl-N-trimethylethanolamine; 1,2-Diacyl-sn-glycero-3-phosphocholine; Choline phosphatide; 3-sn-Phosphatidylcholine




Error retrieving N6,N6,N6-Trimethyl-L-lysine: Bad Request
cpd:C01037	7,8-Diaminononanoate; 7,8-Diaminopelargonic acid; DAPA; 7,8-Diaminononanoic acid
Benzealdehyde not found
cpd:C00301	ADP-ribose; ADP-D-ribose; Adenosine diphosphate ribose
Adenosine 5' monophosphate (AMP) not found
Sedoheptulose anhydride monohydrate not found
cpd:C14205	4-tert-Octylphenol; 4-t-Octylphenol; 4-(1,1,3,3-Tetramethylbutyl)phenol
cpd:C00209	Oxalate; Oxalic acid; Ethanedioic acid
Possibe Peptide not found
Sugars-Hexoses-Phosphate not found
Flavanone glycoside.1 not found
4-Acetamidobutyric acid not found




Error retrieving 3,4-Diaminopyridine: Bad Request
Flavin adenine dinucleotide (FAD) not found




Error retrieving 2,2-Bis(hydroxymethyl)propionic acid: Bad Request
DL-Allothreonine not found
Inosine diphosphate (IDP) not found
cpd:C00043	UDP-N-acetyl-alpha-D-glucosamine; UDP-N-acetyl-D-glucosamine; UDP-N-acetylglucosamine
cpd:C08362	(9Z)-Hexadecenoic acid; Palmitoleic acid; cis-9-Hexadecenoic acid; (9Z)-Hexadec-9-enoate
cpd:C00037	Glycine; Aminoacetic acid; Gly
cpd:C00689	alpha,alpha'-Trehalose 6-phosphate; Trehalose 6-phosphate
cpd:C00188	L-Threonine; 2-Amino-3-hydroxybutyric acid
cpd:C01327	Hydrochloric acid; HCl; Hydrogen chloride; Hydrochloride
cpd:C06695	2-Oxazolidinone




Error retrieving Pentulose, 5-phosphate.1: Bad Request
cpd:C00013	Diphosphate; Diphosphoric acid; Pyrophosphate; Pyrophosphoric acid; PPi
cpd:C01771	2-Butenoate; 2-Butenoic acid; Crotonic acid; 3-Methylacrylic acid; (E)-But-2-enoic acid
cpd:C18812	Thidiazuron
cpd:C02679	Dodecanoic acid; Dodecanoate; Dodecylcarboxylate; Lauric acid
Trehalose-6-phosphate not found
cpd:C00135	L-Histidine; (S)-alpha-Amino-1H-imidazole-4-propionic acid




Error retrieving 1,4-butanediol: Bad Request
8-Hydroxyoctanoic acid not found
Nicotinic acid mononucleotide not found
cpd:C00123	L-Leucine; 2-Amino-4-methylvaleric acid; (2S)-alpha-2-Amino-4-methylvaleric acid; (2S)-alpha-Leucine
cpd:C02183	Phloroglucinol; 1,3,5-Benzenetriol; 1,3,5-Trihydroxybenzene; Benzene-1,3,5-triol
cpd:C01551	Allantoin; 5-Ureidohydantoin; Glyoxyldiureide
cpd:C02614	(S)-2-Methylmalate; (S)-2-Methylmalic acid; (S)-Citramalate; (S)-Citramalic acid; (S)-alpha-Hydroxypyrotartaric acid; L-Citramalate; L-Citramalic acid; L-alpha-Hydroxypyrotartaric acid; (2S)-2-Hydroxy-2-methylbutanedioate; (S)-2-Hydroxy-2-methylsuccinic acid
Methyl stearate not found
Prolylglycine not found




Error retrieving 2,5-Dimethyloxazole: Bad Request
Monomethyl phthalate not found
Glycerylphosphorylethanolamine not found
Possible Lipid (C16) not found
cpd:C00632	3-Hydroxyanthranilate; 3-Hydroxyanthranilic acid
cpd:C06423	Octanoic acid; Caprylic acid; Octylic acid; Octanoate
cpd:C02218	Dehydroalanine; 2-Aminoacrylate; 2-Aminoprop-2-enoate
cpd:C00208	Maltose; Malt sugar; alpha-D-Glucopyranosyl-(1->4)-D-glucopyranose
cpd:C00081	ITP; Inosine 5'-triphosphate; Inosine triphosphate; Inosine tripolyphosphate
Ophthalmic acid not found
cpd:C18534	1-Naphthaleneacetic acid sodium salt
Sugars-Monosaccharides-Hexoses not found
DL-Ornithine.1 not found
cpd:C01530	Octadecanoic acid; Stearate; Stearic acid
Hydroxymethanesulfonic acid not found
MHPG not found
cpd:C00544	Homogentisate; Homogentisic acid; 2,5-Dihydroxyphenylacetic acid; 2,5-Dihydroxyphenylacetate
D-Ribose-5-phosphate not found
Sugar-Tetrasacharride not found
N-Acetylvaline not found
cpd:C00015	UDP; Uridine 5'-diphosphate
cpd:C00158	Cit



Error retrieving Adenosine 3',5'-Biphosphate: Bad Request
N-Acetyl-DL-Leucine not found
Possible Peptide.2 not found
cpd:C00869	2-Oxooctadecanoic acid; 2-Oxostearate
cpd:C07997	Eflornithine; DL-Ornithine, 2-(difluoromethyl)-
N-Acetyl-L-proline not found
cpd:C11584	4-Methylumbelliferone glucuronide; 4-Methylumbelliferyl glucuronide
cpd:C00245	Taurine; 2-Aminoethanesulfonic acid; Aminoethylsulfonic acid
cpd:C01709	Hesperetin; 3',5,7-Trihydroxy-4'-methoxyflavanone
DL-Glutamine not found
5'-S-Methyl-5'-thioadenosine not found
cpd:C00852	Chlorogenate; Chlorogenic acid; Caffeoyl quinic acid; trans-5-O-Caffeoyl-D-quinate
4-Acetamidobutanoic acid not found
cpd:C00230	3,4-Dihydroxybenzoate; 3,4-Dihydroxybenzoic acid; Protocatechuate; Protocatechuic acid
Myristyl sulfate not found
DL-Carnitine not found
4-hydroxyphenylpyruvic acid not found
N-methylalanine not found
Guanosine-5'-diphosphate (GDP) not found
Thymidine-5'-phosphate (dTMP) not found
cpd:C11101	5-Hydroxymethyl-2-furaldehyde; HMF; 5-H



Error retrieving 2,2,6,6-Tetramethyl-4-piperidinol: Bad Request
cpd:C06316	Dehydro-D-arabinono-1,4-lactone; (5R)-3,4-Dihydroxy-5-(hydroxymethyl)furan-2(5H)-one
cpd:C00074	Phosphoenolpyruvate; Phosphoenolpyruvic acid; PEP
cpd:C03661	1F-beta-D-Fructosylsucrose; beta-D-Fructofuranosyl-(2->1)-beta-D-fructofuranosyl alpha-D-glucopyranoside; 1-Kestose
cpd:C03067	3-Hydroxybenzaldehyde
cpd:C00106	Uracil
cpd:C00097	L-Cysteine; L-2-Amino-3-mercaptopropionic acid
cpd:C05198	5'-Deoxyadenosine
Dodecamethylcyclohexasiloxane not found
N-epsilon-Acetyl-L-lysine not found
cpd:C01108	1,2,3-Trihydroxybenzene; Pyrogallol; Pyrogallic acid; 1,2,3-Benzenetriol
cpd:C00137	myo-Inositol; D-myo-Inositol; 1D-myo-Inositol; L-myo-Inositol; 1L-myo-Inositol; meso-Inositol; Inositol; Dambose; Cyclohexitol; Meat sugar; Bios I
Sugars-Monosaccharides-Hexoses.2 not found
2-phosphoglyceric acid not found
cpd:C16754	Aflatoxin G2
Sugars-Hexoses-Phosphate.2 not found
cpd:C00082	L-Tyrosine; (S)-3-(p-Hydroxyphenyl)alanine; (S)-

Unnamed: 0,Metabolite,KEGG ID
0,Arecoline,cpd:C10129
1,DL-Glutamic acid,Not Found
2,Sugars -Disaccharride,Request Error
3,L-citrulline,cpd:C00327
4,Phosphonoacetic acid,cpd:C05682
...,...,...
555,Uridine 5'-diphospho-N-acetylglucosamine,Not Found
556,Catechol,cpd:C00090
557,DL-allylglycine,Not Found
558,Phosphoglycolic acid,cpd:C00988


### Make a dataframe of only the known metabolites

In [5]:
known_metabolite_df = metabolite_df.copy()

# filter out the metabolites that were not found and request errors
known_metabolite_df = known_metabolite_df[metabolite_df['KEGG ID'] != 'Not Found']
known_metabolite_df = known_metabolite_df[metabolite_df['KEGG ID'] != 'Request Error']

# reindex so that the index is continuous
known_metabolite_df.reset_index(drop=True, inplace=True)

known_metabolite_df

  known_metabolite_df = known_metabolite_df[metabolite_df['KEGG ID'] != 'Request Error']


Unnamed: 0,Metabolite,KEGG ID
0,Arecoline,cpd:C10129
1,L-citrulline,cpd:C00327
2,Phosphonoacetic acid,cpd:C05682
3,L-Cysteic acid,cpd:C00506
4,Lumichrome,cpd:C01727
...,...,...
277,Aflatoxin G2,cpd:C16754
278,L-tyrosine,cpd:C00082
279,Catechol,cpd:C00090
280,Phosphoglycolic acid,cpd:C00988


### Make a dataframe of only unknown metabolites

In [6]:
# Make a dataframe of only unknown metabolites
unknown_metabolite_df = metabolite_df.copy()

# make a list of the known metabolites
known_metabolites = known_metabolite_df['Metabolite'].tolist()

# filter out the known metabolites
unknown_metabolite_df = unknown_metabolite_df[~unknown_metabolite_df['Metabolite'].isin(known_metabolites)]

# reindex so that the index is continuous
unknown_metabolite_df.reset_index(drop=True, inplace=True)

# save the dataframes to csv
unknown_metabolite_df.to_csv('../data/unknown_metabolites.csv')

unknown_metabolite_df



Unnamed: 0,Metabolite,KEGG ID
0,DL-Glutamic acid,Not Found
1,Sugars -Disaccharride,Request Error
2,D-glucose-6-phosphate,Not Found
3,4-Methyl-2-oxopentanoic acid,Not Found
4,D-Pantothenic acid,Not Found
...,...,...
273,Sugars-Monosaccharides-Hexoses.2,Not Found
274,2-phosphoglyceric acid,Not Found
275,Sugars-Hexoses-Phosphate.2,Not Found
276,Uridine 5'-diphospho-N-acetylglucosamine,Not Found


### Make a dataframe connecting metabolite name, KEGG ID, and KEGG Pathway

In [7]:
metabolite_pathway_list = []

metabolite_pathway_df = known_metabolite_df.copy()

# loop over the metabolites and get the pathways
for index, row in metabolite_pathway_df.iterrows():
    # keep track of progress
    if index % 10 == 0:
        print(f'Processing metabolite {index} of {len(metabolite_pathway_df)}')

    # get the KEGG ID
    kegg_id = row['KEGG ID']

    # get the pathways for the metabolite
    pathways = get_pathways_for_metabolite(kegg_id)

    # make a string of the pathways
    pathways = ';'.join(pathways)

    # add to the list
    metabolite_pathway_list.append(pathways)

# add the pathways to the dataframe
metabolite_pathway_df['KEGG Pathways'] = metabolite_pathway_list

metabolite_pathway_df

Processing metabolite 0 of 282
Processing metabolite 10 of 282
Processing metabolite 20 of 282
Processing metabolite 30 of 282
Processing metabolite 40 of 282
Processing metabolite 50 of 282
Processing metabolite 60 of 282
Processing metabolite 70 of 282
Processing metabolite 80 of 282
Processing metabolite 90 of 282
Processing metabolite 100 of 282
Processing metabolite 110 of 282
Processing metabolite 120 of 282
Processing metabolite 130 of 282
Processing metabolite 140 of 282
Processing metabolite 150 of 282
Processing metabolite 160 of 282
Processing metabolite 170 of 282
Processing metabolite 180 of 282
Processing metabolite 190 of 282
Processing metabolite 200 of 282
Processing metabolite 210 of 282
Processing metabolite 220 of 282
Processing metabolite 230 of 282
Processing metabolite 240 of 282
Processing metabolite 250 of 282
Processing metabolite 260 of 282
Processing metabolite 270 of 282
Processing metabolite 280 of 282


Unnamed: 0,Metabolite,KEGG ID,KEGG Pathways
0,Arecoline,cpd:C10129,
1,L-citrulline,cpd:C00327,map00220;map01100;map01110;map01230
2,Phosphonoacetic acid,cpd:C05682,map00440;map01100;map01120
3,L-Cysteic acid,cpd:C00506,map00270;map00430;map01100;map04080
4,Lumichrome,cpd:C01727,map00740
...,...,...,...
277,Aflatoxin G2,cpd:C16754,map00254;map01100;map01110
278,L-tyrosine,cpd:C00082,map00130;map00261;map00350;map00360;map00400;m...
279,Catechol,cpd:C00090,map00361;map00362;map00364;map00621;map00624;m...
280,Phosphoglycolic acid,cpd:C00988,map00630;map01100;map01110;map01200


### Save the metabolite name -> KEGG ID -> KEGG pathways dataframe as a .csv

In [8]:
metabolite_pathway_df.to_csv('../data/kegg_metabolite_pathways.csv', index=False)