In [1]:
import pandas as pd
import json

In [2]:
data = pd.read_csv(r'../data/processed/combined_metabolites_data_with_model_params.csv').set_index('i')

In [3]:
for m in data.loc[(data['Type'] == 'metabolite') & (data['ID'] != 'unknown'), 'ID'][::-1]:
    print(m)

1,5-Anhydro-D-glucitol
2-Mercaptoethanol
2-Naphthalenesulfonic acid
3-Hydroxybutyric acid
3-Indoxyl sulphate
3-Methylhistidine
4-Guanidinobutyric acid
4-Hydroxybenzaldehyde
4-Hydroxybutyric acid (GHB)
4-Oxoproline
5,5-Dimethylhydantoin
8-Hydroxyquinoline
Acetyl-β-methylcholine
Acrylic acid
Adenosine 5'-monophosphate
Alanine
alpha-Glycerylphosphorylcholine
Anhydrohexose
Anserine
Arginine
Ascorbic acid 2-sulfate
Asparagine
Beta alanine
Beta-D-Glucopyranuronic acid
Betaine
Carnitine
Choline
Citramalic acid
Creatine
Creatinine
Cytidine
Ergothioneine
Ethyl-beta-D-glucuronide
Gluconic acid
Glucose
Glutamic acid
Glutamine
Glyceric acid
Guanidinosuccinic acid
Hexose sugar
Iditol
Hippuric acid
Histidine
Hydrocinnamic acid
Hydroxycinnamic acid
Isoleucine
Lactic Acid
Leucine
Methylaminopyrimidine
Methylhistidine
N-Acetyl-L-aspartic acid
Pyroglutamic acid
N-Acetylneuraminic acid
N-Acetylornithine
N-Isovalerylglycine
N-Methyl-2-pyrrolidone
N3,N4-Dimethyl-L-arginine
N6,N6,N6-Trimethyl-L-lysine
Nicot

In [4]:
metab_classes = {
    "1,5-Anhydro-D-glucitol":           'Carbohydrate',
    "2-Mercaptoethanol":                'Alcohol',
    "2-Naphthalenesulfonic acid":       'Organic acid',
    "3-Hydroxybutyric acid":            'Organic acid',
    "3-Indoxyl sulphate":               'Organic acid',
    "3-Methylhistidine":                'Amino acid derivative',
    "4-Guanidinobutyric acid":          'Organic acid',
    "4-Hydroxybenzaldehyde":            'Alcohol',
    
    "4-Hydroxybutyric acid (GHB)":      'Organic acid',
    "4-Oxoproline":                     'Amino acid derivative',
    "5,5-Dimethylhydantoin":            'Amino acid derivative',
    "8-Hydroxyquinoline":               'Alcohol',
    "Acetyl-β-methylcholine":           'Amino acid derivative',
    "Acetyl-beta-methylcholine":        'Amino acid derivative',
    "Acrylic acid":                     'Organic acid',
    "Adenosine 5'-monophosphate":       'Nitrogen heterocycle',
    "Alanine":                          'Amino acid',
    "alpha-Glycerylphosphorylcholine":  'Organic acid',
    "Anhydrohexose":                    'Carbohydrate',
    
    "Anserine":                       'Amino acid derivative', 
    "Arginine":                       'Amino acid',
    "Ascorbic acid 2-sulfate":        'Organic acid',
    "Asparagine":                     'Amino acid',
    "Beta alanine":                   'Amino acid derivative',
    "Beta-D-Glucopyranuronic acid":   'Carbohydrate',
    "Betaine":                        'Amino acid derivative',  # N-trimethyl glycine (ammonium)
    "Carnitine":                      'Organic acid',
    "Choline":                        'Alcohol',
    "Citramalic acid":                'Organic acid',
    "Creatine":                       'Organic acid',
    
    "Creatinine":               'Nitrogen heterocycle',
    "Cytidine":                 'Nitrogen heterocycle',
    "Ergothioneine":            'Amino acid derivative',
    "Ethyl-beta-D-glucuronide": 'Carbohydrate',
    "Gluconic acid":            'Carbohydrate',
    "Glucose":                  'Carbohydrate',
    "Glutamic acid":            'Amino acid',
    "Glutamine":                'Amino acid',
    "Glyceric acid":            'Carbohydrate',
    "Guanidinosuccinic acid":   'Amino acid derivative',
    "Hexose sugar":             'Carbohydrate',
    
    "Iditol":                   'Carbohydrate',
    "Hippuric acid":            'Organic acid',
    "Histidine":                'Amino acid',
    "Hydrocinnamic acid":       'Organic acid',
    "Hydroxycinnamic acid":     'Organic acid',
    "Isoleucine":               'Amino acid',
    "Lactic Acid":              'Organic acid',
    "Leucine":                  'Amino acid',
    "MethylAminopyrimidine":    'Nitrogen heterocycle',
    "Methylhistidine":          'Amino acid derivative',
    "N-Acetyl-L-aspartic acid": 'Amino acid derivative',
    
    "Pyroglutamic acid":           'Amino acid derivative',
    "N-Acetylneuraminic acid":     'Carbohydrate',
    "N-Acetylornithine":           'Amino acid derivative',
    "N-Isovalerylglycine":         'Amino acid derivative',
    "N-Methyl-2-pyrrolidone":      'Nitrogen heterocycle',
    "N3,N4-Dimethyl-L-arginine":   'Amino acid derivative',
    "N6,N6,N6-Trimethyl-L-lysine": 'Amino acid derivative',
    "Nicotinamide":                'Nitrogen heterocycle',
    "Nicotinamide 1-oxide":        'Nitrogen heterocycle',
    "Ornithine":                   'Amino acid derivative',
    "Pentose sugar":               'Carbohydrate',
    "Phenylacetylglycine":         'Amino acid derivative',
    
    "Phenylalanine":     'Amino acid',
    "Pipecolic acid":    'Organic acid',
    "Proline":           'Amino acid',
    "Pseudouridine":     'Nitrogen heterocycle',
    "Pyroglutamic acid": 'Amino acid derivative',
    "Quinic acid":       'Organic acid',
    "Ribose":            'Carbohydrate',
    "Serine":            'Amino acid',
    "Stachydrine":       'Amino acid derivative',  # Proline Betaine (N-dimethyl ammonium)
    
    "Taurine":               'Amino acid derivative',
    "Threonic acid":         'Carbohydrate',  # sugar acid derived from Threose
    "Threonine":             'Amino acid',
    "Indole-3-acrylic acid": 'Organic acid',  # Also Nitrogen heterocycle (indole)
    "Trigonelline":          'Organic acid',  # also Nitrogen heterocycle (piperidine)
    "Tryptophan":            'Amino acid',
    "Tyrosine":              'Amino acid',
    "Uric acid":             'Nitrogen heterocycle',
    "Uridine":               'Nitrogen heterocycle',
    "Urocanic acid":         'Nitrogen heterocycle',
}

In [5]:
set(metab_classes.values())

{'Alcohol',
 'Amino acid',
 'Amino acid derivative',
 'Carbohydrate',
 'Nitrogen heterocycle',
 'Organic acid'}

In [7]:
# json.dump(metab_classes, open('../data/metadata/metabolite_class_assignments.json', 'w'))