In [3]:
import pandas as pd
import json

In [6]:
data = pd.read_csv(r'../data/processed/combined_metabolites_data_with_model_params.csv').set_index('i')

In [13]:
for m in data.loc[(data['Type'] == 'metabolite') & (data['ID'] != 'unknown'), 'ID'][::-1]:
    print(m)

1,5-Anhydro-D-glucitol
2-Mercaptoethanol
2-Naphthalenesulfonic acid
3-Hydroxybutyric acid
3-Indoxyl sulphate
3-Methylhistidine
4-Guanidinobutyric acid
4-Hydroxybenzaldehyde
4-Hydroxybutyric acid (GHB)
4-Oxoproline
5,5-Dimethylhydantoin
8-Hydroxyquinoline
Acetyl-β-methylcholine
Acrylic acid
Adenosine 5'-monophosphate
Alanine
alpha-Glycerylphosphorylcholine
Anhydrohexose
Anserine
Arginine
Ascorbic acid 2-sulfate
Asparagine
Beta alanine
Beta-D-Glucopyranuronic acid
Betaine
Carnitine
Choline
Citramalic acid
Creatine
Creatinine
Cytidine
Ergothioneine
Ethyl-beta-D-glucuronide
Gluconic acid
Glucose
Glutamic acid
Glutamine
Glyceric acid
Guanidinosuccinic acid
Hexose sugar
Iditol
Hippuric acid
Histidine
Hydrocinnamic acid
Hydroxycinnamic acid
Isoleucine
Lactic Acid
Leucine
Methylaminopyrimidine
Methylhistidine
N-Acetyl-L-aspartic acid
Pyroglutamic acid
N-Acetylneuraminic acid
N-Acetylornithine
N-Isovalerylglycine
N-Methyl-2-pyrrolidone
N3,N4-Dimethyl-L-arginine
N6,N6,N6-Trimethyl-L-lysine
Nicot

In [4]:
metab_classes = {
    "1,5-Anhydro-D-glucitol":           'carbohydrate',
    "2-Mercaptoethanol":                'alcohol',
    "2-Naphthalenesulfonic acid":       'organic acid',
    "3-Hydroxybutyric acid":            'organic acid',
    "3-Indoxyl sulphate":               'organic acid',
    "3-Methylhistidine":                'amino acid derivative',
    "4-Guanidinobutyric acid":          'organic acid',
    "4-Hydroxybenzaldehyde":            'alcohol',
    
    "4-Hydroxybutyric acid (GHB)":      'organic acid',
    "4-Oxoproline":                     'amino acid derivative',
    "5,5-Dimethylhydantoin":            'amino acid derivative',
    "8-Hydroxyquinoline":               'alcohol',
    "Acetyl-β-methylcholine":           'amino acid derivative',
    "Acetyl-beta-methylcholine":        'amino acid derivative',
    "Acrylic acid":                     'organic acid',
    "Adenosine 5'-monophosphate":       'nitrogen heterocycle',
    "Alanine":                          'amino acid',
    "alpha-Glycerylphosphorylcholine":  'organic acid',
    "Anhydrohexose":                    'carbohydrate',
    
    "Anserine":                       'amino acid derivative', 
    "Arginine":                       'amino acid',
    "Ascorbic acid 2-sulfate":        'organic acid',
    "Asparagine":                     'amino acid',
    "Beta alanine":                   'amino acid derivative',
    "Beta-D-Glucopyranuronic acid":   'carbohydrate',
    "Betaine":                        'amino acid derivative',  # N-trimethyl glycine (ammonium)
    "Carnitine":                      'organic acid',
    "Choline":                        'alcohol',
    "Citramalic acid":                'organic acid',
    "Creatine":                       'organic acid',
    
    "Creatinine":               'nitrogen heterocycle',
    "Cytidine":                 'nitrogen heterocycle',
    "Ergothioneine":            'amino acid derivative',
    "Ethyl-beta-D-glucuronide": 'carbohydrate',
    "Gluconic acid":            'carbohydrate',
    "Glucose":                  'carbohydrate',
    "Glutamic acid":            'amino acid',
    "Glutamine":                'amino acid',
    "Glyceric acid":            'carbohydrate',
    "Guanidinosuccinic acid":   'amino acid derivative',
    "Hexose sugar":             'carbohydrate',
    
    "Iditol":                   'carbohydrate',
    "Hippuric acid":            'organic acid',
    "Histidine":                'amino acid',
    "Hydrocinnamic acid":       'organic acid',
    "Hydroxycinnamic acid":     'organic acid',
    "Isoleucine":               'amino acid',
    "Lactic Acid":              'organic acid',
    "Leucine":                  'amino acid',
    "Methylaminopyrimidine":    'nitrogen heterocycle',
    "Methylhistidine":          'amino acid derivative',
    "N-Acetyl-L-aspartic acid": 'amino acid derivative',
    
    "Pyroglutamic acid":           'amino acid derivative',
    "N-Acetylneuraminic acid":     'carbohydrate',
    "N-Acetylornithine":           'amino acid derivative',
    "N-Isovalerylglycine":         'amino acid derivative',
    "N-Methyl-2-pyrrolidone":      'nitrogen heterocycle',
    "N3,N4-Dimethyl-L-arginine":   'amino acid derivative',
    "N6,N6,N6-Trimethyl-L-lysine": 'amino acid derivative',
    "Nicotinamide":                'nitrogen heterocycle',
    "Nicotinamide 1-oxide":        'nitrogen heterocycle',
    "Ornithine":                   'amino acid derivative',
    "Pentose sugar":               'carbohydrate',
    "Phenylacetylglycine":         'amino acid derivative',
    
    "Phenylalanine":     'amino acid',
    "Pipecolic acid":    'organic acid',
    "Proline":           'amino acid',
    "Pseudouridine":     'nitrogen heterocycle',
    "Pyroglutamic acid": 'amino acid derivative',
    "Quinic acid":       'organic acid',
    "Ribose":            'carbohydrate',
    "Serine":            'amino acid',
    "Stachydrine":       'amino acid derivative',  # Proline Betaine (N-dimethyl ammonium)
    
    "Taurine":               'amino acid derivative',
    "Threonic acid":         'carbohydrate',  # sugar acid derived from Threose
    "Threonine":             'amino acid',
    "Indole-3-acrylic acid": 'organic acid',  # Also nitrogen heterocycle (indole)
    "Trigonelline":          'organic acid',  # also nitrogen heterocycle (piperidine)
    "Tryptophan":            'amino acid',
    "Tyrosine":              'amino acid',
    "Uric acid":             'nitrogen heterocycle',
    "Uridine":               'nitrogen heterocycle',
    "Urocanic acid":         'nitrogen heterocycle',
}

In [5]:
set(metab_classes.values())

{'alcohol',
 'amino acid',
 'amino acid derivative',
 'carbohydrate',
 'nitrogen heterocycle',
 'organic acid'}

In [8]:
# json.dump(metab_classes, open('../data/metadata/metabolite_class_assignments.json', 'w'))