In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys
import pathlib
import pickle

sys.path.append('..')

In [3]:
import pandas as pd

In [4]:
from pals.pimp_tools import get_pimp_API_token_from_env, PIMP_HOST, get_ms1_intensities, get_ms1_peaks, get_annotation_df, get_experimental_design
from pals.feature_extraction import DataSource
from pals.pathway_analysis import PALS
from pals.common import *

2019-11-25 14:10:17.451 | INFO     | pals.reactome:get_neo4j_driver:24 - Created graph database driver for bolt://localhost:7687 (neo4j)


# Load data

Generate token by logging in to PiMP

In [5]:
# username = 'joewandy' # PiMP username
# password = 'enter' # PiMP password
# host = 'localhost:8000' # server address and port
# token = get_authentication_token(host, username, password)

Assume token is stored in environmental variable *PIMP_API_TOKEN*

In [6]:
token = get_pimp_API_token_from_env()

In [7]:
analysis_id = 1321 # example beer analysis

In [8]:
int_df_filename = os.path.join(os.getcwd(), 'test_data', 'beer', 'int_df.p')
try:
    int_df = pd.read_pickle(int_df_filename)
except FileNotFoundError:
    int_df = get_ms1_intensities(token, PIMP_HOST, analysis_id)
    int_df.to_pickle(int_df_filename)

int_df.head()

Unnamed: 0_level_0,Beer_1_full1.mzXML,Beer_1_full2.mzXML,Beer_1_full3.mzXML,Beer_2_full1.mzXML,Beer_2_full2.mzXML,Beer_2_full3.mzXML,Beer_3_full1.mzXML,Beer_3_full2.mzXML,Beer_3_full3.mzXML,Beer_4_full1.mzXML,Beer_4_full2.mzXML,Beer_4_full3.mzXML
row_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
3033929,2235291000.0,2000478000.0,2170697000.0,2242760000.0,2279882000.0,1959480000.0,2079356000.0,2110473000.0,2243653000.0,1817065000.0,1746443000.0,1779827000.0
3033930,44334910.0,42873870.0,48948530.0,47604480.0,42172800.0,39084520.0,38257780.0,37701920.0,40871890.0,33304770.0,31536300.0,31024100.0
3033931,1723985000.0,1764235000.0,1585143000.0,1543961000.0,1579320000.0,1555666000.0,1698130000.0,1481824000.0,1508645000.0,1642510000.0,1723919000.0,1697806000.0
3033932,625423700.0,650341700.0,591497500.0,463592900.0,429838200.0,403874700.0,429283700.0,370876100.0,477893200.0,390316500.0,408099500.0,430989200.0
3033933,1075022000.0,929347400.0,1092635000.0,1130720000.0,1118146000.0,1192834000.0,1231442000.0,1262046000.0,1460653000.0,1009838000.0,908511100.0,996717600.0


In [10]:
annotation_df_filename = os.path.join(os.getcwd(), 'test_data', 'beer', 'annotation_df.p')
try:
    annotation_df = pd.read_pickle(annotation_df_filename)
except FileNotFoundError:
    annotation_df = get_annotation_df(token, PIMP_HOST, analysis_id)
    annotation_df.to_pickle(formula_df_filename)

annotation_df.head()

Unnamed: 0_level_0,entity_id
row_id,Unnamed: 1_level_1
3033929,C00148
3036581,C00148
3036855,C00148
3038249,C00148
3033929,C00163


In [14]:
experimental_design_filename = os.path.join(os.getcwd(), 'test_data', 'beer', 'experimental_design.p')
try:
    experimental_design_filename = os.path.join(os.getcwd(), 'test_data', 'beer', 'experimental_design.p')
    with open(experimental_design_filename, 'rb') as f:
        experimental_design = pickle.load(f)
except FileNotFoundError:
    experimental_design = get_experimental_design(token, PIMP_HOST, analysis_id)
    with open(experimental_design_filename, 'wb') as f:
        pickle.dump(experimental_design, f)
        
experimental_design

{'comparisons': [{'case': 'beer1', 'control': 'beer2', 'name': 'beer1/beer2'},
  {'case': 'beer3', 'control': 'beer4', 'name': 'beer3/beer4'}],
 'groups': {'beer4': ['Beer_4_full3.mzXML',
   'Beer_4_full2.mzXML',
   'Beer_4_full1.mzXML'],
  'beer3': ['Beer_3_full3.mzXML', 'Beer_3_full2.mzXML', 'Beer_3_full1.mzXML'],
  'beer2': ['Beer_2_full3.mzXML', 'Beer_2_full1.mzXML', 'Beer_2_full2.mzXML'],
  'beer1': ['Beer_1_full2.mzXML', 'Beer_1_full1.mzXML', 'Beer_1_full3.mzXML']}}

# Set-up KEGG Data Source

In [12]:
pd.set_option('display.max_rows', 500)

In [13]:
ds = DataSource(int_df, annotation_df, experimental_design, DATABASE_PIMP_KEGG)

2019-11-25 14:10:37.005 | DEBUG    | pals.feature_extraction:__init__:34 - Loading C:\Users\joewa\Work\git\PALS\pals\data\PiMP_KEGG.json.zip
2019-11-25 14:10:37.027 | DEBUG    | pals.feature_extraction:__init__:85 - Mapping pathway to unique ids
2019-11-25 14:10:37.032 | DEBUG    | pals.feature_extraction:__init__:99 - Creating dataset to pathway mapping
2019-11-25 14:10:37.914 | DEBUG    | pals.feature_extraction:__init__:124 - Computing unique id counts


We pick a particular pathway map00730 for checking

In [14]:
mapid = 'map00730'

In [15]:
ds.pathway_dict[mapid]

'Thiamine metabolism'

In [16]:
len(ds.pathway_to_unique_ids_dict[mapid]), ds.pathway_to_unique_ids_dict[mapid]

(20,
 {'C11H13N4',
  'C12H15N4O2S',
  'C12H15N4OS',
  'C12H17N4OS',
  'C12H18N4O4PS',
  'C12H19N4O7P2S',
  'C12H20N4O10P3S',
  'C2H3NO2',
  'C2H5NO2',
  'C3H4O3',
  'C3H7NO2S',
  'C3H7O6P',
  'C5H11O7P',
  'C6H10N3O4P',
  'C6H10NO4PS',
  'C6H11N3O7P2',
  'C6H9N3O',
  'C6H9NOS',
  'C8H14N3O7P',
  'C9H11NO3'})

Found 6 here

In [17]:
ds.get_pathway_dataset_unique_counts([mapid])

[6]

But in PiMP, we only found 4
```
From PiMP
Assigned formulas 4
Total formulas 20

cid	Name				Formula		pid
C00022	Puryvate			C3H4O3		66, 4595
C11437	1-Deoxy-D-xylulose-5-phosphate	C5H11O7P	758, 1109, 5208, 5209
C03373	Aminoimadazole ribotide		C8H14N3O7P	2307
C00082	L-Tyrosine			C9H11NO3	143
```

Here are the 6 formulas we found

In [18]:
ds.pathway_to_unique_ids_dict[mapid].intersection(ds.dataset_unique_ids)

{'C11H13N4', 'C3H4O3', 'C3H7O6P', 'C5H11O7P', 'C8H14N3O7P', 'C9H11NO3'}

And their corresponding peaks

In [19]:
pids = ds.dataset_pathways_to_row_ids[mapid]
pids

[3033994,
 3036235,
 3034686,
 3035037,
 3035414,
 3035634,
 3034071,
 3034508,
 3035214,
 3035622,
 3035754,
 3035904,
 3036017,
 3037535]

In [20]:
sorted(ds.pathway_to_unique_ids_dict[mapid].intersection(formula_df.loc[pids]['unique_id'].unique()))

['C11H13N4', 'C3H4O3', 'C3H7O6P', 'C5H11O7P', 'C8H14N3O7P', 'C9H11NO3']

Here are the peaks annotated as the 4 found formulas

In [21]:
formula_df[formula_df['unique_id'].isin(['C3H4O3', 'C5H11O7P', 'C8H14N3O7P', 'C9H11NO3'])].sort_values('unique_id')

Unnamed: 0_level_0,sec_id,mass,rt,polarity,cmpd_id,unique_id,adduct,identified,rc_id,compound,db,entity_id,frank_annot,inchikey
row_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
3033994,66,130.049887,483.560944,positive,641,C3H4O3,M+ACN+H,True,15368350,Pyruvate,kegg,C00022,"{'frank_cmpd_name': 'L-Pyroglutamic acid', 'in...",LCTONWCANYUPML-UHFFFAOYSA-N
3033994,66,130.049887,483.560944,positive,642,C3H4O3,M+ACN+H,True,15368353,3-Oxopropanoate,kegg,C00222,"{'frank_cmpd_name': 'L-Pyroglutamic acid', 'in...",OAKURXIZZOAYBC-UHFFFAOYSA-N
3033994,66,130.049887,483.560944,positive,650,C3H4O3,M+ACN+H,True,15368363,Ethylene carbonate,kegg,C20363,"{'frank_cmpd_name': 'L-Pyroglutamic acid', 'in...",KMTRUDSVKNLOMY-UHFFFAOYSA-N
3033994,66,130.049887,483.560944,positive,652,C3H4O3,M+ACN+H,True,15368365,3-Hydroxypropenoate,kegg,C12069,"{'frank_cmpd_name': 'L-Pyroglutamic acid', 'in...",ZJKIBABOSPFBNO-OWOJBTEDSA-N
3035037,1109,256.058814,1255.710347,positive,5511,C5H11O7P,M+ACN+H,True,15375924,5-Deoxyribose-1-phosphate,kegg,C16637,,XXQFKXPJJNBLSU-TXICZTDVSA-N
3034686,758,256.057793,422.286094,positive,5514,C5H11O7P,M+ACN+H,True,15373309,1-Deoxy-D-xylulose 5-phosphate,kegg,C11437,,AJPADPZSRRUGHI-RFZPGFLSSA-N
3035037,1109,256.058814,1255.710347,positive,5514,C5H11O7P,M+ACN+H,True,15375928,1-Deoxy-D-xylulose 5-phosphate,kegg,C11437,,AJPADPZSRRUGHI-RFZPGFLSSA-N
3034686,758,256.057793,422.286094,positive,5525,C5H11O7P,M+ACN+H,True,15373321,2-Deoxy-D-ribose 5-phosphate,kegg,C00673,,KKZFLSZAWCYPOC-PYHARJCCSA-N
3035037,1109,256.058814,1255.710347,positive,5525,C5H11O7P,M+ACN+H,True,15375932,2-Deoxy-D-ribose 5-phosphate,kegg,C00673,,KKZFLSZAWCYPOC-PYHARJCCSA-N
3034686,758,256.057793,422.286094,positive,5526,C5H11O7P,M+ACN+H,True,15373322,2-Deoxy-D-ribose 1-phosphate,kegg,C00672,,KBDKAJNTYKVSEK-VPENINKCSA-N


And here are the peaks that are not found in PiMP

In [22]:
formula_df[formula_df['unique_id'].isin(['C11H13N4', 'C3H7O6P'])]

Unnamed: 0_level_0,sec_id,mass,rt,polarity,cmpd_id,unique_id,adduct,identified,rc_id,compound,db,entity_id,frank_annot,inchikey
row_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
3035414,1486,224.102981,454.038681,positive,8828,C11H13N4,M+Na,True,15378999,Heteropyrithiamine,kegg,C02691,,SPQICHFDXHERAC-UHFFFAOYSA-N
3035634,1706,212.032018,430.311987,positive,9471,C3H7O6P,M+ACN+H,True,15381038,Glycerone phosphate,kegg,C00111,,GNGACRATGGDKBX-UHFFFAOYSA-N
3035634,1706,212.032018,430.311987,positive,9472,C3H7O6P,M+ACN+H,True,15381040,DL-Glyceraldehyde 3-phosphate,kegg,C00661,,LXJXRIRHZLFYRP-UHFFFAOYSA-N
3035634,1706,212.032018,430.311987,positive,9473,C3H7O6P,M+ACN+H,True,15381045,(2S)-2-Phospholactate,kegg,C19156,,CSZRNWHGZPKNKY-REOHCLBHSA-N
3035634,1706,212.032018,430.311987,positive,9474,C3H7O6P,M+ACN+H,True,15381046,D-Glyceraldehyde 3-phosphate,kegg,C00118,,LXJXRIRHZLFYRP-VKHMYHEASA-N


### Another check with ingenza00001	Glycerol Utilisation

In [23]:
mapid = 'ingenza00001'
ds.pathway_dict[mapid]

'Glycerol Utilisation'

Found 3 formulas

In [24]:
len(ds.pathway_to_unique_ids_dict[mapid]), ds.pathway_to_unique_ids_dict[mapid]

(3, {'C3H6O3', 'C3H7O6P', 'C3H8O3'})

For the following peaks

In [25]:
pids = ds.dataset_pathways_to_row_ids[mapid]
pids

[3034525,
 3034533,
 3034656,
 3034768,
 3034769,
 3035956,
 3035966,
 3037407,
 3035634]

In [26]:
sorted(ds.pathway_to_unique_ids_dict[mapid].intersection(formula_df.loc[pids]['unique_id'].unique()))

['C3H6O3', 'C3H7O6P', 'C3H8O3']

Here are the peaks found in PiMP

In [27]:
formula_df[formula_df['unique_id'].isin(['C3H6O3', 'C3H8O3'])].sort_values('unique_id')

Unnamed: 0_level_0,sec_id,mass,rt,polarity,cmpd_id,unique_id,adduct,identified,rc_id,compound,db,entity_id,frank_annot,inchikey
row_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
3034525,597,132.065497,566.824783,positive,5155,C3H6O3,M+ACN+H,True,15372296,3-Hydroxypropanoate,kegg,C01013,,ALRHLSYJTWAHJZ-UHFFFAOYSA-N
3034533,605,132.065518,606.690979,positive,5165,C3H6O3,M+ACN+H,True,15372479,L-Glyceraldehyde,kegg,C02426,"{'frank_cmpd_name': 'Hydroxyproline', 'inchike...",MNQZXJOMYWMBOU-GSVOUGTGSA-N
3034525,597,132.065497,566.824783,positive,5165,C3H6O3,M+ACN+H,True,15372332,L-Glyceraldehyde,kegg,C02426,,MNQZXJOMYWMBOU-GSVOUGTGSA-N
3034533,605,132.065518,606.690979,positive,5164,C3H6O3,M+ACN+H,True,15372477,Lactate,kegg,C01432,"{'frank_cmpd_name': 'Hydroxyproline', 'inchike...",JVTAAEKCZFNVCJ-UHFFFAOYSA-N
3034525,597,132.065497,566.824783,positive,5164,C3H6O3,M+ACN+H,True,15372330,Lactate,kegg,C01432,,JVTAAEKCZFNVCJ-UHFFFAOYSA-N
3034533,605,132.065518,606.690979,positive,5163,C3H6O3,M+ACN+H,True,15372476,D-Glyceraldehyde,kegg,C00577,"{'frank_cmpd_name': 'Hydroxyproline', 'inchike...",MNQZXJOMYWMBOU-VKHMYHEASA-N
3034533,605,132.065518,606.690979,positive,5159,C3H6O3,M+ACN+H,True,15372458,Glyceraldehyde,kegg,C02154,"{'frank_cmpd_name': 'Hydroxyproline', 'inchike...",MNQZXJOMYWMBOU-UHFFFAOYSA-N
3034525,597,132.065497,566.824783,positive,5159,C3H6O3,M+ACN+H,True,15372311,Glyceraldehyde,kegg,C02154,,MNQZXJOMYWMBOU-UHFFFAOYSA-N
3034525,597,132.065497,566.824783,positive,5163,C3H6O3,M+ACN+H,True,15372329,D-Glyceraldehyde,kegg,C00577,,MNQZXJOMYWMBOU-VKHMYHEASA-N
3034525,597,132.065497,566.824783,positive,5158,C3H6O3,M+ACN+H,True,15372309,(R)-Lactate,kegg,C00256,,JVTAAEKCZFNVCJ-UWTATZPHSA-N


And here are the peaks not found in PiMP

In [28]:
formula_df[formula_df['unique_id'].isin(['C3H7O6P'])]

Unnamed: 0_level_0,sec_id,mass,rt,polarity,cmpd_id,unique_id,adduct,identified,rc_id,compound,db,entity_id,frank_annot,inchikey
row_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
3035634,1706,212.032018,430.311987,positive,9471,C3H7O6P,M+ACN+H,True,15381038,Glycerone phosphate,kegg,C00111,,GNGACRATGGDKBX-UHFFFAOYSA-N
3035634,1706,212.032018,430.311987,positive,9472,C3H7O6P,M+ACN+H,True,15381040,DL-Glyceraldehyde 3-phosphate,kegg,C00661,,LXJXRIRHZLFYRP-UHFFFAOYSA-N
3035634,1706,212.032018,430.311987,positive,9473,C3H7O6P,M+ACN+H,True,15381045,(2S)-2-Phospholactate,kegg,C19156,,CSZRNWHGZPKNKY-REOHCLBHSA-N
3035634,1706,212.032018,430.311987,positive,9474,C3H7O6P,M+ACN+H,True,15381046,D-Glyceraldehyde 3-phosphate,kegg,C00118,,LXJXRIRHZLFYRP-VKHMYHEASA-N


### Checking with map05033	Nicotine addiction

In [40]:
mapid = 'map05033'
ds.pathway_dict[mapid]

'Nicotine addiction'

Found total 7 formulas

In [42]:
len(ds.pathway_to_unique_ids_dict[mapid]), ds.pathway_to_unique_ids_dict[mapid]

(7, {'C10H14N2', 'C4H9NO2', 'C5H9NO4', 'C7H16NO2', 'Ca', 'Cl', 'Na'})

For the following peaks

In [43]:
pids = ds.dataset_pathways_to_row_ids[mapid]
pids

[3033997,
 3034141,
 3034555,
 3034847,
 3035772,
 3037313,
 3034314,
 3035608,
 3036129,
 3037438]

Found 3 formulas in the dataset

In [44]:
sorted(ds.pathway_to_unique_ids_dict[mapid].intersection(formula_df.loc[pids]['unique_id'].unique()))

['C4H9NO2', 'C5H9NO4', 'C7H16NO2']

From PiMP, we should find 2 formulas?

In [45]:
found_in_pimp = ['C4H9NO2', 'C5H9NO4', 'C7H15NO2']

The following are peaks found in PALS

In [46]:
formula_df[formula_df['unique_id'].isin(['C4H9NO2', 'C5H9NO4', 'C7H16NO2'])].sort_values('unique_id')

Unnamed: 0_level_0,sec_id,mass,rt,polarity,cmpd_id,unique_id,adduct,identified,rc_id,compound,db,entity_id,frank_annot,inchikey
row_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
3033997,69,104.07057,645.219111,positive,662,C4H9NO2,M+H,True,15368382,(S)-2-Aminobutanoate,kegg,C02356,"{'frank_cmpd_name': '4-AMINOBUTANOATE', 'inchi...",QWCKQJZIFLGMSD-VKHMYHEASA-N
3035772,1844,104.070577,576.966899,positive,675,C4H9NO2,M+H,True,15382577,3-Aminoisobutyric acid,kegg,C05145,,QCHPKSFMDHPSNR-UHFFFAOYSA-N
3034555,627,104.070572,417.54307,positive,675,C4H9NO2,M+H,True,15372641,3-Aminoisobutyric acid,kegg,C05145,,QCHPKSFMDHPSNR-UHFFFAOYSA-N
3034141,213,104.070573,627.612061,positive,675,C4H9NO2,M+H,True,15369172,3-Aminoisobutyric acid,kegg,C05145,{'frank_cmpd_name': 'gamma-Amino-n-butyric aci...,QCHPKSFMDHPSNR-UHFFFAOYSA-N
3033997,69,104.07057,645.219111,positive,675,C4H9NO2,M+H,True,15368412,3-Aminoisobutyric acid,kegg,C05145,"{'frank_cmpd_name': '4-AMINOBUTANOATE', 'inchi...",QCHPKSFMDHPSNR-UHFFFAOYSA-N
3037313,3385,145.097203,490.451894,positive,671,C4H9NO2,M+ACN+H,True,15396101,L-3-Amino-isobutanoate,kegg,C03284,,QCHPKSFMDHPSNR-VKHMYHEASA-N
3035772,1844,104.070577,576.966899,positive,671,C4H9NO2,M+H,True,15382571,L-3-Amino-isobutanoate,kegg,C03284,,QCHPKSFMDHPSNR-VKHMYHEASA-N
3037313,3385,145.097203,490.451894,positive,675,C4H9NO2,M+ACN+H,True,15396106,3-Aminoisobutyric acid,kegg,C05145,,QCHPKSFMDHPSNR-UHFFFAOYSA-N
3034847,919,104.070582,398.307777,positive,671,C4H9NO2,M+H,True,15374497,L-3-Amino-isobutanoate,kegg,C03284,"{'frank_cmpd_name': 'N,N-Dimethylglycine', 'in...",QCHPKSFMDHPSNR-VKHMYHEASA-N
3034141,213,104.070573,627.612061,positive,671,C4H9NO2,M+H,True,15369166,L-3-Amino-isobutanoate,kegg,C03284,{'frank_cmpd_name': 'gamma-Amino-n-butyric aci...,QCHPKSFMDHPSNR-VKHMYHEASA-N


The following are peaks found in PiMP

In [47]:
formula_df[formula_df['unique_id'].isin(['C4H9NO2', 'C5H9NO4', 'C7H15NO2'])].sort_values('unique_id')

Unnamed: 0_level_0,sec_id,mass,rt,polarity,cmpd_id,unique_id,adduct,identified,rc_id,compound,db,entity_id,frank_annot,inchikey
row_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
3033997,69,104.07057,645.219111,positive,662,C4H9NO2,M+H,True,15368382,(S)-2-Aminobutanoate,kegg,C02356,"{'frank_cmpd_name': '4-AMINOBUTANOATE', 'inchi...",QWCKQJZIFLGMSD-VKHMYHEASA-N
3035772,1844,104.070577,576.966899,positive,675,C4H9NO2,M+H,True,15382577,3-Aminoisobutyric acid,kegg,C05145,,QCHPKSFMDHPSNR-UHFFFAOYSA-N
3034847,919,104.070582,398.307777,positive,675,C4H9NO2,M+H,True,15374503,3-Aminoisobutyric acid,kegg,C05145,"{'frank_cmpd_name': 'N,N-Dimethylglycine', 'in...",QCHPKSFMDHPSNR-UHFFFAOYSA-N
3034141,213,104.070573,627.612061,positive,675,C4H9NO2,M+H,True,15369172,3-Aminoisobutyric acid,kegg,C05145,{'frank_cmpd_name': 'gamma-Amino-n-butyric aci...,QCHPKSFMDHPSNR-UHFFFAOYSA-N
3033997,69,104.07057,645.219111,positive,675,C4H9NO2,M+H,True,15368412,3-Aminoisobutyric acid,kegg,C05145,"{'frank_cmpd_name': '4-AMINOBUTANOATE', 'inchi...",QCHPKSFMDHPSNR-UHFFFAOYSA-N
3037313,3385,145.097203,490.451894,positive,671,C4H9NO2,M+ACN+H,True,15396101,L-3-Amino-isobutanoate,kegg,C03284,,QCHPKSFMDHPSNR-VKHMYHEASA-N
3035772,1844,104.070577,576.966899,positive,671,C4H9NO2,M+H,True,15382571,L-3-Amino-isobutanoate,kegg,C03284,,QCHPKSFMDHPSNR-VKHMYHEASA-N
3037313,3385,145.097203,490.451894,positive,675,C4H9NO2,M+ACN+H,True,15396106,3-Aminoisobutyric acid,kegg,C05145,,QCHPKSFMDHPSNR-UHFFFAOYSA-N
3034847,919,104.070582,398.307777,positive,671,C4H9NO2,M+H,True,15374497,L-3-Amino-isobutanoate,kegg,C03284,"{'frank_cmpd_name': 'N,N-Dimethylglycine', 'in...",QCHPKSFMDHPSNR-VKHMYHEASA-N
3034141,213,104.070573,627.612061,positive,671,C4H9NO2,M+H,True,15369166,L-3-Amino-isobutanoate,kegg,C03284,{'frank_cmpd_name': 'gamma-Amino-n-butyric aci...,QCHPKSFMDHPSNR-VKHMYHEASA-N
