# Assess and Monitor QCs, Internal Standards, and Common Metabolites

## This notebook will guide people to
* ## Identify their files
* ## Specify the LC/MS method used
* ## Specify the text-string used to differentiate blanks, QCs, and experimental injections
* ## Populate the run log with the pass/fail outcome for each run

## Run each block below.  They will indicate "ok" when completed.  Clear all output prior to starting makes it easier to tell when cells are completed.

# 1. Import required packages

In [1]:
import sys
# sys.path.insert(0,'/global/homes/b/bpb/metatlas/' )
sys.path.insert(0,'/global/project/projectdirs/metatlas/anaconda/lib/python2.7/site-packages' )

import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from metatlas import ms_monitor_util as mtools
%matplotlib inline

('Metatlas live in ', '/global/project/projectdirs/metatlas/anaconda/lib/python2.7/site-packages/metatlas')
you're running on fb89848c8ac0 at 172.17.0.2 


# 2. Select your experiment. 

In [2]:
num_days = raw_input('How many days back to search: ')
experiment = mtools.get_recent_experiments(num_days = int(num_days))

How many days back to search: 5


# 3. Get files for that experiment.

In [3]:
files = mtools.get_files_for_experiment(experiment.value)
print len(files)

54


# 4. Get strings used in your file naming and the method you used.


In [7]:
qc_str,blank_str,neg_str,pos_str = mtools.get_blank_qc_pos_neg_string()
method = mtools.get_method_dropdown()

# 5. Get Data from Reference
### You can also view the source of these references [here](https://docs.google.com/a/lbl.gov/spreadsheets/d/1SCvTvVloqkrsvT5uoCLP4gGaFO_BolptkiT3uAk_exM/edit?usp=sharing "Title").

## 5a. Get the Data as a Dataframe to Explore on Your Own

In [8]:
df = mtools.get_ms_monitor_reference_data()


('keys', Index([u'label', u'Formula', u'Neutral Mass', u'Old Metacyc based name', u'',
       u'name', u'inchi_key', u'Comments', u'mz_POS', u'mz_NEG',
       u'Permanent Charge', u'Pos Neg Detected', u'COMMON-HILIC',
       u'ISTD-HILIC', u'QC-HILIC', u'SSM-1', u'rt_peak_6550_ZIC-HILIC',
       u'rt_min_6550_ZIC-HILIC', u'rt_max_6550_ZIC-HILIC',
       u'rt_peak_QE139_ZIC-HILIC', u'rt_min_QE139_ZIC-HILIC',
       u'rt_max_QE139_ZIC-HILIC', u'rt_peak_QE144_ZIC-HILIC',
       u'rt_min_QE144_ZIC-HILIC', u'rt_max_QE144_ZIC-HILIC',
       u'rt_peak_QE119_ZIC-HILIC', u'rt_min_QE119_ZIC-HILIC',
       u'rt_max_QE119_ZIC-HILIC', u'rt_peak_6550_pHILIC',
       u'rt_min_6550_pHILIC', u'rt_max_6550_pHILIC',
       u'rt_peak_6550_pHILIC-noguard', u'rt_peak_QE139_pHILIC',
       u'rt_min_QE139_pHILIC', u'rt_max_QE139_pHILIC', u'ISTD_RP', u'QC_RP',
       u'rt_peak_6550_RP', u'rt_min_6550_RP', u'rt_max_6550_RP',
       u'Pol_6550_RP', u'rt_peak_QE119_RP', u'rt_min_QE119_RP',
       u'rt_max_QE119_R

In [9]:
df.head()

Unnamed: 0,label,Formula,Neutral Mass,Old Metacyc based name,Unnamed: 5,name,inchi_key,Comments,mz_POS,mz_NEG,...,file_rt_QE139_pHILIC_NEG,file_mz_QE139_pHILIC_NEG,file_msms_6550_pHILIC_POS,file_msms_6520_pHILIC_POS,file_msms_QE139_pHILIC_POS,file_msms_6550_pHILIC_NEG,file_msms_6520_pHILIC_NEG,file_msms_QE139_pHILIC_NEG,Source,Compound category
0,ABMBA,C8H8BrNO2,228.973846,ABMBA,ABMBA-QC,ABMBA,LCMZECCEEOQWLQ-UHFFFAOYSA-N,,229.981122,227.96657,...,20160119_pHILIC___NEG_MSMS_KZ__KBL_QCMix_V2___...,20160119_pHILIC___NEG_MSMS_KZ__KBL_QCMix_V2___...,,,20160119_pHILIC___POS_MSMS_KZ__KBL_QCMix_V2___...,,,20160119_pHILIC___NEG_MSMS_KZ__KBL_QCMix_V2___...,,
2,Uracil,C4H4N2O2,112.0273,uracil,Uracil,uracil,ISAKRJDGNUQOIC-UHFFFAOYSA-N,,113.0346,111.02,...,20160119_pHILIC___NEG_MSMS_KZ_RootCass_8_MeOH_...,20160119_pHILIC___NEG_MSMS_KZ_RootCass_8_MeOH_...,,,20160119_pHILIC___POS_MSMS_KZ_RootCass_8_MeOH_...,,,20160119_pHILIC___NEG_MSMS_KZ_RootCass_8_MeOH_...,Standard_KZ_cassettes,
3,pyridoxine,C8H11NO3,169.073898,|pyridoxine|,pyridoxine-QC,|pyridoxine|,LXNHXLLTXMVWPM-UHFFFAOYSA-N,,170.081174,168.066622,...,20160119_pHILIC___NEG_MSMS_KZ__KBL_QCMix_V2___...,20160119_pHILIC___NEG_MSMS_KZ__KBL_QCMix_V2___...,,,20160119_pHILIC___POS_MSMS_KZ__KBL_QCMix_V2___...,,,20160119_pHILIC___NEG_MSMS_KZ__KBL_QCMix_V2___...,,
5,ACA,C15H10O2,222.068085,9-anthracene carboxylic acid,ACA-QC,9-anthracene carboxylic acid,XGWFJBFNAQHLEF-UHFFFAOYSA-N,,223.075361,221.060809,...,20160119_pHILIC___NEG_MSMS_KZ__KBL_QCMix_V2___...,20160119_pHILIC___NEG_MSMS_KZ__KBL_QCMix_V2___...,,,20160119_pHILIC___POS_MSMS_KZ__KBL_QCMix_V2___...,,,20160119_pHILIC___NEG_MSMS_KZ__KBL_QCMix_V2___...,,
7,d5-benzoic acid,C7HD5O2,127.068161,d5-benzoic acid,d5-benzoic acid,d5-benzoic acid,WPYMKLBDIGXBTP-RALIUCGRSA-N,,128.075437,126.060885,...,20160119_pHILIC___NEG_MSMS_KZ__KBL_QCMix_V2___...,20160119_pHILIC___NEG_MSMS_KZ__KBL_QCMix_V2___...,,,,,,,,


## 5b. Get the data for proceeding onto steps 6 and 7

In [None]:
reference_data

In [None]:
mtools = reload(mtools)

reference_data = mtools.filter_istd_qc_by_method(method.value)
print "ok"

# 6. Check that you have entered everything correctly by running the next cell

In [None]:
print "Method = ",method.value
print "Experiment = ",experiment.value
print len(files), " files queued for assessment"
print "filter strings are: ", qc_str.value, blank_str.value, pos_str.value, neg_str.value
print "parameters: ",reference_data['parameters']

In [None]:
##################################################################
##################################################################
##### YOU SHOULD NEVER HAVE TO UNCOMMENT AND RUN THIS BLOCK ######
# reference_data['parameters']['mz_ppm_tolerance'], reference_data['parameters']['rt_minutes_tolerance'] = mtools.get_rt_mz_tolerance_from_user()
##### YOU SHOULD NEVER HAVE TO UNCOMMENT AND RUN THIS BLOCK ######
##################################################################
##################################################################

# 7. Get values and make plots and tables.  This saves an xls and figures to your current folder.

In [None]:
mtools = reload(mtools)
df = mtools.construct_result_table_for_files(files,qc_str,blank_str,neg_str,pos_str,method,reference_data,experiment)

# 8. Assert for each file if it has passed or failed QC

In [None]:
#TODO