In [1]:
from srm_helper import *
import pandas as pd
import random as rd
import numpy as np
from sklearn.model_selection import KFold 
from sklearn.metrics import r2_score


In [2]:
tol = .5  # MS2 fragment tolerance for QqQ optimized transitions
ppmTol = 10  # m/z tolerance for HRMS data in ppm
numCores = 20  # number of CPU cores to use
num2Train = 10 #number of compounds to learn equation
numIters = 100 #number of iterations

In [3]:
### format csv files from whole transition list
totalTransitionInfoFn = "../data/IDX/M3T_transitions_ALTIS_optimized_allCpds.csv"

totalTransitions = pd.read_csv(totalTransitionInfoFn)

switcher = {"Positive":1,"Negative":-1}



In [4]:

allCpds = list(set(totalTransitions["Name"].values))
numCpds = len(allCpds)

In [5]:
if __name__ == '__main__':
    
    targets = totalTransitions
    cpds = []
    toDrop = []
    for index,row in targets.iterrows():
        if (row['Name'],row["Charge"]) in cpds:
            toDrop.append(index)
        else:
            cpds.append((row['Name'],row["Charge"]))

    targets = targets.drop(toDrop)
    goodCols = ["Name","rt_start","rt_end","mz","Charge"]
    targets = targets[goodCols]
    targets.to_csv("tmp_targets_for_evaluation.csv",index=False)

    # create srm_maker object
    srm_maker = SRM_maker(ppm=ppmTol, numCores=numCores)

    msFilenames = ["../data/IDX/IDX_MS2_data/M3T_10uM_pos_DDA_10NCEs_25-35_50ms_5e4_DE5s_updatedRT.mzML",
                   "../data/IDX/IDX_MS2_data/M3T_10uM_neg_DDA_10NCEs_25-35_50ms_5e4_DE5s_updatedRT.mzML",
                   "../data/IDX/IDX_MS2_data/M3T_10uM_pos_DDA_10NCEs_25-35_80ms_1e4_DE5s_updatedRT_missing.mzML"]


    # set datafiles to build srms
    targets = pd.read_csv("tmp_targets_for_evaluation.csv")

    srm_table = pd.DataFrame()
    breakdownCurves = {}

    for msFilename in msFilenames:

        # create SRM table
        srm_table1, _ = srm_maker.createSRMsCE(msFilename, targets)

        srm_table = pd.concat((srm_table,srm_table1),axis=0,ignore_index=True)

        

Library loaded successfully: 0 spectra found
reading data...
2950  MS2 spectra detected
Number of compounds with acquired MS2:  39
Number of spectra to deconvolve:  2362
read data
starting to find transitions
writing results
reading data...
2566  MS2 spectra detected
Number of compounds with acquired MS2:  36
Number of spectra to deconvolve:  1621
read data
starting to find transitions
writing results
reading data...
779  MS2 spectra detected
Number of compounds with acquired MS2:  4
Number of spectra to deconvolve:  378
read data
starting to find transitions
writing results


In [6]:
transition_indices = {}
for index,row in totalTransitions.iterrows():
    new = True
    for x in transition_indices:
        if x[0] == row["Name"] and np.abs(row["Product mz"] - x[1]) < tol and row["Charge"] == x[2]:
            new = False
            transition_indices[x]["QqQ Optimized CE"] = row["CE"]
            break
    if new:
        transition_indices[(row["Name"],row["Product mz"],row["Charge"])] = {"QqQ Optimized CE":row["CE"]}
        
for index,row in srm_table.iterrows():
    new = True
    for x in transition_indices:
        if x[0] == row["Name"] and np.abs(row["Product mz"] - x[1]) < tol and row["Charge"] == x[2]:
            new = False
            transition_indices[x]["HRMS Optimized CE (converted)"] = (row["mz"], row["CE"])
            break
    if new:
        transition_indices[(row["Name"],row["Product mz"],row["Charge"])] = {"HRMS Optimized CE (converted)": (row["mz"], row["CE"])}
evaluation_results = pd.DataFrame.from_dict(transition_indices,orient="index")
evaluation_results



Unnamed: 0,Unnamed: 1,Unnamed: 2,QqQ Optimized CE,HRMS Optimized CE (converted)
6-PGA,96.970,-1,15.74,"(275.0173364, 20.0)"
6-PGA,257.196,-1,11.70,"(275.0173364, 10.0)"
6-PGA,79.042,-1,35.12,"(275.0173364, 70.0)"
6-PGA,177.071,-1,14.86,
Acetoacetyl-CoA,766.167,-1,28.55,"(850.12904, 20.0)"
...,...,...,...,...
Dopamine,67.030,1,,"(154.0863, 35.0)"
Serotonine,78.990,1,,"(177.102263, 50.0)"
Uracil,95.060,1,,"(113.03458, 20.0)"
Uracil,84.080,1,,"(113.03458, 50.0)"


In [None]:
if __name__ == "__main__":
    #get random training compounds 
    r2s = {}

    for num2Train in [3,5,10,15,20,40,60,len(allCpds)-1]:
        
        r2s[num2Train] = []

        trainingCpds = []
        for _ in range(numIters):
            tmp = rd.sample(allCpds,k=num2Train)
            trainingCpds.append(tmp)

        for cpds in trainingCpds:
            goodInds = []
            for index,row in totalTransitions.iterrows():
                if row["Name"] in cpds:
                    goodInds.append(index)


            filt = totalTransitions.loc[goodInds,:]
            filt.to_csv("tmp_to_learn_conv.csv",index=False)


            # create srm_maker object
            srm_maker = SRM_maker(ppm=ppmTol, numCores=numCores)

            # set datafiles for learning conversion
            trainingData = pd.read_csv("tmp_to_learn_conv.csv")

            msFilenames = ["../data/IDX/IDX_MS2_data/M3T_10uM_pos_DDA_10NCEs_25-35_50ms_5e4_DE5s_updatedRT.mzML",
                           "../data/IDX/IDX_MS2_data/M3T_10uM_neg_DDA_10NCEs_25-35_50ms_5e4_DE5s_updatedRT.mzML",
                           "../data/IDX/IDX_MS2_data/M3T_10uM_pos_DDA_10NCEs_25-35_80ms_1e4_DE5s_updatedRT_missing.mzML"]

            #build conversion
            merged = srm_maker.buildConversion(msFilenames, trainingData, tic_cutoff=0, frag_cutoff=0,
                                               frag_ppm_tolerance=2 * 1e6 * .5 / 200)

            predCEs = []
            trueCEs = []

            for index,row in evaluation_results.iterrows():
                if index[0] not in cpds:
                    if not pd.isna(row["QqQ Optimized CE"]) and type(row["HRMS Optimized CE (converted)"]) == type(tuple()):
                        trueCEs.append(row["QqQ Optimized CE"])
                        predCEs.append(srm_maker.CE_converter(row["HRMS Optimized CE (converted)"]))

            r2 = r2_score(trueCEs,predCEs)
            r2s[num2Train].append(r2)
            

Library loaded successfully: 0 spectra found
reading data...
2950  MS2 spectra detected
Number of compounds with acquired MS2:  2
Number of spectra to deconvolve:  152
read data
reading data...
2566  MS2 spectra detected
Number of compounds with acquired MS2:  1
Number of spectra to deconvolve:  51
read data
reading data...
779  MS2 spectra detected
Number of compounds with acquired MS2:  1
Number of spectra to deconvolve:  178
read data
Finding target transitions
gathering optimal CEs
built conversion
Library loaded successfully: 0 spectra found
reading data...
2950  MS2 spectra detected
Number of compounds with acquired MS2:  3
Number of spectra to deconvolve:  272
read data
reading data...
2566  MS2 spectra detected
Number of compounds with acquired MS2:  0
Number of spectra to deconvolve:  0
read data
reading data...
779  MS2 spectra detected
Number of compounds with acquired MS2:  0
Number of spectra to deconvolve:  0
read data
Finding target transitions
gathering optimal CEs
buil

read data
reading data...
2566  MS2 spectra detected
Number of compounds with acquired MS2:  3
Number of spectra to deconvolve:  72
read data
reading data...
779  MS2 spectra detected
Number of compounds with acquired MS2:  0
Number of spectra to deconvolve:  0
read data
Finding target transitions
gathering optimal CEs
built conversion
Library loaded successfully: 0 spectra found
reading data...
2950  MS2 spectra detected
Number of compounds with acquired MS2:  2
Number of spectra to deconvolve:  61
read data
reading data...
2566  MS2 spectra detected
Number of compounds with acquired MS2:  1
Number of spectra to deconvolve:  60
read data
reading data...
779  MS2 spectra detected
Number of compounds with acquired MS2:  0
Number of spectra to deconvolve:  0
read data
Finding target transitions
gathering optimal CEs
built conversion
Library loaded successfully: 0 spectra found
reading data...
2950  MS2 spectra detected
Number of compounds with acquired MS2:  1
Number of spectra to deconv

read data
reading data...
779  MS2 spectra detected
Number of compounds with acquired MS2:  1
Number of spectra to deconvolve:  30
read data
Finding target transitions
gathering optimal CEs
built conversion
Library loaded successfully: 0 spectra found
reading data...
2950  MS2 spectra detected
Number of compounds with acquired MS2:  2
Number of spectra to deconvolve:  65
read data
reading data...
2566  MS2 spectra detected
Number of compounds with acquired MS2:  3
Number of spectra to deconvolve:  121
read data
reading data...
779  MS2 spectra detected
Number of compounds with acquired MS2:  0
Number of spectra to deconvolve:  0
read data
Finding target transitions
gathering optimal CEs
built conversion
Library loaded successfully: 0 spectra found
reading data...
2950  MS2 spectra detected
Number of compounds with acquired MS2:  2
Number of spectra to deconvolve:  75
read data
reading data...
2566  MS2 spectra detected
Number of compounds with acquired MS2:  3
Number of spectra to deco

gathering optimal CEs
built conversion
Library loaded successfully: 0 spectra found
reading data...
2950  MS2 spectra detected
Number of compounds with acquired MS2:  3
Number of spectra to deconvolve:  156
read data
reading data...
2566  MS2 spectra detected
Number of compounds with acquired MS2:  1
Number of spectra to deconvolve:  23
read data
reading data...
779  MS2 spectra detected
Number of compounds with acquired MS2:  0
Number of spectra to deconvolve:  0
read data
Finding target transitions
gathering optimal CEs
built conversion
Library loaded successfully: 0 spectra found
reading data...
2950  MS2 spectra detected
Number of compounds with acquired MS2:  1
Number of spectra to deconvolve:  43
read data
reading data...
2566  MS2 spectra detected
Number of compounds with acquired MS2:  2
Number of spectra to deconvolve:  177
read data
reading data...
779  MS2 spectra detected
Number of compounds with acquired MS2:  0
Number of spectra to deconvolve:  0
read data
Finding target 

read data
reading data...
2566  MS2 spectra detected
Number of compounds with acquired MS2:  2
Number of spectra to deconvolve:  53
read data
reading data...
779  MS2 spectra detected
Number of compounds with acquired MS2:  0
Number of spectra to deconvolve:  0
read data
Finding target transitions
gathering optimal CEs
built conversion
Library loaded successfully: 0 spectra found
reading data...
2950  MS2 spectra detected
Number of compounds with acquired MS2:  2
Number of spectra to deconvolve:  103
read data
reading data...
2566  MS2 spectra detected
Number of compounds with acquired MS2:  3
Number of spectra to deconvolve:  84
read data
reading data...
779  MS2 spectra detected
Number of compounds with acquired MS2:  0
Number of spectra to deconvolve:  0
read data
Finding target transitions
gathering optimal CEs
built conversion
Library loaded successfully: 0 spectra found
reading data...
2950  MS2 spectra detected
Number of compounds with acquired MS2:  2
Number of spectra to decon

read data
reading data...
779  MS2 spectra detected
Number of compounds with acquired MS2:  0
Number of spectra to deconvolve:  0
read data
Finding target transitions
gathering optimal CEs
built conversion
Library loaded successfully: 0 spectra found
reading data...
2950  MS2 spectra detected
Number of compounds with acquired MS2:  3
Number of spectra to deconvolve:  142
read data
reading data...
2566  MS2 spectra detected
Number of compounds with acquired MS2:  0
Number of spectra to deconvolve:  0
read data
reading data...
779  MS2 spectra detected
Number of compounds with acquired MS2:  0
Number of spectra to deconvolve:  0
read data
Finding target transitions
gathering optimal CEs
built conversion
Library loaded successfully: 0 spectra found
reading data...
2950  MS2 spectra detected
Number of compounds with acquired MS2:  0
Number of spectra to deconvolve:  0
read data
reading data...
2566  MS2 spectra detected
Number of compounds with acquired MS2:  3
Number of spectra to deconvo

gathering optimal CEs
built conversion
Library loaded successfully: 0 spectra found
reading data...
2950  MS2 spectra detected
Number of compounds with acquired MS2:  2
Number of spectra to deconvolve:  93
read data
reading data...
2566  MS2 spectra detected
Number of compounds with acquired MS2:  1
Number of spectra to deconvolve:  40
read data
reading data...
779  MS2 spectra detected
Number of compounds with acquired MS2:  0
Number of spectra to deconvolve:  0
read data
Finding target transitions
gathering optimal CEs
built conversion
Library loaded successfully: 0 spectra found
reading data...
2950  MS2 spectra detected
Number of compounds with acquired MS2:  2
Number of spectra to deconvolve:  97
read data
reading data...
2566  MS2 spectra detected
Number of compounds with acquired MS2:  1
Number of spectra to deconvolve:  90
read data
reading data...
779  MS2 spectra detected
Number of compounds with acquired MS2:  0
Number of spectra to deconvolve:  0
read data
Finding target tr

reading data...
2950  MS2 spectra detected
Number of compounds with acquired MS2:  3
Number of spectra to deconvolve:  126
read data
reading data...
2566  MS2 spectra detected
Number of compounds with acquired MS2:  2
Number of spectra to deconvolve:  87
read data
reading data...
779  MS2 spectra detected
Number of compounds with acquired MS2:  1
Number of spectra to deconvolve:  142
read data
Finding target transitions
gathering optimal CEs
built conversion
Library loaded successfully: 0 spectra found
reading data...
2950  MS2 spectra detected
Number of compounds with acquired MS2:  4
Number of spectra to deconvolve:  279
read data
reading data...
2566  MS2 spectra detected
Number of compounds with acquired MS2:  3
Number of spectra to deconvolve:  120
read data
reading data...
779  MS2 spectra detected
Number of compounds with acquired MS2:  0
Number of spectra to deconvolve:  0
read data
Finding target transitions
gathering optimal CEs
built conversion
Library loaded successfully: 0

read data
reading data...
2566  MS2 spectra detected
Number of compounds with acquired MS2:  5
Number of spectra to deconvolve:  234
read data
reading data...
779  MS2 spectra detected
Number of compounds with acquired MS2:  0
Number of spectra to deconvolve:  0
read data
Finding target transitions
gathering optimal CEs
built conversion
Library loaded successfully: 0 spectra found
reading data...
2950  MS2 spectra detected
Number of compounds with acquired MS2:  2
Number of spectra to deconvolve:  280
read data
reading data...
2566  MS2 spectra detected
Number of compounds with acquired MS2:  3
Number of spectra to deconvolve:  92
read data
reading data...
779  MS2 spectra detected
Number of compounds with acquired MS2:  1
Number of spectra to deconvolve:  178
read data
Finding target transitions
gathering optimal CEs
built conversion
Library loaded successfully: 0 spectra found
reading data...
2950  MS2 spectra detected
Number of compounds with acquired MS2:  1
Number of spectra to de

In [None]:
plt.hist(r2s[10])
plt.xlabel("R2")
plt.ylabel("frequency")

In [None]:
if __name__ == "__main__":
    filt = totalTransitions
    filt.to_csv("tmp_to_learn_conv.csv",index=False)


    # create srm_maker object
    srm_maker = SRM_maker(ppm=ppmTol, numCores=numCores)

    # set datafiles for learning conversion
    trainingData = pd.read_csv("tmp_to_learn_conv.csv")

    msFilenames = ["../data/IDX/IDX_MS2_data/M3T_10uM_pos_DDA_10NCEs_25-35_50ms_5e4_DE5s_updatedRT.mzML",
                   "../data/IDX/IDX_MS2_data/M3T_10uM_neg_DDA_10NCEs_25-35_50ms_5e4_DE5s_updatedRT.mzML",
                   "../data/IDX/IDX_MS2_data/M3T_10uM_pos_DDA_10NCEs_25-35_80ms_1e4_DE5s_updatedRT_missing.mzML"]

    #build conversion
    merged = srm_maker.buildConversion(msFilenames, trainingData, tic_cutoff=0, frag_cutoff=0,
                                       frag_ppm_tolerance=2 * 1e6 * .5 / 200)
    
    merged.to_csv("../data/IDX/all_cpds_merged_to_learn_conversion.csv")
    
    print(srm_maker.getConversionEquationString())

In [None]:
print(r2s.keys())

In [None]:
keys = list(r2s.keys())
keys.sort()
keys = keys[:-1]
vals = [np.mean(r2s[k]) for k in keys]
errs = [np.std(r2s[k]) for k in keys]
plt.scatter(keys,vals,color="black")
plt.errorbar(keys,vals,yerr=errs,color="black",capsize=3)
plt.ylim((0,1.2))
plt.xlabel("# of compounds")
plt.ylabel("R2")
plt.savefig("num_cpds_plot.png")