In [1]:
import pandas as pd
import numpy as np
import sys
import pickle
import sqlite3
import peakutils
from scipy import signal
import random
from numba import njit
import warnings


In [2]:
file_idx = 0
run_name = '190719_Hela_Ecoli_1to1_01'
merged_df_file_name = '/Users/darylwilding-mcbride/Downloads/estimators/merged-run-library-sequence-attribs.pkl'
mz_estimator_file_name = '/Users/darylwilding-mcbride/Downloads/estimators/run-190719_Hela_Ecoli_1to1_01-mz-estimator.pkl'
scan_estimator_file_name = '/Users/darylwilding-mcbride/Downloads/estimators/run-190719_Hela_Ecoli_1to1_01-scan-estimator.pkl'
rt_estimator_file_name = '/Users/darylwilding-mcbride/Downloads/estimators/run-190719_Hela_Ecoli_1to1_01-rt-estimator.pkl'

In [3]:
PROTON_MASS = 1.007276

# frame types for PASEF mode
FRAME_TYPE_MS1 = 0
FRAME_TYPE_MS2 = 8

CONVERTED_DATABASE_NAME = '/Users/darylwilding-mcbride/Downloads/experiments/dwm-test/converted-databases/exp-dwm-test-run-190719_Hela_Ecoli_1to1_01-converted.sqlite'

# Mass difference between Carbon-12 and Carbon-13 isotopes, in Da. For calculating the spacing between isotopic peaks.
CARBON_MASS_DIFFERENCE = 1.003355

In [4]:
with open(mz_estimator_file_name, 'rb') as file:
    mz_estimator = pickle.load(file)
with open(scan_estimator_file_name, 'rb') as file:
    scan_estimator = pickle.load(file)
with open(rt_estimator_file_name, 'rb') as file:
    rt_estimator = pickle.load(file)




In [5]:
merged_df = pd.read_pickle(merged_df_file_name)

In [6]:
# get the ms1 frame ids with a range of RT as a tuple
def get_ms1_frame_ids(converted_db_name, rt_lower, rt_upper):
    db_conn = sqlite3.connect(converted_db_name)
    ms1_frame_properties_df = pd.read_sql_query("select Id,Time from frame_properties where Time >= {} and Time <= {} and MsMsType == {} order by Time".format(rt_lower, rt_upper, FRAME_TYPE_MS1), db_conn)
    db_conn.close()
    ms1_frame_ids = tuple(ms1_frame_properties_df.Id)
    return ms1_frame_ids

In [7]:
def calculate_monoisotopic_mass_from_mz(monoisotopic_mz, charge):
    monoisotopic_mass = (monoisotopic_mz * charge) - (PROTON_MASS * charge)
    return monoisotopic_mass

In [8]:
# calculate the r-squared value of series_2 against series_1, where series_1 is the original data (source: https://stackoverflow.com/a/37899817/1184799)
def calculate_r_squared(series_1, series_2):
    residuals = series_1 - series_2
    ss_res = np.sum(residuals**2)
    ss_tot = np.sum((series_1 - np.mean(series_1))**2)
    r_squared = 1 - (ss_res / ss_tot)
    return r_squared

In [9]:
def estimate_target_coordinates(row_as_series, mz_estimator, scan_estimator, rt_estimator):
    sequence_estimation_attribs_s = row_as_series[['theoretical_mz','experiment_rt_mean','experiment_rt_std_dev','experiment_scan_mean','experiment_scan_std_dev','experiment_intensity_mean','experiment_intensity_std_dev']]
    sequence_estimation_attribs = np.reshape(sequence_estimation_attribs_s.values, (1, -1))  # make it 2D

    # estimate the raw monoisotopic m/z
    mz_delta_ppm_estimated = mz_estimator.predict(sequence_estimation_attribs)[0]
    theoretical_mz = sequence_estimation_attribs_s.theoretical_mz
    estimated_monoisotopic_mz = (mz_delta_ppm_estimated / 1e6 * theoretical_mz) + theoretical_mz

    # estimate the raw monoisotopic scan
    estimated_scan_delta = scan_estimator.predict(sequence_estimation_attribs)[0]
    experiment_scan_mean = sequence_estimation_attribs_s.experiment_scan_mean
    estimated_scan_apex = (estimated_scan_delta * experiment_scan_mean) + experiment_scan_mean

    # estimate the raw monoisotopic RT
    estimated_rt_delta = rt_estimator.predict(sequence_estimation_attribs)[0]
    experiment_rt_mean = sequence_estimation_attribs_s.experiment_rt_mean
    estimated_rt_apex = (estimated_rt_delta * experiment_rt_mean) + experiment_rt_mean

    return {"mono_mz":estimated_monoisotopic_mz, "scan_apex":estimated_scan_apex, "rt_apex":estimated_rt_apex}


In [10]:
def get_decoy_coordinates(target_mz, target_scan, peak_width_scan, target_rt, peak_width_rt):
    # calculate decoy mz
    mz_base_offset_ppm = (1 if random.random() < 0.5 else -1) * 100  # +/- offset of 100 ppm
    mz_random_delta_ppm = random.randint(-20, +20)  # random delta ppm between -20 and +20
    mz_offset_ppm = mz_base_offset_ppm + mz_random_delta_ppm
    decoy_mz = (mz_offset_ppm / 1e6 * target_mz) + target_mz
    # calculate decoy scan
    scan_base_offset = (1 if random.random() < 0.5 else -1) * 2 * peak_width_scan  # +/- 2 peak widths
    scan_random_delta = random.randint(-10, +10)
    scan_offset = scan_base_offset + scan_random_delta
    decoy_scan = target_scan + scan_offset
    # calculate decoy RT
    rt_base_offset = (1 if random.random() < 0.5 else -1) * 2 * peak_width_rt  # +/- 2 peak widths
    rt_random_delta = random.randint(-10, +10)
    rt_offset = rt_base_offset + rt_random_delta
    decoy_rt = target_rt + rt_offset
    return (decoy_mz, decoy_scan, decoy_rt)

In [11]:
def calculate_decoy_coordinates(row_as_series):
    peak_width_scan = row_as_series.experiment_scan_peak_width
    peak_width_rt = row_as_series.experiment_rt_peak_width
    estimated_monoisotopic_mz = row_as_series.target_coords['mono_mz']
    estimated_scan_apex = row_as_series.target_coords['scan_apex']
    estimated_rt_apex = row_as_series.target_coords['rt_apex']

    (decoy_mz, decoy_scan, decoy_rt) = get_decoy_coordinates(estimated_monoisotopic_mz, estimated_scan_apex, peak_width_scan, estimated_rt_apex, peak_width_rt)
    return {"mono_mz":decoy_mz, "scan_apex":decoy_scan, "rt_apex":decoy_rt}


In [23]:
# assumes the isotope's raw points have already been flattened to a particular dimension (e.g. scan, RT, m/z) and 
# sorted by ascending order in that dimension
def fit_curve_to_flattened_isotope(flattened_points_df, estimated_apex, estimated_peak_width):
    results_d = {}
    found_curve_fit = False
    
    if len(flattened_points_df) > 0:
        # apply a filter to make curve fitting easier, if there are enough points
        window_length = 11
        if len(flattened_points_df) > window_length:
            flattened_points_df['filtered_intensity'] = signal.savgol_filter(flattened_points_df.intensity, window_length=window_length, polyorder=3)
        else:
            # there are fewer points than the window length, so we'll try to fit the gaussian to the unfiltered points
            flattened_points_df['filtered_intensity'] = flattened_points_df.intensity
        
        # determine whether there's more than one peak
        peak_maxima_indexes = peakutils.indexes(flattened_points_df.filtered_intensity.values, thres=0.05, min_dist=estimated_peak_width/4, thres_abs=False)
        results_d["number_of_peaks"] = len(peak_maxima_indexes)
        
        results_d["original_intensity_filtered"] = flattened_points_df.filtered_intensity.values
    
        if len(peak_maxima_indexes) > 0:
            # if there's more than one peak, we need to trim the data to a single peak
            if len(peak_maxima_indexes) > 1:
                peaks_df = flattened_points_df.iloc[peak_maxima_indexes].copy()
                # find the peak closest to the estimated apex
                peaks_df['apex_delta'] = abs(peaks_df.x - estimated_apex)
                peak_idx = peaks_df.apex_delta.idxmin()

                valleys_idx = peakutils.indexes(-flattened_points_df.filtered_intensity.values, thres=0.5, min_dist=estimated_peak_width/4, thres_abs=False)
                valleys_df = flattened_points_df.iloc[valleys_idx].copy()
                # find the closest valley above the peak
                if (len(valleys_idx) > 0) and (max(valleys_idx) > peak_idx):
                    valley_idx_above = valleys_idx[valleys_idx > peak_idx].min()
                    base_upper = valleys_df.loc[valley_idx_above].x
                    flattened_points_df = flattened_points_df[(flattened_points_df.x <= base_upper)]
                else:
                    # could not find a valley above the peak
                    valley_idx_above = -1
                # find the closest valley below the peak
                if (len(valleys_idx) > 0) and (min(valleys_idx) < peak_idx):
                    valley_idx_below = valleys_idx[valleys_idx < peak_idx].max()
                    base_lower = valleys_df.loc[valley_idx_below].x
                    flattened_points_df = flattened_points_df[(flattened_points_df.x >= base_lower)]
                else:
                    # could not find a valley below the peak
                    valley_idx_below = -1

                results_d["peaks_df"] = peaks_df
                results_d["selected_peak_idx"] = peak_idx
                results_d["peak_maxima_indexes"] = peak_maxima_indexes
                results_d["valleys_df"] = valleys_df
                results_d["valley_idx_below"] = valley_idx_below
                results_d["valley_idx_above"] = valley_idx_above

            # fit to the top most interesting bit of the curve
            flattened_points_df = flattened_points_df[flattened_points_df.filtered_intensity >= (flattened_points_df.filtered_intensity.max() * 0.02)]
            if len(flattened_points_df) > 0:
                # fit a gaussian curve to the data
                try:
                    gaussian_params = peakutils.peak.gaussian_fit(flattened_points_df.x.values, flattened_points_df.filtered_intensity.values, center_only=False)
                    found_curve_fit = True
                    apex_x = gaussian_params[1]
                    std_dev = abs(gaussian_params[2])
                    fitted_intensity_values = peakutils.peak.gaussian(flattened_points_df.x.values, gaussian_params[0], gaussian_params[1], gaussian_params[2])
                    fitted_curve_df = flattened_points_df.copy()
                    fitted_curve_df['fitted_intensity'] = fitted_intensity_values
                    area_under_curve = fitted_intensity_values.sum()
                    intensity_at_apex = peakutils.peak.gaussian(apex_x, gaussian_params[0], gaussian_params[1], gaussian_params[2])

                    # calculate the r-squared value of the fitted curve
                    r_squared = calculate_r_squared(series_1=flattened_points_df.intensity, series_2=fitted_intensity_values)

                    # calculate the peak symmetry
                    lhs_auc = flattened_points_df[(flattened_points_df.x <= apex_x)].intensity.sum()
                    rhs_auc = flattened_points_df[(flattened_points_df.x > apex_x)].intensity.sum()
                    if rhs_auc == 0:
                        peak_symmetry = 0
                    else:
                        peak_symmetry = lhs_auc / rhs_auc
                except:
                    pass

    if found_curve_fit:
        # collate the metrics into the results dictionary
        results_d["fitted"] = found_curve_fit
        results_d["std_dev"] = std_dev
        results_d["full_width_half_max"] = 2.355 * std_dev  # https://ned.ipac.caltech.edu/level5/Leo/Stats2_3.html
        results_d["base_width"] = 6 * std_dev  # https://ned.ipac.caltech.edu/level5/Leo/Stats2_3.html
        results_d["base_lower"] = apex_x - (3 * std_dev)  # https://ned.ipac.caltech.edu/level5/Leo/Stats2_3.html
        results_d["base_upper"] = apex_x + (3 * std_dev)  # https://ned.ipac.caltech.edu/level5/Leo/Stats2_3.html
        results_d["r_squared"] = r_squared
        results_d["apex_x"] = apex_x
        results_d["intensity_at_apex_x"] = intensity_at_apex
        results_d["area_under_curve"] = area_under_curve
        results_d["peak_symmetry"] = peak_symmetry
        results_d["fitted_curve_df"] = fitted_curve_df
        results_d["isolated_peak_df"] = flattened_points_df
    else:
        results_d = None

    return results_d

In [46]:
# takes a numpy array of intensity, and another of mz
def mz_centroid(_int_f, _mz_f):
    return ((_int_f/_int_f.sum()) * _mz_f).sum()

In [14]:
# Calculate the r-squared value for isotope peaks 1:0, 2:1 compared to the theoretical model
MAX_NUMBER_OF_SULPHUR_ATOMS = 3
MAX_NUMBER_OF_PREDICTED_RATIOS = 6

S0_r = np.empty(MAX_NUMBER_OF_PREDICTED_RATIOS+1, dtype=np.ndarray)
S0_r[1] = np.array([-0.00142320578040, 0.53158267080224, 0.00572776591574, -0.00040226083326, -0.00007968737684])
S0_r[2] = np.array([0.06258138406507, 0.24252967352808, 0.01729736525102, -0.00427641490976, 0.00038011211412])
S0_r[3] = np.array([0.03092092306220, 0.22353930450345, -0.02630395501009, 0.00728183023772, -0.00073155573939])
S0_r[4] = np.array([-0.02490747037406, 0.26363266501679, -0.07330346656184, 0.01876886839392, -0.00176688757979])
S0_r[5] = np.array([-0.19423148776489, 0.45952477474223, -0.18163820209523, 0.04173579115885, -0.00355426505742])
S0_r[6] = np.array([0.04574408690798, -0.05092121193598, 0.13874539944789, -0.04344815868749, 0.00449747222180])

S1_r = np.empty(MAX_NUMBER_OF_PREDICTED_RATIOS+1, dtype=np.ndarray)
S1_r[1] = np.array([-0.01040584267474, 0.53121149663696, 0.00576913817747, -0.00039325152252, -0.00007954180489])
S1_r[2] = np.array([0.37339166598255, -0.15814640001919, 0.24085046064819, -0.06068695741919, 0.00563606634601])
S1_r[3] = np.array([0.06969331604484, 0.28154425636993, -0.08121643989151, 0.02372741957255, -0.00238998426027])
S1_r[4] = np.array([0.04462649178239, 0.23204790123388, -0.06083969521863, 0.01564282892512, -0.00145145206815])
S1_r[5] = np.array([-0.20727547407753, 0.53536509500863, -0.22521649838170, 0.05180965157326, -0.00439750995163])
S1_r[6] = np.array([0.27169670700251, -0.37192045082925, 0.31939855191976, -0.08668833166842, 0.00822975581940])

S2_r = np.empty(MAX_NUMBER_OF_PREDICTED_RATIOS+1, dtype=np.ndarray)
S2_r[1] = np.array([-0.01937823810470, 0.53084210514216, 0.00580573751882, -0.00038281138203, -0.00007958217070])
S2_r[2] = np.array([0.68496829280011, -0.54558176102022, 0.44926662609767, -0.11154849560657, 0.01023294598884])
S2_r[3] = np.array([0.04215807391059, 0.40434195078925, -0.15884974959493, 0.04319968814535, -0.00413693825139])
S2_r[4] = np.array([0.14015578207913, 0.14407679007180, -0.01310480312503, 0.00362292256563, -0.00034189078786])
S2_r[5] = np.array([-0.02549241716294, 0.32153542852101, -0.11409513283836, 0.02617210469576, -0.00221816103608])
S2_r[6] = np.array([-0.14490868030324, 0.33629928307361, -0.08223564735018, 0.01023410734015, -0.00027717589598])

model_params = np.empty(MAX_NUMBER_OF_SULPHUR_ATOMS, dtype=np.ndarray)
model_params[0] = S0_r
model_params[1] = S1_r
model_params[2] = S2_r

# Find the ratio of H(peak_number)/H(peak_number-1) for peak_number=1..6
# peak_number = 0 refers to the monoisotopic peak
# number_of_sulphur = number of sulphur atoms in the molecule
def peak_ratio(monoisotopic_mass, peak_number, number_of_sulphur):
    ratio = None
    if (((1 <= peak_number <= 3) & (((number_of_sulphur == 0) & (498 <= monoisotopic_mass <= 3915)) |
                                    ((number_of_sulphur == 1) & (530 <= monoisotopic_mass <= 3947)) |
                                    ((number_of_sulphur == 2) & (562 <= monoisotopic_mass <= 3978)))) |
       ((peak_number == 4) & (((number_of_sulphur == 0) & (907 <= monoisotopic_mass <= 3915)) |
                              ((number_of_sulphur == 1) & (939 <= monoisotopic_mass <= 3947)) |
                              ((number_of_sulphur == 2) & (971 <= monoisotopic_mass <= 3978)))) |
       ((peak_number == 5) & (((number_of_sulphur == 0) & (1219 <= monoisotopic_mass <= 3915)) |
                              ((number_of_sulphur == 1) & (1251 <= monoisotopic_mass <= 3947)) |
                              ((number_of_sulphur == 2) & (1283 <= monoisotopic_mass <= 3978)))) |
       ((peak_number == 6) & (((number_of_sulphur == 0) & (1559 <= monoisotopic_mass <= 3915)) |
                              ((number_of_sulphur == 1) & (1591 <= monoisotopic_mass <= 3947)) |
                              ((number_of_sulphur == 2) & (1623 <= monoisotopic_mass <= 3978))))):
        beta0 = model_params[number_of_sulphur][peak_number][0]
        beta1 = model_params[number_of_sulphur][peak_number][1]
        beta2 = model_params[number_of_sulphur][peak_number][2]
        beta3 = model_params[number_of_sulphur][peak_number][3]
        beta4 = model_params[number_of_sulphur][peak_number][4]
        scaled_m = monoisotopic_mass / 1000.0
        ratio = beta0 + (beta1*scaled_m) + beta2*(scaled_m**2) + beta3*(scaled_m**3) + beta4*(scaled_m**4)
    return ratio


In [15]:
warnings.filterwarnings('error')

In [49]:
def extract_feature_metrics_at_coords(coordinates_d, experiment_scan_peak_width, experiment_rt_peak_width, charge):
    mono_mz = coordinates_d['mono_mz']
    scan_apex = coordinates_d['scan_apex']
    rt_apex = coordinates_d['rt_apex']
    
    # distance for looking either side of the scan and RT apex, based on the other times this sequence has been seen in this experiment
    SCAN_WIDTH = experiment_scan_peak_width
    RT_WIDTH = experiment_rt_peak_width

    # the width to use for isotopic width, in Da
    MZ_TOLERANCE_PPM = 5  # +/- this amount
    MZ_TOLERANCE_PERCENT = MZ_TOLERANCE_PPM * 10**-4
    MS1_PEAK_DELTA = mono_mz * MZ_TOLERANCE_PERCENT / 100

    # the number of isotopes to look for in the m/z dimension - the theoretical model includes 7 (the monoisotopic plus 6 isotopes)
    NUMBER_OF_ISOTOPES = 7
    expected_spacing_mz = CARBON_MASS_DIFFERENCE / charge

    # define the region we will look in for the feature
    feature_region_mz_lower = mono_mz - MS1_PEAK_DELTA
    feature_region_mz_upper = mono_mz + (NUMBER_OF_ISOTOPES * expected_spacing_mz) + MS1_PEAK_DELTA
    scan_lower = scan_apex - (2 * SCAN_WIDTH)
    scan_upper = scan_apex + (2 * SCAN_WIDTH)
    rt_lower = rt_apex - (2 * RT_WIDTH)
    rt_upper = rt_apex + (2 * RT_WIDTH)

    # extract the raw data within this area of interest
    db_conn = sqlite3.connect(CONVERTED_DATABASE_NAME)
    feature_region_raw_points_df = pd.read_sql_query("select frame_id,mz,scan,intensity,retention_time_secs from frames where mz >= {} and mz <= {} and scan >= {} and scan <= {} and retention_time_secs >= {} and retention_time_secs <= {} and frame_type == {}".format(feature_region_mz_lower, feature_region_mz_upper, scan_lower, scan_upper, rt_lower, rt_upper, FRAME_TYPE_MS1), db_conn)
    db_conn.close()
    
    if len(feature_region_raw_points_df) > 0:
        # derive peaks for the monoisotopic and the isotopes
        isotope_peaks_l = []
        isotope_raw_points_l = []
        mz = mono_mz
        for isotope_idx in range(NUMBER_OF_ISOTOPES):
            estimated_isotope_midpoint = mz + (isotope_idx * expected_spacing_mz)
            isotope_mz_lower = estimated_isotope_midpoint - MS1_PEAK_DELTA
            isotope_mz_upper = estimated_isotope_midpoint + MS1_PEAK_DELTA
            isotope_raw_points_df = feature_region_raw_points_df[(feature_region_raw_points_df.mz >= isotope_mz_lower) & (feature_region_raw_points_df.mz <= isotope_mz_upper)]
            # add the isotope's raw points to the list
            isotope_raw_points_l.append(isotope_raw_points_df)
            # centroid the raw points to get the peak for the isotope
            isotope_raw_points_a = isotope_raw_points_df[['mz','intensity']].values
            assert(None not in isotope_raw_points_a), "found NaN in isotope {}".format(isotope_idx)
            mz_cent = mz_centroid(isotope_raw_points_a[:,1], isotope_raw_points_a[:,0])
            summed_intensity = isotope_raw_points_a[:,1].sum()
            # add the peak to the list of isotopic peaks
            isotope_peaks_l.append((mz_cent, summed_intensity))
            if isotope_idx == 0:
                # set the m/z reference point to be the monoisotope's centroided m/z
                mz = mz_cent
        isotope_peaks_df = pd.DataFrame(isotope_peaks_l, columns=['mz_centroid','summed_intensity'])

        try:

            # We have confidence in the accuracy of each dimension in decreasing order: m/z, RT, scan. Therefore we constrain 
            # the cuboid by m/z first to find the peak in RT, then constrain the points to the RT peak's FWHM, then find 
            # the peak in the scan dimension.

            # Monoisotopic peak
            mono_raw_points_df = isotope_raw_points_l[0]

            # Collapse the points onto the RT dimension
            rt_0_df = mono_raw_points_df.groupby(['frame_id','retention_time_secs'], as_index=False).intensity.sum()
            rt_0_df.sort_values(by=['retention_time_secs'], ascending=True, inplace=True)
            rt_0_df['x'] = rt_0_df.retention_time_secs
            rt_0_metrics = fit_curve_to_flattened_isotope(rt_0_df, rt_apex, RT_WIDTH)

            assert(rt_0_metrics is not None), "could not calculate metrics for monoisotopic peak in RT dimension"

            # Collapse the points onto the mobility dimension, constraining the points to the FWHM of the peak in RT
            rt_peak_fwhm_lower = rt_0_metrics['apex_x'] - (rt_0_metrics['full_width_half_max'])
            rt_peak_fwhm_upper = rt_0_metrics['apex_x'] + (rt_0_metrics['full_width_half_max'])
            mono_raw_points_df = mono_raw_points_df[(mono_raw_points_df.retention_time_secs >= rt_peak_fwhm_lower) & (mono_raw_points_df.retention_time_secs <= rt_peak_fwhm_upper)].copy()
            scan_0_df = mono_raw_points_df.groupby(['scan'], as_index=False).intensity.sum()
            scan_0_df.sort_values(by=['scan'], ascending=True, inplace=True)
            scan_0_df['x'] = scan_0_df.scan
            scan_0_metrics = fit_curve_to_flattened_isotope(scan_0_df, scan_apex, SCAN_WIDTH)

            assert(scan_0_metrics is not None), "could not calculate metrics for monoisotopic peak in scan dimension"

            # Isotope 1 peak
            isotope_1_raw_points_df = isotope_raw_points_l[1]

            # Collapse the points onto the RT dimension
            rt_1_df = isotope_1_raw_points_df.groupby(['frame_id','retention_time_secs'], as_index=False).intensity.sum()
            rt_1_df.sort_values(by=['retention_time_secs'], ascending=True, inplace=True)
            rt_1_df['x'] = rt_1_df.retention_time_secs
            rt_1_metrics = fit_curve_to_flattened_isotope(rt_1_df, rt_apex, RT_WIDTH)

            assert(rt_1_metrics is not None), "could not calculate metrics for isotopic peak 1 in RT dimension"

            # Collapse the points onto the mobility dimension
            rt_peak_fwhm_lower = rt_1_metrics['apex_x'] - rt_1_metrics['full_width_half_max']
            rt_peak_fwhm_upper = rt_1_metrics['apex_x'] + rt_1_metrics['full_width_half_max']
            isotope_1_raw_points_df = isotope_1_raw_points_df[(isotope_1_raw_points_df.retention_time_secs >= rt_peak_fwhm_lower) & (isotope_1_raw_points_df.retention_time_secs <= rt_peak_fwhm_upper)].copy()
            scan_1_df = isotope_1_raw_points_df.groupby(['scan'], as_index=False).intensity.sum()
            scan_1_df.sort_values(by=['scan'], ascending=True, inplace=True)
            scan_1_df['x'] = scan_1_df.scan
            scan_1_metrics = fit_curve_to_flattened_isotope(scan_1_df, scan_apex, SCAN_WIDTH)    

            assert(scan_1_metrics is not None), "could not calculate metrics for isotopic peak 1 in scan dimension"

            # Isotope 2 peak
            isotope_2_raw_points_df = isotope_raw_points_l[2]

            # Collapse the points onto the RT dimension
            rt_2_df = isotope_2_raw_points_df.groupby(['frame_id','retention_time_secs'], as_index=False).intensity.sum()
            rt_2_df.sort_values(by=['retention_time_secs'], ascending=True, inplace=True)
            rt_2_df['x'] = rt_2_df.retention_time_secs
            rt_2_metrics = fit_curve_to_flattened_isotope(rt_2_df, rt_apex, RT_WIDTH)

            assert(rt_2_metrics is not None), "could not calculate metrics for isotopic peak 2 in the RT dimension"

            # Collapse the points onto the mobility dimension
            rt_peak_fwhm_lower = rt_2_metrics['apex_x'] - rt_2_metrics['full_width_half_max']
            rt_peak_fwhm_upper = rt_2_metrics['apex_x'] + rt_2_metrics['full_width_half_max']
            isotope_2_raw_points_df = isotope_2_raw_points_df[(isotope_2_raw_points_df.retention_time_secs >= rt_peak_fwhm_lower) & (isotope_2_raw_points_df.retention_time_secs <= rt_peak_fwhm_upper)].copy()
            scan_2_df = isotope_2_raw_points_df.groupby(['scan'], as_index=False).intensity.sum()
            scan_2_df.sort_values(by=['scan'], ascending=True, inplace=True)
            scan_2_df['x'] = scan_2_df.scan
            scan_2_metrics = fit_curve_to_flattened_isotope(scan_2_df, scan_apex, SCAN_WIDTH)    

            assert(scan_2_metrics is not None), "could not calculate metrics for isotopic peak 2 in the scan dimension"

            #####################################
            # Build a dictionary of the metrics
            #####################################
            feature_metrics = {}

            # Calculate the feature metrics
            calculated_monoisotopic_mz = isotope_peaks_df.iloc[0].mz_centroid
            delta_mz_ppm = (calculated_monoisotopic_mz - mono_mz) / mono_mz * 1e6
            feature_metrics['delta_mz_ppm'] = delta_mz_ppm
            feature_metrics['delta_scan'] = (scan_0_metrics['apex_x'] - scan_apex) / scan_apex
            feature_metrics['delta_rt'] = (rt_0_metrics['apex_x'] - rt_apex) / rt_apex

            # Calculate the delta ppm of the de-isotoped first and second isotopic peaks

            # monoisotopic
            monoisotopic_mz_centroid = isotope_peaks_df.iloc[0].mz_centroid

            # first isotope
            assert(isotope_peaks_df.iloc[1].summed_intensity > 0), "isotope 1 summed intensity is zero"
            isotope_1_mz_centroid = isotope_peaks_df.iloc[1].mz_centroid

            # second isotope
            assert(isotope_peaks_df.iloc[2].summed_intensity > 0), "isotope 2 summed intensity is zero"
            isotope_2_mz_centroid = isotope_peaks_df.iloc[2].mz_centroid

            isotope_0_1_mz_delta_ppm = (monoisotopic_mz_centroid - (isotope_1_mz_centroid - (1 * expected_spacing_mz))) / monoisotopic_mz_centroid * 1e6
            feature_metrics['isotope_0_1_mz_delta_ppm'] = isotope_0_1_mz_delta_ppm
            isotope_0_2_mz_delta_ppm = (monoisotopic_mz_centroid - (isotope_2_mz_centroid - (2 * expected_spacing_mz))) / monoisotopic_mz_centroid * 1e6
            feature_metrics['isotope_0_2_mz_delta_ppm'] = isotope_0_2_mz_delta_ppm

            # Calculate the RT apex deltas from the monoisotopic peak for the first and second isotopes
            isotope_0_1_rt_delta = (rt_1_metrics['apex_x'] - rt_0_metrics['apex_x']) / rt_0_metrics['apex_x']
            isotope_0_2_rt_delta = (rt_2_metrics['apex_x'] - rt_0_metrics['apex_x']) / rt_0_metrics['apex_x']
            feature_metrics['isotope_0_1_rt_delta'] = isotope_0_1_rt_delta
            feature_metrics['isotope_0_2_rt_delta'] = isotope_0_2_rt_delta

            # Calculate the scan apex deltas from the monoisotopic peak for the first and second isotopes
            isotope_0_1_scan_delta = (scan_1_metrics['apex_x'] - scan_0_metrics['apex_x']) / scan_0_metrics['apex_x']
            isotope_0_2_scan_delta = (scan_2_metrics['apex_x'] - scan_0_metrics['apex_x']) / scan_0_metrics['apex_x']
            feature_metrics['isotope_0_1_scan_delta'] = isotope_0_1_scan_delta
            feature_metrics['isotope_0_2_scan_delta'] = isotope_0_2_scan_delta

            # Calculate the monoisotopic peak intensity divided by the sum of the isotope peaks
            monoisotope_int_over_isotope_peak_int_sum = isotope_peaks_df.iloc[0].summed_intensity / isotope_peaks_df.iloc[:3].summed_intensity.sum()
            feature_metrics['monoisotope_int_over_isotope_peak_int_sum'] = monoisotope_int_over_isotope_peak_int_sum

            # Calculate the monoisotopic peak AUC divided by the sum of the isotope peak AUCs
            monoisotope_auc_over_isotope_peak_auc_sum = rt_0_metrics['area_under_curve'] / (rt_0_metrics['area_under_curve'] + rt_1_metrics['area_under_curve'] + rt_2_metrics['area_under_curve'])
            feature_metrics['monoisotope_auc_over_isotope_peak_auc_sum'] = monoisotope_auc_over_isotope_peak_auc_sum

            # calculate the theoretical and observed isotopic peak height ratios
            monoisotopic_mass = calculate_monoisotopic_mass_from_mz(monoisotopic_mz_centroid, charge)
            ratios = []
            for isotope in range(1,4):
                expected_ratio = peak_ratio(monoisotopic_mass=monoisotopic_mass, peak_number=isotope, number_of_sulphur=0)
                observed_ratio = isotope_peaks_df.iloc[isotope].summed_intensity / isotope_peaks_df.iloc[isotope-1].summed_intensity
                ratios.append((expected_ratio, observed_ratio))

            ratios_a = np.array(ratios)
            assert((None not in ratios_a[:,0]) and (None not in ratios_a[:,1])), "r-squared for PHR cannot be calculated"
            r_squared_phr = calculate_r_squared(ratios_a[:,0], ratios_a[:,1])
            feature_metrics['r_squared_phr'] = r_squared_phr

            # Calculate the geometric mean of the isotope peak intensities
            geometric_mean_0_1 = np.log(isotope_peaks_df.iloc[0].summed_intensity * isotope_peaks_df.iloc[1].summed_intensity) / 2
            geometric_mean_0_1_2 = np.log(isotope_peaks_df.iloc[0].summed_intensity * isotope_peaks_df.iloc[1].summed_intensity * isotope_peaks_df.iloc[2].summed_intensity) / 3
            feature_metrics['geometric_mean_0_1'] = geometric_mean_0_1
            feature_metrics['geometric_mean_0_1_2'] = geometric_mean_0_1_2

            # Calculate the m/z ppm standard deviation for isotopes 0 and 1
            mz_centroid_0 = isotope_peaks_df.iloc[0].mz_centroid
            mono_raw_points_df['mz_ppm_delta'] = (mono_raw_points_df.mz - mz_centroid_0) / mz_centroid_0 * 1e6
            mz_delta_ppm_std_dev_0 = np.std(mono_raw_points_df.mz_ppm_delta)

            mz_centroid_1 = isotope_peaks_df.iloc[1].mz_centroid
            isotope_1_raw_points_df['mz_ppm_delta'] = (isotope_1_raw_points_df.mz - mz_centroid_1) / mz_centroid_1 * 1e6
            mz_delta_ppm_std_dev_1 = np.std(isotope_1_raw_points_df.mz_ppm_delta)

            feature_metrics['mz_delta_ppm_std_dev_0'] = mz_delta_ppm_std_dev_0
            feature_metrics['mz_delta_ppm_std_dev_1'] = mz_delta_ppm_std_dev_1

            # Calculate the symmetry of the isotopes in RT and CCS
            feature_metrics['rt_peak_symmetry_0'] = rt_0_metrics['peak_symmetry']
            feature_metrics['rt_peak_symmetry_1'] = rt_1_metrics['peak_symmetry']
            feature_metrics['rt_peak_symmetry_2'] = rt_2_metrics['peak_symmetry']

            feature_metrics['scan_peak_symmetry_0'] = scan_0_metrics['peak_symmetry']
            feature_metrics['scan_peak_symmetry_1'] = scan_1_metrics['peak_symmetry']
            feature_metrics['scan_peak_symmetry_2'] = scan_2_metrics['peak_symmetry']

            # Calculate the isotopic peak correlation with each other in RT
            rt_isolated_peak_0_df = rt_0_metrics['isolated_peak_df'][['frame_id','intensity']]
            rt_isolated_peak_1_df = rt_1_metrics['isolated_peak_df'][['frame_id','intensity']]
            rt_isolated_peak_2_df = rt_2_metrics['isolated_peak_df'][['frame_id','intensity']]

            rt_combined_df = pd.merge(rt_isolated_peak_0_df, rt_isolated_peak_1_df, on='frame_id', how='left', suffixes=('_0', '_1')).sort_values(by='frame_id')
            rt_combined_df = pd.merge(rt_combined_df, rt_isolated_peak_2_df, on='frame_id', how='left', suffixes=('_0', '_2')).sort_values(by='frame_id')
            rt_combined_df.rename(columns={'intensity': 'intensity_2'}, inplace=True)
            rt_combined_df.fillna(1, inplace=True)
            rt_isotope_correlation = np.corrcoef(rt_combined_df[['intensity_0','intensity_1','intensity_2']].values, rowvar=False)[1,0]
            feature_metrics['rt_isotope_correlation'] = rt_isotope_correlation

            scan_isolated_peak_0_df = scan_0_metrics['isolated_peak_df'][['scan','intensity']]
            scan_isolated_peak_1_df = scan_1_metrics['isolated_peak_df'][['scan','intensity']]
            scan_isolated_peak_2_df = scan_2_metrics['isolated_peak_df'][['scan','intensity']]

            scan_combined_df = pd.merge(scan_isolated_peak_0_df, scan_isolated_peak_1_df, on='scan', how='left', suffixes=('_0', '_1')).sort_values(by='scan')
            scan_combined_df = pd.merge(scan_combined_df, scan_isolated_peak_2_df, on='scan', how='left', suffixes=('_0', '_2')).sort_values(by='scan')
            scan_combined_df.rename(columns={'intensity': 'intensity_2'}, inplace=True)
            scan_combined_df.fillna(1, inplace=True)
            scan_isotope_correlation = np.corrcoef(scan_combined_df[['intensity_0','intensity_1','intensity_2']].values, rowvar=False)[1,0]
            feature_metrics['scan_isotope_correlation'] = scan_isotope_correlation

            # Calculate the FWHM and peak base width of the monoisotopic peak in RT and CCS dimensions
            feature_metrics['fwhm_rt_0'] = rt_0_metrics['full_width_half_max']
            feature_metrics['peak_base_width_rt_0'] = rt_0_metrics['base_width']

            feature_metrics['fwhm_scan_0'] = scan_0_metrics['full_width_half_max']
            feature_metrics['peak_base_width_scan_0'] = scan_0_metrics['base_width']

            # Calculate the number of points and missing points in consecutive frames
            rt_lower_0 = rt_0_metrics['isolated_peak_df'].retention_time_secs.min()
            rt_upper_0 = rt_0_metrics['isolated_peak_df'].retention_time_secs.max()
            ms1_frame_ids_0 = get_ms1_frame_ids(CONVERTED_DATABASE_NAME, rt_lower_0, rt_upper_0)
            ms1_frame_ids_0_df = pd.DataFrame(ms1_frame_ids_0, columns=['frame_id'])
            ms1_frame_ids_0_df['intensity'] = 0
            merged_df = pd.merge(ms1_frame_ids_0_df, rt_isolated_peak_0_df, on='frame_id', how='left', suffixes=('_0', '_1')).sort_values(by='frame_id')
            number_of_missing_frames_0 = merged_df.intensity_1.isna().sum()
            feature_metrics['number_of_missing_frames_0'] = number_of_missing_frames_0
            feature_metrics['number_of_frames_0'] = len(ms1_frame_ids_0_df)

            rt_lower_1 = rt_1_metrics['isolated_peak_df'].retention_time_secs.min()
            rt_upper_1 = rt_1_metrics['isolated_peak_df'].retention_time_secs.max()
            ms1_frame_ids_1 = get_ms1_frame_ids(CONVERTED_DATABASE_NAME, rt_lower_1, rt_upper_1)
            ms1_frame_ids_1_df = pd.DataFrame(ms1_frame_ids_1, columns=['frame_id'])
            ms1_frame_ids_1_df['intensity'] = 0
            merged_df = pd.merge(ms1_frame_ids_1_df, rt_isolated_peak_1_df, on='frame_id', how='left', suffixes=('_0', '_1')).sort_values(by='frame_id')
            number_of_missing_frames_1 = merged_df.intensity_1.isna().sum()
            feature_metrics['number_of_missing_frames_1'] = number_of_missing_frames_1
            feature_metrics['number_of_frames_1'] = len(ms1_frame_ids_1_df)

            rt_lower_2 = rt_2_metrics['isolated_peak_df'].retention_time_secs.min()
            rt_upper_2 = rt_2_metrics['isolated_peak_df'].retention_time_secs.max()
            ms1_frame_ids_2 = get_ms1_frame_ids(CONVERTED_DATABASE_NAME, rt_lower_2, rt_upper_2)
            ms1_frame_ids_2_df = pd.DataFrame(ms1_frame_ids_2, columns=['frame_id'])
            ms1_frame_ids_2_df['intensity'] = 0
            merged_df = pd.merge(ms1_frame_ids_2_df, rt_isolated_peak_2_df, on='frame_id', how='left', suffixes=('_0', '_1')).sort_values(by='frame_id')
            number_of_missing_frames_2 = merged_df.intensity_1.isna().sum()
            feature_metrics['number_of_missing_frames_2'] = number_of_missing_frames_2
            feature_metrics['number_of_frames_2'] = len(ms1_frame_ids_2_df)

        except (AssertionError, Warning) as e:
            print('Error: {}'.format(e))
            feature_metrics = None
    else:
        feature_metrics = None

    return feature_metrics

In [17]:
# use the estimators to extract the target and decoy metrics for each sequence-charge found in this run
library_sequences_in_this_run_df = merged_df[(merged_df.file_idx == file_idx)]

In [18]:
# extract feature metrics from the target coordinates for each sequence in the run
library_sequences_in_this_run_df['target_coords'] = library_sequences_in_this_run_df.apply(lambda row: estimate_target_coordinates(row, mz_estimator, scan_estimator, rt_estimator), axis=1)

In [27]:
library_sequences_in_this_run_df.columns

Index(['file_idx', 'sequence', 'charge', 'run_mz', 'run_scan', 'run_rt',
       'run_intensity', 'cv_mz', 'cv_scan', 'cv_rt', 'cv_intensity',
       'theoretical_mz', 'experiment_scan_mean', 'experiment_scan_std_dev',
       'experiment_scan_peak_width', 'experiment_rt_mean',
       'experiment_rt_std_dev', 'experiment_rt_peak_width',
       'experiment_intensity_mean', 'experiment_intensity_std_dev',
       'number_of_runs_identified', 'delta_mz', 'delta_mz_ppm', 'delta_scan',
       'delta_rt', 'target_coords'],
      dtype='object')

In [33]:
target_metrics = []
for row in library_sequences_in_this_run_df.itertuples():
    metrics = extract_feature_metrics_at_coords(row.target_coords, row.experiment_scan_peak_width, row.experiment_rt_peak_width, row.charge)
    target_metrics.append((row.file_idx, row.sequence, row.charge, metrics))

Error: could not calculate metrics for isotopic peak 2 in the scan dimension
Error: could not calculate metrics for isotopic peak 2 in the RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: invalid value encountered in true_divide
Error: could not calculate metrics for isotopic peak 1 in scan dimension
Error: could not calculate metrics for monoisotopic peak in scan dimension
Error: could not calculate metrics for monoisotopic peak in scan dimension
Error: could not calculate metrics for isotopic peak 2 in the scan dimension
Error: invalid value encountered in true_divide
Error: could not calculate metrics for monoisotopic peak in scan dimension
Error: could not calculate metrics for isotopic peak 1 in RT dimension
Error: could not calculate metrics for isotopic peak 2 in the RT dimension
Error: invalid value encountered in true_divide
Error: could not calculate metrics for monoisotopic peak in scan dimension
Error: r-squared for PHR cannot be 

Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: r-squared for PHR cannot be calculated
Error: could not calculate metrics for isotopic peak 2 in the scan dimension
Error: invalid value encountered in true_divide
Error: invalid value encountered in true_divide
Error: could not calculate metrics for isotopic peak 1 in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for isotopic peak 1 in RT dimension
Error: could not calculate metrics for isotopic peak 1 in RT dimension
Error: invalid value encountered in true_divide
Error: invalid value encountered in true_divide
Error: invalid value encountered in true_divide
Error: invalid value encountered in true_divide
Error: could not calculate metrics for isotopic peak 1 in RT dimension
Error: could not calculate metrics for isotopic peak 2 in the scan dimension


TypingError: Failed at nopython (nopython frontend)
Internal error at <numba.typeinfer.ArgConstraint object at 0x12fb57b38>:
--%<----------------------------------------------------------------------------
Traceback (most recent call last):
  File "/Users/darylwilding-mcbride/anaconda3/envs/py36/lib/python3.6/site-packages/numba/errors.py", line 491, in new_error_context
    yield
  File "/Users/darylwilding-mcbride/anaconda3/envs/py36/lib/python3.6/site-packages/numba/typeinfer.py", line 194, in __call__
    assert ty.is_precise()
AssertionError

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/darylwilding-mcbride/anaconda3/envs/py36/lib/python3.6/site-packages/numba/typeinfer.py", line 138, in propagate
    constraint(typeinfer)
  File "/Users/darylwilding-mcbride/anaconda3/envs/py36/lib/python3.6/site-packages/numba/typeinfer.py", line 195, in __call__
    typeinfer.add_type(self.dst, ty, loc=self.loc)
  File "/Users/darylwilding-mcbride/anaconda3/envs/py36/lib/python3.6/contextlib.py", line 99, in __exit__
    self.gen.throw(type, value, traceback)
  File "/Users/darylwilding-mcbride/anaconda3/envs/py36/lib/python3.6/site-packages/numba/errors.py", line 499, in new_error_context
    six.reraise(type(newerr), newerr, tb)
  File "/Users/darylwilding-mcbride/anaconda3/envs/py36/lib/python3.6/site-packages/numba/six.py", line 659, in reraise
    raise value
numba.errors.InternalError: [33m[1m[33m[1m[0m
[0m[37m[1m[1] During: typing of argument at <ipython-input-13-afaa2571f442> (4)[0m
--%<----------------------------------------------------------------------------

[37m[1m
File "<ipython-input-13-afaa2571f442>", line 4:[0m
[34m[1mdef mz_centroid(_int_f, _mz_f):
[31m[1m    return ((_int_f/_int_f.sum()) * _mz_f).sum()
[0m    [32m[1m^[0m[0m

This error may have been caused by the following argument(s):
- argument 0: Unsupported array dtype: object
- argument 1: Unsupported array dtype: object

This is not usually a problem with Numba itself but instead often caused by
the use of unsupported features or an issue in resolving types.

To see Python/NumPy features supported by the latest release of Numba visit:
http://numba.pydata.org/numba-doc/dev/reference/pysupported.html
and
http://numba.pydata.org/numba-doc/dev/reference/numpysupported.html

For more information about typing errors and how to debug them visit:
http://numba.pydata.org/numba-doc/latest/user/troubleshoot.html#my-code-doesn-t-compile

If you think your code should work with Numba, please report the error message
and traceback, along with a minimal reproducer at:
https://github.com/numba/numba/issues/new


In [34]:
target_metrics_df = pd.DataFrame(target_metrics, columns=['file_idx','sequence','charge','target_metrics'])

In [35]:
target_metrics_df.head()

Unnamed: 0,file_idx,sequence,charge,target_metrics
0,0,AAAAAAAAAPAAAATAPTTAATTAATAAQ,3,"{'delta_mz_ppm': 1.5902111145813935, 'delta_sc..."
1,0,AAAAAAAAVPSAGPAGPAPTSAAGR,2,"{'delta_mz_ppm': 0.2730240796475787, 'delta_sc..."
2,0,AAAAALSQQQSLQER,2,"{'delta_mz_ppm': -0.6714100379718795, 'delta_s..."
3,0,AAAAATVVPPMVGGPPFVGPVGFGPGDR,3,"{'delta_mz_ppm': -0.48732784030103726, 'delta_..."
4,0,AAAAAWEEPSSGNGTAR,2,"{'delta_mz_ppm': -0.29029132632778526, 'delta_..."


In [36]:
target_metrics_df.target_metrics.isna().sum()

138

In [37]:
len(target_metrics_df)

1108

In [38]:
library_sequences_in_this_run_df = pd.merge(library_sequences_in_this_run_df, target_metrics_df, how='left', left_on=['file_idx','sequence','charge'], right_on=['file_idx','sequence','charge'])

In [39]:
library_sequences_in_this_run_df.columns

Index(['file_idx', 'sequence', 'charge', 'run_mz', 'run_scan', 'run_rt',
       'run_intensity', 'cv_mz', 'cv_scan', 'cv_rt', 'cv_intensity',
       'theoretical_mz', 'experiment_scan_mean', 'experiment_scan_std_dev',
       'experiment_scan_peak_width', 'experiment_rt_mean',
       'experiment_rt_std_dev', 'experiment_rt_peak_width',
       'experiment_intensity_mean', 'experiment_intensity_std_dev',
       'number_of_runs_identified', 'delta_mz', 'delta_mz_ppm', 'delta_scan',
       'delta_rt', 'target_coords', 'target_metrics'],
      dtype='object')

In [40]:
library_sequences_in_this_run_df.target_metrics.head()

0    {'delta_mz_ppm': 1.5902111145813935, 'delta_sc...
1    {'delta_mz_ppm': 0.2730240796475787, 'delta_sc...
2    {'delta_mz_ppm': -0.6714100379718795, 'delta_s...
3    {'delta_mz_ppm': -0.48732784030103726, 'delta_...
4    {'delta_mz_ppm': -0.29029132632778526, 'delta_...
Name: target_metrics, dtype: object

In [41]:
library_sequences_in_this_run_df.to_pickle('/Users/darylwilding-mcbride/Downloads/library_sequences_in_this_run_df.pkl')

In [42]:
# extract feature metrics from the decoy coordinates for each sequence in the run
library_sequences_in_this_run_df['decoy_coords'] = library_sequences_in_this_run_df.apply(lambda row: calculate_decoy_coordinates(row), axis=1)

In [50]:
decoy_metrics = []
for row in library_sequences_in_this_run_df.itertuples():
    metrics = extract_feature_metrics_at_coords(row.decoy_coords, row.experiment_scan_peak_width, row.experiment_rt_peak_width, row.charge)
    decoy_metrics.append((row.file_idx, row.sequence, row.charge, metrics))


Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for isotopic peak 1 in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in scan dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in scan dimension
Error: could not calculate metrics for monoisotopic peak in scan dimension
Error: could not calculate metrics for monoisot

Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for isotopic peak 2 in the scan dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisot

Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for isotopic peak 2 in the scan dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for isotopic peak 1 in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotop

Error: could not calculate metrics for isotopic peak 2 in the RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for isotopic peak 1 in RT dimension
Error: could not calculate metrics for isotopic peak 2 in the RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for isotopic peak 1 in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: invalid value encountered in true_divide
Err

Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for isotopic peak 1 in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for isotopic peak 1 in scan dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in scan dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic

Error: could not calculate metrics for monoisotopic peak in scan dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for isotopic peak 1 in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for isotopic peak 1 in scan dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic

Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: invalid value encountered in true_divide
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for isotopic peak 1 in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for isotopic peak 1 in RT dimension
Error: 

Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in scan dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in scan dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for isotopic peak 2 in the RT dimension
Error: could not calculate metrics for isotopic peak 2 in the RT dimension
Error: could not calculate metrics for mono

Error: could not calculate metrics for isotopic peak 1 in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for isotopic peak 1 in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for isotopic peak 1 in scan dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic pea

Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in scan dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for isotopic peak 1 in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: invalid value encountered in true_divide
Error: could not calculate metrics for isotopic peak 1 in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error

Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: invalid value encountered in true_divide
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for isotopic peak 1 in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: invalid value encountered in true_divide
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for isotopic peak 1 in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metri

Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic

Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for isotopic peak 1 in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: invalid value encountered in true_divide
Error

Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for isotopic peak 2 in the scan dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisot

Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for isotopic peak 1 in scan dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension
Error: could not calculate metrics for monoisotopic peak in RT dimension


KeyboardInterrupt: 

In [None]:
library_sequences_in_this_run_df[['target_coords','decoy_coords','target_metrics','decoy_metrics']].head()