In [16]:
import sqlite3
import pandas as pd
import numpy as np
import sys
from matplotlib import colors, cm, pyplot as plt
import peakutils

In [12]:
ALLPEPTIDES_FILENAME = '/Users/darylwilding-mcbride/Downloads/maxquant_results/txt/allPeptides.txt'
MIN_ISOTOPE_CORRELATION = 0.9
rt_lower = 3000
rt_upper = 3060
SCAN_MIN = 1
SCAN_MAX = 910
SCAN_LENGTH_MINIMUM = SCAN_MAX * 0.05  # filter out the small-extent features
RT_EACH_SIDE = 1.0  # proportion of RT length / 2 used for the bounding box

In [13]:
allpeptides_df = pd.read_csv(ALLPEPTIDES_FILENAME, sep='\t')
allpeptides_df.rename(columns={'Number of isotopic peaks':'isotope_count', 'm/z':'mz', 'Number of data points':'number_data_points', 'Intensity':'intensity', 'Ion mobility index':'scan', 'Ion mobility index length':'scan_length', 'Ion mobility index length (FWHM)':'scan_length_fwhm', 'Retention time':'rt', 'Retention length':'rt_length', 'Retention length (FWHM)':'rt_length_fwhm', 'Charge':'charge_state', 'Number of pasef MS/MS':'number_pasef_ms2_ids', 'Isotope correlation':'isotope_correlation'}, inplace=True)
allpeptides_df = allpeptides_df[
                    allpeptides_df.intensity.notnull() &
                    (allpeptides_df.isotope_correlation >= MIN_ISOTOPE_CORRELATION) &
                    (allpeptides_df.rt >= rt_lower) & (allpeptides_df.rt <= rt_upper) &
                    (allpeptides_df.isotope_count > 2) &
                    (allpeptides_df.scan_length > SCAN_LENGTH_MINIMUM)
                ].copy()

allpeptides_df["rt_delta"] = allpeptides_df.rt_length / 2
allpeptides_df["rt_lower"] = allpeptides_df.rt - (allpeptides_df.rt_delta * RT_EACH_SIDE)
allpeptides_df["rt_upper"] = allpeptides_df.rt + (allpeptides_df.rt_delta * RT_EACH_SIDE)

# sort the features by decreasing intensity and give them an ID
allpeptides_df.sort_values(by=['intensity'], ascending=False, inplace=True)
allpeptides_df["mq_feature_id"] = np.arange(start=1, stop=len(allpeptides_df)+1)


In [14]:
allpeptides_df.head(2)

Unnamed: 0,Raw file,charge_state,mz,Mass,Resolution,number_data_points,Number of frames,isotope_count,isotope_correlation,Mass fractional part,...,scan_length_fwhm,intensity,Intensities,number_pasef_ms2_ids,Pasef MS/MS IDs,MS/MS scan number,rt_delta,rt_lower,rt_upper,mq_feature_id
115765,HeLa_20KInt_2KIT_Slot1-46_01_1179,3,591.64124,1771.9019,24759.921836,126898,62,5,0.999577,0.901895,...,45,5333400.0,,14,201260;201269;201396;201523;201533;201617;2016...,70194.0,36.0445,3013.6285,3085.7175,1
68165,HeLa_20KInt_2KIT_Slot1-46_01_1179,3,529.5855,1585.7347,24764.949178,62822,48,6,0.999682,0.734679,...,36,3737400.0,,13,197875;197885;198053;198071;198394;198403;1984...,58323.0,27.7765,2979.3535,3034.9065,2


In [15]:
allpeptides_df.columns

Index(['Raw file', 'charge_state', 'mz', 'Mass', 'Resolution',
       'number_data_points', 'Number of frames', 'isotope_count',
       'isotope_correlation', 'Mass fractional part', 'Mass deficit', 'rt',
       'rt_length', 'rt_length_fwhm', 'Min frame index', 'Max frame index',
       'scan', 'scan_length', 'scan_length_fwhm', 'intensity', 'Intensities',
       'number_pasef_ms2_ids', 'Pasef MS/MS IDs', 'MS/MS scan number',
       'rt_delta', 'rt_lower', 'rt_upper', 'mq_feature_id'],
      dtype='object')

In [20]:
allpeptides_df['rt_std_dev'] = allpeptides_df.rt_length_fwhm / 2.634
allpeptides_df['3_rt_std_dev'] = 3 * allpeptides_df.std_dev

In [28]:
allpeptides_df['number_of_std_devs'] = allpeptides_df.rt_delta / allpeptides_df.rt_std_dev

In [29]:
allpeptides_df[['rt_delta','number_of_std_devs','rt_length']].head()

Unnamed: 0,rt_delta,number_of_std_devs,rt_length
115765,36.0445,8.922208,72.089
68165,27.7765,6.873666,55.553
77387,31.908,3.741349,63.816
91041,25.997,6.438749,51.994
128790,23.044,5.708982,46.088
