In [614]:
import sqlite3
import pandas as pd
import numpy as np
import sys

In [615]:
RT_LIMIT_LOWER = 4340
RT_LIMIT_UPPER = 4580

In [616]:
BASE_NAME = "/home/ubuntu/HeLa_20KInt-rt-{}-{}".format(RT_LIMIT_LOWER,RT_LIMIT_UPPER)
CONVERTED_DATABASE_NAME = '{}/HeLa_20KInt.sqlite'.format(BASE_NAME)

In [617]:
evidence_df = pd.read_csv('/home/ubuntu/maxquant_results/txt/evidence.txt', sep='\t')

In [618]:
allpeptides_df = pd.read_csv('/home/ubuntu/maxquant_results/txt/allPeptides.txt', sep='\t')

In [619]:
allpeptides_df.columns

Index(['Raw file', 'Charge', 'm/z', 'Mass', 'Resolution',
       'Number of data points', 'Number of frames', 'Number of isotopic peaks',
       'Isotope correlation', 'Mass fractional part', 'Mass deficit',
       'Retention time', 'Retention length', 'Retention length (FWHM)',
       'Min frame index', 'Max frame index', 'Ion mobility index',
       'Ion mobility index length', 'Ion mobility index length (FWHM)',
       'Intensity', 'Intensities', 'Number of pasef MS/MS', 'Pasef MS/MS IDs',
       'MS/MS scan number'],
      dtype='object')

In [620]:
allpeptides_df.rename(columns={'Number of isotopic peaks':'isotope_count', 'm/z':'mz', 'Number of data points':'number_data_points', 'Intensity':'intensity', 'Ion mobility index':'scan', 'Ion mobility index length':'scan_length', 'Ion mobility index length (FWHM)':'scan_length_fwhm', 'Retention time':'rt', 'Retention length':'rt_length', 'Retention length (FWHM)':'rt_length_fwhm', 'Charge':'charge_state', 'Number of pasef MS/MS':'number_pasef_ms2_ids', 'Isotope correlation':'isotope_correlation'}, inplace=True)

In [621]:
allpeptides_df.columns

Index(['Raw file', 'charge_state', 'mz', 'Mass', 'Resolution',
       'number_data_points', 'Number of frames', 'isotope_count',
       'isotope_correlation', 'Mass fractional part', 'Mass deficit', 'rt',
       'rt_length', 'rt_length_fwhm', 'Min frame index', 'Max frame index',
       'scan', 'scan_length', 'scan_length_fwhm', 'intensity', 'Intensities',
       'number_pasef_ms2_ids', 'Pasef MS/MS IDs', 'MS/MS scan number'],
      dtype='object')

In [622]:
# filter the features so it only has the RT range and the features were identified
allpeptides_subset_df = allpeptides_df[(allpeptides_df.intensity.notnull()) & (allpeptides_df.rt >= RT_LIMIT_LOWER) & (allpeptides_df.rt <= RT_LIMIT_UPPER) & (allpeptides_df.number_pasef_ms2_ids > 0)].copy()

In [623]:
# assign an ID to each MQ feature in the RT window
allpeptides_subset_df.sort_values(by=['intensity'], ascending=False, inplace=True)

In [624]:
allpeptides_subset_df["mq_feature_id"] = np.arange(start=1, stop=len(allpeptides_subset_df)+1)

In [625]:
allpeptides_subset_df.head()

Unnamed: 0,Raw file,charge_state,mz,Mass,Resolution,number_data_points,Number of frames,isotope_count,isotope_correlation,Mass fractional part,...,Max frame index,scan,scan_length,scan_length_fwhm,intensity,Intensities,number_pasef_ms2_ids,Pasef MS/MS IDs,MS/MS scan number,mq_feature_id
273238,HeLa_20KInt_2KIT_Slot1-46_01_1179,2,895.9567,1789.8988,24866.281465,207657,86,6,0.99299,0.898847,...,3820,363,486,111,26707000.0,,21,320702;320713;320725;320875;320898;320909;3210...,71215.0,1
271311,HeLa_20KInt_2KIT_Slot1-46_01_1179,2,877.90152,1753.7885,24678.668667,407176,65,7,0.99776,0.788495,...,3821,411,285,54,16367000.0,,30,322727;322738;322914;322925;323008;323121;3231...,69116.0,2
258239,HeLa_20KInt_2KIT_Slot1-46_01_1179,2,830.4567,1658.8988,25134.174793,83295,43,6,0.997026,0.898846,...,3759,435,357,84,10780000.0,,10,318034;318045;318145;318157;318276;318287;3183...,63395.0,3
272371,HeLa_20KInt_2KIT_Slot1-46_01_1179,2,890.44758,1778.8806,24407.579354,131211,61,9,0.999581,0.880605,...,3872,348,219,78,9524100.0,,28,329333;329357;329402;329411;329420;329430;3294...,70576.0,4
257004,HeLa_20KInt_2KIT_Slot1-46_01_1179,2,821.89394,1641.7733,24395.375297,249643,66,9,0.999171,0.773335,...,3893,465,399,60,8506200.0,,16,331217;331229;331467;331553;331665;331771;3318...,62224.0,5


In [626]:
allpeptides_subset_df.mq_feature_id.max()

5442

### Set MQ feature to visualise

In [627]:
MQ_FEATURE_ID = 4

In [628]:
mq_feature_df = allpeptides_subset_df[allpeptides_subset_df.mq_feature_id==MQ_FEATURE_ID]
mq_feature_df

Unnamed: 0,Raw file,charge_state,mz,Mass,Resolution,number_data_points,Number of frames,isotope_count,isotope_correlation,Mass fractional part,...,Max frame index,scan,scan_length,scan_length_fwhm,intensity,Intensities,number_pasef_ms2_ids,Pasef MS/MS IDs,MS/MS scan number,mq_feature_id
272371,HeLa_20KInt_2KIT_Slot1-46_01_1179,2,890.44758,1778.8806,24407.579354,131211,61,9,0.999581,0.880605,...,3872,348,219,78,9524100.0,,28,329333;329357;329402;329411;329420;329430;3294...,70576.0,4


In [629]:
DELTA_MZ = 1.003355     # Mass difference between Carbon-12 and Carbon-13 isotopes, in Da. For calculating the spacing between isotopic peaks.
PROTON_MASS = 1.007276  # Mass of a proton in unified atomic mass units, or Da. For calculating the monoisotopic mass.

In [630]:
expected_isotope_spacing_mz = DELTA_MZ / charge_state
expected_isotope_spacing_mz

0.5016775

In [631]:
MZ_TOLERANCE_PPM = 5
MZ_TOLERANCE_PERCENT = MZ_TOLERANCE_PPM * 10**-4

In [632]:
mz = mq_feature_df.mz.values[0]
mz_delta = mz * MZ_TOLERANCE_PERCENT / 100
mz_lower = mz - mz_delta
mz_upper = mz + mz_delta

scan = mq_feature_df.scan.values[0]
scan_delta = int(mq_feature_df.scan_length.values[0] / 2)
scan_lower = scan - scan_delta
scan_upper = scan + scan_delta

rt = mq_feature_df.rt.values[0]
rt_delta = mq_feature_df.rt_length.values[0] / 2
rt_lower = rt - rt_delta
rt_upper = rt + rt_delta

charge_state = mq_feature_df.charge_state.values[0]
isotope_count = mq_feature_df.isotope_count.values[0]
isotope_correlation = mq_feature_df.isotope_correlation.values[0]

In [633]:
# load the summed frame points in the feature's region
db_conn = sqlite3.connect(CONVERTED_DATABASE_NAME)
summed_frames_df = pd.read_sql_query("select * from summed_frames where retention_time_secs >= {} and retention_time_secs <= {} and scan >= {} and scan <= {} and mz >= {}".format(rt_lower,rt_upper,scan_lower,scan_upper,mz_lower), db_conn)
db_conn.close()

In [634]:
summed_frames_df.head()

Unnamed: 0,frame_id,point_id,mz,scan,intensity,retention_time_secs,peak_id
0,58,13686,1127.059093,239,1021,4477.717421,1844
1,58,13692,1126.559367,239,822,4477.629811,2580
2,58,13696,1128.069732,239,592,4477.687513,0
3,58,13700,930.446712,239,284,4477.955461,0
4,58,13708,943.012751,239,246,4477.784656,0


In [635]:
summed_frames_df["hover"] = summed_frames_df["mz"].map('{:,.4f} m/z'.format) + ', ' + summed_frames_df["scan"].map('{} scan'.format) + ', ' + summed_frames_df["retention_time_secs"].map('RT {:.1f} secs'.format)

In [636]:
summed_frames_df.head()

Unnamed: 0,frame_id,point_id,mz,scan,intensity,retention_time_secs,peak_id,hover
0,58,13686,1127.059093,239,1021,4477.717421,1844,"1,127.0591 m/z, 239 scan, RT 4477.7 secs"
1,58,13692,1126.559367,239,822,4477.629811,2580,"1,126.5594 m/z, 239 scan, RT 4477.6 secs"
2,58,13696,1128.069732,239,592,4477.687513,0,"1,128.0697 m/z, 239 scan, RT 4477.7 secs"
3,58,13700,930.446712,239,284,4477.955461,0,"930.4467 m/z, 239 scan, RT 4478.0 secs"
4,58,13708,943.012751,239,246,4477.784656,0,"943.0128 m/z, 239 scan, RT 4477.8 secs"


In [637]:
isotope_point_df_list = []
for isotope_idx in range(0,isotope_count):
    isotope_mz_lower = mz_lower + (isotope_idx * expected_isotope_spacing_mz)
    isotope_mz_upper = mz_upper + (isotope_idx * expected_isotope_spacing_mz)
    matches_df = summed_frames_df[
        (summed_frames_df.mz >= isotope_mz_lower) &
        (summed_frames_df.mz <= isotope_mz_upper) &
        (summed_frames_df.retention_time_secs >= rt_lower) &
        (summed_frames_df.retention_time_secs <= rt_upper) &
        (summed_frames_df.scan >= scan_lower) &
        (summed_frames_df.scan <= scan_upper)
    ]
    isotope_point_df_list.append((matches_df))

In [638]:
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

print(__version__) # requires version >= 1.9.0

3.5.0


In [639]:
init_notebook_mode(connected=True)

In [640]:
tableau20 = [(31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120),  
             (44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150),  
             (148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148),  
             (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199),  
             (188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)]

In [641]:
tableau20_py = tableau20.copy()

In [642]:
import plotly.plotly as py
import plotly.graph_objs as go

traces = []
for isotope_idx in range(0,isotope_count):
    df = isotope_point_df_list[isotope_idx]
    if len(df) > 0:
        trace = go.Scatter3d(
            x = df.mz,
            y = df.scan,
            z = df.intensity,
            mode = 'markers',
            marker = dict(
                size = 5,
                color = "rgb{}".format(tableau20_py[isotope_idx]),
                opacity = 0.8
            ),
            text = df.hover,
            hoverinfo = 'text',
            name = "isotope {}".format(isotope_idx)
        )
        traces.append((trace))


data = traces
layout = go.Layout(
    autosize=False,
    width=900,
    height=600,
    title='Summed points (across the feature\'s RT extent) comprising the isotopic peaks for MQ feature {}<br>Isotope correlation {:,.2f}'.format(MQ_FEATURE_ID,isotope_correlation),
    margin=dict(
        l=0,
        r=0,
        b=0,
        t=40
    ),
    scene = dict(
        xaxis = dict(
            title='m/z', autorange='reversed'),
        yaxis = dict(
            title='scan'),
        zaxis = dict(
            title='intensity')
    )
)
fig = go.Figure(data=data, layout=layout)
iplot(fig)