In [1]:
import pandas as pd
import numpy as np
import sys
import pickle
import glob
import os
import shutil
import sqlite3
import matplotlib.pyplot as plt
from matplotlib import colors, cm, pyplot as plt
from PIL import Image, ImageDraw

In [2]:
EXPERIMENT_NAME = 'P3856'
RUN_NAME = 'P3856_YHE114_1_Slot1-1_1_5115'

In [3]:
EXPERIMENT_DIR = '/data2/experiments/{}'.format(EXPERIMENT_NAME)
FEATURES_DIR = '{}/features/{}'.format(EXPERIMENT_DIR, RUN_NAME)
CONVERTED_DB = '{}/converted-databases/exp-{}-run-{}-converted.sqlite'.format(EXPERIMENT_DIR, EXPERIMENT_NAME, RUN_NAME)
ENCODED_FEATURES_DIR = '{}/encoded-features/{}'.format(EXPERIMENT_DIR, RUN_NAME)
FEATURE_SLICES_DIR = '{}/slices'.format(ENCODED_FEATURES_DIR)

In [4]:
# frame types for PASEF mode
FRAME_TYPE_MS1 = 0
FRAME_TYPE_MS2 = 8

In [5]:
# the sequence of interest
selected_sequence = 'TAIESALTALETALK'
selected_sequence_charge = 2

In [6]:
estimated_coords_df = pd.read_pickle('{}/target-decoy-models/library-sequences-in-run-{}.pkl'.format(EXPERIMENT_DIR, RUN_NAME))
estimated_coords = estimated_coords_df[(estimated_coords_df.sequence == selected_sequence) & (estimated_coords_df.charge == selected_sequence_charge)].iloc[0].target_coords
estimated_coords

{'mono_mz': 766.4324120546552,
 'scan_apex': 474.3991264771071,
 'rt_apex': 2327.4625731350775}

In [7]:
extracted_coords = estimated_coords_df[(estimated_coords_df.sequence == selected_sequence) & (estimated_coords_df.charge == selected_sequence_charge)].iloc[0].attributes
extracted_rt_apex = extracted_coords['rt_apex']
extracted_scan_apex = extracted_coords['scan_apex']
extracted_mz = extracted_coords['monoisotopic_mz_centroid']

In [8]:
# how far either side of the feature coordinates should the images extend
OFFSET_MZ_LOWER = 10.0
OFFSET_MZ_UPPER = 20.0

OFFSET_CCS_LOWER = 100
OFFSET_CCS_UPPER = 100

OFFSET_RT_LOWER = 10
OFFSET_RT_UPPER = 10

In [9]:
# image dimensions
PIXELS_X = 600
PIXELS_Y = 600

In [10]:
def pixel_xy(mz, scan, mz_lower, mz_upper, scan_lower, scan_upper):
    x_pixels_per_mz = (PIXELS_X-1) / (mz_upper - mz_lower)
    y_pixels_per_scan = (PIXELS_Y-1) / (scan_upper - scan_lower)
    
    pixel_x = int((mz - mz_lower) * x_pixels_per_mz)
    pixel_y = int((scan - scan_lower) * y_pixels_per_scan)
    return (pixel_x, pixel_y)

In [11]:
# create the colour mapping
colour_map = plt.get_cmap('rainbow')
norm = colors.LogNorm(vmin=1, vmax=5000, clip=True)  # aiming to get good colour variation in the lower range, and clipping everything else

In [12]:
# clear out any previous feature slices
if os.path.exists(FEATURE_SLICES_DIR):
    shutil.rmtree(FEATURE_SLICES_DIR)
os.makedirs(FEATURE_SLICES_DIR)

In [13]:
# determine the cuboid dimensions
mz_lower = estimated_coords['mono_mz'] - OFFSET_MZ_LOWER
mz_upper = estimated_coords['mono_mz'] + OFFSET_MZ_UPPER
scan_lower = estimated_coords['scan_apex'] - OFFSET_CCS_LOWER
scan_upper = estimated_coords['scan_apex'] + OFFSET_CCS_UPPER
rt_apex = estimated_coords['rt_apex']
rt_lower = estimated_coords['rt_apex'] - OFFSET_RT_LOWER
rt_upper = estimated_coords['rt_apex'] + OFFSET_RT_UPPER

x_pixels_per_mz = int((PIXELS_X-1) / (mz_upper - mz_lower))
y_pixels_per_scan = int((PIXELS_Y-1) / (scan_upper - scan_lower))

In [14]:
# get the raw data for this feature
db_conn = sqlite3.connect(CONVERTED_DB)
raw_df = pd.read_sql_query('select mz,scan,intensity,frame_id,retention_time_secs from frames where mz >= {} and mz <= {} and scan >= {} and scan <= {} and frame_type == {} and retention_time_secs >= {} and retention_time_secs <= {}'.format(mz_lower, mz_upper, scan_lower, scan_upper, FRAME_TYPE_MS1, rt_lower, rt_upper), db_conn)
if len(raw_df) == 0:
    print("found no raw points for feature {}".format(feature_id))
db_conn.close()

In [15]:
# get the frame ID closest to the estimated RT apex
apex_frame_id = int(raw_df.iloc[(raw_df['retention_time_secs'] - rt_apex).abs().argsort()[:1]].sort_values(by=['retention_time_secs'], ascending=[True], inplace=False).iloc[0].frame_id)
apex_frame_id

21281

In [16]:
# get the frame ID closest to the extracted RT apex
extracted_apex_frame_id = int(raw_df.iloc[(raw_df['retention_time_secs'] - extracted_rt_apex).abs().argsort()[:1]].sort_values(by=['retention_time_secs'], ascending=[True], inplace=False).iloc[0].frame_id)
extracted_apex_frame_id

21281

In [17]:
# calculate the raw point coordinates in scaled pixels
pixel_df = pd.DataFrame(raw_df.apply(lambda row: pixel_xy(row.mz, row.scan, mz_lower, mz_upper, scan_lower, scan_upper), axis=1).tolist(), columns=['pixel_x','pixel_y'])
raw_pixel_df = pd.concat([raw_df, pixel_df], axis=1)

# sum the intensity of raw points that have been assigned to each pixel
pixel_intensity_df = raw_pixel_df.groupby(by=['frame_id', 'pixel_x', 'pixel_y'], as_index=False).intensity.sum()

print('maximum intensity is {}'.format(pixel_intensity_df.intensity.max()))

maximum intensity is 5777


In [18]:
# calculate the colour to represent the intensity
colours_l = []
for i in pixel_intensity_df.intensity.unique():
    colours_l.append((i, colour_map(norm(i), bytes=True)[:3]))
colours_df = pd.DataFrame(colours_l, columns=['intensity','colour'])
pixel_intensity_df = pd.merge(pixel_intensity_df, colours_df, how='left', left_on=['intensity'], right_on=['intensity'])

In [19]:
# write out the images to files
feature_slice = 0
for group_name,group_df in pixel_intensity_df.groupby(['frame_id'], as_index=False):
    # create an intensity array
    tile_im_array = np.zeros([PIXELS_Y, PIXELS_X, 3], dtype=np.uint8)  # container for the image
    for r in zip(group_df.pixel_x, group_df.pixel_y, group_df.colour):
        x = r[0]
        y = r[1]
        c = r[2]
        tile_im_array[y:y+y_pixels_per_scan,x,:] = c

    # create an image of the intensity array
    feature_slice += 1
    tile = Image.fromarray(tile_im_array, 'RGB')
    
    # if this is the estimated apex frame, highlight the estimated coordinates
    if group_name == apex_frame_id:
        estimated_x, estimated_y = pixel_xy(estimated_coords['mono_mz'], estimated_coords['scan_apex'], mz_lower, mz_upper, scan_lower, scan_upper)
        draw = ImageDraw.Draw(tile) 
        draw.line((estimated_x,0, estimated_x,PIXELS_Y), fill='green', width=2)
        draw.line((0,estimated_y, PIXELS_X,estimated_y), fill='green', width=2)
    
    # if this is the extracted apex frame, highlight the extracted coordinates
    if group_name == extracted_apex_frame_id:
        extracted_x, extracted_y = pixel_xy(extracted_coords['monoisotopic_mz_centroid'], extracted_coords['scan_apex'], mz_lower, mz_upper, scan_lower, scan_upper)
        draw = ImageDraw.Draw(tile) 
        draw.line((extracted_x,0, extracted_x,PIXELS_Y), fill='blue', width=2)
        draw.line((0,extracted_y, PIXELS_X,extracted_y), fill='blue', width=2)
    
    # save the image as a file
    tile_file_name = '{}/feature-slice-{:03d}.png'.format(FEATURE_SLICES_DIR, feature_slice)
    tile.save(tile_file_name)