In [1]:
import json
from PIL import Image, ImageDraw
import pandas as pd
import sqlite3
import numpy as np
import glob
import os
import shutil

In [2]:
PIXELS_X = 910
PIXELS_Y = 910  # equal to the number of scan lines
MZ_MIN = 100.0
MZ_MAX = 1700.0
SCAN_MAX = PIXELS_Y
SCAN_MIN = 1
MZ_PER_TILE = 18.0
TILES_PER_FRAME = int((MZ_MAX - MZ_MIN) / MZ_PER_TILE) + 1

In [3]:
# frame types for PASEF mode
FRAME_TYPE_MS1 = 0
FRAME_TYPE_MS2 = 8

In [4]:
def mz_range_for_tile(tile_id):
    assert (tile_id >= 0) and (tile_id <= TILES_PER_FRAME-1), "tile_id not in range"

    mz_lower = MZ_MIN + (tile_id * MZ_PER_TILE)
    mz_upper = mz_lower + MZ_PER_TILE
    return (mz_lower, mz_upper)

In [5]:
def tile_pixel_x_from_mz(mz):
    assert (mz >= MZ_MIN) and (mz <= MZ_MAX), "m/z not in range"

    tile_id = int((mz - MZ_MIN) / MZ_PER_TILE)
    pixel_x = int(((mz - MZ_MIN) % MZ_PER_TILE) / MZ_PER_TILE * PIXELS_X)
    return (tile_id, pixel_x)

In [6]:
EXPERIMENT_DIR = '/Users/darylwilding-mcbride/Downloads/experiments/dwm-test'
FEATURES_NAME = '{}/features/190719_Hela_Ecoli_1to1_01/exp-dwm-test-run-190719_Hela_Ecoli_1to1_01-features-all.pkl'.format(EXPERIMENT_DIR)
CONVERTED_DATABASE_NAME = '{}/converted-databases/exp-dwm-test-run-190719_Hela_Ecoli_1to1_01-converted.sqlite'.format(EXPERIMENT_DIR)
OVERLAY_TILE_DIR = '{}/tiles/test-tfd'.format(EXPERIMENT_DIR)


In [8]:
rt_lower_tile_set = 200
rt_upper_tile_set = 800

In [9]:
db_conn = sqlite3.connect(CONVERTED_DATABASE_NAME)
ms1_frame_properties_df = pd.read_sql_query("select Id,Time from frame_properties where Time >= {} and Time <= {} and MsMsType == {}".format(rt_lower_tile_set, rt_upper_tile_set, FRAME_TYPE_MS1), db_conn)
db_conn.close()

In [10]:
features_df = pd.read_pickle(FEATURES_NAME)

In [11]:
features_df.columns

Index(['candidate_phr_error', 'charge', 'envelope', 'feature_id', 'intensity',
       'intensity_full_rt_extent', 'mono_adjusted', 'monoisotopic_mz',
       'original_phr', 'original_phr_error', 'precursor_id', 'rt_apex',
       'rt_curve_fit', 'rt_lower', 'rt_upper', 'scan_apex', 'scan_curve_fit',
       'scan_lower', 'scan_upper', 'monoisotopic_mass'],
      dtype='object')

In [12]:
TILE_ID = 34
TILE_DIR = '/Users/darylwilding-mcbride/Downloads/experiments/dwm-test/tiles/190719_Hela_Ecoli_1to1_01/tile-{}'.format(TILE_ID)

In [13]:
features_df['mz_lower'] = features_df.apply(lambda row: row.envelope[0][0], axis=1)
features_df['mz_upper'] = features_df.apply(lambda row: row.envelope[-1][0], axis=1)

In [14]:
features_df['rt_lower_frame'] = features_df.apply(lambda row: row.rt_lower if row.rt_curve_fit else row.rt_apex-2 , axis=1)
features_df['rt_upper_frame'] = features_df.apply(lambda row: row.rt_upper if row.rt_curve_fit else row.rt_apex+2 , axis=1)

In [15]:
features_df[['rt_curve_fit','rt_lower','rt_upper','rt_apex','rt_lower_frame','rt_upper_frame']].head()

Unnamed: 0,rt_curve_fit,rt_lower,rt_upper,rt_apex,rt_lower_frame,rt_upper_frame
0,True,1059.91,1067.34,1063.62,1059.91,1067.34
0,True,1002.55,1055.2,1028.87,1002.55,1055.2
1,False,1022.36,1042.36,1027.04,1025.04,1029.04
2,True,1003.81,1058.32,1031.07,1003.81,1058.32
0,True,1011.49,1022.56,1017.02,1011.49,1022.56


In [16]:
# get the m/z range for this tile
(tile_mz_lower,tile_mz_upper) = mz_range_for_tile(TILE_ID)

In [17]:
features_df = features_df[(features_df.rt_lower_frame <= rt_upper_tile_set) & (features_df.rt_upper_frame >= rt_lower_tile_set) & (features_df.mz_lower >= tile_mz_lower) & (features_df.mz_upper <= tile_mz_upper)]

In [18]:
len(features_df)

2462

In [19]:
file_list = sorted(glob.glob("{}/frame-*-tile-{}-mz-*.png".format(TILE_DIR, TILE_ID)))

In [20]:
len(file_list)

1120

In [26]:
if os.path.exists(OVERLAY_TILE_DIR):
    shutil.rmtree(OVERLAY_TILE_DIR)
os.makedirs(OVERLAY_TILE_DIR)

In [27]:
for file in file_list:
    base_name = os.path.basename(file)
    frame_id = int(base_name.split('-')[1])
    # get the retention time for this frame
    frame_rt = ms1_frame_properties_df[ms1_frame_properties_df.Id == frame_id].iloc[0].Time
    # find the features intersecting with this frame
    intersecting_features_df = features_df[(features_df.rt_lower_frame <= frame_rt) & (features_df.rt_upper_frame >= frame_rt)]
    # draw the labels
    img = Image.open(file)
    draw = ImageDraw.Draw(img)
    for idx,feature in intersecting_features_df.iterrows():
#         (_,x0) = tile_pixel_x_from_mz(feature.mz_lower)
#         (_,x1) = tile_pixel_x_from_mz(feature.mz_upper)
        (t,x0_buffer) = tile_pixel_x_from_mz(feature.mz_lower - 0.25)
        if t < TILE_ID:
            x0_buffer = 1
        (t,x1_buffer) = tile_pixel_x_from_mz(feature.mz_upper + 0.25)
        if t > TILE_ID:
            x1_buffer = PIXELS_X
        y0 = feature.scan_lower
        y1 = feature.scan_upper
        # are there any points in this region of the frame?
        db_conn = sqlite3.connect(CONVERTED_DATABASE_NAME)
        points_df = pd.read_sql_query("select * from frames where frame_id == {} and mz >= {} and mz <= {} and scan >= {} and scan <= {}".format(frame_id, feature.mz_lower, feature.mz_upper, feature.scan_lower, feature.scan_upper), db_conn)
        db_conn.close()
        # if so, draw the label
        if len(points_df) > 0:
            # draw the rectangle
            draw.rectangle(xy=[(x0_buffer, y0), (x1_buffer, y1)], fill=None, outline='red')
    img.save('{}/frame-{}-tile-{}-mz-{}-{}.png'.format(OVERLAY_TILE_DIR, frame_id, TILE_ID, int(tile_mz_lower), int(tile_mz_upper)))
