In [4]:
import pandas as pd
import numpy as np
import sys
import pickle
import glob
import os
import sqlite3
import matplotlib.pyplot as plt
from matplotlib import colors, cm, pyplot as plt
from PIL import Image

In [8]:
EXPERIMENT_NAME = 'P3856'
EXPERIMENT_DIR = '/data2/experiments/{}'.format(EXPERIMENT_NAME)
RUN_NAME = 'P3856_YHE114_1_Slot1-1_1_5115'
FEATURES_DIR = '{}/features/{}'.format(EXPERIMENT_DIR, RUN_NAME)
CONVERTED_DB = '{}/converted-databases/exp-{}-run-{}-converted.sqlite'.format(EXPERIMENT_DIR, EXPERIMENT_NAME, RUN_NAME)
ENCODED_FEATURES_DIR = '{}/encoded-features/{}'.format(EXPERIMENT_DIR, RUN_NAME)
FEATURE_SLICES_DIR = '{}/slices'.format(ENCODED_FEATURES_DIR)

In [9]:
# frame types for PASEF mode
FRAME_TYPE_MS1 = 0
FRAME_TYPE_MS2 = 8

In [10]:
# get the frame properties
db_conn = sqlite3.connect(CONVERTED_DB)
frame_properties_df = pd.read_sql_query('select Id,Time from frame_properties where MsMsType == {}'.format(FRAME_TYPE_MS1), db_conn)
db_conn.close()


In [11]:
frame_properties_df.sample(n=5)

Unnamed: 0,Id,Time
1720,3297,373.213999
5643,13543,1508.875812
2527,7179,782.533948
6656,18556,2039.154444
7013,20341,2228.092378


In [12]:
# image dimensions
PIXELS_X = 224
PIXELS_Y = 224

In [13]:
def pixel_xy(mz, scan, mz_lower, mz_upper, scan_lower, scan_upper):
    x_pixels_per_mz = (PIXELS_X-1) / (mz_upper - mz_lower)
    y_pixels_per_scan = (PIXELS_Y-1) / (scan_upper - scan_lower)
    
    pixel_x = int((mz - mz_lower) * x_pixels_per_mz)
    pixel_y = int((scan - scan_lower) * y_pixels_per_scan)
    return (pixel_x, pixel_y)

In [15]:
precursor_id = 2917
feature_pkl = '{}/exp-dwm-test-run-{}-features-precursor-{}.pkl'.format(FEATURES_DIR, RUN_NAME, precursor_id)

In [16]:
# create the colour mapping
colour_map = plt.get_cmap('rainbow')
norm = colors.LogNorm(vmin=1, vmax=1000, clip=True)  # aiming to get good colour variation in the lower range, and clipping everything else

In [18]:
# load the features for this precursor
features_df = pd.read_pickle(feature_pkl)
# for each feature, generate image slices for its cuboid
for feature in features_df.itertuples():
    feature_id = feature.feature_id
#     print("feature ID {}".format(feature_id))
    print(feature)

    # determine the feature cuboid dimensions
    mz_lower = feature.envelope[0][0] - 0.5
    mz_upper = feature.envelope[-1][0] + 0.5
    scan_lower = feature.scan_lower
    scan_upper = feature.scan_upper
    rt_apex = feature.rt_apex
    rt_lower = feature.rt_lower
    rt_upper = feature.rt_upper
    monoisotopic_mz = feature.monoisotopic_mz

    # get the raw data for this feature
    db_conn = sqlite3.connect(CONVERTED_DB)
    raw_df = pd.read_sql_query('select mz,scan,intensity,frame_id,retention_time_secs from frames where mz >= {} and mz <= {} and scan >= {} and scan <= {} and frame_type == {} and retention_time_secs >= {} and retention_time_secs <= {}'.format(mz_lower, mz_upper, scan_lower, scan_upper, FRAME_TYPE_MS1, rt_lower, rt_upper), db_conn)
    if len(raw_df) == 0:
        print("found no raw points for feature {}".format(feature_id))
    db_conn.close()

    # calculate the raw point coordinates in scaled pixels
    pixel_df = pd.DataFrame(raw_df.apply(lambda row: pixel_xy(row.mz, row.scan, monoisotopic_mz-0.5, monoisotopic_mz+(10*0.5), scan_lower, scan_upper), axis=1).tolist(), columns=['pixel_x','pixel_y'])
    raw_pixel_df = pd.concat([raw_df, pixel_df], axis=1)

    # sum the intensity of raw points that have been assigned to each pixel
    pixel_intensity_df = raw_pixel_df.groupby(by=['frame_id', 'pixel_x', 'pixel_y'], as_index=False).intensity.sum()

    # calculate the colour to represent the intensity
    colours_l = []
    for i in pixel_intensity_df.intensity.unique():
        colours_l.append((i, colour_map(norm(i), bytes=True)[:3]))
    colours_df = pd.DataFrame(colours_l, columns=['intensity','colour'])
    pixel_intensity_df = pd.merge(pixel_intensity_df, colours_df, how='left', left_on=['intensity'], right_on=['intensity'])

    # get the frame IDs closest to the RT apex
    frame_ids = frame_properties_df.iloc[(frame_properties_df['Time'] - rt_apex).abs().argsort()[:20]].sort_values(by=['Time'], ascending=[True], inplace=False).Id.tolist()    

    # write out the images to files
    feature_slice = 0
    for frame_id in frame_ids:
        frame_df = pixel_intensity_df[(pixel_intensity_df.frame_id == frame_id)]
        # create an intensity array
        tile_im_array = np.zeros([PIXELS_Y, PIXELS_X, 3], dtype=np.uint8)  # container for the image
        for r in zip(frame_df.pixel_x, frame_df.pixel_y, frame_df.colour):
            x = r[0]
            y = r[1]
            c = r[2]
            tile_im_array[y,x,:] = c

        # create an image of the intensity array
        feature_slice += 1
        tile = Image.fromarray(tile_im_array, 'RGB')
        tile_file_name = '{}/feature-{}-slice-{:03d}.png'.format(FEATURE_SLICES_DIR, feature_id, feature_slice)
        tile.save(tile_file_name)

Pandas(Index=0, monoisotopic_mz=680.807471132394, charge=2, intensity=543545, intensity_full_rt_extent=276595, scan_apex=771.81, scan_curve_fit=True, scan_lower=752.55, scan_upper=791.07, rt_apex=206.0, rt_curve_fit=True, rt_lower=206.0, rt_upper=206.01, precursor_id=2917, envelope=[(680.8058, 272788.00), (681.3075, 177628.00), (681.8089, 70475.00), (682.3110, 22654.00)], feature_id=291701, mono_adjusted=False, original_phr_error=-0.10876178198401788, candidate_phr_error=None, original_phr=0.6511576755575759, monoisotopic_mass=1359.600342264788)
found no raw points for feature 291701


AttributeError: 'DataFrame' object has no attribute 'tolist'

In [13]:
len(features_df)

1