In [21]:
import pandas as pd
import pickle
import tempfile
import zipfile
import json
import shutil

In [23]:
# loads the metadata, ms1 points, and ms2 points from the specified zip file
def load_precursor_cuboid_zip(filename):
    temp_dir = tempfile.TemporaryDirectory().name
    with zipfile.ZipFile(filename, "r") as zf:
        zf.extractall(path=temp_dir)
        names = zf.namelist()
        with open('{}/{}'.format(temp_dir, names[0])) as json_file:
            metadata = json.load(json_file)
        ms1_df = pd.read_pickle('{}/{}'.format(temp_dir, names[1]))
        ms2_df = pd.read_pickle('{}/{}'.format(temp_dir, names[2]))
    # clean up the temp directory
    shutil.rmtree(temp_dir)
    return (metadata, ms1_df, ms2_df)


In [24]:
experiment_base_dir = '/Users/darylwilding-mcbride/Downloads/experiments'
run_name = 'P3856_YHE211_1_Slot1-1_1_5104'
experiment_name = 'P3856'
precursor_id = 1000
total_precursors = 52464

In [25]:
precursor_zip_filename = '{}/{}/precursor-cuboids/{}/exp-{}-run-{}-precursor-{}-of-{}.zip'.format(experiment_base_dir, experiment_name, run_name, experiment_name, run_name, precursor_id, total_precursors)
# precursor_metadata, ms1_points_df = load_precursor_cuboid_zip_ms1(precursor_zip_filename)
precursor_metadata, ms1_points_df, ms2_points_df = load_precursor_cuboid_zip(precursor_zip_filename)

In [26]:
precursor_metadata

{'precursor_id': 1000,
 'window_mz_lower': 606.3989053402448,
 'window_mz_upper': 609.7989053402449,
 'wide_mz_lower': 605.3955503402448,
 'wide_mz_upper': 609.7989053402449,
 'window_scan_width': 25,
 'fe_scan_lower': 811,
 'fe_scan_upper': 836,
 'wide_scan_lower': 786,
 'wide_scan_upper': 861,
 'wide_rt_lower': 447.34784298957885,
 'wide_rt_upper': 467.7675776990704,
 'fe_ms1_frame_lower': 4078,
 'fe_ms1_frame_upper': 4108,
 'fe_ms2_frame_lower': 4090,
 'fe_ms2_frame_upper': 4094,
 'wide_frame_lower': 3993,
 'wide_frame_upper': 4189,
 'number_of_windows': 4}

#### raw ms1 points

In [27]:
ms1_points_df.head()

Unnamed: 0,frame_id,frame_type,mz,scan,intensity,retention_time_secs
0,3993,0,605.725731,861,55,447.157516
1,3993,0,605.74503,824,76,447.157516
2,3993,0,605.802929,839,85,447.157516
3,3993,0,605.806789,847,78,447.157516
4,3993,0,605.810649,816,78,447.157516


In [28]:
fe_raw_points_df = ms1_points_df[(ms1_points_df.mz >= precursor_metadata['window_mz_lower']) & (ms1_points_df.mz <= precursor_metadata['window_mz_upper'])]

In [29]:
fe_raw_points_df.head()

Unnamed: 0,frame_id,frame_type,mz,scan,intensity,retention_time_secs
11,3993,0,606.598351,819,9,447.157516
12,3993,0,606.764452,816,52,447.157516
13,3993,0,607.146956,816,23,447.157516
14,3993,0,607.59143,843,25,447.157516
15,3993,0,607.730603,861,92,447.157516


#### fragment ions

In [36]:
ms2_points_df.head(n=3)

Unnamed: 0,frame_id,frame_type,mz,scan,intensity,retention_time_secs
0,3994,8,173.739556,814,9,447.263026
1,3994,8,176.262261,828,9,447.263026
2,3994,8,182.614667,837,9,447.263026


#### derived feature attributes for this precursor cuboid

In [30]:
feature_df = pd.read_pickle('{}/{}/features/experiment-features.pkl'.format(experiment_base_dir, experiment_name))

In [31]:
feature_df[(feature_df.precursor_id == precursor_id)]

Unnamed: 0,monoisotopic_mz,charge,intensity,intensity_full_rt_extent,scan_apex,scan_curve_fit,scan_lower,scan_upper,rt_apex,rt_curve_fit,rt_lower,rt_upper,precursor_id,envelope,feature_id,candidate_phr_error,mono_adjusted,original_phr_error,original_phr,monoisotopic_mass
95150,607.789175,2,34799.0,84296,822.76,True,808.66,836.86,456.91,True,446.91,466.91,1000,"((607.7875, 17619.00), (608.2890, 11221.00), (...",100001,,False,-0.0220516,0.636869,1213.563749
95151,606.798859,2,2019.0,1395,833.32,False,813.32,853.32,457.91,False,447.91,467.91,1000,"((606.7935, 1019.00), (607.3003, 1000.00), (60...",100002,-0.316447,False,0.509418,0.981354,1211.583118
