In [90]:
%load_ext autoreload
%autoreload all
import argparse
from typing import Dict, Tuple, List, Literal
import logging
import pandas as pd
import molmass
from pyteomics import mzml as pytmzml
from draft import *

import matplotlib.pyplot as plt

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [106]:
mzml_1 = "/Users/gkreder/Downloads/2024-07-15_mschemutils-refactor/Pos_01.mzML"
mzml_2 = "/Users/gkreder/Downloads/2024-07-15_mschemutils-refactor/Pos_05.mzML"
starting_index = 1
index_1 = 750
index_2 = 633
quasi_x = 114.5
quasi_y = -0.238
R = 10000
match_acc = 100.0 / R
res_clearance = 200.0 / R
subformula_tolerance = 100.0 / R
du_min = -0.5
min_spectrum_quasi_sum = 20
min_total_peaks = 2
abs_cutoff = None
rel_cutoff = None
QUASI_CUTOFF_DEFAULT = 5
quasi_cutoff = QUASI_CUTOFF_DEFAULT
exclude_peaks = None
pdpl = None
parent_mz = 271.0601
parent_formula = molmass.Formula("C15H11O5").formula
out_prefix = "Genistein_v_DMG_0V-Pos_01_750_v_Pos_05_633"
suffixes = ['A', 'B']
log_plots = True
gain_control = False

In [127]:
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logging.info("Reading spectra")
spectra = get_spectra_by_indices([mzml_1, mzml_2], [index_1 - starting_index, index_2 - starting_index], gain_control)
logging.info("Validating spectra")
validate_spectrum_pair(spectra)
kw = {'abs_cutoff' : abs_cutoff, 
      'quasi_x' : quasi_x,
      'quasi_y' : quasi_y,
      'rel_cutoff' : rel_cutoff,
      'quasi_cutoff' : quasi_cutoff,
      'pdpl' : pdpl,
      'exclude_peaks' : exclude_peaks,
      'match_acc' : match_acc,
      'parent_mz' : parent_mz,
      'res_clearance' : res_clearance,
      'sort_intensity' : True
      }
logging.info("Filtering and converting spectra")
spectra_filtered = [filter_and_convert_spectrum_complete(spectrum, **kw) for spectrum in spectra]
gray_kw = {
    'quasi_x' : quasi_x,
    'quasi_y' : quasi_y,
    'parent_mz' : parent_mz,
    'match_acc' : match_acc
}
gray_spectra = [filter_and_convert_spectrum_complete(spectrum, **gray_kw) for spectrum in spectra]
logging.info("Merging spectra")
merged_spectrum = merge_spectra(spectra_filtered[0], spectra_filtered[1], match_acc)
final_spectrum = merged_spectrum.copy()
if parent_formula is not None:
    logging.info("Parent formula specified - Calculating formulas")
    formula_spectrum = add_spectra_formulas(final_spectrum, parent_formula, subformula_tolerance, du_min, pdpl)
logging.info("Calculating metrics")
metrics = calc_spectra_metrics(formula_spectrum)
df_stats, df_intersection, df_union, spectra_df = get_results_dfs(spectra, metrics, parent_mz, quasi_x, quasi_y, parent_formula, suffixes)
write_results_xlsx("/Users/gkreder/Downloads/test.xlsx", df_stats, df_intersection, df_union, spectra_df)
summary_plots(df_stats, df_intersection, df_union, gray_spectra, suffixes=suffixes, log_plots=log_plots)

INFO:root:Reading spectra
DEBUG:root:Getting spectrum 749 from /Users/gkreder/Downloads/2024-07-15_mschemutils-refactor/Pos_01.mzML
DEBUG:root:Getting spectrum 632 from /Users/gkreder/Downloads/2024-07-15_mschemutils-refactor/Pos_05.mzML
INFO:root:Validating spectra
INFO:root:Filtering and converting spectra
DEBUG:root:Filtering spectrum with filters: abs_cutoff=None, quasi_x=114.5, quasi_y=-0.238, rel_cutoff=None, quasi_cutoff=5, pdpl=None, exclude_peaks=None, match_acc=0.01, parent_mz=271.0601, res_clearance=0.02
DEBUG:root:Converting spectrum intensities to quasicounts with x = 114.5 and y = -0.238
DEBUG:root:Filtering spectrum by quasi count cutoff of 5
DEBUG:root:Filtering spectrum by parent m/z 271.0601 with match accuracy of 0.01
DEBUG:root:Filtering spectrum by resolution clearance of 0.02
DEBUG:root:Filtering spectrum by peak exclusion [271.0606417691041] (keep_exact=True) with match accuracy of 0.02
DEBUG:root:Filtering spectrum by peak exclusion [215.06548681993976] (keep_ex