In [1]:
import ms_deisotope as ms_ditp
import numpy as np
import polars as pl
import importlib
from oliglow import utils
from oliglow import averageine
from oliglow import deisotope

%matplotlib inline
from matplotlib import pyplot as plt, rcParams
rcParams["figure.figsize"] = 16, 4

# Options and libraries for reading files!

from clr_loader import get_mono
# get_mono from clr_loader defines where to get mono from

# Use the mono from the mono installation in the homebrew directory and not the default (the default is only for the intel acrchitecture)
rt = get_mono(libmono="/opt/homebrew/Cellar/mono/6.12.0.206/lib/libmono-2.0.1.dylib")

# import clr
# import pythonnet
# pythonnet.get_runtime_info()

In [2]:
params_dict = {
    "avgine": averageine.averagine_rna_with_backbone,
    "min_score": 150.0,
    "mass_error_tol": 0.02,
    "truncate_after": 0.8,
    "scale": "sum",
    "max_missed_peaks": 0,
    "error_tol": 2e-5,
    "scale_method": "sum",
    "minimum_intensity": 0.0,
}

In [None]:
df = deisotope.deisotope_all_ms2_scans(
    "../data/2025-04-16_SJH_10merRNA01_mC_ms2_17,20,25.raw",
    deisotoping_parameters=params_dict,
    aggregate_masses=None,
    min_num_peaks_per_ms2_scan=0,
).sort("precursor_neutral_mass").filter(pl.col("ms1_deisotope_success"))

Scan number 73 processed with 4 deconvoluted peaks
Scan number 74 processed with 1 deconvoluted peaks
Scan number 76 processed with 2 deconvoluted peaks
Scan number 78 processed with 5 deconvoluted peaks
Scan number 79 processed with 4 deconvoluted peaks
Scan number 80 processed with 6 deconvoluted peaks
Scan number 81 processed with 3 deconvoluted peaks
Scan number 83 processed with 4 deconvoluted peaks
Scan number 84 processed with 3 deconvoluted peaks
Scan number 85 processed with 3 deconvoluted peaks
Scan number 86 processed with 3 deconvoluted peaks
Scan number 88 processed with 5 deconvoluted peaks
Scan number 89 processed with 4 deconvoluted peaks
Scan number 90 processed with 9 deconvoluted peaks
Scan number 91 processed with 1 deconvoluted peaks
Scan number 93 processed with 4 deconvoluted peaks
Scan number 94 processed with 6 deconvoluted peaks
Scan number 95 processed with 4 deconvoluted peaks
Scan number 96 processed with 2 deconvoluted peaks
Scan number 98 processed with 3

### Aggregate based on the MS1 mass:

In [7]:
df_agg = utils.custom_aggregate_aggregate_all(df=df,
                                        tolerance_type="relative",tolerance=2e-5,
                                        index_column="precursor_neutral_mass",
                                        name_intesity_col="ms1_intensity")

In [8]:
with pl.Config() as cfg:
            cfg.set_tbl_rows(-1)
            print(
                df_agg.select(
                    pl.col("group_index"),
                    pl.col("precursor_neutral_mass"),
                    pl.col("ms1_intensity"),
                )
            )


shape: (39, 3)
┌─────────────┬────────────────────────┬───────────────┐
│ group_index ┆ precursor_neutral_mass ┆ ms1_intensity │
│ ---         ┆ ---                    ┆ ---           │
│ i64         ┆ f64                    ┆ f64           │
╞═════════════╪════════════════════════╪═══════════════╡
│ 0           ┆ 560.919729             ┆ 2.5655e7      │
│ 1           ┆ 563.94073              ┆ 4.3770e8      │
│ 2           ┆ 583.929633             ┆ 616878.171875 │
│ 3           ┆ 587.959245             ┆ 3.8563e8      │
│ 4           ┆ 587.970654             ┆ 1.5931e7      │
│ 5           ┆ 599.908387             ┆ 979255.085938 │
│ 6           ┆ 605.925437             ┆ 1.0608e8      │
│ 7           ┆ 618.064737             ┆ 6.6378e7      │
│ 8           ┆ 680.223748             ┆ 226113.205078 │
│ 9           ┆ 715.955136             ┆ 7.1074e8      │
│ 10          ┆ 731.932365             ┆ 1.9338e9      │
│ 11          ┆ 737.94972              ┆ 2.0469e9      │
│ 12          ┆ 

In [11]:
highest_intensity_row = df_agg.filter(pl.col("ms1_intensity") == df_agg["ms1_intensity"].max())
print(highest_intensity_row)
MS1_mass = highest_intensity_row["precursor_neutral_mass"].to_numpy()[0]
print("MS1 mass = ", MS1_mass)

shape: (1, 27)
┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐
│ group_ind ┆ precursor ┆ individua ┆ ms1_inten ┆ … ┆ precursor ┆ isolation ┆ isolation ┆ isolatio │
│ ex        ┆ _neutral_ ┆ l_intensi ┆ sity      ┆   ┆ _charge   ┆ _window_t ┆ _window_l ┆ n_window │
│ ---       ┆ mass      ┆ ties      ┆ ---       ┆   ┆ ---       ┆ arget     ┆ ower      ┆ _upper   │
│ i64       ┆ ---       ┆ ---       ┆ f64       ┆   ┆ list[i64] ┆ ---       ┆ ---       ┆ ---      │
│           ┆ f64       ┆ list[f64] ┆           ┆   ┆           ┆ list[f64] ┆ list[f64] ┆ list[f64 │
│           ┆           ┆           ┆           ┆   ┆           ┆           ┆           ┆ ]        │
╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡
│ 29        ┆ 4130.7202 ┆ [3.8898e8 ┆ 1.9099e11 ┆ … ┆ [-5, -5,  ┆ [825.5381 ┆ [824.0381 ┆ [827.038 │
│           ┆ 44        ┆ ,         ┆           ┆   ┆ … -5]     ┆ 47, 825.5 

In [12]:
df_ms1 = df.sort("neutral_mass").filter(pl.col("precursor_neutral_mass").is_between(MS1_mass-0.01,MS1_mass+1.01))


In [None]:
#Export this to a csv file!
df_ms1.write_csv("../data/2025-04-16_SJH_10merRNA01_mC_ms2_17,20,25_deisotoped.tsv",separator="\t")
