# Immonium ion detection - ion peaks over time

To count the number of immonium ions present in the data, ion intensity curves over time are summarized to count only the highest peaks.

In [None]:
import pandas as pd
from pyopenms import MzMLFile, MSExperiment
import numpy as np
import re
import matplotlib.pyplot as plt

In [None]:
exp = MSExperiment()
MzMLFile().load("ptm-search-data/data/230928_JL_Immonium_ions_Modified_DIA.mzML", exp)
spectra_all = exp.getSpectra()

detections_df = pd.read_csv("ptm-search-data/results_thesis/workflow_test_tolerance_snr3_10ppm_all_mods_with_decoys/detected_ions.csv")

In [None]:
def plot_ions_behaviour_for_spectrum_id_within_mz_range(s_id_number, detected_df):
    detected_df = detected_df.set_index("spectrum_id", inplace=False)
    spectrum_idcs = []
    detected_intensities = []

    initial_spectrum_idx = [i[0] for i in enumerate(spectra_all) if i[1].getNativeID() == f"controllerType=0 controllerNumber=1 scan={s_id_number}"][0]
    spectrum_idx = initial_spectrum_idx % 73

    while spectrum_idx < len(spectra_all):
        current_spectrum = spectra_all[spectrum_idx]
        
        spectrum_idcs.append(spectrum_idx)
        current_id = current_spectrum.getNativeID()
        if (detected_df.index == current_id).any():
            detected_intensities.append(detected_df.loc[current_id]["detected_intensity"].max())
        else:
            detected_intensities.append(0)

        spectrum_idx += 73

    plt.figure(figsize=((16, 5)))
    detected_intensities = np.array(detected_intensities)

    if np.all(detected_intensities == 0):
        return 0, 0
    
    plt.plot(spectrum_idcs, detected_intensities, label="Peaks before noise reduction")
    plt.xlabel("Spectrum ID")
    plt.ylabel("Ion intensity")

    num_detected = (detected_intensities != 0).sum()
    detected_intensities[detected_intensities < np.max(detected_intensities)/8] = 0
    plt.plot(spectrum_idcs, detected_intensities, color="orange", label="Peaks after noise reduction")
    plt.legend()
    num_detected_peaks = (detected_intensities != 0).sum()

    ion_name = f"{detected_df['letter_and_unimod_format_mod'].iloc[0]}_{detected_df['type'].iloc[0]}"
    plt.savefig(f"../../data/plots/ion_detection/ions_over_time_{ion_name}_{s_id_number}.png")
    plt.close()
    return num_detected, num_detected_peaks


In [None]:
def plot_ions_over_time(detected_df):
    num_detected = 0
    num_detected_peaks =0
    for i in range(37, 73):
        num_detected_mz_range, num_detected_peaks_mz_range = plot_ions_behaviour_for_spectrum_id_within_mz_range(i, detected_df)
        num_detected += num_detected_mz_range
        num_detected_peaks += num_detected_peaks_mz_range
    plt.close()
    return num_detected, num_detected_peaks


In [None]:
ions_detected_peaks = {"ion_name": [], "num_ion_detected": [], "num_ion_detected_peaks": []}
for ion_name, detected_ion_df in detections_df.groupby(["letter_and_unimod_format_mod", "type"]):
    print("_".join(ion_name))
    num_ion_detected, num_ion_detected_peaks = plot_ions_over_time(detected_ion_df)
    ions_detected_peaks["ion_name"].append(ion_name)
    ions_detected_peaks["num_ion_detected"].append(num_ion_detected)
    ions_detected_peaks["num_ion_detected_peaks"].append(num_ion_detected_peaks)
ions_detected_peaks_df = pd.DataFrame(ions_detected_peaks)
ions_detected_peaks_df.to_csv(f"../../data/plots/ion_detection/ions_over_time.csv", index=False)