# Immonium ion detection
Analysis of immonium ion detection results: Effect of parameters and collision energy, and summary of detected ions.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
def print_results(df):
    results_df = df[["amino_acid", "mod_name"]].groupby(["amino_acid"])
    for amino_acid, sub_df in results_df:
        print(amino_acid)
        print(pd.unique(sub_df["mod_name"]))

## Results of parameter and collision energy effect evaluations

In [5]:
def grouped_bar_mods(dfs_unmod, dfs_mod, xticklabels, xlabel, out_name=None, legend_loc="upper right"):
    x = np.arange(len(dfs_mod))
    width = 0.25

    fig, ax = plt.subplots(layout='constrained')

    if len(dfs_unmod) > 0:
        ax.bar_label(ax.bar(x, [len(df) for df in dfs_unmod], width, label="Dataset without mods"), padding=3)

    ax.bar_label(ax.bar(x + width, [len(df) for df in dfs_mod], width, label="Dataset with mods"), padding=3)

    ax.set_ylabel('Number of detected diagnostic ions')
    ax.set_xlabel(xlabel)
    ax.set_xticks(x + width/2, xticklabels)

    if legend_loc is not None:
        ax.legend(loc=legend_loc)

    if out_name is not None:
        plt.savefig(f"../../data/plots/{out_name}.svg", bbox_inches="tight")
    plt.show()


In [4]:
def grouped_bar_mods_spectra(dfs_unmod, dfs_mod, xticklabels, xlabel, out_name=None, legend_loc="upper right"):
    x = np.arange(len(dfs_unmod))
    width = 0.25

    fig, ax = plt.subplots(layout='constrained')

    ax.bar_label(ax.bar(x, [len(df["spectrum_id"].unique()) for df in dfs_unmod], width, label="dataset without mods"), padding=3)

    ax.bar_label(ax.bar(x + width, [len(df["spectrum_id"].unique()) for df in dfs_mod], width, label="dataset with mods"), padding=3)

    ax.set_ylabel('Number of spectra with detected diagnostic ions')
    ax.set_xlabel(xlabel)
    ax.set_xticks(x + width/2, xticklabels)
    if out_name is not None:
        plt.savefig(f"../../data/plots/{out_name}.svg")
    plt.show()

Result dataframes for different collision energies on the single-energy files

In [None]:
dfs_different_collision_energies_5ppm = [
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230228_Immonium_30NCE_mod.mzML_diagnostic_ions_ppm_tolerance_5_snr_threshold_3_unimod.csv"),
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230228_Immonium_35NCE_mod.mzML_diagnostic_ions_ppm_tolerance_5_snr_threshold_3_unimod.csv"),
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230228_Immonium_40NCE_mod.mzML_diagnostic_ions_ppm_tolerance_5_snr_threshold_3_unimod.csv"),
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230228_Immonium_45NCE_mod.mzML_diagnostic_ions_ppm_tolerance_5_snr_threshold_3_unimod.csv"),
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230228_Immonium_50NCE_mod.mzML_diagnostic_ions_ppm_tolerance_5_snr_threshold_3_unimod.csv"),
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230228_Immonium_55NCE_mod.mzML_diagnostic_ions_ppm_tolerance_5_snr_threshold_3_unimod.csv"),
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230228_Immonium_60NCE_mod.mzML_diagnostic_ions_ppm_tolerance_5_snr_threshold_3_unimod.csv"),
]

In [None]:
dfs_different_collision_energies = [
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230228_Immonium_30NCE_mod.mzML_diagnostic_ions_ppm_tolerance_10_snr_threshold_3_unimod.csv"),
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230228_Immonium_35NCE_mod.mzML_diagnostic_ions_ppm_tolerance_10_snr_threshold_3_unimod.csv"),
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230228_Immonium_40NCE_mod.mzML_diagnostic_ions_ppm_tolerance_10_snr_threshold_3_unimod.csv"),
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230228_Immonium_45NCE_mod.mzML_diagnostic_ions_ppm_tolerance_10_snr_threshold_3_unimod.csv"),
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230228_Immonium_50NCE_mod.mzML_diagnostic_ions_ppm_tolerance_10_snr_threshold_3_unimod.csv"),
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230228_Immonium_55NCE_mod.mzML_diagnostic_ions_ppm_tolerance_10_snr_threshold_3_unimod.csv"),
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230228_Immonium_60NCE_mod.mzML_diagnostic_ions_ppm_tolerance_10_snr_threshold_3_unimod.csv"),
]

Result dataframes for different parameters on the stepped fragmentation files

In [None]:
dfs_dia_mod_10ppm_snr = [
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230928_JL_Immonium_ions_Modified_DIA.mzML_diagnostic_ions_ppm_tolerance_10_snr_threshold_1_unimod.csv"),
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230928_JL_Immonium_ions_Modified_DIA.mzML_diagnostic_ions_ppm_tolerance_10_snr_threshold_2_unimod.csv"),
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230928_JL_Immonium_ions_Modified_DIA.mzML_diagnostic_ions_ppm_tolerance_10_snr_threshold_3_unimod.csv"),
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230928_JL_Immonium_ions_Modified_DIA.mzML_diagnostic_ions_ppm_tolerance_10_snr_threshold_5_unimod.csv"),
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230928_JL_Immonium_ions_Modified_DIA.mzML_diagnostic_ions_ppm_tolerance_10_snr_threshold_10_unimod.csv"),
]

In [None]:
dfs_dia_mod_ppm = [
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230928_JL_Immonium_ions_Modified_DIA.mzML_diagnostic_ions_ppm_tolerance_5_snr_threshold_3_unimod.csv"),
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230928_JL_Immonium_ions_Modified_DIA.mzML_diagnostic_ions_ppm_tolerance_6_snr_threshold_3_unimod.csv"),
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230928_JL_Immonium_ions_Modified_DIA.mzML_diagnostic_ions_ppm_tolerance_7_snr_threshold_3_unimod.csv"),
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230928_JL_Immonium_ions_Modified_DIA.mzML_diagnostic_ions_ppm_tolerance_8_snr_threshold_3_unimod.csv"),
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230928_JL_Immonium_ions_Modified_DIA.mzML_diagnostic_ions_ppm_tolerance_9_snr_threshold_3_unimod.csv"),
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230928_JL_Immonium_ions_Modified_DIA.mzML_diagnostic_ions_ppm_tolerance_10_snr_threshold_3_unimod.csv"),
]

In [None]:
dfs_dia_unmod_10ppm_snr = [
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230928_JL_Immonium_ions_Unmodified_DIA.mzML_diagnostic_ions_ppm_tolerance_10_snr_threshold_1_unimod.csv"),
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230928_JL_Immonium_ions_Unmodified_DIA.mzML_diagnostic_ions_ppm_tolerance_10_snr_threshold_2_unimod.csv"),
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230928_JL_Immonium_ions_Unmodified_DIA.mzML_diagnostic_ions_ppm_tolerance_10_snr_threshold_3_unimod.csv"),
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230928_JL_Immonium_ions_Unmodified_DIA.mzML_diagnostic_ions_ppm_tolerance_10_snr_threshold_5_unimod.csv"),
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230928_JL_Immonium_ions_Unmodified_DIA.mzML_diagnostic_ions_ppm_tolerance_10_snr_threshold_10_unimod.csv"),
]

In [None]:
dfs_dia_unmod_ppm = [
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230928_JL_Immonium_ions_Unmodified_DIA.mzML_diagnostic_ions_ppm_tolerance_5_snr_threshold_3_unimod.csv"),
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230928_JL_Immonium_ions_Unmodified_DIA.mzML_diagnostic_ions_ppm_tolerance_6_snr_threshold_3_unimod.csv"),
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230928_JL_Immonium_ions_Unmodified_DIA.mzML_diagnostic_ions_ppm_tolerance_7_snr_threshold_3_unimod.csv"),
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230928_JL_Immonium_ions_Unmodified_DIA.mzML_diagnostic_ions_ppm_tolerance_8_snr_threshold_3_unimod.csv"),
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230928_JL_Immonium_ions_Unmodified_DIA.mzML_diagnostic_ions_ppm_tolerance_9_snr_threshold_3_unimod.csv"),
    pd.read_csv("ptm-search-data/results_thesis/detection_result_csvs/230928_JL_Immonium_ions_Unmodified_DIA.mzML_diagnostic_ions_ppm_tolerance_10_snr_threshold_3_unimod.csv"),
]

In [None]:
grouped_bar_mods([], dfs_different_collision_energies_5ppm, [30, 35, 40, 45, 50, 55, 60], "Collision energy", out_name="collision_energies_5ppm", legend_loc=None)

In [None]:
grouped_bar_mods([], dfs_different_collision_energies, [30, 35, 40, 45, 50, 55, 60], "Collision energy", out_name="collision_energies", legend_loc=None)

In [None]:
grouped_bar_mods(dfs_dia_unmod_ppm, dfs_dia_mod_ppm, [5, 6, 7, 8, 9, 10], "Ppm tolerance", legend_loc="upper left", out_name="dia_ppm tolerances")

In [None]:
grouped_bar_mods_spectra(dfs_dia_unmod_ppm, dfs_dia_mod_ppm, [5, 6, 7, 8, 9, 10], "Ppm tolerance", legend_loc="upper left", out_name="dia_ppm_tolerances_spectra")

In [None]:
grouped_bar_mods(dfs_dia_unmod_10ppm_snr, dfs_dia_mod_10ppm_snr, [1,2,3,5,10], "SNR threshold", out_name="dia_snr_thresholds_10ppm")

In [None]:
grouped_bar_mods_spectra(dfs_dia_unmod_10ppm_snr, dfs_dia_mod_10ppm_snr, [1,2,3,5,10], "SNR threshold", out_name="dia_snr_thresholds_10ppm_spectra")

## Summary of detected ions

In [14]:
mods_df = dfs_dia_mod_ppm[-1]
unmods_df = dfs_dia_unmod_ppm[-1]

In [None]:
mods_df_by_mod = mods_df[["amino_acid", "mod_name", "type", "theoretical_mz", "spectrum_id"]].groupby(["amino_acid", "mod_name", "type", "theoretical_mz"]).count().rename(columns={"spectrum_id": "count"})
mods_df_by_mod

In [16]:
mods_df_by_mod.to_csv("../../data/plots/detection_mod.csv")

In [None]:
fig, ax = plt.subplots(figsize=(6, 16))

y_pos = np.arange(len(mods_df_by_mod))
ax.barh(y_pos, mods_df_by_mod["count"], align='center')
ax.set_yticks([])
ax.set_xticks(np.arange(0, 35000, 10000))
plt.xticks(fontsize=17)
ax.invert_yaxis() 
ax.set_xlim(0, 35000)
ax.margins(y=0)
ax.grid(True)
for spine in ax.spines.values():
    spine.set_edgecolor('grey')

table = ax.table(cellText=[[str(index_field) for index_field in mod.Index] + [mod.count] for mod in mods_df_by_mod.itertuples()], loc="left", colWidths=[0.07, 0.18, 0.08, 0.06, 0.04],)
# based on https://stackoverflow.com/a/55661458
cell_height = 1 / len(mods_df_by_mod)
for pos, cell in table.get_celld().items():
    cell.set_height(cell_height)
    cell.set_edgecolor("gray")
table.scale(5, 1)
table.set_fontsize(20)

fig.savefig("../../data/plots/DIA_mods_10ppm_grouped_table.png", bbox_inches="tight")

plt.show()

In [None]:
unmods_df_by_mod = unmods_df[["amino_acid", "mod_name", "type", "theoretical_mz", "spectrum_id"]].groupby(["amino_acid", "mod_name", "type", "theoretical_mz"]).count().rename(columns={"spectrum_id": "count"})
unmods_df_by_mod

In [14]:
unmods_df_by_mod.to_csv("../../data/plots/detection_unmod.csv")

In [None]:
fig, ax = plt.subplots(figsize=(6, 16*len(unmods_df_by_mod)/len(mods_df_by_mod)))

y_pos = np.arange(len(unmods_df_by_mod))
ax.barh(y_pos, unmods_df_by_mod["count"], align='center')
ax.set_yticks([])
ax.set_xticks(np.arange(0, 35000, 10000))
plt.xticks(fontsize=17)
ax.invert_yaxis() 
ax.set_xlim(0, 35000)
ax.grid(True)
ax.margins(y=0)
ax.invert_xaxis() 
for spine in ax.spines.values():
    spine.set_edgecolor('grey')

table = ax.table(cellText=[[mod.count] + [str(index_field) for index_field in mod.Index] for mod in unmods_df_by_mod.itertuples()],
                 loc="right", colWidths=[0.04, 0.07, 0.18, 0.08, 0.06])
# based on https://stackoverflow.com/a/55661458
cell_height = 1 / len(unmods_df_by_mod)
for pos, cell in table.get_celld().items():
    cell.set_height(cell_height)
    cell.set_edgecolor("gray")
table.scale(5, 1)
table.set_fontsize(20)

fig.savefig("../../data/plots/DIA_unmods_10ppm_grouped_table.png", bbox_inches="tight")

plt.show()