# DIA - Label-Free

## DIA-NN (>= V2.0)

In [None]:
# Import required packages:

import msmu as mm
import pandas as pd

In [None]:
# read DIA data (DIA-NN format):

mdata = mm.read_diann("diann/output/dir/")

In [None]:
# (Optional) Add metadata for samples:

meta_df = pd.read_csv("path/to/metadata.csv")
meta_df = meta_df.set_index("tag")  # set the index to match sample names in mdata.obs

mdata.obs = mdata.obs.join(meta_df)
mdata.push_obs()  # push the updated obs back to the MuData object

In [None]:
# Filter PSM based on q-value and (optional) contaminants:

mdata = mm.pp.add_filter(mdata, modality="psm", column="q_value", keep="lt", value=0.01)
# mdata = mm.pp.add_filter(mdata, modality="psm", column="proteins", keep="not contains", value="contam_")
mdata = mm.pp.apply_filter(mdata, modality="psm")

In [None]:
# Log2 transform the data:

mdata = mm.pp.log2_transform(mdata, modality="psm")

In [None]:
# Normalize the data using median normalization

mdata = mm.pp.normalise(mdata, modality="psm", method="median")

In [None]:
# Summarise to peptide:

data = mm.pp.to_peptide(mdata)

In [None]:
# Filter peptides based on q-value:

mdata = mm.pp.add_filter(mdata, modality="peptide", column="q_value", keep="lt", value=0.01)
mdata = mm.pp.apply_filter(mdata, modality="peptide")

In [None]:
# Protein inference with peptides and their matched proteins:

mdata = mm.pp.infer_protein(mdata)

In [None]:
# Summarise to protein:
# Top 3 peptides wihin protein group can be used for protein group quantification aggregation (else, top_n=None)

mdata = mm.pp.to_protein(mdata, top_n=3, rank_method="total_intensity")

In [None]:
# Filter protein groups based on q-value:

mdata = mm.pp.add_filter(mdata, modality="protein", column="q_value", keep="lt", value=0.01)
mdata = mm.pp.apply_filter(mdata, modality="protein")

## DIA-NN (< V2.0)

`DIA-NN` under version `2.0` does not provide decoy features in its final report (report.tsv).<br>
So we cannot calculate step-wise q-values.<br>
And also protein groups from `DIA-NN` should be used rather than newly inferred protein groups by `msmu`

In [None]:
# Filter features based on q-value and protein group q-value:
# for protein group q-value, use Lib.PG.Q.Value (MBR) or Global.PG.Q.Value (no MBR) from DIA-NN search result
# (Steps before filtering are same as above, so not repeated here)

mdata = mm.pp.add_filter(mdata, modality="feature", column="q_value", keep="lt", value=0.01)

mdata["feature"].var["pg_q_value"] = mdata["feature"].varm["search_result"]["Lib.PG.Q.Value"]
mdata = mm.pp.add_filter(mdata, modality="feature", column="pg_q_value", keep="lt", value=0.01)

mdata = mm.pp.apply_filter(mdata, modality="feature")

In [None]:
# Log2 transform the data:

mdata = mm.pp.log2_transform(mdata, modality="feature")

In [None]:
# Normalize the data using median centering normalization

mdata = mm.pp.normalise(mdata, modality="feature", method="median")

In [None]:
# Summarise to peptide:

mdata = mm.pp.to_peptide(mdata)

In [None]:
# Add protein_group and peptide_type information from DIA-NN search result to peptide var:
# - protein_group: Protein.Group
# - peptide_type: Proteotypic
# not to filter out any peptides here, just mapping information

pg_df = mdata["feature"].varm["search_result"][["Modified.Peptides", "Protein.Group", "Proteotypic"]].drop_duplicates().set_index("Modified.Peptides")

mdata["peptide"].var["protein_group"] = mdata["peptide"].map(pg_df["Protein.Group"])
mdata["peptide"].var["peptide_type"] = mdata["peptide"].map(pg_df["Proteotypic"])

In [None]:
# Summarise to protein:
# Top 3 peptides wihin protein group can be used for protein group quantification aggregation (else, top_n=None)

mdata = mm.pp.to_protein(mdata, top_n=3, rank_method="total_intensity")