# DDA - TMT

In [None]:
# Import required packages:
import msmu as mm
import pandas as pd

In [None]:
# read TMT data (Sage format):
mdata = mm.read_sage("sage_output_directory/", label="tmt")

# MaxQuant format:
# mdata = mm.read_maxquant("path_to_maxquant_directory/", label="tmt")

# FragPipe format:
# mdata = mm.read_fragpipe("path_to_fragpipe_directory/", label="tmt")

In [None]:
# (Optional) Add metadata for samples:

meta_df = pd.read_csv("path/to/metadata.csv")
meta_df = meta_df.set_index("tag")  # set the index to match sample names in mdata.obs

mdata.obs = mdata.obs.join(meta_df)
mdata.push_obs()  # push the updated obs back to the MuData object

In [None]:
# (Optional) Remove Blank TMT Channels from mudata:

mdata = mdata[(mdata.obs["condition"] != "BLANK"), :]

In [None]:
# Filter features based on q-value and (optional) contaminants:

mdata = mm.pp.add_filter(mdata, modality="feature", column="q_value", keep="lt", value=0.01)
# mdata = mm.pp.add_filter(mdata, modality="feature", column="proteins", keep="not contains", value="contam_")
mdata = mm.pp.apply_filter(mdata, modality="feature")

In [None]:
# Log2 transform the data:

mdata = mm.pp.log2_transform(mdata, modality="feature")

In [None]:
# Normalize the data using median centering normalization
# if your data with fractionated samples, set fraction=True which normalizes within each fraction

mdata = mm.pp.normalise(mdata, modality="feature", method="median", fraction=True)

In [None]:
# (Optional) Scale the data using Global Intensity Scaling (GIS) for TMT batch effect correction:
# Make sure to have "POOLED_" channels in each TMT batch for GIS to work

# mdata = mm.pp.scale_feature(mdata=mdata, method="gis", gis_prefix="POOLED_")

In [None]:
# (Optional, but recommended) Compute Precursor Purity if mzML files are available:
# Provide the list of mzML file paths corresponding to each TMT channel/sample
# mzml_paths = ["path/to/sample1.mzML", "path/to/sample2.mzML", ...]

mdata = mm.pp.compute_precursor_purity(mdata, mzml_paths = [])

In [None]:
# Summarise to peptide:
# If isolation purity was caculated, you could not to use feature under purity thresholds for quantification aggregation

mdata = mm.pp.to_peptide(mdata, purity_threshold=0.7)

In [None]:
# Filter peptides based on q-value:

mdata = mm.pp.add_filter(mdata, modality="peptide", column="q_value", keep="lt", value=0.01)
mdata = mm.pp.apply_filter(mdata, modality="peptide")

In [None]:
# Protein group inference with peptides and their matched proteins:

mdata = mm.pp.infer_protein(mdata)

In [None]:
# Summarise to protein:
# Top 3 peptides wihin protein group can be used for protein group quantification aggregation (else, top_n=None)

mdata = mm.pp.to_protein(mdata, top_n=3, rank_method="total_intensity")

In [None]:
# Filter protein groups based on q-value:

mdata = mm.pp.add_filter(mdata, modality="protein", column="q_value", keep="lt", value=0.01)
mdata = mm.pp.apply_filter(mdata, modality="protein")