Skip to content

Commit

Permalink
Merge pull request #6 from ypriverol/master
Browse files Browse the repository at this point in the history
q-values when Posterior Error probability
  • Loading branch information
jpfeuffer committed May 13, 2022
2 parents a50a6fe + bcce411 commit 9805a94
Show file tree
Hide file tree
Showing 19 changed files with 126,385 additions and 192 deletions.
12 changes: 8 additions & 4 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ jobs:

steps:
- uses: actions/checkout@v3
- name: Set up Python 3.10
- name: Set up Python 3.7
uses: actions/setup-python@v3
with:
python-version: "3.10"
python-version: "3.7"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand All @@ -34,6 +34,10 @@ jobs:
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest
- name: Test peptide file generation
run: |
pytest
python peptide_file_generation.py --mztab data/PXD020192-heart.mzTab.gz --msstats data/PXD020192-heart-msstats.tsv.gz --triqler data/PXD020192-heart-triqler.tsv.gz --sdrf data/PXD020192-heart.sdrf.tsv.gz --output data/PXD020192-Peptide-Intensities.tsv --compress
- name: Test with normalization
run: |
python peptide_normalization.py --log2 --peptides ./data/heart-grouped-Intensities.tsv --contaminants contaminants_ids.tsv --routliers --output data/heart-grouped-Intensities-Norm.tsv --verbose --nmethod qnorm
4 changes: 0 additions & 4 deletions compute_ibaq.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

from ibaqpy_commons import remove_contaminants_decoys, PROTEIN_NAME, INTENSITY, CONDITION, IBAQ, IBAQ_LOG, IBAQ_PPB


def print_help_msg(command):
"""
Print the help of the command
Expand All @@ -17,7 +16,6 @@ def print_help_msg(command):
with click.Context(command) as ctx:
click.echo(command.get_help(ctx))


def normalize_ibaq(res: DataFrame) -> DataFrame:
"""
Normalize the ibaq values using the total ibaq of the sample. The resulted
Expand All @@ -35,7 +33,6 @@ def normalize_ibaq(res: DataFrame) -> DataFrame:
res[IBAQ_PPB] = res[IBAQ].apply(lambda x: (x / total_ibaq) * 100000000)
return res


@click.command()
@click.option("-f", "--fasta", help="Protein database to compute IBAQ values")
@click.option("-p", "--peptides", help="Peptide identifications with intensities following the peptide intensity output")
Expand Down Expand Up @@ -109,6 +106,5 @@ def get_average_nr_peptides_unique_bygroup(pdrow: Series) -> Series:

res.to_csv(output, index=False)


if __name__ == '__main__':
ibaq_compute()
Binary file removed data/PXD004682-Peptide-Intensities.tsv.gz
Binary file not shown.
Binary file removed data/PXD004682-out.mztab.gz
Binary file not shown.
Binary file removed data/PXD004682-out_msstats.csv.gz
Binary file not shown.
Binary file removed data/PXD004682-out_triqler.tsv.gz
Binary file not shown.
Binary file removed data/PXD004682.sdrf.tsv.gz
Binary file not shown.
Binary file removed data/PXD008934-Peptide-Intensities.tsv.gz
Binary file not shown.
Binary file removed data/PXD020192-Peptide-Intensities.tsv.gz
Binary file not shown.
Binary file added data/PXD020192-heart-msstats.tsv.gz
Binary file not shown.
Binary file added data/PXD020192-heart-triqler.tsv.gz
Binary file not shown.
Binary file added data/PXD020192-heart.mzTab.gz
Binary file not shown.
Binary file added data/PXD020192-heart.sdrf.tsv.gz
Binary file not shown.
126,184 changes: 126,184 additions & 0 deletions data/heart-grouped-Intensities.tsv

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions ibaqpy_commons.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

PROTEIN_NAME = 'ProteinName'
PEPTIDE_SEQUENCE = 'PeptideSequence'
PEPTIDE_CANONICAL = "PeptideCanonical"
PEPTIDE_CHARGE = 'PrecursorCharge'
FRAGMENT_ION = 'FragmentIon'
PRODUCT_CHARGE = 'ProductCharge'
Expand Down
46 changes: 46 additions & 0 deletions merge_condition_generation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import gzip
import os
import re
import shutil

import click
import pandas as pd
from typing_extensions import OrderedDict

from ibaqpy_commons import *

def print_help_msg(command) -> None:
"""
Print help information
:param command: command to print helps
:return: print help
"""
with click.Context(command) as ctx:
click.echo(command.get_help(ctx))


@click.command()
@click.option("-i", "--input", help="Folder with all the Intensity files", required=True)
@click.option("-o", "--output", help="Prefix name for the file to group by condition")
@click.option("-p", "--pattern", help="Prefix of the pattern name for all the files in the folder")
def merge_condition_generation(input: str, output: str, pattern: str) -> None:
"""
Merge all the files in a folder with the specific pattern
:param input: Input folder containing all the peptide Intensity files
:param output: Output file prefix with all the intensities
:param pattern: pattern of the files with the corresponding file name prefix
:return:
"""

files = [f for f in os.listdir(input) if pattern in f]
df_from_each_file = (pd.read_csv(input+"/"+f, sep="\t") for f in files)
concatenated_df = pd.concat(df_from_each_file, ignore_index=True)
concatenated_df[CONDITION] = concatenated_df[CONDITION].str.lower()
print(concatenated_df.head())

for k, g in concatenated_df.groupby([CONDITION]):
g.to_csv(f'{output}/{k}-grouped-Intensities.tsv', index=False, sep='\t') # '{}.csv'.format(k)


if __name__ == '__main__':
merge_condition_generation()
112 changes: 0 additions & 112 deletions peptide_combat_normalization.py

This file was deleted.

11 changes: 10 additions & 1 deletion peptide_file_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,17 @@ def peptide_file_generation(triqler: str, msstats: str, mztab: str, sdrf: str, c
psms_df = mztab_df.spectrum_match_table
psms_df[REFERENCE] = psms_df['spectra_ref'].apply(get_run_mztab, metadata=mztab_df.metadata)

psms_df['psmSearchScore'] = psms_df['opt_global_Posterior_Error_Probability_score'].apply(
psms_df['psmSearchScore'] = None

if("opt_global_Posterior_Error_Probability_score" in psms_df.columns):
psms_df['psmSearchScore'] = psms_df['opt_global_Posterior_Error_Probability_score'].apply(
best_probability_error_bestsearch_engine)
elif("opt_global_q-value"):
psms_df['psmSearchScore'] = psms_df['opt_global_q-value'].apply(
best_probability_error_bestsearch_engine)
else:
raise Exception('The peptide quality score is not present in the mzTab')

psms_df[SCAN] = psms_df['spectra_ref'].apply(get_scan_mztab)
psms_df.rename(columns={'opt_global_cv_MS:1000889_peptidoform_sequence': PEPTIDE_SEQUENCE, 'charge': PEPTIDE_CHARGE,
'retention_time': RT}, inplace=True)
Expand Down
Loading

0 comments on commit 9805a94

Please sign in to comment.