In [1]:
import re

import pandas as pd

from bystro.api import auth
from bystro.proteomics.annotation_interface import (
    get_annotation_result_from_query,
    async_get_annotation_result_from_query,
    join_annotation_result_to_fragpipe_dataset
)

from bystro.proteomics.fragpipe_tandem_mass_tag import (
    load_tandem_mass_tag_dataset,
)

In [None]:
user = auth.login('email', 'password', 'https://bystro-dev.emory.edu')

job_id = '663ac4bfa0e17a1660ba8130'
index = '663ac4bfa0e17a1660ba8130_657a50d4b2d0278938ba791d'

In [5]:
abundance_file = 'tests/example_abundance_gene_MD.tsv'
experiment_file = 'tests/example_experiment_annotation_file.tsv'

tmt_dataset = load_tandem_mass_tag_dataset(abundance_file, experiment_file)

In [6]:
# Query and melt by refSeq.name2 to make sure you can join on FragPipe gene_id
query_result_df = get_annotation_result_from_query(
    query_string="*",
    index_name=index,
    bystro_api_auth=user,
    melt_by_samples=True,
    melt_by_field='refSeq.name2',
    fields=["refSeq.name2", "refSeq.spID", "refSeq.name"]
)

In [8]:
query_result_df.head()

Unnamed: 0,chrom,pos,vcfPos,inputRef,alt,type,id,locus,sample,dosage,refSeq.name2,refSeq.spID,refSeq.name
0,chr17,2923767,2923767,A,G,SNP,.,chr17:2923767:A:G,1805,2,RAP1GAP2,,NM_001330058
1,chr17,2923767,2923767,A,G,SNP,.,chr17:2923767:A:G,1805,2,RAP1GAP2,Q684P5,NM_015085
2,chr17,2923767,2923767,A,G,SNP,.,chr17:2923767:A:G,1805,2,RAP1GAP2,Q684P5-2,NM_001100398
3,chr17,2923767,2923767,A,G,SNP,.,chr17:2923767:A:G,1847,2,RAP1GAP2,,NM_001330058
4,chr17,2923767,2923767,A,G,SNP,.,chr17:2923767:A:G,1847,2,RAP1GAP2,Q684P5,NM_015085


In [9]:
sample_ids = query_result_df["sample"].unique()

sample_names = list(tmt_dataset.annotation_df.index)[0 : sample_ids.shape[0]]

# replace the sample ids with the sample names
replacements = {sample_id: sample_name for sample_id, sample_name in zip(sample_ids, sample_names)}
query_result_df["sample"] = query_result_df["sample"].replace(replacements)

joined_df = join_annotation_result_to_fragpipe_dataset(query_result_df, tmt_dataset,
                                                       fragpipe_sample_id_column='sample_id',
                                                       fragpipe_gene_name_column='gene_name')
joined_df

Unnamed: 0,chrom,pos,vcfPos,inputRef,alt,type,id,locus,sample,dosage,refSeq.name2,refSeq.spID,refSeq.name,protein_abundance
0,chr17,4620497,4620497,C,T,SNP,.,chr17:4620497:C:T,CPT0088900003,2,ARRB2,H0Y688,NM_001257328,0.169927
1,chr17,4620497,4620497,C,T,SNP,.,chr17:4620497:C:T,CPT0088900003,2,ARRB2,P32121,NM_001257329,0.169927
2,chr17,4620497,4620497,C,T,SNP,.,chr17:4620497:C:T,CPT0088900003,2,ARRB2,P32121-3,NM_001257330,0.169927
3,chr17,4620497,4620497,C,T,SNP,.,chr17:4620497:C:T,CPT0088900003,2,ARRB2,P32121-2,NM_001257331,0.169927
4,chr17,4620497,4620497,C,T,SNP,.,chr17:4620497:C:T,CPT0088900003,2,ARRB2,,NM_001330064,0.169927
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7549,chr17,43507008,43507008,G,C,SNP,.,chr17:43507008:G:C,CPT0088920001,-1,ARHGAP27,,NM_001282290,0.064085
7550,chr17,43507008,43507008,G,C,SNP,.,chr17:43507008:G:C,CPT0088920001,-1,ARHGAP27,,NM_001385384,0.064085
7551,chr17,43507008,43507008,G,C,SNP,.,chr17:43507008:G:C,CPT0088920001,-1,ARHGAP27,,NR_169600,0.064085
7552,chr17,43507008,43507008,G,C,SNP,.,chr17:43507008:G:C,CPT0088920001,-1,ARHGAP27,,NR_169601,0.064085
