In [None]:
import pandas as pd
import numpy as np

# fill_modified_sequence

In [None]:
def fill_modified_sequence(df:pd.DataFrame):
    '''
        Manipulates df['Modified sequence]
        if it is NaN
        Unmodified -> _Sequence_
        Acetly -> _Acetyl(N term)Sequence_
        Oxidation -> _Sequence.replace('M', 'M(Oxidation (M)))_ (Only if #Oxidations == Appearances of Methionine)
    '''
    for index, row in df.iterrows():
        if pd.isnull(row['Modified sequence']):
            if row['Modifications'] == 'Unmodified':
                df.loc[index,'Modified sequence'] = f'_{row["Sequence"]}_'
            else:
                if row['Acetyl (Protein N-term)'] == 1 :
                    df.loc[index,'Modified sequence'] = f'_(Acetyl (Protein N-term)){row["Sequence"]}_'
                if row['Oxidation (M)'] > 0:
                    if row['Oxidation (M)'] == row['Sequence'].count('M'): # Check if all Mehtionine are Oxidated
                        if pd.isnull(df.loc[index,'Modified sequence']): # Check if a string was added before from acetylation of n term
                            df.loc[index,'Modified sequence'] = f"_{row['Sequence'].replace('M', 'M(Oxidation (M))')}_"
                        else:
                            df.loc[index,'Modified sequence'] = f"{df.loc[index,'Modified sequence'].replace('M', 'M(Oxidation (M))')}"
                    
                    
    return df

# MaxQuant Format to AlphaBase Format

In [None]:
def mq_to_ab(df: pd.DataFrame):
    '''
    Turns DataFrames from MaxQuant into a DataFrame which can be read by AlphaPeptDeep
    Introduces Column to make it possible to merge the AlphaBase Format again with the original DataFrame in MaxQuant Format
    '''
    df.loc[:,'Original index'] = df.index
    # run AlphaBase
    mq_reader = psm_reader_provider.get_reader('maxquant')
    mq_reader.column_mapping['Original index'] = 'Original index'
    mq_reader._translate_columns(df)
    mq_reader._transform_table(df)
    mq_reader._translate_decoy(df)
    mq_reader._translate_score(df)
    mq_reader._load_modifications(df)
    mq_reader._translate_modifications()
    mq_reader._post_process(df)  
    df_ab = mq_reader.psm_df
    return df_ab

# AlphaBase Format to MaxQuant Format

In [None]:
def ab_to_mq(df_max_quant: pd.DataFrame, df_alpha_base:pd.DataFrame):
    '''
    Merges DataFrame from AlphaBase Format with DataFrame in MaxQuant Format
    Returns merged DataFrame in MAxQuant Format with new columns: 'ccs_pred', 'IM_pred', 'ccs_error', 'IM_error'
    '''
    mapping_dict = {
                'sequence': 'Sequence',
                'charge': 'Charge',
                'rt': 'Retention time',
                'ccs': 'CCS',
                'mobility': '1/K0',
                'scan_num': 'MS/MS scan number',
                'raw_name': 'Raw file',
                'precursor_mz': 'm/z',
                'score': 'Score',
                'proteins': 'Proteins',
                'genes': 'Gene names',
                'decoy': 'Reverse',
                'intensity': 'Intensity',
                'nAA':'Length'}
    merging_list = ['Sequence', 'Charge', 'CCS', 'Score', 'Length', 'Retention time', 'Proteins', 'Gene names','1/K0' , 'MS/MS scan number', 'Raw file'
                                                    , 'm/z', 'Intensity' ]
            # rename the columns of the alphabase dataframe according to the mapredictionping dict
    df_alpha_base.rename(columns=mapping_dict, inplace=True)
            # set the original index as index
    df_alpha_base.set_index('Original index', inplace=True)
    df_merged = pd.merge(df_max_quant, df_alpha_base, on = merging_list, how = 'inner')
    df_merged['ccs_error'] = np.subtract(df_merged['CCS'], df_merged['ccs_pred'])
    df_merged['IM_error'] = np.subtract(df_merged['1/K0'], df_merged['mobility_pred'])
    return df_merged

# Percentiles

In [None]:
def percentiles(df: pd.DataFrame, cat: str):
    '''
    Returns all details for the Delta95 Metric
    '''
    perc_low = np.percentile(df[cat], 2.5)
    perc_up = np.percentile(df[cat], 97.5)
    delta95= (perc_low)*(-1)+perc_up
    return [perc_low, perc_up, delta95]
