# Imports

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

%run methods_exploratory_analysis.ipynb

In [None]:
df = pd.read_csv("evidence_freshfrozen.txt",  sep="\t")

# Exploratory Analysis

In [3]:
# subset dataset
df_51 = df[df["Experiment"]=='P064051']
df_64 = df[df["Experiment"]=='P064064']
df_28 = df[df["Experiment"]=='P064428']

## General

In [None]:
# range CCS
print(df['CCS'].min())
print(df['CCS'].max())
print(df['CCS'].max()-df['CCS'].min())

In [None]:
# range 1/K0
print(df['1/K0'].min())
print(df['1/K0'].max())
print(df['1/K0'].max()-df['1/K0'].min())

## Differentiation of Isomers

In [8]:
%run methods_exploratory_analysis.ipynb
df_isomers = find_isomers(df, 'm/z', 0.0)

### RT Range

In [9]:
# Check RT for all isomers
im_diff_df = df_isomers[['Set', 'Retention time', 'Fraction','Experiment']]
grouped_diff_df = im_diff_df.groupby('Set')['Retention time'].max()- im_diff_df.groupby('Set')['Retention time'].min()
grouped_diff_df = grouped_diff_df.reset_index()


In [None]:
# Histogram of Sets per RT range
# Figure 4.1 a
plt.figure(figsize=(10, 6))
plt.hist(x= grouped_diff_df['Retention time'], bins = 100)[2]
plt.axvline(x=1.8, color='black', linestyle='--')
plt.xlabel('RT Range', fontsize = 20)
plt.ylabel('Number of Sets', fontsize = 20)

In [11]:
# Apply Delta95 of RT for 30 min Gradient
isomers_rt = df_isomers[(df_isomers['Set'].isin(grouped_diff_df[grouped_diff_df['Retention time']<=1.80]['Set']))]

### IM Difference

In [12]:
# Check 1/K0 for all isomers
im_diff_df = isomers_rt[['Set', '1/K0', 'Fraction','Experiment']]
grouped_diff_df = im_diff_df.groupby('Set')['1/K0'].max()- im_diff_df.groupby('Set')['1/K0'].min()
grouped_diff_df = grouped_diff_df.reset_index()

In [None]:
# Histogram of Sets per IM Range
# Figure 4.1 b
plt.figure(figsize=(10, 6))
plt.hist(x=( grouped_diff_df['1/K0']), bins = 100)
plt.xlabel('1/K0 Range', fontsize = 20)
plt.ylabel('Number of Sets', fontsize = 20)

## Influences of PTMs on IM

In [None]:
# Calculate Differences of PTMs to Unmodfied Peptides
# Table 4.1
%run methods_exploratory_analysis.ipynb
mod_list = ['Unmodified', 'Acetyl (Protein N-term)', 'Acetyl (Protein N-term),Oxidation (M)', 'Acetyl (Protein N-term),2 Oxidation (M)', 'Acetyl (Protein N-term),3 Oxidation (M)', 'Oxidation (M)',
            '2 Oxidation (M)', '3 Oxidation (M)', '4 Oxidation (M)', '5 Oxidation (M)']
prop_list = ['Mass', 'Retention time', '1/K0']
for prop in prop_list:
    print(f'Difference in {prop}')
    counter = 0
    for mod in mod_list:
        df_diff = difference_mass_modification(df, mod, prop)
        print(f"{mod}: {df_diff['Difference'].mean()}, {df_diff['Difference'].std()}")
        counter +=1
    print()


## Matrix Effect

### Experiment 51

In [None]:
df_top_ten_51= get_peptides_across_many_fractions(df_51, 46)

In [16]:
# Selected Peptides for Plot
include= ['_FLISLLEEYFK_','_QTTAAAAATFSEQVGGGSGGAGR_', '_YLATASTMDHAR_', '_ALFSSITDSEK_', '_FFLTGTSIFVK_', '_GASDFLSFAVK_', '_GDFTFFIDTFK_', 
'_GFVIDDGLITK_' , '_GHYTEGAELVDSVLDVVR_', '_HNDDEQYAWESSAGGSFTVR_']

In [None]:
# Scatter Plots: Matrix Effect over different Measurements
# Figure 4.2
%run methods_exploratory_analysis.ipynb
plot_scatter( df_top_ten_51[df_top_ten_51['Modified sequence'].isin(include)],y='Fraction', x= 'Mass', c=['Modified sequence','Charge'])
plot_scatter(df_top_ten_51[df_top_ten_51['Modified sequence'].isin(include)], y='Fraction', x= 'Retention time', c=['Modified sequence','Charge'])
plot_scatter(df_top_ten_51[df_top_ten_51['Modified sequence'].isin(include)], y='Fraction', x= '1/K0', c=['Modified sequence','Charge'])

## Uncertainty of Measurements

In [6]:
# Choose Category for Analysis
cat = 'CCS' # '1/K0'

In [7]:
# Calculate mean between same measurements
grouped_df = df.groupby(['Modified sequence', 'Charge'])
df_filtered = grouped_df.filter(lambda x: len(x) > 1)
group_means = df_filtered.groupby(['Modified sequence', 'Charge'])[cat].mean().reset_index()
df_uncertainty = pd.merge(df_filtered, group_means, on=['Modified sequence', 'Charge'], suffixes=('', '_mean'))

In [8]:
# Calculate Error
df_uncertainty[f'{cat}_error'] = np.subtract(df_uncertainty[f'{cat}_mean'], df_uncertainty[cat])

In [None]:
# Percentile of Measurement Error
# Table 4.3
%run alpha_pept_deep_methods.ipynb
delta95 = percentiles(df_uncertainty, f'{cat}_error')
print(f"2.5 Percentile: {delta95[0]}, 97.5 Percentile: {delta95[1]}, Delta95: {delta95[2]}")