## 2.3 Predict retention times of modified peptides

### 2.3.1 Effect of modifications on retention time

Lets first make predictions:

In [None]:
!pip install deeplc

In [None]:
import pandas as pd
import seaborn as sns
from deeplc import DeepLC
from matplotlib import pyplot as plt

# Suppress tensorflow logging
import os
import warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', category=FutureWarning)
import tensorflow as tf
tf.get_logger().setLevel('ERROR')

df = pd.read_csv("https://dl.dropboxusercontent.com/s/bok4w3jw2gxohbz/deeplc_input.csv",index_col=0)
df.fillna("",inplace=True)

num_total_rows_select = 5000
num_calib = 250

sub_df_pred = df[df["scan"].isin(list(set(df["scan"].sample(num_total_rows_select))))].copy()
sub_df_calib = sub_df_pred[sub_df_pred["scan"].isin(list(set(sub_df_pred[sub_df_pred["q_value"] < 0.01]["scan"].sample(num_calib))))].copy()

sub_df_pred.rename({
    "database_peptide" : "seq",
    "rt" : "tr"
},axis=1,inplace=True)

sub_df_calib.rename({
    "database_peptide" : "seq",
    "rt" : "tr"
},axis=1,inplace=True)

dlc = DeepLC(
    cnn_model=True,
    pygam_calibration=False,
    verbose=False
)

dlc.calibrate_preds(seq_df=sub_df_calib[sub_df_calib["best_psm"]==1])

preds = dlc.make_preds(seq_df=sub_df_pred)
sub_df_pred["preds"] = preds

In the next cells we predict retention times for modified peptides:

In [None]:
def plot_modification(sub_df_best,modification="carbamidomethyl"):
    # Init plot
    plt.figure(figsize=(7,7))
    ax = plt.gca()
    ax.set_aspect('equal')

    # Plot data
    plt.scatter(sub_df_best[sub_df_best["modifications"].str.contains(modification)]["tr"],sub_df_best[sub_df_best["modifications"].str.contains(modification)]["preds"],alpha=0.5,s=4)
    plt.plot([1500,14500],[1500,14500],c="black",linestyle="dotted")
    
    plt.title(modification)
    plt.xlabel("Observed retention time (s)")
    plt.ylabel("Predicted retention time (s)")
    
    plt.show()

In [None]:
sub_df_best = sub_df_pred[sub_df_pred["best_psm"]==1]
sub_df_best = sub_df_best[sub_df_best["q_value"]<0.001]

plot_modification(sub_df_best,modification="carbamidomethyl")
plot_modification(sub_df_best,modification="Formyl")
plot_modification(sub_df_best,modification="Dehydrated")
plot_modification(sub_df_best,modification="Ammonium")
plot_modification(sub_df_best,modification="Sulfide")

# 2.4 Playground - design your own peptides and modifications and predict their retention time (optional for a later time for the real enthusiasts)

## 2.4.1 Make predictions for your own peptide and modifications combos

Provide the data for peptides you want to predict:

In [None]:
#IIVINTPNNPIGK
dict_effect_aa = {
    "seq" : ["IIVINKPNNPIGK", # K on pos 6
             "IIVINTPNNPIGK", # T on pos 6
             "IIVINAPNNPIGK", # A on pos 6
             "IIVINWPNNPIGK"  # W on pos 6
            ],
    "modifications" : ["","","",""],
    "tr" : [0,1,2,3]
}

df_effect_aa = pd.DataFrame(dict_effect_aa)

In [None]:
preds = dlc.make_preds(seq_df=df_effect_aa)

Lets have a look at their predictions:

In [None]:
plt.scatter(df_effect_aa.index,preds)
plt.xticks(df_effect_aa.index,df_effect_aa["seq"])
plt.ylabel("Predicted retention time (s)")
plt.show()

Provide the data for peptides+modifications you want to predict:

In [None]:
#IIVINTPNNPIGK
dict_effect_aa = {
    "seq" : ["IIVINCPNNPIGK", "IIVINCPNNPIGK", "IIVINQPNNPIGK", "IIVINQPNNPIGK", "IIVINMPNNPIGK", "IIVINMPNNPIGK"],
    "modifications" : ["","6|carbamidomethyl","","6|Deamidated","","6|Formyl"],
    "tr" : [0,1,2,3,4,5]
}

df_effect_aa = pd.DataFrame(dict_effect_aa)

In [None]:
preds = dlc.make_preds(seq_df=df_effect_aa)

In [None]:
plt.scatter(df_effect_aa.index,preds)
plt.xticks(df_effect_aa.index,df_effect_aa["seq"]+"+"+df_effect_aa["modifications"],rotation=90)
plt.ylabel("Predicted retention time (s)")
plt.show()

## 2.4.2 Questions - playground retention time prediction

<ol>
  <li>Can you design a peptide that falls in between "IIVINKPNNPIGK" and "IIVINTPNNPIGK" in terms of retention time?</li>
  <li>What effect do certain modifications have? Is this expected?</li>
  <li>Do you expect that modifications always have the same effect?</li>
</ol>