In [1]:
import pandas as pd
import numpy as np
from Bio.SeqUtils.ProtParam import ProteinAnalysis
import panel as pn
import hvplot.pandas
import holoviews as hv
import matplotlib.pyplot as plt
from holoviews import opts
from scipy import optimize
import warnings
warnings.filterwarnings('ignore')

In [2]:
evidences = pd.read_csv('../output/evidence_aligned.csv')

In [3]:
### Calculate physicochemical properties(GRAVY score)
evidences['gravy'] = [ProteinAnalysis(seq).gravy() for seq in evidences['Sequence']]

In [4]:
# Calculate trend line functions
CCS_fit_charge2 = evidences[evidences['Charge'] == 2] 
CCS_fit_charge3 = evidences[evidences['Charge'] == 3] 
CCS_fit_charge4 = evidences[evidences['Charge'] == 4]

def trendline_func(x, a, b):
    return a * np.power(x, b)

params_charge2, params_covariance_charge2 = optimize.curve_fit(
    trendline_func, CCS_fit_charge2['m/z'], CCS_fit_charge2['CCS'])
params_charge3, params_covariance_charge3 = optimize.curve_fit(
    trendline_func, CCS_fit_charge3['m/z'], CCS_fit_charge3['CCS'])
params_charge4, params_covariance_charge4 = optimize.curve_fit(
    trendline_func, CCS_fit_charge4['m/z'], CCS_fit_charge4['CCS'])

In [5]:
# Figure 3a
def visualize_sec_str():
    
    scatter_plot = evidences.hvplot.points(
        x='m/z', y='CCS', c='gravy', xlabel='m/z', ylabel='CCS (\u212b\u00B2)', clabel='GRAVY', 
        width=450, height=400, cmap=plt.get_cmap('RdYlBu'), tools=['hover'], 
        colorbar=True, rasterize=True)
    scatter_plot.opts(
        toolbar='above', clim=(-4, 3)
    )
    
    opts.defaults(opts.Curve(color = "black", line_width=0.5, line_dash='dashed'))
    trendline_x = np.arange(300,1800,1)
    trendline_charge2 = hv.Curve((trendline_x, trendline_func(trendline_x, params_charge2[0], params_charge2[1])))
    trendline_charge3 = hv.Curve((trendline_x, trendline_func(trendline_x, params_charge3[0], params_charge3[1])))
    trendline_charge4 = hv.Curve((trendline_x, trendline_func(trendline_x, params_charge4[0], params_charge4[1])))
    
    return (scatter_plot * trendline_charge2 * trendline_charge3 * trendline_charge4)

In [6]:
layout = pn.Column(
    pn.pane.Markdown(
        """### Figure 3. A global view on peptide cross sections. a, Mass-to-charge vs. collisional cross section distribution of all peptides in this study colored by the GRAVY hydrophobicity index (n = 559,979).""", 
        margin=(10, 0, 20, 0)
    ),
    pn.Row(
        pn.layout.VSpacer(width=200), 
        visualize_sec_str
    ),
    align='center',
    width=920
)
layout.embed()