In [1]:
# Main imports
import numpy as np
import pandas as pd

# Import matplotlib and such
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
%matplotlib inline
plt.ion()

from helper_functions import \
    thermodynamic_model, \
    plot_manifold_model, \
    plot_manifold_measurements, \
    get_measurement_subset_df
    

In [2]:
# Conversion parameter
kbt_to_kcal = 1/1.62

# From occlusion results
F_50 = 28.06836

In [3]:
# Load clonal measurements 
data_df = pd.read_excel('../data/results.xlsx', sheet_name='measurements_summary').set_index('name')
data_df.head()

Unnamed: 0_level_0,location,log_t+,dlog_t+,log_t-,dlog_t-,num_t+,num_t-,outlier,spacing,sequence
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
60c-r18,b3C9,-1.564814,0.302713,-1.6408,0.122828,5,6,0.0,,CGCAATGAATCACTCCATTGAGTGTTTTGAGGGTCCCAGGCTTTAC...
61c-oc-2,b5E2,-5.308959,0.114674,-3.378802,0.081788,3,6,,-2.5,CGCAATGAATCACTCCATTGAGTGTTTTGAGGGTCCCCAGGCTTTA...
61c-oc-2,b5E1,-7.081135,0.270409,-3.249277,0.205113,3,6,,-2.5,CGCAATGAATCACTCCATTGAGTGTTTTGAGGGTCCCCAGGCTTTA...
61c-oc0,b5E4,-1.691993,0.146085,1.51402,0.088154,6,6,0.0,0.5,CGCAATGAATCACTCCATTGAGTGTTTTGAGGGTCCCCAGGCTTTA...
61c-oc1,b5E6,-6.037679,0.137889,-4.105176,0.591296,3,5,,1.5,CGCAATGAATCACTCCATTGAGTGTTTTGAGGGTCCCCAGGCTTTA...


In [4]:
# Load c61 meassurements
c61_model_df = pd.read_excel('../data/results.xlsx', sheet_name='c61r18_resamp').set_index('run')
c61_model_df.head()

Unnamed: 0_level_0,log_tsat,log_tbg,log_alphap,b1A1,b1A1_weight,b1B8,b1B8_weight,b1B9,b1B9_weight,b1C1,...,b2E7,b2E7_weight,b2E8,b2E8_weight,b2E9,b2E9_weight,b2F1,b2F1_weight,b2F2,b2F2_weight
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
fit,2.755847,-5.381134,6.400048,-4.245446,1,-10.734998,1,-8.662148,1,-10.257168,...,-5.974972,1,-3.895103,1,-5.299942,1,-11.689338,1,-1.818018,1
samp_00,2.796262,-5.53336,6.287923,-4.284942,2,-10.655064,2,-8.579794,3,-10.158285,...,,0,,0,-5.331684,1,,0,-1.866132,1
samp_01,2.784679,-5.461318,6.240078,,0,-10.602852,4,,0,-10.110816,...,-5.975344,1,-3.927357,1,-5.316813,2,-11.554311,1,,0
samp_02,2.794542,-5.248201,6.428325,-4.291674,1,-10.809958,1,,0,-10.342807,...,,0,-3.94004,1,,0,-11.766457,3,,0
samp_03,2.739887,-5.204617,6.576093,-4.23515,1,,0,-8.832153,2,-10.440185,...,-6.000587,1,-3.879204,3,-5.303102,3,-11.860723,3,,0


In [5]:
# Load spacing results
distance_df = pd.read_excel('../data/results.xlsx', sheet_name='conjoined_resamp').set_index('run')
distance_df.head()

Unnamed: 0_level_0,log_tsat,log_tbg_c60,log_tbg_c61,log_tbg_c62,log_tbg_c63,log_tbg_c64,log_tbg_c65,log_tbg_c66,log_tbg_c71,log_tbg_c72,...,b14A4_log_P,b14A4_weight,b14A5_log_P,b14A5_weight,b14A6_log_P,b14A6_weight,b14A7_log_P,b14A7_weight,b3G7_log_P,b3G7_weight
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
fit,2.723034,-7.044643,-5.531436,-6.737779,-6.917441,-6.923248,-13.245711,-9.849898,-5.606744,-5.892497,...,-5.256954,1,-4.413326,1,-7.415928,1,-9.118636,1,-4.639016,1
samp_00,2.75994,-6.633598,-5.39558,-6.12764,-6.40342,-7.385322,-22.272519,-6.207592,-5.671957,-5.907824,...,,0,,0,,0,-9.24443,1,,0
samp_01,2.745413,-6.255918,-5.401288,-7.239914,-6.110711,-6.71499,-58.948487,-13.746928,-5.724202,-5.890159,...,-5.343646,1,,0,,0,-9.316098,1,-4.719914,1
samp_02,2.711057,-7.697602,-5.821185,-7.411829,-104.954307,-6.065134,-107.546969,-5.185987,-5.714961,-5.982784,...,,0,-4.41557,1,,0,-9.288488,2,,0
samp_03,2.743069,-7.179868,-5.428558,-7.24549,-30.385952,-7.003511,-27.74432,-5.1802,-5.677444,-5.852142,...,,0,-4.470129,1,-7.508317,1,-9.274382,2,-4.697321,2


In [14]:
# Create table of beta values and their uncertainties
params_df = pd.DataFrame(columns=['spacing','sheet'])
sheets = [
    'c60_beta_resamp',
    'c61_beta_resamp',
    'c62_beta_resamp',
    'c63_beta_resamp',
    'c64_beta_resamp',
    'c65_beta_resamp',
    'c71_beta_resamp',
    'c72_beta_resamp',
    'c81_beta_resamp',
    'c82_beta_resamp'
    ]
params_df['sheet'] = sheets
spacings = [-float(sheet[1:3])-.5 for sheet in sheets]
params_df['spacing'] = spacings

for i, sheet in enumerate(sheets):
    print('Processing sheet %s...'%sheet)
    df = pd.read_excel('../data/results.xlsx', sheet_name=sheet)
    log_betaps = df['log_betap'].values[1:]
    log_alphaps = df['log_betap'].values[1:]
    
    # Compute true alphas and betas
    alphaps = np.exp(log_alphaps)
    betaps = np.exp(log_betaps)
    alphas = alphaps*(1+1/F_50) - 1/F_50
    betas = betaps*(1+1/(F_50*alphas)) - 1/(F_50*alphas)
    dGs = -kbt_to_kcal*np.log(alphas)
    
    # Compute alphas
    params_df.loc[i,'alpha'] = np.percentile(alphas,50)
    params_df.loc[i,'dalpha-'] = np.percentile(alphas,50)-np.percentile(alphas,16)
    params_df.loc[i,'dalpha+'] = np.percentile(alphas,84)-np.percentile(alphas,50)
    
    # Compute dGs
    params_df.loc[i,'dG'] = np.percentile(dGs,50)
    params_df.loc[i,'ddG'] = 0.5*(np.percentile(dGs,84)-np.percentile(dGs,16))
    
    # Compute betas
    params_df.loc[i,'beta'] = np.percentile(betas,50)
    params_df.loc[i,'dbeta-'] = np.percentile(betas,50)-np.percentile(betas,16)
    params_df.loc[i,'dbeta+'] = np.percentile(betas,84)-np.percentile(betas,50)
    
params_df.set_index('spacing',drop=True,inplace=True)
params_df

Processing sheet c60_beta_resamp...
Processing sheet c61_beta_resamp...
Processing sheet c62_beta_resamp...
Processing sheet c63_beta_resamp...
Processing sheet c64_beta_resamp...
Processing sheet c65_beta_resamp...
Processing sheet c71_beta_resamp...
Processing sheet c72_beta_resamp...
Processing sheet c81_beta_resamp...
Processing sheet c82_beta_resamp...


Unnamed: 0_level_0,sheet,alpha,dalpha-,dalpha+,dG,ddG,beta,dbeta-,dbeta+
spacing,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
-60.5,c60_beta_resamp,1.20613,0.191459,0.767884,-0.115685,0.205401,1.204917,0.190254,0.752563
-61.5,c61_beta_resamp,0.953294,0.024789,0.024954,0.029526,0.016107,0.953215,0.024899,0.025016
-62.5,c62_beta_resamp,1.038607,0.090053,0.13329,-0.023382,0.065258,1.038557,0.090099,0.132472
-63.5,c63_beta_resamp,0.77111,0.477697,0.399852,0.160599,0.427269,0.768751,0.533961,0.401352
-64.5,c64_beta_resamp,0.971899,0.196979,0.228396,0.017597,0.13505,0.97187,0.1992,0.227274
-65.5,c65_beta_resamp,1.353537,0.530721,0.244941,-0.186865,0.20496,1.35036,0.528857,0.240409
-71.5,c71_beta_resamp,0.93098,0.085486,0.075249,0.044147,0.053717,0.930804,0.086282,0.075423
-72.5,c72_beta_resamp,0.887261,0.079802,0.072332,0.073838,0.053277,0.886768,0.080889,0.072767
-81.5,c81_beta_resamp,0.841901,0.10037,0.09149,0.10623,0.071024,0.840879,0.102449,0.092348
-82.5,c82_beta_resamp,0.830144,0.240043,0.074378,0.114911,0.131825,0.828948,0.248642,0.075227


In [6]:
# Prepare data frame to hold recruitment table
columns = [
    'position', 
    'num_inliers', 
    'num_outliers', 
    'dG', 
    'alpha', 
    'gaston',
    'ushida',
]
gaston_dict = {
    -60.5:'3.85',
    -61.5:'9.05',
    -62.5:'4.22',
    -66.5:'0.78',
    -71.5:'2.50',
    -72.5:'3.49',
    -76.5:'0.54'
}
ushida_dict = {
    -61.5:'20.6',
    -66.5:'0.84',
    -71.5:'16.4',
    -82.5:'6.99'
}
table_df = pd.DataFrame(columns=columns)
table_df.head()

Unnamed: 0,position,num_inliers,num_outliers,dG,alpha,gaston,ushida


In [15]:
### Figure 4: Recruitment data ###
fig, ax = plt.subplots(figsize=[1,1])

samples_dists = [
    ('c60',60.5), 
    ('c61',61.5),
    ('c62',62.5), 
    ('c63',63.5), 
    ('c64',64.5),
    ('c65',65.5),
    ('c66',66.5),
    ('c71',71.5), 
    ('c72',72.5),
    ('c76',76.5),
    ('c81',81.5),
    ('c82',82.5)]


t_sat = np.exp(distance_df.loc['fit','log_tsat'])
t_sats = np.exp(distance_df.loc[:,'log_tsat'].values[1:])
t_sat_50 = np.percentile(t_sats,50)
t_sat_84 = np.percentile(t_sats,84)
t_sat_16 = np.percentile(t_sats,16)

print('t_sat = %.1f^{+%.1f}_{-%.1f}'%(t_sat_50,t_sat_84-t_sat_50,t_sat_50-t_sat_16))


for n, pair in enumerate(samples_dists):

    sample = pair[0]
    dist = pair[1]
    samples_labels_colors = [(sample,None,'k')]
    spacing = -dist
    row = params.loc[spacing,:]
    
    
    # Plot measurements, 
    num_points, num_outliers = plot_manifold_measurements(ax,data_df,samples_labels_colors, 
                                                           markersize=3, xlabel='', ylabel='', 
                                                           fontsize=9, show_legend=False, lim=lim)

    # Write strings
    dG_str = r'$%.2f\pm%.2f$'%(row['dG'],row['ddG'])
    alpha_vals = (row['alpha'], row['dalpha+'], row['dalpha-'])
    if alpha > 100:
        alpha_str = '$%d^{+%d}_{-%d}$'%alpha_vals
    elif alpha > 10:
        alpha_str = '$%.1f^{+%.1f}_{-%.1f}$'%alpha_vals
    else:
        alpha_str = '$%.2f^{+%.2f}_{-%.2f}$'%alpha_vals
    beta_str = '$%.2f^{+%.2f}_{-%.2f}$'%(row['beta'], row['dbeta+'], row['dbeta-'])
    
    # Fill out table
    table_df.loc[n,'position'] = '%.1f'%(-dist)
    table_df.loc[n,'num_inliers'] = '%d'%(num_points-num_outliers)
    table_df.loc[n,'num_outliers'] = '%d'%num_outliers
    table_df.loc[n,'alpha'] = alpha_str
    table_df.loc[n,'dG'] = dG_str    
    table_df.loc[n,'beta'] = beta_str
    table_df.loc[n, 'gaston'] = gaston_dict[-dist] if -dist in gaston_dict.keys() else 'n/a'
    table_df.loc[n, 'ushida'] = ushida_dict[-dist] if -dist in ushida_dict.keys() else 'n/a'

table_df

NameError: name 'textwidth' is not defined

In [None]:
# Write latex table
header = r"""
\begin{tabular}{c c c c c c c}
\toprule position (bp) & $n$ & outliers & $\Delta G_\alpha$ (kcal/mol) & {$\alpha$} & {$t_+ / t_-$ (Gaston)} & {$t_+ / t_-$ (Ushida)} \\ \midrule
"""

footer = r"""\bottomrule 
\end{tabular}
"""


file_str = header
for n, row in table_df.iterrows():
    file_str +=  ' & '.join(row) + ' \\\\\n'
file_str += footer

with open('../tables/recruitment.txt','w') as f:
    f.write(file_str)

In [None]:
! cat ../tables/recruitment.txt