In [None]:
import pickle
from matplotlib import pyplot as plt
import numpy as np
import math
import pandas as pd

In [None]:
with open('./bioen_result.pkl', 'rb') as f:
    data = pickle.load(f)

In [None]:
data

In [None]:
with open('./thetas.dat') as f:
    thetas = [line.rstrip() for line in f]
    
thetas

In [None]:
thetas = ['']

In [None]:
# L-curve plot

chi_square = [data[float(thetas[0])][3]]
for theta in thetas:
    chi_square.append(data[float(theta)][2])

chi_square = np.array((chi_square))
reduced_chi_square = chi_square/121

In [None]:
chi_square

In [None]:
reduced_chi_square

In [None]:
S_KL = [0] # relative entropy = 0 for initial data before reweighting

for theta in thetas:
    skl = np.sum(np.array((data[float(theta)][18]))*np.log(np.array((data[float(theta)][18]/data[float(theta)][16]))))
    S_KL.append(skl)
    
S_KL

In [None]:
print(S_KL[18])

In [None]:
%matplotlib inline
plt.figure(figsize = (6,4))
plt.plot(S_KL,reduced_chi_square,'x',color = 'black')
plt.title('L-curve analysis')
plt.ylabel('Reduced $\chi$$^2$')
plt.xlabel('Relative entropy $S_{KL}$')


In [None]:
start = 9
end = 19


y = list(reduced_chi_square)[start:end]

x = S_KL[start:end]

from kneed import KneeLocator
kn = KneeLocator(x, y, curve='convex', direction='decreasing')
print(kn.knee)

import matplotlib.pyplot as plt
plt.xlabel('number of clusters k')
plt.ylabel('Sum of squared distances')
plt.plot(x, y, 'bx-')
plt.vlines(kn.knee, plt.ylim()[0], plt.ylim()[1], linestyles='dashed')

In [None]:
optimal_theta_idx = 18

In [None]:
np.exp(-0.3267974860657099)

In [None]:
plt.figure(figsize = (4,3))
plt.plot(S_KL,reduced_chi_square,'x',color = 'black')
plt.ylabel('Reduced $\chi$$^2$')
plt.xlabel('Relative entropy $S_{KL}$')
plt.scatter(S_KL[optimal_theta_idx],reduced_chi_square[optimal_theta_idx],
            color = 'blue',s=50,label = 'SBM: Optimised Ensemble')
plt.scatter(S_KL[0],reduced_chi_square[0],color = 'green',s=50,label = 'SBM: Starting Ensemble')
plt.legend(loc = 'upper right')
plt.tight_layout()
plt.savefig('lcurve_new.pdf')

In [None]:
w_opt = data[float(thetas[optimal_theta_idx])][18]

In [None]:
idx_top10 = sorted(range(len(w_opt)), key=lambda i: w_opt[i])[-10:]


In [None]:
idx_top10

In [None]:
for idx in idx_top10:
    print(w_opt[idx])

In [None]:
np.max(w_opt)

In [None]:
# make frames text file with top 10 frame indices
# writing frames file for making new trajectory file with gmx trajconv
f = open('top10.ndx', 'w')
f.write('[ frames ]\n')  
for idx in idx_top10:
    string = '{}\n'.format(idx+1) # python indexing starts at 0, gromacs indexing starts at 1
    f.write(string)
f.close()

In [None]:
# save w_opt

np.save('w_opt.npy',w_opt)

In [None]:
rates_init = data[float(thetas[optimal_theta_idx])][13]
rates_opt = data[float(thetas[optimal_theta_idx])][14]

In [None]:
for p_id, p_info in rates_init.items():
    print("\n Exp:", p_id)
    
    for key in p_info:
        print(key + ':', p_info[key])

In [None]:
r2_init = []
for value in p_info.values():
    r2_init.append(float(value))
    
r2_init_id = []
for ID in p_info.keys():
    r2_init_id.append(str(ID))


In [None]:
r2_init

In [None]:
r2_init_id

In [None]:
plt.plot(r2_init)
plt.ylim(0,100)

In [None]:
for p_id, p_info in rates_opt.items():
    print("\n Exp:", p_id)
    
    for key in p_info:
        print(key + ':', p_info[key])

In [None]:
r2_opt = []
for value in p_info.values():
    r2_opt.append(float(value))
    
r2_opt_id = []
for ID in p_info.keys():
    r2_opt_id.append(str(ID))



In [None]:
plt.plot(r2_opt)
plt.plot(r2_init)
plt.ylim(0,100)

In [None]:
# calculate ratios
# defining constants
K = 1.23e-44 # m^6 s^-2
tau_C = 5e-9 # tauC for disordered RNC, 12ns, in seconds
larmor_H = 800.284e6*2*math.pi # s^-1, larmor frequency proton
#t = 0.0111 # total evolution time of the transverse proton magnetization during the NMR experiment, 11.1ms, in seconds
R2H = 100.0 # in s^-1
R2MQ = 100.0 # in s^-1
DELTA = 5.6e-3 # delay time in s




In [None]:
# calculate ratios
ratios_init = (R2H*np.exp(-2*DELTA*np.array((r2_init)))/(R2H+np.array((r2_init))))*(R2MQ/(R2MQ+np.array((r2_init))))
ratios_opt = (R2H*np.exp(-2*DELTA*np.array((r2_opt)))/(R2H+np.array((r2_opt))))*(R2MQ/(R2MQ+np.array((r2_opt))))



In [None]:
# construct a table

all_data = pd.DataFrame(list(zip(r2_init_id, r2_init, r2_opt, ratios_init, ratios_opt)), 
                        columns =['ID', 'rate_init','rate_opt','ratios_init','ratios_opt']) 

In [None]:
all_data

In [None]:
# load ratios experimental data
ratios_c657 = pd.read_csv('./experimental_data/a3a3_c657_ratios_new.csv')
ratios_c699 = pd.read_csv('./experimental_data/a3a3_c699_ratios_new.csv')
ratios_c744 = pd.read_csv('./experimental_data/a3a3_c744_ratios_new.csv')

In [None]:
exp_ids = []
for residue in ratios_c657['Residue']:
    name = 'pre_c657_{}'.format(int(residue))
    exp_ids.append(name)
    
for residue in ratios_c699['Residue']:
    name = 'pre_c699_{}'.format(int(residue))
    exp_ids.append(name)
    
for residue in ratios_c744['Residue']:
    name = 'pre_c744_{}'.format(int(residue))
    exp_ids.append(name)
    
exp_values = []
for value in ratios_c657['Ratio_paramagnetic:diamagnetic']:
    exp_values.append(value)
    
for value in ratios_c699['Ratio_paramagnetic:diamagnetic']:
    exp_values.append(value)
    
for value in ratios_c744['Ratio_paramagnetic:diamagnetic']:
    exp_values.append(value)

    
exp_errors = []
for error in ratios_c657['Combined_error']:
    exp_errors.append(error)
    
for error in ratios_c699['Combined_error']:
    exp_errors.append(error)
    
for error in ratios_c744['Combined_error']:
    exp_errors.append(error)
    


In [None]:
restraint_idxs = []
for i in range(len(exp_ids)):
    name = exp_ids[i]
    
    if name in r2_init_id:
        restraint_idxs.append(i)
        
len(restraint_idxs)

In [None]:
final_exp_ids = []
for idx in restraint_idxs:
    final_exp_ids.append(exp_ids[idx])
    
final_exp_values = []
for idx in restraint_idxs:
    final_exp_values.append(exp_values[idx])
    
final_exp_errors = []
for idx in restraint_idxs:
    final_exp_errors.append(exp_errors[idx])
    


In [None]:
remove_this_one = ''

for string in r2_init_id:
    if string not in final_exp_ids:
        remove_this_one = string

In [None]:
exp_data = pd.DataFrame(list(zip(final_exp_ids, final_exp_values, final_exp_errors)), 
                        columns =['ID', 'exp_values','exp_errors'])
exp_data

In [None]:
all_data2 = exp_data.merge(all_data, how = 'inner', on = ['ID'])
final_all_data = all_data2.sort_values(['exp_values'], ascending=False)

In [None]:
# calculate reduced chi square for initial and optimised model
def chi_square(predictions,targets,errors,reduced = False):
    if len(predictions) != len(targets):
        print("Error: Number of data points in predictions and targets are not the same!")
        
    if reduced == False:
        return np.sum(((predictions - targets) ** 2)/(errors**2))
    
    if reduced == True:
        return np.sum(((predictions - targets) ** 2)/(errors**2))/len(targets)
    
init_chi = chi_square(np.array((final_all_data['ratios_init'])),np.array((final_all_data['exp_values'])),
                     np.array((final_all_data['exp_errors'])),reduced = True)

opt_chi = chi_square(np.array((final_all_data['ratios_opt'])),np.array((final_all_data['exp_values'])),
                     np.array((final_all_data['exp_errors'])),reduced = True)

print(init_chi)
print(opt_chi)

In [None]:
# plot

plt.figure(figsize = (12,4))
plt.plot(final_all_data['ID'],final_all_data['exp_values'],'ko',label = 'Experiment',alpha = 0.4)
plt.errorbar(final_all_data['ID'], final_all_data['exp_values'], final_all_data['exp_errors'],color = 'black',
            alpha = 0.4)

initial_string = 'Initial: Reduced $\chi$$^2$ = {:.2f}'.format(init_chi)
optimal_string = 'Optimal: Reduced $\chi$$^2$ = {:.2f}'.format(opt_chi)

plt.scatter(final_all_data['ID'],final_all_data['ratios_init'],color = 'red',alpha = 0.4,
            label = initial_string)
plt.scatter(final_all_data['ID'],final_all_data['ratios_opt'],color = 'blue',alpha = 0.4,label = optimal_string)

plt.xticks([])
plt.xlabel('Data points')
plt.ylabel('Iox/Ired')
plt.legend()
#plt.savefig('exp_init_opt_used.png')

In [None]:
# plot
plt.figure(figsize = (10,4))
plt.plot(final_all_data['exp_ID'],final_all_data['exp_values'],'ko')
plt.errorbar(final_all_data['exp_ID'], final_all_data['exp_values'], final_all_data['exp_errors'],color = 'black')

plt.scatter(final_all_data['ID'],final_all_data['ratios_init'],color = 'red',alpha = 0.4)
plt.scatter(final_all_data['ID'],final_all_data['ratios_opt'],color = 'blue',alpha = 0.4)

In [None]:
final_table = final_all_data.sort_values(by=["ID"])

In [None]:
pd.set_option('display.max_rows', None)
final_table

In [None]:
IDs = final_table['ID']
exp_ratios = final_table['exp_values']
exp_errors = final_table['exp_errors']
init_ratios = final_table['ratios_init']
opt_ratios = final_table['ratios_opt']

In [None]:
# plot

#c657

plt.figure(figsize = (10,4))
plt.plot(IDs[0:37],exp_ratios[0:37],'ko',label = 'Experiment',alpha = 0.4)
plt.errorbar(IDs[0:37], exp_ratios[0:37], exp_errors[0:37],color = 'black',
            alpha = 0.4)

initial_string = 'Initial: Reduced $\chi$$^2$ = {:.2f}'.format(init_chi)
optimal_string = 'Optimal: Reduced $\chi$$^2$ = {:.2f}'.format(opt_chi)

plt.scatter(IDs[0:37],init_ratios[0:37],color = 'red',alpha = 0.4,
            label = initial_string)
plt.scatter(IDs[0:37],opt_ratios[0:37],color = 'blue',alpha = 0.4,label = optimal_string)

plt.xticks([])
plt.xlabel('Data points')
plt.ylabel('Iox/Ired')
plt.legend()
#plt.savefig('c657_exp_init_opt_used.png')

In [None]:
# plot

#c699

plt.figure(figsize = (10,4))
plt.plot(IDs[38:87],exp_ratios[38:87],'ko',label = 'Experiment',alpha = 0.4)
plt.errorbar(IDs[38:87], exp_ratios[38:87], exp_errors[38:87],color = 'black',
            alpha = 0.4)

initial_string = 'Initial: Reduced $\chi$$^2$ = {:.2f}'.format(init_chi)
optimal_string = 'Optimal: Reduced $\chi$$^2$ = {:.2f}'.format(opt_chi)

plt.scatter(IDs[38:87],init_ratios[38:87],color = 'red',alpha = 0.4,
            label = initial_string)
plt.scatter(IDs[38:87],opt_ratios[38:87],color = 'blue',alpha = 0.4,label = optimal_string)

plt.xticks([])
plt.xlabel('Data points')
plt.ylabel('Iox/Ired')
plt.legend()
#plt.savefig('c699_exp_init_opt_used.png')

In [None]:
# plot

#c744

plt.figure(figsize = (10,4))
plt.plot(IDs[87:119],exp_ratios[87:119],'ko',label = 'Experiment',alpha = 0.4)
plt.errorbar(IDs[87:119], exp_ratios[87:119], exp_errors[87:119],color = 'black',
            alpha = 0.4)

initial_string = 'Initial: Reduced $\chi$$^2$ = {:.2f}'.format(init_chi)
optimal_string = 'Optimal: Reduced $\chi$$^2$ = {:.2f}'.format(opt_chi)

plt.scatter(IDs[87:119],init_ratios[87:119],color = 'red',alpha = 0.4,
            label = initial_string)
plt.scatter(IDs[87:119],opt_ratios[87:119],color = 'blue',alpha = 0.4,label = optimal_string)

plt.xticks([])
plt.xlabel('Data points')
plt.ylabel('Iox/Ired')
plt.legend()
#plt.savefig('c744_exp_init_opt_used.png')

In [None]:
# plot

plt.figure(figsize = (15,6))
plt.bar(final_table['ID'],final_table['exp_values'],color = 'cyan',yerr = final_table['exp_errors'], 
        edgecolor = 'black',linewidth = 0.5,error_kw=dict(ecolor='black',elinewidth=0.5)
       ,label = 'isolated A3A3 Exp. Data')

initial_string = 'Initial: Reduced $\chi$$^2$ = {:.2f}'.format(init_chi)
optimal_string = 'Optimal: Reduced $\chi$$^2$ = {:.2f}'.format(opt_chi)

plt.plot(final_table['ID'],final_table['ratios_init'],'r-',
            label = initial_string)
plt.plot(final_table['ID'],final_table['ratios_opt'],'b-',label = optimal_string)

plt.xticks([])
plt.xlabel('Data points')
plt.ylabel('Iox/Ired')
plt.legend()
#plt.savefig('reweighted_overlayed_pre_profile.png')