In [None]:
import numpy as np
import pandas as pd
import scipy as sp
from pathlib import Path
import safep
import matplotlib as mpl
import matplotlib.pyplot as plt
from IPython.display import display, Markdown
import re

from Notebook_Helpers import get_upper_walls, FEPData, TIData, Kd, P_bind

import warnings #Suppress future warnings from pandas.
warnings.simplefilter(action='ignore', category=FutureWarning)

# What this Notebook Does:

Each section of this notebook will calculate a component of the free energy of binding of phenol to lysozyme as described in the companion tutorial. Briefly, during a free energy perturbation simulation, NAMD (or other software) will write the difference in internal energy between the simulated state and an adjacent state (dE =  E_lambda_k+/-1 - E_lambda_k, where lambda_k determines the ensemble being simulated). The functions used by this notebook read, parse, and process those outputs into a standard format that can be quickly analyzed using one of several methods. By default, we use the Bennett Acceptance Ratio (BAR) estimator with automated decorrelation to make the calculations more robust to both outliers and autocorrelation. See Shirts and Chodera (2018) for more details.

One section of the notebook uses [thermodynamic integration (TI)](https://en.wikipedia.org/wiki/Thermodynamic_integration) to calculate the free energy cost of imposing the DBC restraint. This calculation is much more straightforward than FEP calculations. We can analytically determine the derivative of the force with respect to lambda over a series of simulations at discrete values of lambda. Averaging and accumulating those derivatives yields the free energy cost.

# How to Use this Notebook:

## User Parameters:
The notebook as-is will read and process the sample outputs provided. 
To use it for your own data, be sure to update the section labeled "User Settings" below. Pay special attention to the *root* and *path* variables.

- root should be the path (relative or absolute) to the parent directory that contains (or will containt) all your data.
- temperature is the temperature **at which your simulations were run** 
- decorrelate is a flag for automatic decorrelation of samples (see Shirts and Chodera '08). Should be set to True for general use. 
- detectEQ (automatic equilibrium detection). Set to True. This is more robust than manually guessing at the time required for equilibration prior to a FEP run.

## Layout:
The notebook is organized into five sections (separated by horizontal lines):
- Process the Bound Data [(step B)](#bound_fep)
- Process the DBC TI calculation [(step C)](#DBC_TI)
- Process the Unbound Data [(step D)](#unbound_fep)
- Calculate the Volumetric Restraint Contribution [(step E.2)](#volume)
- Calculate the Binding Free Energy [(step E.3)](#total)

## File Structure Data:

```
Repository/Supp-Files
|
|----stepB_alchemy_site
|    |----[sample_]output
|         | *.fepout
|
|----stepC_restraint_perturbation
|    |----[sample_]output
|         | *.colvars.traj
|
|----stepD_alchemy_bulk
|    |----[sample_]output
|         | *fepout
|
|
```



# Other Important Notes and Credits
- This notebook is specially written for the SAFEP tutorial. 
For more up-to-date and general versions see the SAFEP github.

- This and other SAFEP notebooks make use of pyMBAR and Alchemlyb. 
For more information see Shirts and Chodera (2008), ["Statistically optimal analysis of samples from multiple equilibrium states"](https://doi.org/10.1063%2F1.2978177)



# User Settings:

In [None]:
root = '.' # Root path to your project

# Used throughout
temperature = 300

In [None]:
# Radius of the spherical restraint used for the DBC TI calculation
COMfname = f'{root}/stepC_restraint_perturbation/sample_outputs/DBC_restraint_RFEP.colvars'
COMradius = float(get_upper_walls(COMfname)) 

# Position of the DBC upper wall
DBCfname = f'{root}/stepC_restraint_perturbation/inputs/run.namd'
DBCwidth = float(get_upper_walls(DBCfname))  

## Update these paths to point to your output files:

In [None]:
bound_fep_path=Path(f'{root}/stepB_alchemy_site/sample_outputs/')
restraint_perturbation_path = Path(f'{root}/stepC_restraint_perturbation/sample_outputs/')
bulk_fep_path=Path(f'{root}/stepD_alchemy_bulk/sample_outputs/')

## Advanced settings:

In [None]:
detectEQ = True #Flag for automatic equilibrium detection and decorrelation

***
<a id='bound_fep'></a>
# Process the Bound FEP Data 
Here we process the FEP data generated by decoupling the ligand from the protein (**Step B**)


In [None]:
filepattern='*.fepout' #This can be a regex pattern if you have more than one fepout file
#Caution: if you have multiple fepout files, name them alphanumerically

site = FEPData(bound_fep_path, filepattern, temperature, detectEQ, "site")
site.process()
Markdown(site.pretty_print_dG())

In [None]:
#Make some general plots
width = 8
height = 4

#Manually set the extent of the y axes
cumulative_ylim = (0,16) 
perwindow_ylim = (0,1.25)
fig, axes = site.general_plot(width, height, cumulative_ylim, perwindow_ylim)
plt.savefig(f'{bound_fep_path}/bound_generalFigures.pdf')
plt.show()

fit, axes = site.convergence_plot(width*3/4, height)
plt.savefig(f'{bound_fep_path}/bound_convergence.pdf')
plt.show()

***
<a id='DBC_TI'></a>
# Process the DBC TI calculation
Compute the free energy cost of restraining the DBC coordinate (**Step C**)

Note: The error estimate below is just the standard deviation of the samples. <br>
The BAR errors calculated above are standard errors of the mean.

In [None]:
filepattern = '*.colvars.traj'
DBC = TIData(restraint_perturbation_path,
             filepattern,
             temperature,
             "DBC",
             eqtime=1000,
             num_steps=300000,
             force_constant=0,
             target_force_constant=200,
             force_exponent=6,
             nLambdas=41,
             upper_walls=DBCwidth)

DBC.read()
DBC.process()
Markdown(DBC.pretty_print_dG())

In [None]:
''' Plot the results. '''
fig, axes = safep.plot_TI(DBC.cumulative, DBC.perWindow, fontsize=20)
plt.savefig(f'{restraint_perturbation_path}/TI_general.pdf')
plt.show()

***
<a id='unbound_fep'></a>
# Process the Bulk FEP Data
Ligand decoupling from solvent (**Step D**)

In [None]:
filepattern='*.fepout' #This can be a regex pattern if you have more than one fepout file
#Caution: if you have multiple fepout files, name them alphanumerically

bulk = FEPData(bulk_fep_path, filepattern, temperature, detectEQ, "bulk")
bulk.process()
Markdown(bulk.pretty_print_dG())

In [None]:
width = 8
height = 4

#Manually set the extent of the y axes
cumulative_ylim = (0,8) 
perwindow_ylim = (-0.5,1.)

fig, axes = bulk.general_plot(width, height, cumulative_ylim, perwindow_ylim)
plt.savefig(f'{bulk_fep_path}/bulk_generalFigures.pdf')
plt.show()

fit, axes = bulk.convergence_plot(width*3/4, height)
plt.savefig(f'{bulk_fep_path}/bulk_convergence.pdf')
plt.show()

***
<a id='volume'></a>
# Calculate the Volumetric Restraint Contribution
**Step E.2**


In [None]:
molar = 1660 #cubic angstroms per particle in a one molar solution
dG_V = np.round(-site.RT*np.log(4/3*np.pi*COMradius**3/molar), 1)
error_V = 0

change_mkd_vol = f'\u0394G<sub>V</sub> = {np.round(dG_V, 1)} kcal/mol'
Markdown('<font size=5>{}</font><br/>'.format(change_mkd_vol))

***
<a id='total'></a>
# Calculate the Binding Free Energy
**Step E.3 to End**

In [None]:
dG_binding = bulk.dG + dG_V + DBC.dG - site.dG
error_binding = np.sqrt(np.sum(np.array([bulk.error, error_V, site.error, DBC.error])**2))

In [None]:
changeAndError = f'\u0394G<sub>bind</sub> = {np.round(dG_binding, 1)}\u00B1{np.round(error_binding, 1)} kcal/mol'
Markdown('<font size=5>{}</font><br/>'.format(changeAndError))

# Estimate the titration curve

In [None]:
concentrations = np.logspace(0,5,1000) # A vector of concentrations in micromolar
K = Kd(dG_binding, site.RT)

mpl.rc('xtick', labelsize=16)
mpl.rc('ytick', labelsize=16)
fig, ax = plt.subplots(figsize=(10,6.1))
ax.plot(concentrations, P_bind(K, concentrations), label='Binding Curve')

P_lower = P_bind(Kd(dG_binding-error_binding*1.96, site.RT),concentrations)
P_upper = P_bind(Kd(dG_binding+error_binding*1.96, site.RT), concentrations)
ax.fill_between(concentrations, P_lower, P_upper, alpha=0.25, label='95% Confidence Interval')
plt.xscale('log')
ax.set_xlabel('Concentration of Phenol '+r'($\mathrm{\mu}$M)', fontsize=20)
ax.set_ylabel('Fraction of Sites Occupied', fontsize=20)
ax.vlines(K, 0, 1, linestyles='dashed', color='black', label='Dissociation Constant')
ax.legend(loc='lower right', fontsize=20*0.75)

plt.savefig(f'./titration_curve.pdf')
plt.show()