Skip to content

Commit

Permalink
Merge pull request #286 from choderalab/bar_uncertainty
Browse files Browse the repository at this point in the history
BAR uncertainty clarified
  • Loading branch information
Lnaden committed Feb 22, 2018
2 parents 5de6592 + 072c6bd commit 27bc127
Show file tree
Hide file tree
Showing 2 changed files with 153 additions and 39 deletions.
11 changes: 9 additions & 2 deletions devtools/travis-ci/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,16 @@ MINICONDA=Miniconda2-latest-Linux-x86_64.sh
MINICONDA_HOME=$HOME/miniconda
MINICONDA_MD5=$(curl -s https://repo.continuum.io/miniconda/ | grep -A3 $MINICONDA | sed -n '4p' | sed -n 's/ *<td>\(.*\)<\/td> */\1/p')
wget -q http://repo.continuum.io/miniconda/$MINICONDA
# Parse version out of file as fall back
# Sed cuts out everything before the first digit, then traps the first digit and everything after
MINICONDA_DL_VER=$(head $MINICONDA | grep VER | sed -n 's/[^0-9]*\([0-9.]*\)/\1/p')
MINICONDA_FILE_VER="Miniconda2-$MINICONDA_DL_VER-Linux-x86_64.sh"
MINICONDA_MD5_VER=$(curl -s https://repo.continuum.io/miniconda/ | grep -A3 $MINICONDA_FILE_VER | sed -n '4p' | sed -n 's/ *<td>\(.*\)<\/td> */\1/p')
if [[ $MINICONDA_MD5 != $(md5sum $MINICONDA | cut -d ' ' -f 1) ]]; then
echo "Miniconda MD5 mismatch"
exit 1
if [[ $MINICONDA_MD5_VER != $(md5sum $MINICONDA | cut -d ' ' -f 1) ]]; then
echo "Miniconda MD5 mismatch"
exit 1
fi
fi
bash $MINICONDA -b -p $MINICONDA_HOME

Expand Down
181 changes: 144 additions & 37 deletions pymbar/bar.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def BARzero(w_F, w_R, DeltaF):
return fzero


def BAR(w_F, w_R, DeltaF=0.0, compute_uncertainty=True, maximum_iterations=500, relative_tolerance=1.0e-11, verbose=False, method='false-position', iterated_solution=True):
def BAR(w_F, w_R, DeltaF=0.0, compute_uncertainty=True, uncertainty_method='BAR',maximum_iterations=500, relative_tolerance=1.0e-12, verbose=False, method='false-position', iterated_solution=True):
"""Compute free energy difference using the Bennett acceptance ratio (BAR) method.
Parameters
Expand All @@ -162,6 +162,9 @@ def BAR(w_F, w_R, DeltaF=0.0, compute_uncertainty=True, maximum_iterations=500,
DeltaF can be set to initialize the free energy difference with a guess
compute_uncertainty : bool, optional, default=True
if False, only the free energy is returned
uncertainty_method: string, optional, default=BAR
There are two possible uncertainty estimates for BAR. One agrees with MBAR for two states exactly;
The other only agrees with MBAR in the limit of good overlap. See below.
maximum_iterations : int, optional, default=500
can be set to limit the maximum number of iterations performed
relative_tolerance : float, optional, default=1E-11
Expand Down Expand Up @@ -198,7 +201,7 @@ def BAR(w_F, w_R, DeltaF=0.0, compute_uncertainty=True, maximum_iterations=500,
>>> from pymbar import testsystems
>>> [w_F, w_R] = testsystems.gaussian_work_example(mu_F=None, DeltaF=1.0, seed=0)
>>> [DeltaF, dDeltaF] = BAR(w_F, w_R)
>>> print('Free energy difference is %.3f +- %.3f kT' % (DeltaF, dDeltaF))
>>> print('Free energy difference is {:.3f} +- {:.3f} kT'.format(DeltaF, dDeltaF))
Free energy difference is 1.088 +- 0.050 kT
Test various other schemes.
Expand Down Expand Up @@ -267,7 +270,7 @@ def BAR(w_F, w_R, DeltaF=0.0, compute_uncertainty=True, maximum_iterations=500,
if FNew == 0:
# Convergence is achieved.
if verbose:
print("Convergence achieved.")
print('Convergence achieved.')
relative_change = 10 ** (-15)
break

Expand All @@ -285,7 +288,7 @@ def BAR(w_F, w_R, DeltaF=0.0, compute_uncertainty=True, maximum_iterations=500,
if (DeltaF == 0.0):
# The free energy difference appears to be zero -- return.
if verbose:
print("The free energy difference appears to be zero.")
print('The free energy difference appears to be zero.')
if compute_uncertainty:
return [0.0, 0.0]
else:
Expand All @@ -294,7 +297,7 @@ def BAR(w_F, w_R, DeltaF=0.0, compute_uncertainty=True, maximum_iterations=500,
if iterated_solution:
relative_change = abs((DeltaF - DeltaF_old) / DeltaF)
if verbose:
print("relative_change = %12.3f" % relative_change)
print("relative_change = {:12.3f}".format(relative_change))

if ((iteration > 0) and (relative_change < relative_tolerance)):
# Convergence is achieved.
Expand All @@ -316,25 +319,127 @@ def BAR(w_F, w_R, DeltaF=0.0, compute_uncertainty=True, maximum_iterations=500,
raise BoundsError(message)

if verbose:
print("iteration %5d : DeltaF = %16.3f" % (iteration, DeltaF))
print("iteration {:5d}: DeltaF = {:16.3f}".format(iteration, DeltaF))

# Report convergence, or warn user if not achieved.
if iterated_solution:
if iteration < maximum_iterations:
if verbose:
print('Converged to tolerance of %e in %d iterations (%d function evaluations)' % (relative_change, iteration, nfunc))
print('Converged to tolerance of {:e} in {:d} iterations ({:d} function evaluations)'.format(relative_change, iteration, nfunc))
else:
message = 'WARNING: Did not converge to within specified tolerance. max_delta = %f, TOLERANCE = %f, MAX_ITS = %d' % (relative_change, relative_tolerance, maximum_iterations)
message = 'WARNING: Did not converge to within specified tolerance. max_delta = {:f}, TOLERANCE = {:f}, MAX_ITS = %d'.format(relative_change, relative_tolerance, maximum_iterations)
raise ConvergenceError(message)

if compute_uncertainty:
# Compute asymptotic variance estimate using Eq. 10a of Bennett, 1976 (except with n_1<f>_1^2 in
# the second denominator, it is an error in the original
# NOTE: The numerical stability of this computation may need to be improved.

'''
Compute asymptotic variance estimate using Eq. 10a of Bennett,
1976 (except with n_1<f>_1^2 in the second denominator, it is
an error in the original NOTE: The 'BAR' and 'MBAR' estimators
do not agree for poor overlap. This is not because of
numerical precision, but because they are fundamentally
different estimators. For poor overlap, 'MBAR' diverges high,
and 'BAR' diverges by being too low. In situations they are
noticeably from each other, they are also pretty different
from the true answer (obtained by calculating the standard
deviation over lots of realizations).
First, we examine the 'BAR' equation. Rederive from Bennett, substituting (8) into (7)
(8) -> W = [q0/n0 exp(-U1) + q1/n1 exp(-U0)]^-1
<(W exp(-U1))^2 >_0 <(W exp(-U0))^2 >_1
(7) -> ----------------------- + ----------------------- - 1/n0 - 1/n1
n_0 [<(W exp(-U1)>_0]^2 n_1 [<(W exp(-U0)>_1]^2
Const cancels out of top and bottom. Wexp(-U0) = [q0/n0 exp(-(U1-U0)) + q1/n1]^-1
= n1/q1 [n1/n0 q0/q1 exp(-(U1-U0)) + 1]^-1
= n1/q1 [exp (M+(F1-F0)-(U1-U0)+1)^-1]
= n1/q1 f(x)
Wexp(-U1) = [q0/n0 + q1/n1 exp(-(U0-U1))]^-1
= n0/q0 [1 + n0/n1 q1/q0 exp(-(U0-U1))]^-1
= n0/q0 [1 + exp(-M+[F0-F1)-(U0-U1))]^-1
= n0/q0 f(-x)
<(W exp(-U1))^2 >_0 <(W exp(-U0))^2 >_1
(7) -> ----------------------- + ----------------------- - 1/n0 - 1/n1
n_0 [<(W exp(-U1)>_0]^2 n_1 [<(W exp(-U0)>_1]^2
<[n0/q0 f(-x)]^2>_0 <[n1/q1 f(x)]^2>_1
----------------------- + ------------------------ -1/n0 -1/n1
n_0 <n0/q0 f(-x)>_0^2 n_1 <n1/q1 f(x)>_1^2
1 <[f(-x)]^2>_0 1 <[f(x)]^2>_1
- [----------------------- - 1] + - [------------------------ - 1]
n0 <f(-x)>_0^2 n1 n_1<f(x)>_1^2
where f = the fermi function, 1/(1+exp(-x))
This formula the 'BAR' equation works for works for free
energies (F0-F1) that don't satisfy the BAR equation. The
'MBAR' equation, detailed below, only works for free energies
that satisfy the equation.
Now, let's look at the MBAR version of the uncertainty. This
is written (from Shirts and Chodera, JPC, 129, 124105, Equation E9) as
[ n0<f(x)f(-x)>_0 + n1<f(x)f(-x)_1 ]^-1 - n0^-1 - n1^-1
we note the f(-x) + f(x) = 1, and change this to:
[ n0<(1-f(-x)f(-x)>_0 + n1<f(x)(1-f(x))_1 ]^-1 - n0^-1 - n1^-1
[ n0<f(-x)-f(-x)^2)>_0 + n1<f(x)-f(x)^2)_1 ]^-1 - n0^-1 - n1^-1
1 1 1
-------------------------------------------------------------------- - --- - ---
n0 <f(-x)>_0 - n0 <[f(-x)]^2>_0 + n1 <f(x)>_1 + n1 <[f(x)]^2>_1 n0 n1
Removing the factor of - (T_F + T_R)/(T_F*T_R)) from both, we compare:
<[f(-x)]^2>_0 <[f(x)]^2>_1
[------------------] + [---------------]
n0 <f(-x)>_0^2 n1 <f(x)>_1^2
1
--------------------------------------------------------------------
n0 <f(-x)>_0 - n0 <[f(-x)]^2>_0 + n1 <f(x)>_1 + n1 <[f(x)]^2>_1
denote: <f(-x)>_0 = afF
<f(-x)^2>_0 = afF2
<f(x)>_1 = afR
<f(x)^2>_1 = afF2
Then we can look at both of these as:
variance_BAR = (afF2/afF**2)/T_F + (afR2/afR**2)/T_R
variance_MBAR = 1/(afF*T_F - afF2*T_F + afR*T_R - afR2*T_R)
Rearranging:
variance_BAR = (afF2/afF**2)/T_F + (afR2/afR**2)/T_R
variance_MBAR = 1/(afF*T_F + afR*T_R - (afF2*T_F + afR2*T_R))
# check the steps below? Not quite sure.
variance_BAR = (afF2/afF**2) + (afR2/afR**2) = (afF2 + afR2)/afR**2
variance_MBAR = 1/(afF + afR - (afF2 + afR2)) = 1/(2*afR-(afF2+afR2))
Definitely not the same. Now, the reason that they both work
for high overlap is still not clear. We will determine the
difference at some point.
see https://github.com/choderalab/pymbar/issues/281 for more information.
Now implement the two computations.
'''

# Determine number of forward and reverse work values provided.
T_F = float(w_F.size) # number of forward work values
T_R = float(w_R.size) # number of reverse work values

# Compute log ratio of forward and reverse counts.
M = np.log(T_F / T_R)

Expand All @@ -343,40 +448,42 @@ def BAR(w_F, w_R, DeltaF=0.0, compute_uncertainty=True, maximum_iterations=500,
else:
C = M - DeltaF_initial

#fF = 1 / (1 + np.exp(w_F + C)), but we need to handle overflows
# In theory, overflow handling should not be needed now, because we use numlogexp or a custom routine?

# fF = 1 / (1 + np.exp(w_F + C)), but we need to handle overflows
exp_arg_F = (w_F + C)
# use boolean logic to zero out the ones that are less than 0, but not if greater than zero.
max_arg_F = np.choose(np.less(0.0, exp_arg_F), (0.0, exp_arg_F))
log_fF = - max_arg_F - np.log(np.exp(-max_arg_F) + np.exp(exp_arg_F - max_arg_F))
sum_fF = np.exp(logsumexp(log_fF))
max_arg_F = np.max(exp_arg_F)
log_fF = - np.log(np.exp(-max_arg_F) + np.exp(exp_arg_F - max_arg_F))
afF = np.exp(logsumexp(log_fF)-max_arg_F)/T_F

#fF = 1 / (1 + np.exp(w_F - C)), but we need to handle overflows
# fR = 1 / (1 + np.exp(w_R - C)), but we need to handle overflows
exp_arg_R = (w_R - C)
# use boolean logic to zero out the ones that are less than 0, but not if greater than zero.
max_arg_R = np.choose(np.less(0.0, exp_arg_R), (0.0, exp_arg_R))
log_fR = - max_arg_R - np.log(np.exp(-max_arg_R) + np.exp(exp_arg_R - max_arg_R))
sum_fR = np.exp(logsumexp(log_fR))

# compute averages of f_f
afF2 = (sum_fF/T_F) ** 2
afR2 = (sum_fR/T_R) ** 2

#var(x) = <x^2> - <x>^2
vfF = np.exp(logsumexp(2*log_fF))/T_F - afF2
vfR = np.exp(logsumexp(2*log_fR))/T_R - afR2

# an alternate formula for the variance that works for guesses
# for the free energy that don't satisfy the BAR equation.

variance = (vfF/T_F) / afF2 + (vfR/T_R) / afR2
max_arg_R = np.max(exp_arg_R)
log_fR = - np.log(np.exp(-max_arg_R) + np.exp(exp_arg_R - max_arg_R))
afR = np.exp(logsumexp(log_fR)-max_arg_R)/T_R

afF2 = np.exp(logsumexp(2*log_fF)-2*max_arg_F)/T_F
afR2 = np.exp(logsumexp(2*log_fR)-2*max_arg_R)/T_R

nrat = (T_F + T_R)/(T_F * T_R) # same for both methods

if uncertainty_method == 'BAR':
variance = (afF2/afF**2)/T_F + (afR2/afR**2)/T_R - nrat
dDeltaF = np.sqrt(variance)
elif uncertainty_method == 'MBAR':
# OR equivalently
vartemp = ((afF - afF2)*T_F + (afR - afR2)*T_R)
dDeltaF = np.sqrt(1.0/vartemp - nrat)
else:
message = 'ERROR: BAR uncertainty method {:s} is not defined'.format(uncertainty_method)
raise ParameterError(message)

dDeltaF = np.sqrt(variance)
if verbose:
print("DeltaF = %8.3f +- %8.3f" % (DeltaF, dDeltaF))
print("DeltaF = {:8.3f} +- {:8.3f}".format(DeltaF, dDeltaF))
return (DeltaF, dDeltaF)
else:
if verbose:
print("DeltaF = %8.3f" % (DeltaF))
print("DeltaF = {:8.3f}".format(DeltaF))
return DeltaF

#=============================================================================================
Expand Down

0 comments on commit 27bc127

Please sign in to comment.