Skip to content

Commit

Permalink
Merge pull request #152 from kyleabeauchamp/cleaner
Browse files Browse the repository at this point in the history
Switched to int64, moved logsumexp to utils.
  • Loading branch information
mrshirts committed Jan 10, 2015
2 parents 90ee48c + 62ed5dc commit 10de035
Show file tree
Hide file tree
Showing 7 changed files with 121 additions and 113 deletions.
6 changes: 3 additions & 3 deletions pymbar/bar.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
#=============================================================================================
import numpy as np
import numpy.linalg
from pymbar.utils import _logsum, ParameterError, ConvergenceError, BoundsError
from pymbar.utils import ParameterError, ConvergenceError, BoundsError, logsumexp
from pymbar.exp import EXP


Expand Down Expand Up @@ -111,7 +111,7 @@ def BARzero(w_F, w_R, DeltaF):
# give up; if there's overflow, return zero
print("The input data results in overflow in BAR")
return np.nan
log_numer = _logsum(log_f_F) - np.log(T_F)
log_numer = logsumexp(log_f_F) - np.log(T_F)

# Compute log_denominator.
# log_denom = log < f(-W) exp[-W] >_R
Expand All @@ -123,7 +123,7 @@ def BARzero(w_F, w_R, DeltaF):
except:
print("The input data results in overflow in BAR")
return np.nan
log_denom = _logsum(log_f_R) - np.log(T_R)
log_denom = logsumexp(log_f_R) - np.log(T_R)

# This function must be zeroed to find a root
fzero = DeltaF - (log_denom - log_numer)
Expand Down
4 changes: 2 additions & 2 deletions pymbar/exp.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
# IMPORTS
#=============================================================================================
import numpy as np
from pymbar.utils import _logsum
from pymbar.utils import logsumexp

#=============================================================================================
# One-sided exponential averaging (EXP).
Expand Down Expand Up @@ -96,7 +96,7 @@ def EXP(w_F, compute_uncertainty=True, is_timeseries=False):
T = float(np.size(w_F)) # number of work measurements

# Estimate free energy difference by exponential averaging using DeltaF = - log < exp(-w_F) >
DeltaF = - (_logsum(- w_F) - np.log(T))
DeltaF = - (logsumexp(- w_F) - np.log(T))

if compute_uncertainty:
# Compute x_i = np.exp(-w_F_i - max_arg)
Expand Down
26 changes: 13 additions & 13 deletions pymbar/mbar.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
import numpy as np
import numpy.linalg as linalg
from pymbar import mbar_solvers
from pymbar.utils import _logsum, kln_to_kn, kn_to_n, ParameterError
from pymbar.utils import kln_to_kn, kn_to_n, ParameterError, logsumexp

DEFAULT_SOLVER_PROTOCOL = mbar_solvers.DEFAULT_SOLVER_PROTOCOL
DEFAULT_SUBSAMPLING_PROTOCOL = mbar_solvers.DEFAULT_SUBSAMPLING_PROTOCOL
Expand Down Expand Up @@ -155,7 +155,7 @@ def __init__(self, u_kn, N_k, maximum_iterations=10000, relative_tolerance=1.0e-

# Store local copies of necessary data.
# N_k[k] is the number of samples from state k, some of which might be zero.
self.N_k = np.array(N_k, dtype=np.int32)
self.N_k = np.array(N_k, dtype=np.int64)
self.N = np.sum(self.N_k)

# Get dimensions of reduced potential energy matrix, and convert to KxN form if needed.
Expand Down Expand Up @@ -184,7 +184,7 @@ def __init__(self, u_kn, N_k, maximum_iterations=10000, relative_tolerance=1.0e-
if x_kindices is not None:
self.x_kindices = x_kindices
else:
self.x_kindices = np.arange(N, dtype=np.int32)
self.x_kindices = np.arange(N, dtype=np.int64)
Nsum = 0
for k in range(K):
self.x_kindices[Nsum:Nsum+N_k[k]] = k
Expand Down Expand Up @@ -223,7 +223,7 @@ def __init__(self, u_kn, N_k, maximum_iterations=10000, relative_tolerance=1.0e-

# Determine list of k indices for which N_k != 0
self.states_with_samples = np.where(self.N_k != 0)[0]
self.states_with_samples = self.states_with_samples.astype(np.int32)
self.states_with_samples = self.states_with_samples.astype(np.int64)

# Number of states with samples.
self.K_nonzero = self.states_with_samples.size
Expand Down Expand Up @@ -611,7 +611,7 @@ def computeExpectationsInner(self, A_n, u_ln, state_map,
msize = K + NL + S # augmented size; all of the states needed to calculate
# the observables, and the observables themselves.
Log_W_nk = np.zeros([N, msize], np.float64) # log weight matrix
N_k = np.zeros([msize], np.int32) # counts
N_k = np.zeros([msize], np.int64) # counts
f_k = np.zeros([msize], np.float64) # free energies

# <A> = A(x_n) exp[f_{k} - q_{k}(x_n)] / \sum_{k'=1}^K N_{k'} exp[f_{k'} - q_{k'}(x_n)]
Expand All @@ -625,7 +625,7 @@ def computeExpectationsInner(self, A_n, u_ln, state_map,
for l in L_list:
la = K+l #l, augmented
Log_W_nk[:, la] = self._computeUnnormalizedLogWeights(u_ln[l,:])
f_k[la] = -_logsum(Log_W_nk[:, la])
f_k[la] = -logsumexp(Log_W_nk[:, la])
Log_W_nk[:, la] += f_k[la]

# Compute the remaining rows/columns of W_nk, and calculate
Expand All @@ -635,7 +635,7 @@ def computeExpectationsInner(self, A_n, u_ln, state_map,
l = K + state_map[0,s]
i = state_map[1,s]
Log_W_nk[:, sa] = np.log(A_n[i, :]) + Log_W_nk[:, l]
f_k[sa] = -_logsum(Log_W_nk[:, sa])
f_k[sa] = -logsumexp(Log_W_nk[:, sa])
Log_W_nk[:, sa] += f_k[sa] # normalize this row

# Compute estimates of A_i[s]
Expand Down Expand Up @@ -1252,7 +1252,7 @@ def computePMF(self, u_n, bin_n, nbins, uncertainties='from-lowest', pmf_referen
>>> x_n_sorted = np.sort(x_n) # unroll to n-indices
>>> bins = np.append(x_n_sorted[0::(N_tot/nbins)], x_n_sorted.max()+0.1)
>>> bin_widths = bins[1:] - bins[0:-1]
>>> bin_n = np.zeros(x_n.shape, np.int32)
>>> bin_n = np.zeros(x_n.shape, np.int64)
>>> bin_n = np.digitize(x_n, bins) - 1
>>> # Compute PMF for these unequally-sized bins.
>>> [f_i, df_i] = mbar.computePMF(u_n, bin_n, nbins)
Expand Down Expand Up @@ -1291,10 +1291,10 @@ def computePMF(self, u_n, bin_n, nbins, uncertainties='from-lowest', pmf_referen
raise "WARNING: bin %d has no samples -- all bins must have at least one sample." % i

# Compute dimensionless free energy of occupying state i.
f_i[i] = - _logsum(log_w_n[indices])
f_i[i] = - logsumexp(log_w_n[indices])

# Compute uncertainties by forming matrix of W_nk.
N_k = np.zeros([self.K + nbins], np.int32)
N_k = np.zeros([self.K + nbins], np.int64)
N_k[0:K] = self.N_k
W_nk = np.zeros([self.N, self.K + nbins], np.float64)
W_nk[:, 0:K] = np.exp(self.Log_W_nk)
Expand Down Expand Up @@ -1351,7 +1351,7 @@ def computePMF(self, u_n, bin_n, nbins, uncertainties='from-lowest', pmf_referen
# Determine uncertainties from normalization that \sum_i p_i = 1.

# Compute bin probabilities p_i
p_i = np.exp(-f_i - _logsum(-f_i))
p_i = np.exp(-f_i - logsumexp(-f_i))

# todo -- eliminate triple loop over nbins!
# Compute uncertainties in bin probabilities.
Expand Down Expand Up @@ -1442,7 +1442,7 @@ def _computeAsymptoticCovarianceMatrix(self, W, N_k, method=None):
REQUIRED ARGUMENTS
W (np.array of np.float of dimension [N,K]) - matrix of normalized weights (see Eq. 9 of [1]) - W[n,k] is the weight of snapshot n (n = 1..N) in state k
Note that sum(W(:,k)) = 1 for any k = 1..K, and sum(N_k(:) .* W(n,:)) = 1 for any n.
N_k (np.array of np.int32 of dimension [K]) - N_k[k] is the number of samples from state K
N_k (np.array of np.int64 of dimension [K]) - N_k[k] is the number of samples from state K
RETURN VALUES
Theta (KxK np float64 array) - asymptotic covariance matrix (see Eq. 8 of [1])
Expand Down Expand Up @@ -1642,4 +1642,4 @@ def _computeUnnormalizedLogWeights(self, u_n):
REFERENCE
'log weights' here refers to \log [ \sum_{k=1}^K N_k exp[f_k - (u_k(x_n) - u(x_n)] ]
"""
return -1. * mbar_solvers.logsumexp(self.f_k + u_n[:, np.newaxis] - self.u_kn.T, b=self.N_k, axis=1)
return -1. * logsumexp(self.f_k + u_n[:, np.newaxis] - self.u_kn.T, b=self.N_k, axis=1)
71 changes: 1 addition & 70 deletions pymbar/mbar_solvers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,83 +2,14 @@
import numpy as np
import math
import scipy.optimize
from pymbar.utils import ensure_type


try: # numexpr used in logsumexp when available.
import numexpr
HAVE_NUMEXPR = True
except ImportError:
HAVE_NUMEXPR = False
from pymbar.utils import ensure_type, logsumexp

# Below are the recommended default protocols (ordered sequence of minimization algorithms / NLE solvers) for solving the MBAR equations.
# Note: we use tuples instead of lists to avoid accidental mutability.
DEFAULT_SUBSAMPLING_PROTOCOL = (dict(method="L-BFGS-B"), ) # First use BFGS on subsampled data.
DEFAULT_SOLVER_PROTOCOL = (dict(method="hybr"), ) # Then do fmin hybrid on full dataset.



def logsumexp(a, axis=None, b=None, use_numexpr=True):
"""Compute the log of the sum of exponentials of input elements.
Parameters
----------
a : array_like
Input array.
axis : None or int, optional, default=None
Axis or axes over which the sum is taken. By default `axis` is None,
and all elements are summed.
b : array-like, optional
Scaling factor for exp(`a`) must be of the same shape as `a` or
broadcastable to `a`.
use_numexpr : bool, optional, default=True
If True, use the numexpr library to speed up the calculation, which
can give a 2-4X speedup when working with large arrays.
Returns
-------
res : ndarray
The result, ``log(sum(exp(a)))`` calculated in a numerically
more stable way. If `b` is given then ``log(sum(b*exp(a)))``
is returned.
See Also
--------
numpy.logaddexp, numpy.logaddexp2, scipy.misc.logsumexp
Notes
-----
This is based on scipy.misc.logsumexp but with optional numexpr
support for improved performance.
"""

a = np.asarray(a)

a_max = np.amax(a, axis=axis, keepdims=True)

if a_max.ndim > 0:
a_max[~np.isfinite(a_max)] = 0
elif not np.isfinite(a_max):
a_max = 0

if b is not None:
b = np.asarray(b)
if use_numexpr and HAVE_NUMEXPR:
out = np.log(numexpr.evaluate("b * exp(a - a_max)").sum(axis))
else:
out = np.log(np.sum(b * np.exp(a - a_max), axis=axis))
else:
if use_numexpr and HAVE_NUMEXPR:
out = np.log(numexpr.evaluate("exp(a - a_max)").sum(axis))
else:
out = np.log(np.sum(np.exp(a - a_max), axis=axis))

a_max = np.squeeze(a_max, axis=axis)
out += a_max

return out


def validate_inputs(u_kn, N_k, f_k):
"""Check types and return inputs for MBAR calculations.
Expand Down
21 changes: 0 additions & 21 deletions pymbar/tests/test_mbar_solvers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,6 @@
import scipy.misc
from nose import SkipTest

def test_logsumexp():
a = np.random.normal(size=(200, 500, 5))

for axis in range(a.ndim):
ans_ne = pymbar.mbar_solvers.logsumexp(a, axis=axis)
ans_no_ne = pymbar.mbar_solvers.logsumexp(a, axis=axis, use_numexpr=False)
ans_scipy = scipy.misc.logsumexp(a, axis=axis)
eq(ans_ne, ans_no_ne)
eq(ans_ne, ans_scipy)

def test_logsumexp_b():
a = np.random.normal(size=(200, 500, 5))
b = np.random.normal(size=(200, 500, 5)) ** 2.

for axis in range(a.ndim):
ans_ne = pymbar.mbar_solvers.logsumexp(a, b=b, axis=axis)
ans_no_ne = pymbar.mbar_solvers.logsumexp(a, b=b, axis=axis, use_numexpr=False)
ans_scipy = scipy.misc.logsumexp(a, b=b, axis=axis)
eq(ans_ne, ans_no_ne)
eq(ans_ne, ans_scipy)


def load_oscillators(n_states, n_samples):
name = "%dx%d oscillators" % (n_states, n_samples)
Expand Down
32 changes: 32 additions & 0 deletions pymbar/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import numpy as np
import pymbar
from pymbar.utils_for_testing import eq
import scipy.misc
from nose import SkipTest

def test_logsumexp():
a = np.random.normal(size=(200, 500, 5))

for axis in range(a.ndim):
ans_ne = pymbar.utils.logsumexp(a, axis=axis)
ans_no_ne = pymbar.utils.logsumexp(a, axis=axis, use_numexpr=False)
ans_scipy = scipy.misc.logsumexp(a, axis=axis)
eq(ans_ne, ans_no_ne)
eq(ans_ne, ans_scipy)

def test_logsumexp_b():
a = np.random.normal(size=(200, 500, 5))
b = np.random.normal(size=(200, 500, 5)) ** 2.

for axis in range(a.ndim):
ans_ne = pymbar.utils.logsumexp(a, b=b, axis=axis)
ans_no_ne = pymbar.utils.logsumexp(a, b=b, axis=axis, use_numexpr=False)
ans_scipy = scipy.misc.logsumexp(a, b=b, axis=axis)
eq(ans_ne, ans_no_ne)
eq(ans_ne, ans_scipy)

def test_logsum():
u = np.random.normal(size=(200))
y1 = pymbar.utils.logsumexp(u)
y2 = pymbar.utils._logsum(u)
eq(y1, y2, decimal=12)

0 comments on commit 10de035

Please sign in to comment.