Skip to content

Commit

Permalink
Merge pull request #143 from mrshirts/master
Browse files Browse the repository at this point in the history
Adding effective sample functionality
  • Loading branch information
kyleabeauchamp committed Jan 5, 2015
2 parents 84649b9 + 1421f88 commit 9b6690f
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 2 deletions.
35 changes: 35 additions & 0 deletions examples/harmonic-oscillators.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,41 @@ def GetAnalytical(beta,K,O,observables):
print "Overlap scalar output"
print O

print "============================================"
print " Testing computeEffectiveSampleNumber "
print "============================================"

N_eff = mbar.computeEffectiveSampleNumber(verbose = True)
print "Effective Sample number"
print N_eff
print "Compare stanadrd estimate of <x> with the MBAR estimate of <x>"
print "We should have that with MBAR, err_MBAR = sqrt(N_k/N_eff)*err_standard,"
print "so standard (scaled) results should be very close to MBAR results."
print "No standard estimate exists for states that are not sampled."
A_kn = x_kn
(val_mbar, err_mbar) = mbar.computeExpectations(A_kn)
err_standard = numpy.zeros([K],dtype = numpy.float64)
err_scaled = numpy.zeros([K],dtype = numpy.float64)

for k in range(K):
if N_k[k] != 0:
# use position
err_standard[k] = numpy.std(A_kn[k,0:N_k[k]])/numpy.sqrt(N_k[k]-1)
err_scaled[k] = numpy.std(A_kn[k,0:N_k[k]])/numpy.sqrt(N_eff[k]-1)

print " ",
for k in range(K):
print " %d " %(k),
print ""
print "MBAR :",
print err_mbar
print "standard :",
print err_standard
print "sqrt N_k/N_eff :",
print numpy.sqrt(N_k/N_eff)
print "Standard (scaled):",
print err_standard * numpy.sqrt(N_k/N_eff)

print "============================================"
print " Testing computePMF "
print "============================================"
Expand Down
57 changes: 57 additions & 0 deletions pymbar/mbar.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,63 @@ def getWeights(self):

return np.exp(self.Log_W_nk)

#=========================================================================
def computeEffectiveSampleNumber(self, verbose = False):
"""
Compute the effective sample number of each state;
essentially, an estimate of how many samples are contributing to the average
at given state. See pymbar/examples for a demonstration.
It also counts the efficiency of the sampling, which is simply the ratio
of the effective number of samples at a given state to the total number
of samples collected.
Returns
-------
N_eff : np.ndarray, float, shape=(K)
estimated number of states contributing to estimates at each
state i. An estimate to how many samples collected just at state
i would result in similar statistical efficiency as the MBAR
simulation. Valid for both sampled states (in which the weight
will be greater than N_k[i], and unsampled states.
Parameters
----------
verbose : print out information about the effective number of samples
Notes
-----
# using Kish (1965) formula (Kish, Leslie (1965). Survey Sampling. New York: Wiley)
# As the weights become more concentrated in fewer observations, the effective sample size shrinks.
# from http://healthcare-economist.com/2013/08/22/effective-sample-size/
# effective # of samples contributing to averages carried out at state i
# = (\sum_{n=1}^N w_in)^2 / \sum_{n=1}^N w_in^2
# = (\sum_{n=1}^N w_in^2)^-1
#
# the effective sample number is most useful to diagnose when there are only a few samples
# contributing to the averages.
Examples
--------
>>> from pymbar import testsystems
>>> [x_kn, u_kln, N_k, s_n] = testsystems.HarmonicOscillatorsTestCase().sample()
>>> mbar = MBAR(u_kln, N_k)
>>> N_eff = mbar.computeEffectiveSampleNumber()
"""

N_eff = np.zeros(self.K)
for k in range(self.K):
w = np.exp(self.Log_W_nk[:,k])
N_eff[k] = 1/np.sum(w**2)

if verbose:
print "Effective number of sample in state %d is %10.3f" % (k,N_eff[k])
print "Efficiency for state %d is %d/%d = %10.4f" % (k,N_eff[k],len(w),N_eff[k]/len(w))

return N_eff

#=========================================================================
def computeOverlap(self, output='scalar'):
"""Compute estimate of overlap matrix between the states.
Expand Down
17 changes: 15 additions & 2 deletions pymbar/tests/test_mbar.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,22 @@ def test_mbar_computeEntropyAndEnthalpy():
z = convert_to_differences(s_ij,ds_ij,sa)
eq(z / z_scale_factor, np.zeros(np.shape(z)), decimal=0)

def test_mbar_computeOverlap():
def test_mbar_computeEffectiveSampleNumber():
""" testing computeEffectiveSampleNumber """

""" testing computeOverlap """
for system_generator in system_generators:
name, test = system_generator()
x_n, u_kn, N_k_output, s_n = test.sample(N_k, mode='u_kn')
eq(N_k, N_k_output)
mbar = MBAR(u_kn, N_k)

# one mathematical effective sample numbers should be between N_k and sum_k N_k
N_eff = mbar.computeEffectiveSampleNumber()
sumN = np.sum(N_k)
for k in range(len(N_eff)):
eq(np.bool(N_eff[k] > N_k[k] and N_eff[k] < sumN),True)

def test_mbar_computeOverlap():

# tests with identical states, which gives analytical results.

Expand Down

0 comments on commit 9b6690f

Please sign in to comment.