Skip to content

Commit

Permalink
Merge pull request #159 from smcantab/stefano/time_series_fix
Browse files Browse the repository at this point in the history
Stefano/time series fix
  • Loading branch information
kyleabeauchamp committed Jan 14, 2015
2 parents c7a7bbc + afd617f commit 1678cad
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 14 deletions.
5 changes: 3 additions & 2 deletions devtools/conda-recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ requirements:
- scipy
- setuptools
- numexpr
- six
- six
run:
- python
- numpy
Expand All @@ -21,7 +21,8 @@ requirements:
test:
requires:
- nose
- setuptools
- setuptools
- statsmodels
imports:
- pymbar
commands:
Expand Down
25 changes: 25 additions & 0 deletions pymbar/tests/test_timeseries.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from pymbar import timeseries
from pymbar import testsystems
import numpy as np
from scipy import stats
from pymbar.utils_for_testing import eq, skipif
from six.moves import xrange

Expand Down Expand Up @@ -102,6 +104,29 @@ def test_detectEquil():
def test_detectEquil_binary():
x = np.random.normal(size=10000)
(t, g, Neff_max) = timeseries.detectEquilibration_binary_search(x)

@skipif(not HAVE_STATSMODELS, "Skipping FFT based tests because statsmodels not installed.")
def test_compare_detectEquil(show_hist=False):
"""
compare detectEquilibration implementations (with and without binary search + fft)
"""
t_res = []
N=100
for _ in xrange(100):
A_t = testsystems.correlated_timeseries_example(N=N, tau=5.0) + 2.0
B_t = testsystems.correlated_timeseries_example(N=N, tau=5.0) + 1.0
C_t = testsystems.correlated_timeseries_example(N=N*2, tau=5.0)
D_t = np.concatenate([A_t, B_t, C_t, np.zeros(20)]) #concatenate and add flat region to one end (common in MC data)
bs_de = timeseries.detectEquilibration_binary_search(D_t, bs_nodes=10)
std_de = timeseries.detectEquilibration(D_t, fast=False, nskip=1)
t_res.append(bs_de[0]-std_de[0])
t_res_mode = float(stats.mode(t_res)[0][0])
eq(t_res_mode,0.,decimal=1)
if show_hist:
import matplotlib.pyplot as plt
plt.hist(t_res)
plt.show()



def test_detectEquil_constant_trailing():
Expand Down
26 changes: 14 additions & 12 deletions pymbar/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -833,13 +833,16 @@ def statisticalInefficiency_fft(A_n, mintime=3):
return g #, g_t, C_t


def detectEquilibration_binary_search(A_t):
def detectEquilibration_binary_search(A_t, bs_nodes=10):
"""Automatically detect equilibrated region of a dataset using a heuristic that maximizes number of effectively uncorrelated samples.
Parameters
----------
A_t : np.ndarray
timeseries
bs_nodes : int > 4
number of geometrically distributed binary search nodes
Returns
-------
Expand All @@ -858,6 +861,7 @@ def detectEquilibration_binary_search(A_t):
likely be found if the N_eff[t] varies smoothly.
"""
assert bs_nodes > 4, "Number of nodes for binary search must be > 4"
T = A_t.size

# Special case if timeseries is constant.
Expand All @@ -866,36 +870,34 @@ def detectEquilibration_binary_search(A_t):

start = 1
end = T - 1
n_grid = min(5, T)
n_grid = min(bs_nodes, T)

while True:
time_grid = np.unique((10 ** np.linspace(np.log10(start), np.log10(end), n_grid)).round().astype('int'))

g_t = np.ones(len(time_grid))
Neff_t = np.ones(len(time_grid))
time_grid = np.unique((10 ** np.linspace(np.log10(start), np.log10(end), n_grid)).round().astype('int'))
g_t = np.ones(time_grid.size)
Neff_t = np.ones(time_grid.size)

for k, t in enumerate(time_grid):
g_t[k] = statisticalInefficiency_fft(A_t[t:])
Neff_t[k] = (T - t + 1) / g_t[k]
if t < T-1:
g_t[k] = statisticalInefficiency_fft(A_t[t:])
Neff_t[k] = (T - t + 1) / g_t[k]

Neff_max = Neff_t.max()
k = Neff_t.argmax()
t = time_grid[k]
g = g_t[k]

if (time_grid.max() - time_grid.min() <= n_grid - 1.):
if (end - start < 4):
break

if k == 0:
start = time_grid[0]
end = time_grid[1]
elif k == len(time_grid) - 1:
elif k == time_grid.size - 1:
start = time_grid[-2]
end = time_grid[-1]
else:
start = time_grid[k - 1]
end = time_grid[k + 1]



return (t, g, Neff_max)

0 comments on commit 1678cad

Please sign in to comment.