Support pymbar 4 (#1173)

* switch to using pymbar 3 & 4 support from openmmtools * fix typo on import * fix yaml * switch to using pymbar 4 * missed a pymbar import * missed another one * bump ci * missed a import * go back to how it was * Update devtools/conda-envs/test_env.yaml --------- Co-authored-by: Iván Pulido <2949729+ijpulidos@users.noreply.github.com>
choderalab · Jul 19, 2023 · 2d19c20 · 2d19c20
1 parent a46516c
commit 2d19c20
Show file tree

Hide file tree

Showing 9 changed files with 31 additions and 32 deletions.
diff --git a/devtools/conda-envs/test_env.yaml b/devtools/conda-envs/test_env.yaml
@@ -34,7 +34,7 @@ dependencies:
   - pdbfixer
   - pip
   - progressbar2
-  - pymbar <4
+  - pymbar >=4
   - pytest
   - pytest-attrib
   - pytest-cov

diff --git a/perses/analysis/analysis.py b/perses/analysis/analysis.py
@@ -15,7 +15,7 @@
 
 import numpy as np
 import itertools
-import pymbar
+from openmmtools.multistate.pymbar import _pymbar_bar
 from perses import storage
 import seaborn as sns
 
@@ -203,7 +203,7 @@ def get_free_energies(self, environment):
                 resampled_w_F = np.random.choice(w_F, len(w_F), replace=True)
                 resampled_w_R = np.random.choice(w_R, len(w_R), replace=True)
 
-                [df, ddf] = pymbar.BAR(resampled_w_F, resampled_w_R)
+                [df, ddf] = _pymbar_bar(resampled_w_F, resampled_w_R)
                 bootstrapped_bar[i] = df
 
             free_energies[state_pair] = [np.mean(bootstrapped_bar), np.std(bootstrapped_bar)]

diff --git a/perses/analysis/fah_analysis.py b/perses/analysis/fah_analysis.py
@@ -2,7 +2,7 @@
 import numpy as np
 import pandas as pd
 import seaborn as sns
-from pymbar import BAR
+from openmmtools.multistate.pymbar import _pymbar_bar
 import matplotlib.pyplot as plt
 import seaborn
 from simtk.openmm import unit
@@ -160,7 +160,7 @@ def _bootstrap_BAR(run, phase, gen_id, n_bootstrap):
                 f"less than {min_num_work_values} reverse work values (got {len(r_works)})"
             )
 
-        fe, err = bootstrap_uncorrelated(BAR, n_iters=n_bootstrap)(
+        fe, err = bootstrap_uncorrelated(_pymbar_bar, n_iters=n_bootstrap)(
             f_works.values, r_works.values
         )
 
@@ -227,7 +227,7 @@ def _process_phase(i, phase):
             axes[i].set_title(phase)
 
             # TODO add bootstrapping here
-            d[f"{phase}_fes"] = BAR(np.asarray(all_forward), np.asarray(all_reverse))
+            d[f"{phase}_fes"] = _pymbar_bar(np.asarray(all_forward), np.asarray(all_reverse))
 
         for i, phase in enumerate(projects.keys()):
             try:

diff --git a/perses/app/relative_hydration.py b/perses/app/relative_hydration.py
@@ -108,11 +108,11 @@ def check_alchemical_hybrid_elimination_bar(topology_proposal, old_positions, ne
         bar.update(i)
     del reverse_context, reverse_integrator
 
-    from pymbar import BAR
-    [df, ddf] = BAR(w_f, w_r)
+    from openmmtools.multistate.pymbar import _pymbar_bar
+    [df, ddf] = _pymbar_bar(w_f, w_r)
     print("df = %12.6f +- %12.5f kT" % (df, ddf))
 
 
 if __name__=="__main__":
     topology_proposal, old_positions, new_positions = generate_solvated_hybrid_test_topology()
-    check_alchemical_hybrid_elimination_bar(topology_proposal, old_positions, new_positions, ncmc_nsteps=100, n_iterations=500)
+    check_alchemical_hybrid_elimination_bar(topology_proposal, old_positions, new_positions, ncmc_nsteps=100, n_iterations=500)
diff --git a/perses/app/relative_setup.py b/perses/app/relative_setup.py
@@ -9,7 +9,7 @@
 
 from openmmtools.states import ThermodynamicState, CompoundThermodynamicState, SamplerState
 
-import pymbar
+from openmmtools.multistate.pymbar import _pymbar_bar, _pymbar_exp, detect_equilibration
 from cloudpathlib import AnyPath
 import simtk.openmm as openmm
 import simtk.openmm.app as app
@@ -1661,11 +1661,11 @@ def compute_sMC_free_energy(self):
         """
         for _direction, works in self._nonequilibrium_cumulative_work.items():
             if works is not None:
-                self.dg_EXP[_direction] = pymbar.EXP(works[:,-1])
+                self.dg_EXP[_direction] = _pymbar_exp(works[:,-1])
 
         if all(work is not None for work in self._nonequilibrium_cumulative_work.values()):
             #then we can compute a BAR estimate
-            self.dg_BAR = pymbar.BAR(self._nonequilibrium_cumulative_work['forward'][:,-1], self._nonequilibrium_cumulative_work['reverse'][:,-1])
+            self.dg_BAR = _pymbar_bar(self._nonequilibrium_cumulative_work['forward'][:,-1], self._nonequilibrium_cumulative_work['reverse'][:,-1])
 
 
     def attempt_resample(self, observable = 'ESS', resampling_method = 'multinomial', resample_observable_threshold = 0.5):
@@ -2096,7 +2096,7 @@ def _adjust_for_correlation(self, timeseries_array: np.array):
         Neff_max : float
             Max number of uncorrelated samples
         """
-        [t0, g, Neff_max] = pymbar.timeseries.detectEquilibration(timeseries_array)
+        [t0, g, Neff_max] = detect_equilibration(timeseries_array)
 
         return timeseries_array[t0:], g, Neff_max
 
@@ -2147,11 +2147,11 @@ def _endpoint_perturbations(self, direction = None, num_subsamples = 100):
             self._nonalchemical_reduced_potential_differences[start_lambda] = np.array([nonalchemical_reduced_potential_differences[i] for i in nonalch_full_uncorrelated_indices])
 
             #now to bootstrap results
-            alchemical_exp_results = np.array([pymbar.EXP(np.random.choice(self._alchemical_reduced_potential_differences[_direction], size = (len(self._alchemical_reduced_potential_differences[_direction]))), compute_uncertainty=False) for _ in range(num_subsamples)])
+            alchemical_exp_results = np.array([_pymbar_exp(np.random.choice(self._alchemical_reduced_potential_differences[_direction], size = (len(self._alchemical_reduced_potential_differences[_direction]))), compute_uncertainty=False) for _ in range(num_subsamples)])
             self._EXP[_direction] = (np.average(alchemical_exp_results), np.std(alchemical_exp_results)/np.sqrt(num_subsamples))
             _logger.debug(f"alchemical exp result for {_direction}: {self._EXP[_direction]}")
 
-            nonalchemical_exp_results = np.array([pymbar.EXP(np.random.choice(self._nonalchemical_reduced_potential_differences[start_lambda], size = (len(self._nonalchemical_reduced_potential_differences[start_lambda]))), compute_uncertainty=False) for _ in range(num_subsamples)])
+            nonalchemical_exp_results = np.array([_pymbar_exp(np.random.choice(self._nonalchemical_reduced_potential_differences[start_lambda], size = (len(self._nonalchemical_reduced_potential_differences[start_lambda]))), compute_uncertainty=False) for _ in range(num_subsamples)])
             self._EXP[start_lambda] = (np.average(nonalchemical_exp_results), np.std(nonalchemical_exp_results)/np.sqrt(num_subsamples))
             _logger.debug(f"nonalchemical exp result for {start_lambda}: {self._EXP[start_lambda]}")
 
@@ -2167,7 +2167,7 @@ def _alchemical_free_energy(self, num_subsamples = 100):
         work_subsamples = {'forward': [np.random.choice(self._nonequilibrium_cum_work['forward'], size = (len(self._nonequilibrium_cum_work['forward']))) for _ in range(num_subsamples)],
                            'reverse': [np.random.choice(self._nonequilibrium_cum_work['reverse'], size = (len(self._nonequilibrium_cum_work['reverse']))) for _ in range(num_subsamples)]}
 
-        bar_estimates = np.array([pymbar.BAR(forward_sample, reverse_sample, compute_uncertainty=False) for forward_sample, reverse_sample in zip(work_subsamples['forward'], work_subsamples['reverse'])])
+        bar_estimates = np.array([_pymbar_bar(forward_sample, reverse_sample, compute_uncertainty=False) for forward_sample, reverse_sample in zip(work_subsamples['forward'], work_subsamples['reverse'])])
         df, ddf = np.average(bar_estimates), np.std(bar_estimates) / np.sqrt(num_subsamples)
         self._BAR = [df, ddf]
         return (df, ddf)

diff --git a/perses/dispersed/smc.py b/perses/dispersed/smc.py
@@ -10,7 +10,7 @@
 from perses.annihilation.lambda_protocol import LambdaProtocol
 from perses.annihilation.lambda_protocol import RelativeAlchemicalState
 import random
-import pymbar
+from openmmtools.multistate.pymbar import _pymbar_bar, _pymbar_exp
 from perses.dispersed.parallel import Parallelism
 # Instantiate logger
 _logger = logging.getLogger("sMC")
@@ -726,10 +726,10 @@ def compute_sMC_free_energy(self, cumulative_work_dict):
         self.dg_EXP = {}
         for _direction, _lst in cumulative_work_dict.items():
             self.cumulative_work[_direction] = _lst
-            self.dg_EXP[_direction] = np.array([pymbar.EXP(_lst[:,i]) for i in range(_lst.shape[1])])
+            self.dg_EXP[_direction] = np.array([_pymbar_exp(_lst[:,i]) for i in range(_lst.shape[1])])
             _logger.debug(f"cumulative_work for {_direction}: {self.cumulative_work[_direction]}")
         if len(list(self.cumulative_work.keys())) == 2:
-            self.dg_BAR = pymbar.BAR(self.cumulative_work['forward'][:, -1], self.cumulative_work['reverse'][:, -1])
+            self.dg_BAR = _pymbar_bar(self.cumulative_work['forward'][:, -1], self.cumulative_work['reverse'][:, -1])
 
     def minimize_sampler_states(self):
         """

diff --git a/perses/dispersed/utils.py b/perses/dispersed/utils.py
@@ -306,10 +306,10 @@ def compute_timeseries(reduced_potentials):
         uncorrelated indices
 
     """
-    from pymbar import timeseries
-    t0, g, Neff_max = timeseries.detectEquilibration(reduced_potentials) #computing indices of uncorrelated timeseries
+    from openmmtools.multistate.pymbar import detect_equilibration, subsample_correlated_data
+    t0, g, Neff_max = detect_equilibration(reduced_potentials) #computing indices of uncorrelated timeseries
     A_t_equil = reduced_potentials[t0:]
-    uncorrelated_indices = timeseries.subsampleCorrelatedData(A_t_equil, g=g)
+    uncorrelated_indices = subsample_correlated_data(A_t_equil, g=g)
     A_t = A_t_equil[uncorrelated_indices]
     full_uncorrelated_indices = [i+t0 for i in uncorrelated_indices]
 

diff --git a/perses/tests/test_relative.py b/perses/tests/test_relative.py
@@ -19,9 +19,8 @@
 from openmmtools.integrators import LangevinIntegrator
 from unittest import skipIf
 
-import pymbar.timeseries as timeseries
+from openmmtools.multistate.pymbar import _pymbar_exp, detect_equilibration
 import pytest
-import pymbar
 
 running_on_github_actions = os.environ.get('GITHUB_ACTIONS', None) == 'true'
 
@@ -100,9 +99,9 @@ def calculate_cross_variance(all_results):
         non_b = all_results[2]
         hybrid_b = all_results[3]
     print('CROSS VALIDATION')
-    [df, ddf] = pymbar.EXP(non_a - hybrid_b)
+    [df, ddf] = _pymbar_exp(non_a - hybrid_b)
     print('df: {}, ddf: {}'.format(df, ddf))
-    [df, ddf] = pymbar.EXP(non_b - hybrid_a)
+    [df, ddf] = _pymbar_exp(non_b - hybrid_a)
     print('df: {}, ddf: {}'.format(df, ddf))
     return
 
@@ -395,9 +394,9 @@ def run_endpoint_perturbation(lambda_thermodynamic_state, nonalchemical_thermody
         md.Trajectory(nonalchemical_trajectory / unit.nanometers, nonalchemical_mdtraj_topology).save(f'nonalchemical{lambda_index}.pdb')
 
     # Analyze data and return results
-    [t0, g, Neff_max] = timeseries.detectEquilibration(w)
+    [t0, g, Neff_max] = detect_equilibration(w)
     w_burned_in = w[t0:]
-    [df, ddf] = pymbar.EXP(w_burned_in)
+    [df, ddf] = _pymbar_exp(w_burned_in)
     ddf_corrected = ddf * np.sqrt(g)
     results = [df, ddf_corrected, t0, Neff_max]
 

diff --git a/perses/tests/utils.py b/perses/tests/utils.py
@@ -504,7 +504,7 @@ def validate_rjmc_work_variance(top_prop, positions, geometry_method = 0, num_it
     md_steps: int
         number of md_steps to run in each num_iteration
     compute_timeseries = bool (default False)
-        whether to use pymbar detectEquilibration and subsampleCorrelated data from the MD run (the potential energy is the data)
+        whether to use pymbar detect_equilibration and subsample_correlated data from the MD run (the potential energy is the data)
     prespecified_conformers = None or unit.Quantity(np.array([num_iterations, system.getNumParticles(), 3]), unit = unit.nanometers)
         whether to input a unit.Quantity of conformers and bypass the conformer_generation/pymbar stage; None will default conduct this phase
 
@@ -562,9 +562,9 @@ def validate_rjmc_work_variance(top_prop, positions, geometry_method = 0, num_it
 
         if compute_timeseries:
             print(f"computing production and data correlation")
-            from pymbar import timeseries
-            t0, g, Neff = timeseries.detectEquilibration(rps)
-            series = timeseries.subsampleCorrelatedData(np.arange(t0, num_iterations), g = g)
+            from openmmtools.multistate.pymbar import detect_equilibration
+            t0, g, Neff = detect_equilibration(rps)
+            series = subsample_correlated_data(np.arange(t0, num_iterations), g = g)
             print(f"production starts at index {t0} of {num_iterations}")
             print(f"the number of effective samples is {Neff}")
             indices = t0 + series