Skip to content

Commit

Permalink
Merge pull request #290 from choderalab/py3_examples
Browse files Browse the repository at this point in the history
Examples converted to py3
  • Loading branch information
mrshirts committed Feb 24, 2018
2 parents 24fc03f + d4be4e0 commit fe9ea53
Show file tree
Hide file tree
Showing 19 changed files with 847 additions and 799 deletions.
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
MBAR analysis of single-molecule DNA hairpin single-molecule equilibrium experiments under constant force load.
##MBAR analysis of single-molecule DNA hairpin single-molecule equilibrium experiments under constant force load.

DESCRIPTION
#DESCRIPTION

REFERENCES
#REFERENCES

[1] Woodside MT, Behnke-Parks WM, Larizadeh K, Travers K, Herschlag D, and Block SM. Nanomechanical measurements of the sequence-dependent folding landscapes of single nucleic acid hairpins. PNAS 103(16):6190-6195, 2006.
[2] Greenleaf WJ, Woodside MT, Abbondanzieri EA, and Block SM. Passive all-optical force clamp for high-resolution laser trapping. PRL 95:208102, 2005.


MANIFEST
#MANIFEST

original-data/ - original data in Excel spreadsheet form, provided by Michael T. Woodside <Michael.Woodside@nrc-cnrc.gc.ca>, corresponding author of [1]. Data has been compressed with bzip2 as 20R55_4T_data.xls.bz2
processed-data/ - external biasing forces (in pN) and extension trajectories (in nm, 0.1 ms time resolution) extracted from the Excel files in original-data/
extract-data.py - Python script to extract data from Excel datafiles (slow!)
force-bias-optical-trap.py - Python script to compute potentials of mean force (PMFs) using MBAR
plots/ - directory containing plots
output/ - directory containing PMFs
`original-data/` - original data in Excel spreadsheet form, provided by Michael T. Woodside <Michael.Woodside@nrc-cnrc.gc.ca>, corresponding author of [1]. Data has been compressed with bzip2 as 20R55_4T_data.xls.bz2
`processed-data/` - external biasing forces (in pN) and extension trajectories (in nm, 0.1 ms time resolution) extracted from the Excel files in original-data/
`extract-data.py` - Python script to extract data from Excel datafiles (run as `extract_data.py` in place)
`force-bias-optical-trap.py` - Python script to compute potentials of mean force (PMFs) using MBAR
`plots/` - directory containing plots
`output/` - directory containing PMFs

CORRESPONDENCE
#USAGE

Correspondence with the corresponding author, Michael T. Woodside, describing the dataset appears below.

Expand Down Expand Up @@ -50,10 +50,5 @@ Cheers,
Michael Woodside
National Institute for Nanotechnology, NRC
and Dept. of Physics, University of Alberta
11421 Saskatchewan Dr
Edmonton AB, T6G 2M9
Canada
tel: (780) 641-1695
fax: (780) 641-1601
---------------------------------------------------------------

24 changes: 12 additions & 12 deletions examples/constant-force-optical-trap/extract-data.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,17 @@
# IMPORTS
#=============================================================================================

from __future__ import print_function
from numpy import * # array routines
import numpy
import commands
import os
import os.path
import sys

try:
import xlrd # pure Python Excel spreadsheet (.xls) file reader
except ImportError:
print "Can't run, requires the xlrd .xls file reader module, at http://www.lexicon.net/sjmachin/xlrd.htm"
print("Can't run, requires the xlrd .xls file reader module, at http://www.lexicon.net/sjmachin/xlrd.htm")
sys.exit()

#=============================================================================================
Expand All @@ -40,33 +40,33 @@

# process all datasets
for dataset in datasets:
print "Extracting data from dataset '%s'..." % dataset
print("Extracting data from dataset '%s'..." % dataset)

# Open Excel spreadsheet file.
workbook_filename = os.path.join(original_data_directory, dataset + '_data.xls')
workbook = xlrd.open_workbook(workbook_filename)

# DEBUG
print "Workbook contains %d worksheets:" % workbook.nsheets
print workbook.sheet_names()
print("Workbook contains %d worksheets:" % workbook.nsheets)
print(workbook.sheet_names())

# Get the first worksheet.
worksheet = workbook.sheet_by_index(0)

# DEBUG
print "First worksheet (named '%s') has %d columns and %d rows." % (worksheet.name, worksheet.ncols, worksheet.nrows)
print("First worksheet (named '%s') has %d columns and %d rows." % (worksheet.name, worksheet.ncols, worksheet.nrows))

# Extract biasing forces.
K = worksheet.ncols - 1
biasing_force_k = zeros([K], float64)
for k in range(K):
biasing_force_k[k] = worksheet.cell_value(rowx = 1, colx = 1 + k)
print "%d biasing forces (in pN):" % K
print biasing_force_k
print("%d biasing forces (in pN):" % K)
print(biasing_force_k)

# Write biasing forces.
filename = os.path.join(processed_data_directory, dataset + '.forces')
print "Writing biasing forces to '%s'..." % filename
print("Writing biasing forces to '%s'..." % filename)
outfile = open(filename, 'w')
for k in range(K):
if (k > 0): outfile.write(' ')
Expand All @@ -77,14 +77,14 @@
T_max = worksheet.nrows - 3
x_kt = zeros([K, T_max])
for k in range(K):
print "Reading trajectory %d / %d..." % (k+1, K+1)
print("Reading trajectory %d / %d..." % (k+1, K+1))
for t in range(T_max):
x_kt[k,t] = worksheet.cell_value(colx = 1 + k, rowx = 3 + t)
print "Read %d trajectories of %d samples each." % (K, T_max)
print("Read %d trajectories of %d samples each." % (K, T_max))

# Write trajectories.
filename = os.path.join(processed_data_directory, dataset + '.trajectories')
print "Writing trajectories to '%s'..." % filename
print("Writing trajectories to '%s'..." % filename)
outfile = open(filename, 'w')
for t in range(T_max):
for k in range(K):
Expand Down
40 changes: 20 additions & 20 deletions examples/constant-force-optical-trap/force-bias-optical-trap.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
#=============================================================================================
# IMPORTS
#=============================================================================================

from __future__ import print_function
from numpy import * # array routines
import numpy
from math import * # additional mathematical functions
import pymbar # multistate Bennett acceptance ratio analysis (provided by pymbar)
from pymbar import timeseries # timeseries analysis (provided by pymbar)
import commands
import subprocess
import os
import os.path
import sys
Expand Down Expand Up @@ -116,20 +116,20 @@ def construct_nonuniform_bins(x_n, nbins):
for k in range(K):
biasing_force_k[k] = float(elements[k])
infile.close()
print "biasing forces (in pN) = "
print biasing_force_k
print("biasing forces (in pN) = ")
print(biasing_force_k)

# Determine maximum number of snapshots in all trajectories.
filename = os.path.join(directory, prefix + '.trajectories')
T_max = int(commands.getoutput('wc -l %s' % filename).split()[0]) + 1
T_max = int(subprocess.getoutput('wc -l %s' % filename).split()[0]) + 1

# Allocate storage for original (correlated) trajectories
T_k = zeros([K], int32) # T_k[k] is the number of snapshots from umbrella simulation k
x_kt = zeros([K,T_max], float64) # x_kt[k,t] is the position of snapshot t from trajectory k (in nm)

# Read the trajectories.
filename = os.path.join(directory, prefix + '.trajectories')
print "Reading %s..." % filename
print("Reading %s..." % filename)
infile = open(filename, 'r')
lines = infile.readlines()
infile.close()
Expand All @@ -149,7 +149,7 @@ def construct_nonuniform_bins(x_n, nbins):
all_data_indices = where(mask_kt)

# Construct equal-frequency extension bins
print "binning data..."
print("binning data...")
bin_kt = zeros([K, T_max], int32)
(bin_left_boundary_i, bin_center_i, bin_width_i, bin_assignments) = construct_nonuniform_bins(x_kt[all_data_indices], nbins)
bin_kt[all_data_indices] = bin_assignments
Expand All @@ -162,7 +162,7 @@ def construct_nonuniform_bins(x_n, nbins):
g = timeseries.statisticalInefficiency(x_kt[k,0:T_k[k]], x_kt[k,0:T_k[k]])
# store statistical inefficiency
g_k[k] = g
print "timeseries %d : g = %.1f, %.0f uncorrelated samples (of %d total samples)" % (k+1, g, floor(T_k[k] / g), T_k[k])
print("timeseries %d : g = %.1f, %.0f uncorrelated samples (of %d total samples)" % (k+1, g, floor(T_k[k] / g), T_k[k]))
N_max = max(N_max, ceil(T_k[k] / g) + 1)

# Subsample trajectory position data.
Expand All @@ -181,8 +181,8 @@ def construct_nonuniform_bins(x_n, nbins):
x0_k = zeros([K], float64) # x position corresponding to zero of potential
for k in range(K):
x0_k[k] = x_kn[k,0:N_k[k]].mean()
print "x0_k = "
print x0_k
print("x0_k = ")
print(x0_k)

# Compute bias energies in units of kT.
u_kln = zeros([K,K,N_max], float64) # u_kln[k,l,n] is the reduced (dimensionless) relative potential energy of snapshot n from umbrella simulation k evaluated at umbrella l
Expand All @@ -199,7 +199,7 @@ def construct_nonuniform_bins(x_n, nbins):
start_time = time.time()

# Initialize MBAR.
print "Running MBAR..."
print("Running MBAR...")
mbar = pymbar.MBAR(u_kln, N_k, verbose = True, method = 'adaptive', relative_tolerance = 1.0e-10)

# Compute unbiased energies (all biasing forces are zero).
Expand All @@ -209,15 +209,15 @@ def construct_nonuniform_bins(x_n, nbins):
u_kn[k,0:N_k[k]] = 0.0 + pN_nm_to_kT * biasing_force_k[k] * (x_kn[k,0:N_k[k]] - x0_k[k])

# Compute PMF in unbiased potential (in units of kT).
print "Computing PMF..."
print("Computing PMF...")
(f_i, df_i) = mbar.computePMF(u_kn, bin_kn, nbins)
# compute estimate of PMF including Jacobian term
pmf_i = f_i + numpy.log(bin_width_i)
# Write out unbiased estimate of PMF
print "Unbiased PMF (in units of kT)"
print "%8s %8s %8s %8s %8s" % ('bin', 'f', 'df', 'pmf', 'width')
print("Unbiased PMF (in units of kT)")
print("%8s %8s %8s %8s %8s" % ('bin', 'f', 'df', 'pmf', 'width'))
for i in range(nbins):
print "%8.3f %8.3f %8.3f %8.3f %8.3f" % (bin_center_i[i], f_i[i], df_i[i], pmf_i[i], bin_width_i[i])
print("%8.3f %8.3f %8.3f %8.3f %8.3f" % (bin_center_i[i], f_i[i], df_i[i], pmf_i[i], bin_width_i[i]))

filename = os.path.join(output_directory, 'pmf-unbiased.out')
outfile = open(filename, 'w')
Expand All @@ -229,7 +229,7 @@ def construct_nonuniform_bins(x_n, nbins):
# DEBUG
stop_time = time.time()
elapsed_time = stop_time - start_time
print "analysis took %f seconds" % elapsed_time
print("analysis took %f seconds" % elapsed_time)

# compute observed and expected histograms at each state
for l in range(0,K):
Expand All @@ -256,9 +256,9 @@ def construct_nonuniform_bins(x_n, nbins):
N_i_expected = float(N) * p_i
dN_i_expected = numpy.sqrt(float(N) * p_i * (1.0 - p_i)) # only approximate, since correlations df_i df_j are neglected
# plot
print "state %d (%f pN)" % (l, biasing_force_k[l])
print("state %d (%f pN)" % (l, biasing_force_k[l]))
for bin_index in range(nbins):
print "%8.3f %10f %10f +- %10f" % (bin_center_i[bin_index], N_i_expected[bin_index], N_i_observed[bin_index], dN_i_observed[bin_index])
print("%8.3f %10f %10f +- %10f" % (bin_center_i[bin_index], N_i_expected[bin_index], N_i_observed[bin_index], dN_i_observed[bin_index]))

# Write out observed bin counts
filename = os.path.join(output_directory, 'counts-observed-%d.out' % l)
Expand Down Expand Up @@ -314,8 +314,8 @@ def construct_nonuniform_bins(x_n, nbins):
outfile = open(gnuplot_input_filename, 'w')
outfile.write(gnuplot_input)
outfile.close()
output = commands.getoutput('gnuplot < %(gnuplot_input_filename)s' % vars())
output = commands.getoutput('epstopdf %(filename)s' % vars())
output = subprocess.getoutput('gnuplot < %(gnuplot_input_filename)s' % vars())
output = subprocess.getoutput('epstopdf %(filename)s' % vars())



Binary file modified examples/harmonic-oscillators/QQMBARobserve.pdf
Binary file not shown.
Binary file modified examples/harmonic-oscillators/QQdf.pdf
Binary file not shown.
Binary file modified examples/harmonic-oscillators/QQstandardobserve.pdf
Binary file not shown.
Binary file not shown.
59 changes: 31 additions & 28 deletions examples/harmonic-oscillators/harmonic-oscillators-distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@
#=============================================================================================
# IMPORTS
#=============================================================================================
from __future__ import print_function
import numpy
from pymbar import testsystems, MBAR, confidenceintervals
from pymbar.utils import ParameterError, DataError
#=============================================================================================
# PARAMETERS
#=============================================================================================
Expand All @@ -41,7 +43,7 @@
try:
import matplotlib.pyplot as plt
except:
print "Can't import matplotlib, will not produce graphs."
print("Can't import matplotlib, will not produce graphs.")
generateplots = False

observe = 'position^2' # the observable, one of 'mean square displacement','position', or 'potential energy'
Expand All @@ -55,7 +57,8 @@

# Determine number of simulations.
K = numpy.size(N_k)
if numpy.shape(K_k) != numpy.shape(N_k): raise "K_k and N_k must have same dimensions."
if numpy.shape(K_k) != numpy.shape(N_k):
raise DataError("K_k and N_k must have same dimensions.")

# Determine maximum number of samples to be drawn for any state.
N_max = numpy.max(N_k)
Expand All @@ -64,12 +67,12 @@
# For a harmonic oscillator with spring constant K,
# x ~ Normal(x_0, sigma^2), where sigma = 1/sqrt(beta K)
sigma_k = (beta * K_k)**-0.5
print "Gaussian widths:"
print sigma_k
print("Gaussian widths:")
print(sigma_k)

# Compute the absolute dimensionless free energies of each oscillator analytically.
# f = - ln(sqrt((2 pi)/(beta K)) )
print 'Computing dimensionless free energies analytically...'
print('Computing dimensionless free energies analytically...')
f_k_analytical = - numpy.log(numpy.sqrt(2 * numpy.pi) * sigma_k )

# Compute true free energy differences.
Expand All @@ -88,17 +91,17 @@
elif observe == 'position^2':
A_k_analytical = (1+ beta*K_k*O_k**2)/(beta*K_k) # observable is the position^2
else:
raise "Observable %s not known." % observe
raise ParameterError("Observable %s not known." % observe)

# DEBUG info
print "This script will perform %d replicates of an experiment where samples are drawn from %d harmonic oscillators." % (nreplicates, K)
print "The harmonic oscillators have equilibrium positions"
print O_k
print "and spring constants"
print K_k
print "and the following number of samples will be drawn from each (can be zero if no samples drawn):"
print N_k
print ""
print("This script will perform %d replicates of an experiment where samples are drawn from %d harmonic oscillators." % (nreplicates, K))
print("The harmonic oscillators have equilibrium positions")
print(O_k)
print("and spring constants")
print(K_k)
print("and the following number of samples will be drawn from each (can be zero if no samples drawn):")
print(N_k)
print("")

# Conduct a number of replicates of the same experiment
replicates_observable = [] # storage for one hash for each replicate
Expand All @@ -107,7 +110,7 @@
replicates_fdf = [] # storage for one hash for final observable

for replicate_index in range(0,nreplicates):
print "Performing replicate %d / %d" % (replicate_index+1, nreplicates)
print("Performing replicate %d / %d" % (replicate_index+1, nreplicates))

# Initialize a hash to store data for this replicate.
replicate_df = { }
Expand Down Expand Up @@ -177,8 +180,8 @@
As_k_estimated[k] = numpy.average(A_kn[k,k,0:N_k[k]])
dAs_k_estimated[k] = numpy.sqrt(numpy.var(A_kn[k,k,0:N_k[k]])/(N_k[k]-1))

print A_k_estimated
print dA_k_estimated
print(A_k_estimated)
print(dA_k_estimated)

# need to additionally transform to get the square root
if observe == 'RMS displacement':
Expand Down Expand Up @@ -212,29 +215,29 @@


# compute the probability distribution of all states
print "Free energies"
print("Free energies")
# compute anderson/darling statistics

D = confidenceintervals.AndersonDarling(replicates_df,K)
print "Anderson-Darling Metrics (see README.md)"
print D
print("Anderson-Darling Metrics (see README.md)")
print(D)
if (generateplots):
confidenceintervals.QQPlot(replicates_df,K,title='Q-Q plots of free energy differences',filename="QQdf.pdf")
(alpha_fij,Pobs_fij,Plow_fij,Phigh_fij,dPobs_fij,Pnorm_fij) = confidenceintervals.generateConfidenceIntervals(replicates_df,K)

print "Standard ensemble averaged observables"
print("Standard ensemble averaged observables")
(alpha_Ai,Pobs_Ai,Plow_Ai,Phigh_Ai,dPobs_Ai,Pnorm_Ai) = confidenceintervals.generateConfidenceIntervals(replicates_standobservable,numpy.sum(ifzero))
D = confidenceintervals.AndersonDarling(replicates_standobservable,numpy.sum(ifzero))
print "Anderson-Darling Metrics (see README.md)"
print D
print("Anderson-Darling Metrics (see README.md)")
print(D)
if (generateplots):
confidenceintervals.QQPlot(replicates_standobservable,numpy.sum(ifzero),title='Q-Q plots of ensemble averaged observables \n with standard error estimates', filename="QQstandardobserve.pdf")

print "MBAR ensemble averaged observables"
print("MBAR ensemble averaged observables")
(alpha_Ai,Pobs_Ai,Plow_Ai,Phigh_Ai,dPobs_Ai,Pnorm_Ai) = confidenceintervals.generateConfidenceIntervals(replicates_observable,K)
D = confidenceintervals.AndersonDarling(replicates_observable,K)
print "Anderson-Darling Metrics (see README.md)"
print D
print("Anderson-Darling Metrics (see README.md)")
print(D)
if (generateplots):
confidenceintervals.QQPlot(replicates_observable,K,title='Q-Q plots of ensemble averaged observables using MBAR', filename="QQMBARobserve.pdf")

Expand Down Expand Up @@ -264,8 +267,8 @@
replicate['error'] = replicate_ij['error'][0,k]
replicates_fdf.append(replicate)
# compute the distribution of the end states only
print ""
print " ==== State %d alone with MBAR ===== " %(k)
print("")
print(" ==== State %d alone with MBAR ===== " %(k))
(alpha_f,Pobs_f,Plow_f,Phigh_f,dPobs_f,Pnorm_f) = confidenceintervals.generateConfidenceIntervals(replicates_fdf,K);
label = 'State %d' % k
if (generateplots):
Expand Down

0 comments on commit fe9ea53

Please sign in to comment.