Merge pull request #290 from choderalab/py3_examples

Examples converted to py3
choderalab · Feb 24, 2018 · fe9ea53 · fe9ea53
2 parents 24fc03f + d4be4e0
commit fe9ea53
Show file tree

Hide file tree

Showing 19 changed files with 847 additions and 799 deletions.
diff --git a/examples/constant-force-optical-trap/README → ...les/constant-force-optical-trap/README.md b/examples/constant-force-optical-trap/README → ...les/constant-force-optical-trap/README.md
@@ -1,23 +1,23 @@
-MBAR analysis of single-molecule DNA hairpin single-molecule equilibrium experiments under constant force load.
+##MBAR analysis of single-molecule DNA hairpin single-molecule equilibrium experiments under constant force load.
 
-DESCRIPTION
+#DESCRIPTION
 
-REFERENCES
+#REFERENCES
 
 [1] Woodside MT, Behnke-Parks WM, Larizadeh K, Travers K, Herschlag D, and Block SM. Nanomechanical measurements of the sequence-dependent folding landscapes of single nucleic acid hairpins. PNAS 103(16):6190-6195, 2006.
 [2] Greenleaf WJ, Woodside MT, Abbondanzieri EA, and Block SM. Passive all-optical force clamp for high-resolution laser trapping. PRL 95:208102, 2005.
 
 
-MANIFEST
+#MANIFEST
 
-original-data/ - original data in Excel spreadsheet form, provided by Michael T. Woodside <Michael.Woodside@nrc-cnrc.gc.ca>, corresponding author of [1].  Data has been compressed with bzip2 as 20R55_4T_data.xls.bz2
-processed-data/ - external biasing forces (in pN) and extension trajectories (in nm, 0.1 ms time resolution) extracted from the Excel files in original-data/
-extract-data.py - Python script to extract data from Excel datafiles (slow!)
-force-bias-optical-trap.py - Python script to compute potentials of mean force (PMFs) using MBAR
-plots/ - directory containing plots
-output/ - directory containing PMFs
+`original-data/` - original data in Excel spreadsheet form, provided by Michael T. Woodside <Michael.Woodside@nrc-cnrc.gc.ca>, corresponding author of [1].  Data has been compressed with bzip2 as 20R55_4T_data.xls.bz2
+`processed-data/` - external biasing forces (in pN) and extension trajectories (in nm, 0.1 ms time resolution) extracted from the Excel files in original-data/
+`extract-data.py` - Python script to extract data from Excel datafiles (run as `extract_data.py` in place)
+`force-bias-optical-trap.py` - Python script to compute potentials of mean force (PMFs) using MBAR
+`plots/` - directory containing plots
+`output/` - directory containing PMFs
 
-CORRESPONDENCE
+#USAGE
 
 Correspondence with the corresponding author, Michael T. Woodside, describing the dataset appears below.
 
@@ -50,10 +50,5 @@ Cheers,
 Michael Woodside
 National Institute for Nanotechnology, NRC
  and Dept. of Physics, University of Alberta
-11421 Saskatchewan Dr
-Edmonton AB, T6G 2M9
-Canada
-tel: (780) 641-1695
-fax: (780) 641-1601
 ---------------------------------------------------------------
 
diff --git a/examples/constant-force-optical-trap/extract-data.py b/examples/constant-force-optical-trap/extract-data.py
@@ -9,17 +9,17 @@
 # IMPORTS
 #=============================================================================================
 
+from __future__ import print_function
 from numpy import * # array routines
 import numpy
-import commands
 import os
 import os.path
 import sys
 
 try:
     import xlrd # pure Python Excel spreadsheet (.xls) file reader
 except ImportError:
-    print "Can't run, requires the xlrd .xls file reader module, at http://www.lexicon.net/sjmachin/xlrd.htm"
+    print("Can't run, requires the xlrd .xls file reader module, at http://www.lexicon.net/sjmachin/xlrd.htm")
     sys.exit()
 
 #=============================================================================================
@@ -40,33 +40,33 @@
 
 # process all datasets
 for dataset in datasets:
-    print "Extracting data from dataset '%s'..." % dataset
+    print("Extracting data from dataset '%s'..." % dataset)
 
     # Open Excel spreadsheet file.
     workbook_filename = os.path.join(original_data_directory, dataset + '_data.xls')
     workbook = xlrd.open_workbook(workbook_filename)
 
     # DEBUG
-    print "Workbook contains %d worksheets:" % workbook.nsheets
-    print workbook.sheet_names()
+    print("Workbook contains %d worksheets:" % workbook.nsheets)
+    print(workbook.sheet_names())
 
     # Get the first worksheet.
     worksheet = workbook.sheet_by_index(0)
 
     # DEBUG
-    print "First worksheet (named '%s') has %d columns and %d rows." % (worksheet.name, worksheet.ncols, worksheet.nrows)
+    print("First worksheet (named '%s') has %d columns and %d rows." % (worksheet.name, worksheet.ncols, worksheet.nrows))
 
     # Extract biasing forces.
     K = worksheet.ncols - 1
     biasing_force_k = zeros([K], float64)
     for k in range(K):
         biasing_force_k[k] = worksheet.cell_value(rowx = 1, colx = 1 + k)
-    print "%d biasing forces (in pN):" % K
-    print biasing_force_k
+    print("%d biasing forces (in pN):" % K)
+    print(biasing_force_k)
 
     # Write biasing forces.
     filename = os.path.join(processed_data_directory, dataset + '.forces')
-    print "Writing biasing forces to '%s'..." % filename
+    print("Writing biasing forces to '%s'..." % filename)
     outfile = open(filename, 'w')
     for k in range(K):
         if (k > 0): outfile.write(' ')
@@ -77,14 +77,14 @@
     T_max = worksheet.nrows - 3
     x_kt = zeros([K, T_max])
     for k in range(K):
-        print "Reading trajectory %d / %d..." % (k+1, K+1)
+        print("Reading trajectory %d / %d..." % (k+1, K+1))
         for t in range(T_max):
             x_kt[k,t] = worksheet.cell_value(colx = 1 + k, rowx = 3 + t)
-    print "Read %d trajectories of %d samples each." % (K, T_max)
+    print("Read %d trajectories of %d samples each." % (K, T_max))
 
     # Write trajectories.
     filename = os.path.join(processed_data_directory, dataset + '.trajectories')
-    print "Writing trajectories to '%s'..." % filename
+    print("Writing trajectories to '%s'..." % filename)
     outfile = open(filename, 'w')
     for t in range(T_max):
         for k in range(K):

diff --git a/examples/constant-force-optical-trap/force-bias-optical-trap.py b/examples/constant-force-optical-trap/force-bias-optical-trap.py
@@ -7,13 +7,13 @@
 #=============================================================================================
 # IMPORTS
 #=============================================================================================
-
+from __future__ import print_function
 from numpy import * # array routines
 import numpy
 from math import * # additional mathematical functions
 import pymbar # multistate Bennett acceptance ratio analysis (provided by pymbar)
 from pymbar import timeseries # timeseries analysis (provided by pymbar)
-import commands
+import subprocess
 import os
 import os.path
 import sys
@@ -116,20 +116,20 @@ def construct_nonuniform_bins(x_n, nbins):
 for k in range(K):
     biasing_force_k[k] = float(elements[k])
 infile.close()
-print "biasing forces (in pN) = "
-print biasing_force_k
+print("biasing forces (in pN) = ")
+print(biasing_force_k)
 
 # Determine maximum number of snapshots in all trajectories.
 filename = os.path.join(directory, prefix + '.trajectories')
-T_max = int(commands.getoutput('wc -l %s' % filename).split()[0]) + 1
+T_max = int(subprocess.getoutput('wc -l %s' % filename).split()[0]) + 1
 
 # Allocate storage for original (correlated) trajectories
 T_k = zeros([K], int32) # T_k[k] is the number of snapshots from umbrella simulation k
 x_kt = zeros([K,T_max], float64) # x_kt[k,t] is the position of snapshot t from trajectory k (in nm)
 
 # Read the trajectories.
 filename = os.path.join(directory, prefix + '.trajectories')
-print "Reading %s..." % filename
+print("Reading %s..." % filename)
 infile = open(filename, 'r')
 lines = infile.readlines()
 infile.close()
@@ -149,7 +149,7 @@ def construct_nonuniform_bins(x_n, nbins):
 all_data_indices = where(mask_kt)
 
 # Construct equal-frequency extension bins
-print "binning data..."
+print("binning data...")
 bin_kt = zeros([K, T_max], int32)
 (bin_left_boundary_i, bin_center_i, bin_width_i, bin_assignments) = construct_nonuniform_bins(x_kt[all_data_indices], nbins)
 bin_kt[all_data_indices] = bin_assignments
@@ -162,7 +162,7 @@ def construct_nonuniform_bins(x_n, nbins):
     g = timeseries.statisticalInefficiency(x_kt[k,0:T_k[k]], x_kt[k,0:T_k[k]])
     # store statistical inefficiency
     g_k[k] = g
-    print "timeseries %d : g = %.1f, %.0f uncorrelated samples (of %d total samples)" % (k+1, g, floor(T_k[k] / g), T_k[k])
+    print("timeseries %d : g = %.1f, %.0f uncorrelated samples (of %d total samples)" % (k+1, g, floor(T_k[k] / g), T_k[k]))
     N_max = max(N_max, ceil(T_k[k] / g) + 1)
 
 # Subsample trajectory position data.
@@ -181,8 +181,8 @@ def construct_nonuniform_bins(x_n, nbins):
 x0_k = zeros([K], float64) # x position corresponding to zero of potential
 for k in range(K):
     x0_k[k] = x_kn[k,0:N_k[k]].mean()
-print "x0_k = "
-print x0_k
+print("x0_k = ")
+print(x0_k)
 
 # Compute bias energies in units of kT.
 u_kln = zeros([K,K,N_max], float64) # u_kln[k,l,n] is the reduced (dimensionless) relative potential energy of snapshot n from umbrella simulation k evaluated at umbrella l
@@ -199,7 +199,7 @@ def construct_nonuniform_bins(x_n, nbins):
 start_time = time.time()
 
 # Initialize MBAR.
-print "Running MBAR..."
+print("Running MBAR...")
 mbar = pymbar.MBAR(u_kln, N_k, verbose = True, method = 'adaptive', relative_tolerance = 1.0e-10)
 
 # Compute unbiased energies (all biasing forces are zero).
@@ -209,15 +209,15 @@ def construct_nonuniform_bins(x_n, nbins):
     u_kn[k,0:N_k[k]] = 0.0 + pN_nm_to_kT * biasing_force_k[k] * (x_kn[k,0:N_k[k]] - x0_k[k])
 
 # Compute PMF in unbiased potential (in units of kT).
-print "Computing PMF..."
+print("Computing PMF...")
 (f_i, df_i) = mbar.computePMF(u_kn, bin_kn, nbins)
 # compute estimate of PMF including Jacobian term
 pmf_i = f_i + numpy.log(bin_width_i)
 # Write out unbiased estimate of PMF
-print "Unbiased PMF (in units of kT)"
-print "%8s %8s %8s %8s %8s" % ('bin', 'f', 'df', 'pmf', 'width')
+print("Unbiased PMF (in units of kT)")
+print("%8s %8s %8s %8s %8s" % ('bin', 'f', 'df', 'pmf', 'width'))
 for i in range(nbins):
-    print "%8.3f %8.3f %8.3f %8.3f %8.3f" % (bin_center_i[i], f_i[i], df_i[i], pmf_i[i], bin_width_i[i])
+    print("%8.3f %8.3f %8.3f %8.3f %8.3f" % (bin_center_i[i], f_i[i], df_i[i], pmf_i[i], bin_width_i[i]))
 
 filename = os.path.join(output_directory, 'pmf-unbiased.out')
 outfile = open(filename, 'w')
@@ -229,7 +229,7 @@ def construct_nonuniform_bins(x_n, nbins):
 # DEBUG
 stop_time = time.time()
 elapsed_time = stop_time - start_time
-print "analysis took %f seconds" % elapsed_time
+print("analysis took %f seconds" % elapsed_time)
 
 # compute observed and expected histograms at each state
 for l in range(0,K):
@@ -256,9 +256,9 @@ def construct_nonuniform_bins(x_n, nbins):
     N_i_expected = float(N) * p_i
     dN_i_expected = numpy.sqrt(float(N) * p_i * (1.0 - p_i)) # only approximate, since correlations df_i df_j are neglected
     # plot
-    print "state %d (%f pN)" % (l, biasing_force_k[l])
+    print("state %d (%f pN)" % (l, biasing_force_k[l]))
     for bin_index in range(nbins):
-        print "%8.3f %10f %10f +- %10f" % (bin_center_i[bin_index], N_i_expected[bin_index], N_i_observed[bin_index], dN_i_observed[bin_index])        
+        print("%8.3f %10f %10f +- %10f" % (bin_center_i[bin_index], N_i_expected[bin_index], N_i_observed[bin_index], dN_i_observed[bin_index]))        
 
     # Write out observed bin counts
     filename = os.path.join(output_directory, 'counts-observed-%d.out' % l)    
@@ -314,8 +314,8 @@ def construct_nonuniform_bins(x_n, nbins):
     outfile = open(gnuplot_input_filename, 'w')
     outfile.write(gnuplot_input)
     outfile.close()
-    output = commands.getoutput('gnuplot < %(gnuplot_input_filename)s' % vars())
-    output = commands.getoutput('epstopdf %(filename)s' % vars())    
+    output = subprocess.getoutput('gnuplot < %(gnuplot_input_filename)s' % vars())
+    output = subprocess.getoutput('epstopdf %(filename)s' % vars())    
 
 
 
diff --git a/examples/harmonic-oscillators/QQMBARobserve.pdf b/examples/harmonic-oscillators/QQMBARobserve.pdf
diff --git a/examples/harmonic-oscillators/QQdf.pdf b/examples/harmonic-oscillators/QQdf.pdf
diff --git a/examples/harmonic-oscillators/QQstandardobserve.pdf b/examples/harmonic-oscillators/QQstandardobserve.pdf
diff --git a/examples/harmonic-oscillators/cumulative_probability_comparison_curves.pdf b/examples/harmonic-oscillators/cumulative_probability_comparison_curves.pdf
diff --git a/examples/harmonic-oscillators/harmonic-oscillators-distributions.py b/examples/harmonic-oscillators/harmonic-oscillators-distributions.py
@@ -24,8 +24,10 @@
 #=============================================================================================
 # IMPORTS
 #=============================================================================================
+from __future__ import print_function
 import numpy
 from pymbar import testsystems, MBAR, confidenceintervals
+from pymbar.utils import ParameterError, DataError
 #=============================================================================================
 # PARAMETERS
 #=============================================================================================
@@ -41,7 +43,7 @@
   try:
     import matplotlib.pyplot as plt
   except:
-    print "Can't import matplotlib, will not produce graphs."
+    print("Can't import matplotlib, will not produce graphs.")
     generateplots = False
 
 observe = 'position^2' # the observable, one of 'mean square displacement','position', or 'potential energy'
@@ -55,7 +57,8 @@
 
 # Determine number of simulations.
 K = numpy.size(N_k)
-if numpy.shape(K_k) != numpy.shape(N_k): raise "K_k and N_k must have same dimensions."
+if numpy.shape(K_k) != numpy.shape(N_k): 
+  raise DataError("K_k and N_k must have same dimensions.")
 
 # Determine maximum number of samples to be drawn for any state.
 N_max = numpy.max(N_k)
@@ -64,12 +67,12 @@
 # For a harmonic oscillator with spring constant K,
 # x ~ Normal(x_0, sigma^2), where sigma = 1/sqrt(beta K)
 sigma_k = (beta * K_k)**-0.5
-print "Gaussian widths:"
-print sigma_k
+print("Gaussian widths:")
+print(sigma_k)
 
 # Compute the absolute dimensionless free energies of each oscillator analytically.
 # f = - ln(sqrt((2 pi)/(beta K)) )
-print 'Computing dimensionless free energies analytically...'
+print('Computing dimensionless free energies analytically...')
 f_k_analytical = - numpy.log(numpy.sqrt(2 * numpy.pi) * sigma_k )
 
 # Compute true free energy differences.
@@ -88,17 +91,17 @@
 elif observe == 'position^2': 
   A_k_analytical  = (1+ beta*K_k*O_k**2)/(beta*K_k)                 # observable is the position^2
 else:
-  raise "Observable %s not known." % observe
+  raise ParameterError("Observable %s not known." % observe)
 
 # DEBUG info
-print "This script will perform %d replicates of an experiment where samples are drawn from %d harmonic oscillators." % (nreplicates, K)
-print "The harmonic oscillators have equilibrium positions"
-print O_k
-print "and spring constants"
-print K_k
-print "and the following number of samples will be drawn from each (can be zero if no samples drawn):"
-print N_k
-print ""
+print("This script will perform %d replicates of an experiment where samples are drawn from %d harmonic oscillators." % (nreplicates, K))
+print("The harmonic oscillators have equilibrium positions")
+print(O_k)
+print("and spring constants")
+print(K_k)
+print("and the following number of samples will be drawn from each (can be zero if no samples drawn):")
+print(N_k)
+print("")
 
 # Conduct a number of replicates of the same experiment
 replicates_observable = [] # storage for one hash for each replicate
@@ -107,7 +110,7 @@
 replicates_fdf = [] # storage for one hash for final observable
 
 for replicate_index in range(0,nreplicates):
-  print "Performing replicate %d / %d" % (replicate_index+1, nreplicates)
+  print("Performing replicate %d / %d" % (replicate_index+1, nreplicates))
 
   # Initialize a hash to store data for this replicate.
   replicate_df = { }
@@ -177,8 +180,8 @@
         As_k_estimated[k] = numpy.average(A_kn[k,k,0:N_k[k]])
         dAs_k_estimated[k]  = numpy.sqrt(numpy.var(A_kn[k,k,0:N_k[k]])/(N_k[k]-1))
 
-  print A_k_estimated
-  print dA_k_estimated
+  print(A_k_estimated)
+  print(dA_k_estimated)
 
   # need to additionally transform to get the square root
   if observe == 'RMS displacement':
@@ -212,29 +215,29 @@
 
 
 # compute the probability distribution of all states
-print "Free energies"
+print("Free energies")
 # compute anderson/darling statistics
 
 D = confidenceintervals.AndersonDarling(replicates_df,K)
-print "Anderson-Darling Metrics (see README.md)"
-print D
+print("Anderson-Darling Metrics (see README.md)")
+print(D)
 if (generateplots):
   confidenceintervals.QQPlot(replicates_df,K,title='Q-Q plots of free energy differences',filename="QQdf.pdf")
 (alpha_fij,Pobs_fij,Plow_fij,Phigh_fij,dPobs_fij,Pnorm_fij) = confidenceintervals.generateConfidenceIntervals(replicates_df,K)
 
-print "Standard ensemble averaged observables"
+print("Standard ensemble averaged observables")
 (alpha_Ai,Pobs_Ai,Plow_Ai,Phigh_Ai,dPobs_Ai,Pnorm_Ai) = confidenceintervals.generateConfidenceIntervals(replicates_standobservable,numpy.sum(ifzero))
 D = confidenceintervals.AndersonDarling(replicates_standobservable,numpy.sum(ifzero))
-print "Anderson-Darling Metrics (see README.md)"
-print D
+print("Anderson-Darling Metrics (see README.md)")
+print(D)
 if (generateplots):
   confidenceintervals.QQPlot(replicates_standobservable,numpy.sum(ifzero),title='Q-Q plots of ensemble averaged observables \n with standard error estimates', filename="QQstandardobserve.pdf")
 
-print "MBAR ensemble averaged observables"
+print("MBAR ensemble averaged observables")
 (alpha_Ai,Pobs_Ai,Plow_Ai,Phigh_Ai,dPobs_Ai,Pnorm_Ai) = confidenceintervals.generateConfidenceIntervals(replicates_observable,K)
 D = confidenceintervals.AndersonDarling(replicates_observable,K)
-print "Anderson-Darling Metrics (see README.md)"
-print D
+print("Anderson-Darling Metrics (see README.md)")
+print(D)
 if (generateplots):
   confidenceintervals.QQPlot(replicates_observable,K,title='Q-Q plots of ensemble averaged observables using MBAR', filename="QQMBARobserve.pdf")
 
@@ -264,8 +267,8 @@
      replicate['error'] = replicate_ij['error'][0,k]
      replicates_fdf.append(replicate)
    # compute the distribution of the end states only
-   print ""
-   print " ==== State %d alone with MBAR ===== " %(k)   
+   print("")
+   print(" ==== State %d alone with MBAR ===== " %(k))   
    (alpha_f,Pobs_f,Plow_f,Phigh_f,dPobs_f,Pnorm_f) = confidenceintervals.generateConfidenceIntervals(replicates_fdf,K);
    label = 'State %d' % k
    if (generateplots):