Merge pull request #37 from jdherman/problem-dict-and-cleanup

Problem dict and cleanup
SALib · Jan 30, 2015 · 742c1e9 · 742c1e9
2 parents f345eca + 75d5a2e
commit 742c1e9
Show file tree

Hide file tree

Showing 22 changed files with 203 additions and 256 deletions.
diff --git a/SALib/analyze/delta.py b/SALib/analyze/delta.py
@@ -10,16 +10,10 @@
 # Where each entry is a list of size D (the number of parameters)
 # Containing the indices in the same order as the parameter file
 
-def analyze(pfile, input_file, output_file, column=0, calc_second_order=True, num_resamples=10,
-            delim=' ', conf_level=0.95, print_to_console=False):
+def analyze(problem, X, Y, calc_second_order=True, num_resamples=10,
+            conf_level=0.95, print_to_console=False):
 
-    param_file = read_param_file(pfile)
-    Y = np.loadtxt(output_file, delimiter=delim, usecols=(column,))
-    X = np.loadtxt(input_file, delimiter=delim, ndmin=2)
-    if len(X.shape) == 1:
-        X = X.reshape((len(X), 1))
-
-    D = param_file['num_vars']
+    D = problem['num_vars']
     N = Y.size
 
     if conf_level < 0 or conf_level > 1:
@@ -42,7 +36,7 @@ def analyze(pfile, input_file, output_file, column=0, calc_second_order=True, nu
         S['S1_conf'][i] = sobol_first_conf(
             Y, X[:, i], m, num_resamples, conf_level)
         if print_to_console:
-            print("%s %f %f %f %f" % (param_file['names'][i], S['delta'][
+            print("%s %f %f %f %f" % (problem['names'][i], S['delta'][
                   i], S['delta_conf'][i], S['S1'][i], S['S1_conf'][i]))
 
     return S
@@ -105,6 +99,10 @@ def sobol_first_conf(Y, X, m, num_resamples, conf_level):
                         help='Number of bootstrap resamples for Sobol confidence intervals')
     args = parser.parse_args()
 
-    args = parser.parse_args()
-    analyze(args.paramfile, args.model_input_file, args.model_output_file, args.column,
-            delim=args.delimiter, num_resamples=args.resamples, print_to_console=True)
+    problem = read_param_file(args.paramfile)
+    Y = np.loadtxt(args.model_output_file, delimiter=args.delimiter, usecols=(args.column,))
+    X = np.loadtxt(args.model_input_file, delimiter=args.delimiter, ndmin=2)
+    if len(X.shape) == 1:
+        X = X.reshape((len(X), 1))
+
+    analyze(problem, X, Y, num_resamples=args.resamples, print_to_console=True)
diff --git a/SALib/analyze/dgsm.py b/SALib/analyze/dgsm.py
@@ -11,15 +11,10 @@
 # Containing the indices in the same order as the parameter file
 
 
-def analyze(pfile, input_file, output_file, column=0, num_resamples=1000,
-            delim=' ', conf_level=0.95, print_to_console=False):
+def analyze(problem, X, Y, num_resamples=1000,
+            conf_level=0.95, print_to_console=False):
 
-    pf = read_param_file(pfile)
-    Y = np.loadtxt(output_file, delimiter=delim, usecols=(column,))
-    D = pf['num_vars']
-    X = np.loadtxt(input_file, delimiter=delim, ndmin=2)
-    if len(X.shape) == 1:
-        X = X.reshape((len(X), 1))
+    D = problem['num_vars']
 
     if Y.size % (D + 1) == 0:
         N = int(Y.size / (D + 1))
@@ -51,11 +46,11 @@ def analyze(pfile, input_file, output_file, column=0, num_resamples=1000,
         S['vi'][j], S['vi_std'][j] = calc_vi(
             base, perturbed[:, j], X_perturbed[:, j] - X_base[:, j])
         S['dgsm'][j], S['dgsm_conf'][j] = calc_dgsm(base, perturbed[:, j], X_perturbed[
-                                                    :, j] - X_base[:, j], pf['bounds'][j], num_resamples, conf_level)
+                                                    :, j] - X_base[:, j], problem['bounds'][j], num_resamples, conf_level)
 
         if print_to_console:
             print("%s %f %f %f %f" % (
-                pf['names'][j], S['vi'][j], S['vi_std'][j], S['dgsm'][j], S['dgsm_conf'][j]))
+                problem['names'][j], S['vi'][j], S['vi_std'][j], S['dgsm'][j], S['dgsm_conf'][j]))
 
     return S
 
@@ -90,6 +85,11 @@ def calc_dgsm(base, perturbed, x_delta, bounds, num_resamples, conf_level):
     parser.add_argument('-r', '--resamples', type=int, required=False, default=1000,
                         help='Number of bootstrap resamples for Sobol confidence intervals')
     args = parser.parse_args()
+    problem = read_param_file(args.paramfile)
+
+    Y = np.loadtxt(args.model_output_file, delimiter=args.delimiter, usecols=(args.column,))
+    X = np.loadtxt(args.model_input_file, delimiter=args.delimiter, ndmin=2)
+    if len(X.shape) == 1:
+        X = X.reshape((len(X), 1))
 
-    analyze(args.paramfile, args.model_input_file, args.model_output_file, args.column,
-            num_resamples=args.resamples, delim=args.delimiter, print_to_console=True)
+    analyze(problem, X, Y, num_resamples=args.resamples, print_to_console=True)
diff --git a/SALib/analyze/fast.py b/SALib/analyze/fast.py
@@ -12,11 +12,9 @@
 # Containing the indices in the same order as the parameter file
 
 
-def analyze(pfile, output_file, column=0, M=4, delim=' ', print_to_console=False):
+def analyze(problem, Y, M=4, print_to_console=False):
 
-    param_file = read_param_file(pfile)
-    Y = np.loadtxt(output_file, delimiter=delim, usecols=(column,))
-    D = param_file['num_vars']
+    D = problem['num_vars']
 
     if Y.size % (D) == 0:
         N = int(Y.size / D)
@@ -47,7 +45,7 @@ def analyze(pfile, output_file, column=0, M=4, delim=' ', print_to_console=False
         Si['ST'][i] = compute_total_order(Y[l], N, omega[0])
         if print_to_console:
             print("%s %f %f" %
-                  (param_file['names'][i], Si['S1'][i], Si['ST'][i]))
+                  (problem['names'][i], Si['S1'][i], Si['ST'][i]))
     return Si
 
 
@@ -70,5 +68,7 @@ def compute_total_order(outputs, N, omega):
 
     parser = common_args.create()
     args = parser.parse_args()
-    analyze(args.paramfile, args.model_output_file, args.column,
-            delim=args.delimiter, print_to_console=True)
+    problem = read_param_file(args.paramfile)
+    Y = np.loadtxt(args.model_output_file, delimiter=args.delimiter, usecols=(args.column,))
+
+    analyze(problem, Y, print_to_console=True)
diff --git a/SALib/analyze/morris.py b/SALib/analyze/morris.py
@@ -1,7 +1,6 @@
 from __future__ import division
 from __future__ import print_function
 from ..util import read_param_file
-from sys import exit
 import numpy as np
 from scipy.stats import norm
 from . import common_args
@@ -11,12 +10,7 @@
 # Where each entry is a list of size num_vars (the number of parameters)
 # Containing the indices in the same order as the parameter file
 
-
-def analyze(pfile,
-            input_file,
-            output_file,
-            column=0,
-            delim=' ',
+def analyze(problem, X, Y,
             num_resamples=1000,
             conf_level=0.95,
             print_to_console=False,
@@ -28,18 +22,12 @@ def analyze(pfile,
 
     delta = grid_jump / (num_levels - 1)
 
-    param_file = read_param_file(pfile)
-    Y = np.loadtxt(output_file, delimiter=delim, usecols=(column,))
-    X = np.loadtxt(input_file, delimiter=delim, ndmin=2)
-    if len(X.shape) == 1:
-        X = X.reshape((len(X), 1))
+    num_vars = problem['num_vars']
 
-    num_vars = param_file['num_vars']
-
-    if (param_file['groups'] is None) & (Y.size % (num_vars + 1) == 0):
+    if (problem['groups'] is None) & (Y.size % (num_vars + 1) == 0):
         num_trajectories = int(Y.size / (num_vars + 1))
-    elif param_file['groups'] is not None:
-        groups, unique_group_names = param_file['groups']
+    elif problem['groups'] is not None:
+        groups, unique_group_names = problem['groups']
         number_of_groups = len(unique_group_names)
         num_trajectories = int(Y.size / (number_of_groups + 1))
     else:
@@ -56,7 +44,7 @@ def analyze(pfile,
     Si['mu'] = np.average(ee, 1)
     Si['mu_star'] = np.average(np.abs(ee), 1)
     Si['sigma'] = np.std(ee, 1)
-    Si['names'] = param_file['names']
+    Si['names'] = problem['names']
 
     for j in range(num_vars):
         Si['mu_star_conf'][j] = compute_mu_star_confidence(
@@ -66,7 +54,7 @@ def analyze(pfile,
         if print_to_console:
             print("Parameter Mu Sigma Mu_Star Mu_Star_Conf")
             for j in range(num_vars):
-                print("%s %f %f %f %f" % (param_file['names'][j], Si['mu'][j], Si[
+                print("%s %f %f %f %f" % (problem['names'][j], Si['mu'][j], Si[
                     'sigma'][j], Si['mu_star'][j], Si['mu_star_conf'][j]))
         return Si
     elif groups is not None:
@@ -86,10 +74,9 @@ def analyze(pfile,
                                     Si_grouped['mu_star'][j],
                                     Si_grouped['mu_star_conf'][j]))
 
-
         return Si_grouped
     else:
-        raise RuntimeError("Could determine which parameters should be returned")
+        raise RuntimeError("Could not determine which parameters should be returned")
 
 
 def compute_grouped_mu_star(mu_star_ungrouped, group_matrix):
@@ -100,6 +87,7 @@ def compute_grouped_mu_star(mu_star_ungrouped, group_matrix):
     return mu_star_grouped.T
 
 
+# This function is not being used right now, in favor of the vectorized version below
 def compute_elementary_effect(X, Y, j1, j2):
     # The elementary effect is (change in output)/(change in input)
     # Each parameter has one EE per trajectory, because it is only changed
@@ -201,6 +189,13 @@ def compute_mu_star_confidence(ee, num_trajectories, num_resamples, conf_level):
     parser.add_argument('--grid-jump', type=int, required=False,
                         default=2, help='Grid jump size (Morris only)')
     args = parser.parse_args()
-    analyze(args.paramfile, args.model_input_file, args.model_output_file, args.column,
-            delim=args.delimiter, num_resamples=args.resamples, print_to_console=True,
+
+    problem = read_param_file(args.paramfile)
+
+    Y = np.loadtxt(args.model_output_file, delimiter=args.delimiter, usecols=(args.column,))
+    X = np.loadtxt(args.model_input_file, delimiter=args.delimiter, ndmin=2)
+    if len(X.shape) == 1:
+        X = X.reshape((len(X), 1))
+
+    analyze(problem, X, Y, num_resamples=args.resamples, print_to_console=True,
             num_levels=args.levels, grid_jump=args.grid_jump)
diff --git a/SALib/analyze/sobol.py b/SALib/analyze/sobol.py
@@ -11,12 +11,10 @@
 # Containing the indices in the same order as the parameter file
 
 
-def analyze(pfile, output_file, column=0, calc_second_order=True, num_resamples=1000,
-            delim=' ', conf_level=0.95, print_to_console=False):
+def analyze(problem, Y, calc_second_order=True, num_resamples=1000,
+            conf_level=0.95, print_to_console=False):
 
-    param_file = read_param_file(pfile)
-    Y = np.loadtxt(output_file, delimiter=delim, usecols=(column,))
-    D = param_file['num_vars']
+    D = problem['num_vars']
 
     if calc_second_order and Y.size % (2 * D + 2) == 0:
         N = int(Y.size / (2 * D + 2))
@@ -58,7 +56,7 @@ def analyze(pfile, output_file, column=0, calc_second_order=True, num_resamples=
             A, AB[:, j], B, num_resamples, conf_level)
 
         if print_to_console:
-            print("%s %f %f %f %f" % (param_file['names'][j], S['S1'][
+            print("%s %f %f %f %f" % (problem['names'][j], S['S1'][
                   j], S['S1_conf'][j], S['ST'][j], S['ST_conf'][j]))
 
     # Second order (+conf.)
@@ -78,7 +76,7 @@ def analyze(pfile, output_file, column=0, calc_second_order=True, num_resamples=
                     A, AB[:, j], AB[:, k], BA[:, j], B, num_resamples, conf_level)
 
                 if print_to_console:
-                    print("%s %s %f %f" % (param_file['names'][j], param_file[
+                    print("%s %s %f %f" % (problem['names'][j], problem[
                           'names'][k], S['S2'][j, k], S['S2_conf'][j, k]))
 
     return S
@@ -140,5 +138,8 @@ def second_order_confidence(A, ABj, ABk, BAj, B, num_resamples, conf_level):
                         help='Number of bootstrap resamples for Sobol confidence intervals')
     args = parser.parse_args()
 
-    analyze(args.paramfile, args.model_output_file, args.column, (args.max_order == 2),
-            num_resamples=args.resamples, delim=args.delimiter, print_to_console=True)
+    problem = read_param_file(args.paramfile)
+    Y = np.loadtxt(args.model_output_file, delimiter=args.delimiter, usecols=(args.column,))
+
+    analyze(problem, Y, (args.max_order == 2),
+            num_resamples=args.resamples, print_to_console=True)
diff --git a/SALib/analyze/sobol_rbf.py b/SALib/analyze/sobol_rbf.py
@@ -18,15 +18,10 @@
 # containing the indices in the same order as the parameter file
 
 
-def analyze(pfile, input_file, output_file, N_rbf=10000, column=0, n_folds=10,
-            delim=' ', print_to_console=False, training_sample=None):
+def analyze(problem, X, y, N_rbf=10000, n_folds=10,
+            print_to_console=False, training_sample=None):
 
-    param_file = read_param_file(pfile)
-    y = np.loadtxt(output_file, delimiter=delim, usecols=(column,))
-    X = np.loadtxt(input_file, delimiter=delim, ndmin=2)
-    if len(X.shape) == 1:
-        X = X.reshape((len(X), 1))
-    D = param_file['num_vars']
+    D = problem['num_vars']
     mms = MinMaxScaler()
     X = mms.fit_transform(X)
 
@@ -46,16 +41,13 @@ def analyze(pfile, input_file, output_file, N_rbf=10000, column=0, n_folds=10,
         ix = np.random.randint(y.size, size=training_sample)
         reg.fit(X[ix, :], y[ix])
 
-    X_rbf = saltelli.sample(N_rbf, pfile)
+    X_rbf = saltelli.sample(problem, N_rbf)
     X_rbf = mms.transform(X_rbf)
     y_rbf = reg.predict(X_rbf)
 
-    np.savetxt("y_rbf.txt", y_rbf, delimiter=' ')
-
     # not using the bootstrap intervals here. For large enough N, they will go to zero.
     # (this doesn't mean the indices are accurate -- check the metamodel R^2)
-    S = sobol.analyze(
-        pfile, "y_rbf.txt", print_to_console=False, num_resamples=2)
+    S = sobol.analyze(problem, y_rbf, print_to_console=False, num_resamples=2)
     S.pop("S1_conf", None)
     S.pop("ST_conf", None)
     S.pop("S2_conf", None)
@@ -68,13 +60,13 @@ def analyze(pfile, input_file, output_file, N_rbf=10000, column=0, n_folds=10,
         print("\nParameter S1 ST")
         for j in range(D):
             print("%s %f %f" %
-                  (param_file['names'][j], S['S1'][j], S['ST'][j]))
+                  (problem['names'][j], S['S1'][j], S['ST'][j]))
 
         print("\nParameter_1 Parameter_2 S2")
         for j in range(D):
             for k in range(j + 1, D):
                 print(
-                    "%s %s %f" % (param_file['names'][j], param_file['names'][k], S['S2'][j, k]))
+                    "%s %s %f" % (problem['names'][j], problem['names'][k], S['S2'][j, k]))
 
     return S
 
@@ -90,6 +82,11 @@ def analyze(pfile, input_file, output_file, N_rbf=10000, column=0, n_folds=10,
                         help='Subsample size to train SVR. Default uses all points in dataset.')
 
     args = parser.parse_args()
-    analyze(args.paramfile, args.model_input_file, args.model_output_file, args.N_rbf, args.column,
-            delim=args.delimiter, n_folds=args.n_folds, print_to_console=True,
+    problem = read_param_file(args.paramfile)
+    y = np.loadtxt(args.model_output_file, delimiter=args.delimiter, usecols=(args.column,))
+    X = np.loadtxt(args.model_input_file, delimiter=args.delimiter, ndmin=2)
+    if len(X.shape) == 1:
+        X = X.reshape((len(X), 1))
+
+    analyze(problem, X, y, args.N_rbf, n_folds=args.n_folds, print_to_console=True,
             training_sample=args.training_sample)
diff --git a/SALib/sample/fast_sampler.py b/SALib/sample/fast_sampler.py
@@ -6,11 +6,9 @@
 
 # Generate N x D matrix of extended FAST samples (Saltelli 1999)
 
+def sample(problem, N, M=4):
 
-def sample(N, param_file, M=4):
-
-    pf = read_param_file(param_file)
-    D = pf['num_vars']
+    D = problem['num_vars']
 
     omega = np.empty([D])
     omega[0] = math.floor((N - 1) / (2 * M))
@@ -42,7 +40,7 @@ def sample(N, param_file, M=4):
             g = 0.5 + (1 / math.pi) * np.arcsin(np.sin(omega2[j] * s + phi))
             X[l, j] = g
 
-    scale_samples(X, pf['bounds'])
+    scale_samples(X, problem['bounds'])
     return X
 
 if __name__ == "__main__":
@@ -53,6 +51,8 @@ def sample(N, param_file, M=4):
     args = parser.parse_args()
 
     np.random.seed(args.seed)
-    param_values = sample(args.samples, args.paramfile, M=args.M)
+    problem = read_param_file(args.paramfile)
+
+    param_values = sample(problem, N=args.samples, M=args.M)
     np.savetxt(args.output, param_values, delimiter=args.delimiter,
                fmt='%.' + str(args.precision) + 'e')