In [None]:
import numpy as np
from scipy.spatial.distance import cdist
from SALib.sample.morris.strategy import Strategy
from SALib.sample.morris import generate_trajectory, generate_trajectory_2, _sample_oat, \
generate_p_star, generate_x_star, generate_x_star_2, compute_b_star, compute_b_star_2, compute_delta

In [None]:
sample = _sample_oat({'num_vars': 10}, 5)

In [None]:
sample.shape

In [None]:
sample[:11, 4]

## `sample` matrix

The way that this array is constructed is... unintuituve. Maybe it is like this for backwards compatibility. `sample` is a two-dimensional array which includes three dimensions: trajectory, parameter, and parameter value. Personally, I would prefer this to be a three dimensional array (order doesn't really matter).

Note: My (older, unmaintained) [py-elem-effects](https://bitbucket.org/cmutel/py-elem-effects/) library doesn't store the coordinates of every single point at each point on the trajectory, but rather the starting and ending values for each parameter, and the order the parameters change in. This means you reduce memory usage substantially.

The rows are parameter values for the first trajectory, follow by for the second trajectory, etc. Columns are the parameters.

## Trajectory generation performance

The function `_sample_oat` includes this line:

    sample = np.array([generate_trajectory(group_membership, num_levels)
                       for n in range(N)])

These types of loops should almost always be avoided, as there are numpy operators that are much faster. Let's see if we can make trajectory generation faster.

In [None]:
m = np.asmatrix(np.identity(1000, dtype=int))

In [None]:
m.shape

In [None]:
%timeit generate_trajectory(m, 4)

In [None]:
type(generate_trajectory(m, 4))

In [None]:
%timeit generate_trajectory_2(m, 4)

In [None]:
type(generate_trajectory_2(m, 4))

In [None]:
import numpy.random as rd
rd.seed(5)

b1 = generate_trajectory(m, 4)
b2 = generate_trajectory_2(m, 4)

In [None]:
b1

In [None]:
b2

In [None]:
np.all(b1==b2)

# Memory

In [None]:
import numpy as np
import numpy.random as rd
import sys
import timeit

lower = 0
upper = 1
levels = 4
num_parameters = 5000

In [None]:
#pa = np.tile(np.arange(levels,dtype=np.int8), num_parameters).reshape((num_parameters, levels))
pa = np.tile(np.linspace(lower, upper, levels,dtype=np.float16), num_parameters).reshape((num_parameters, levels))
starting_indices = np.random.randint(0, high=levels - 1,size=num_parameters) #at which level each parameter starts
start = pa[np.arange(num_parameters), starting_indices] #base vector x0
end = pa[np.arange(num_parameters), starting_indices - int(levels / 2)] #end of trajectory
order = np.random.permutation(np.arange(num_parameters)).reshape((1, -1)) #which parameter changes when
indices = np.arange(num_parameters + 1).reshape((-1, 1))
start_dense = np.tile(start, num_parameters + 1).reshape((-1, num_parameters))
end_dense = np.tile(end, num_parameters + 1).reshape((-1, num_parameters))
del pa, starting_indices, start, end
mask = indices > order
change = end_dense[mask]
del end_dense
start_dense[mask] = change
#start_dense = np.array(start_dense/3,dtype=np.float16)
#start_dense[mask] = end_dense[mask]


In [None]:
print(sys.getsizeof(mask))
print(sys.getsizeof(change))
print(sys.getsizeof(start_dense))
#print(sys.getsizeof(end_dense))

In [None]:
sys.getsizeof(start_dense/3)

# Time to generate distance between two trajectories

In [1]:
import numpy as np
import numpy.random as rd
import sys
import timeit
from scipy.spatial.distance import cdist, pdist
import time

lower = 0
upper = 1
levels = 4
n_params = 5000

In [2]:
def generate_trajectory_2(num_parameters, levels=4, lower=0, upper=1):
    pa = np.tile(np.linspace(lower, upper, levels,dtype=np.float16), num_parameters).reshape((num_parameters, levels))
    starting_indices = np.random.randint(0, high=levels - 1,size=num_parameters) #at which level each parameter starts
    start = pa[np.arange(num_parameters), starting_indices] #base vector x0
    end = pa[np.arange(num_parameters), starting_indices - int(levels / 2)] #end of trajectory
    order = np.random.permutation(np.arange(num_parameters)).reshape((1, -1)) #which parameter changes when
    indices = np.arange(num_parameters + 1).reshape((-1, 1))
    start_dense = np.tile(start, num_parameters + 1).reshape((-1, num_parameters))
    end_dense = np.tile(end, num_parameters + 1).reshape((-1, num_parameters))
    del pa, starting_indices, start, end
    mask = indices > order
    change = end_dense[mask]
    del end_dense
    start_dense[mask] = change
    del change, mask
    return start_dense

In [3]:
samples1 = generate_trajectory_2(n_params, levels=4, lower=0, upper=1)
samples2 = generate_trajectory_2(n_params, levels=4, lower=0, upper=1)

In [4]:
t1 = time.time()
distance = np.array(np.sum(pdist(samples1, samples2)), dtype=np.float64)
t2 = time.time()
t = t2-t1
print('Time to compute distance: ' + str(t/60) + ' min')

  if(metric in _METRICS['minkowski'].aka or
  metric in _METRICS['wminkowski'].aka or
  metric in ['test_minkowski', 'test_wminkowski'] or


ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [None]:
distance

In [None]:
samples1

In [None]:
samples2