Skip to content

Commit

Permalink
Create row iterator methods for Sampler object
Browse files Browse the repository at this point in the history
  • Loading branch information
aeslaughter committed Nov 25, 2019
1 parent 11f7a2e commit ce50b62
Show file tree
Hide file tree
Showing 8 changed files with 195 additions and 23 deletions.
33 changes: 31 additions & 2 deletions framework/include/samplers/Sampler.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,23 @@ class Sampler : public MooseObject, public SetupInterface, public DistributionIn
DenseMatrix<Real> getLocalSamples();
///@}

/**
* Return the "next" local row. This is designed to be used within a loop using the
* getLocalRowBegin/End methods as such:
*
* for (dof_id_type i = getLocalRowBegin(); i < getLocalRowEnd(); ++i)
* std::vector<Real> row = getNextLocalRow();
*
* Calls to getNextLocalRow() will continue to return the next row of data until the last local
* row has been reached, it will then start again at the beginning if called again. Also, calls
* to getNextLocalRow() can be partial, followed by call(s) to getSamples or getLocalSamples.
* Continued calls to getNextLocalRow() will still continue to give the next row as if the
* other get calls were not made. However, when this occurs calls to restore and advance the
* generators are made after each call got getSamples or getLocalSamples, so this generally
* should be avoided.
*/
std::vector<Real> getNextLocalRow();

/**
* Return the number of samples.
* @return The total number of rows that exist in all DenseMatrix values from the
Expand Down Expand Up @@ -126,15 +143,21 @@ class Sampler : public MooseObject, public SetupInterface, public DistributionIn
virtual void computeLocalSampleMatrix(DenseMatrix<Real> & matrix);
///@}

private:
/**
* Method for advancing the random number generator(s) by the supplied number or calls to rand().
*
* TODO: This should be updated if the If the random number generator is updated to type that
* supports native advancing.
*/
void advanceGenerators(dof_id_type count);
virtual void advanceGenerators(dof_id_type count);

/**
* Method for manually setting the local row index for iteration of sample rows using
* getNextLocalRow method.
*/
void setNextLocalRowIndex(dof_id_type index = 0);

private:
/**
* Function called by MOOSE to setup the Sampler for use. The primary purpose is to partition
* the DenseMatrix rows for parallel distribution. A separate method is required so that the
Expand Down Expand Up @@ -182,6 +205,12 @@ class Sampler : public MooseObject, public SetupInterface, public DistributionIn
/// Total number of columns in the sample matrix
dof_id_type _n_cols;

/// Iterator index for getNextLocalRow method
dof_id_type _next_local_row;

/// Flag for restoring state during getNextLocalRow iteration
bool _next_local_row_requires_state_restore;

/// Flag to indicate if the init method for this class was called
bool _initialized = false;
};
38 changes: 35 additions & 3 deletions framework/src/samplers/Sampler.C
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ Sampler::Sampler(const InputParameters & parameters)
DistributionInterface(this),
_seed(getParam<unsigned int>("seed")),
_n_rows(0),
_n_cols(0)
_n_cols(0),
_next_local_row_requires_state_restore(true)
{
setNumberOfRandomSeeds(1);
}
Expand All @@ -54,14 +55,16 @@ Sampler::init()
if (_n_rows == 0)
mooseError("The number of columns cannot be zero.");

// TODO: When Sampler is updated to a threaded GeneralUserObject, this partitioning must
// also include threads
// TODO: If Sampler is updated to be threaded, this partitioning must also include threads
MooseUtils::linearPartitionItems(
_n_rows, n_processors(), processor_id(), _n_local_rows, _local_row_begin, _local_row_end);

// See FEProblemBase::execute
execute();

// Set the next row iterator index
_next_local_row = _local_row_begin;

_initialized = true;
}

Expand Down Expand Up @@ -96,6 +99,7 @@ Sampler::execute()
DenseMatrix<Real>
Sampler::getSamples()
{
_next_local_row_requires_state_restore = true;
_generator.restoreState();
sampleSetUp();
DenseMatrix<Real> output(_n_rows, _n_cols);
Expand All @@ -107,6 +111,7 @@ Sampler::getSamples()
DenseMatrix<Real>
Sampler::getLocalSamples()
{
_next_local_row_requires_state_restore = true;
_generator.restoreState();
sampleSetUp();
DenseMatrix<Real> output(_n_local_rows, _n_cols);
Expand All @@ -115,6 +120,33 @@ Sampler::getLocalSamples()
return output;
}

std::vector<Real>
Sampler::getNextLocalRow()
{
if (_next_local_row_requires_state_restore)
{
_generator.restoreState();
sampleSetUp();
advanceGenerators(_next_local_row * _n_cols);
_next_local_row_requires_state_restore = false;
}

std::vector<Real> output(_n_cols);
for (dof_id_type j = 0; j < _n_cols; ++j)
output[j] = computeSample(_next_local_row, j);
_next_local_row++;

if (_next_local_row == _local_row_end)
{
advanceGenerators((_n_rows - _local_row_end) * _n_cols);
sampleTearDown();
_next_local_row = _local_row_begin;
_next_local_row_requires_state_restore = true;
}

return output;
}

void
Sampler::computeSampleMatrix(DenseMatrix<Real> & matrix)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,18 @@ various methods on the object as discussed previously.
[samplers/Sampler.md] objects in [MOOSE] are designed to generate an arbitrary set of data sampled from
any number of Distribution objects.

The sampler operators by returning a matrix (`libMesh::DenseMatrix<Real>`) from one of two
methods:
The sampler operators by returning a vector (`std::vector<Real>`) or matrix
(`libMesh::DenseMatrix<Real>`) from one of three methods:

- +`getNextLocalRow`+\\
This method returns a single row from the complete sample matrix and is the preferred method for
accessing sample data, since the memory footprint is limited to a single row rather than
potentially large matrices as in the other methods. This method should be used as follows:

```c++
for (dof_id_type i = getLocalRowBegin(); i < getLocalRowEnd(); ++i)
std::vector<Real> row = getNextLocalRow();
```

- +`getLocalSamples`+<br>
This method returns a subset of rows from the sample matrix for the current processor. This matrix
Expand Down
93 changes: 91 additions & 2 deletions test/src/userobjects/SamplerTester.C
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ validParams<SamplerTester>()
InputParameters params = validParams<GeneralUserObject>();
params.addRequiredParam<SamplerName>("sampler", "The sampler to test.");

MooseEnum test_type(
"mpi thread base_global_vs_local rand_global_vs_local getSamples getLocalSamples");
MooseEnum test_type("mpi thread base_global_vs_local rand_global_vs_local rand_global_vs_next getSamples "
"getLocalSamples getNextLocalRow");
params.addParam<MooseEnum>("test_type", test_type, "The type of test to perform.");
return params;
}
Expand All @@ -41,6 +41,41 @@ SamplerTester::execute()
if (_test_type == "getLocalSamples")
_samples = _sampler.getLocalSamples();

if (_test_type == "getNextLocalRow")
for (dof_id_type i = _sampler.getLocalRowBegin(); i < _sampler.getLocalRowEnd(); ++i)
std::vector<Real> row = _sampler.getNextLocalRow();

if (_test_type == "rand_global_vs_next")
{
mooseAssert(n_processors() == 1, "This test only works on one processor.");

// Get the full set of samples
DenseMatrix<Real> global = _sampler.getSamples();

// Iterate through some
for (dof_id_type i = _sampler.getLocalRowBegin(); i < 7; ++i)
{
std::vector<Real> row = _sampler.getNextLocalRow();
for (unsigned int j = 0; j < 8; j++)
{
assertEqual(row[j], global(i, j));
}
}

// Get the samples again
DenseMatrix<Real> local = _sampler.getLocalSamples();

// Continue iteration
for (dof_id_type i = 7; i < _sampler.getLocalRowEnd(); ++i)
{
std::vector<Real> row = _sampler.getNextLocalRow();
for (unsigned int j = 0; j < 8; j++)
{
assertEqual(row[j], global(i, j));
}
}
}

if (_test_type == "rand_global_vs_local")
{
DenseMatrix<Real> global = _sampler.getSamples();
Expand All @@ -52,6 +87,15 @@ SamplerTester::execute()
assertEqual(local.m(), 14);
assertEqual(local.n(), 8);
assertEqual(global, local);

for (dof_id_type i = _sampler.getLocalRowBegin(); i < _sampler.getLocalRowEnd(); ++i)
{
std::vector<Real> row = _sampler.getNextLocalRow();
for (unsigned int j = 0; j < 8; j++)
{
assertEqual(row[j], global(i, j));
}
}
}

else if (n_processors() == 2)
Expand All @@ -72,6 +116,15 @@ SamplerTester::execute()
assertEqual(local(5, i), global(5, i));
assertEqual(local(6, i), global(6, i));
}

for (dof_id_type i = _sampler.getLocalRowBegin(); i < _sampler.getLocalRowEnd(); ++i)
{
std::vector<Real> row = _sampler.getNextLocalRow();
for (unsigned int j = 0; j < 8; j++)
{
assertEqual(row[j], global(i, j));
}
}
}

else if (processor_id() == 1)
Expand All @@ -88,6 +141,15 @@ SamplerTester::execute()
assertEqual(local(5, i), global(12, i));
assertEqual(local(6, i), global(13, i));
}

for (dof_id_type i = _sampler.getLocalRowBegin(); i < _sampler.getLocalRowEnd(); ++i)
{
std::vector<Real> row = _sampler.getNextLocalRow();
for (unsigned int j = 0; j < 8; j++)
{
assertEqual(row[j], global(i, j));
}
}
}
}

Expand All @@ -107,6 +169,15 @@ SamplerTester::execute()
assertEqual(local(3, i), global(3, i));
assertEqual(local(4, i), global(4, i));
}

for (dof_id_type i = _sampler.getLocalRowBegin(); i < _sampler.getLocalRowEnd(); ++i)
{
std::vector<Real> row = _sampler.getNextLocalRow();
for (unsigned int j = 0; j < 8; j++)
{
assertEqual(row[j], global(i, j));
}
}
}

else if (processor_id() == 1)
Expand All @@ -121,6 +192,15 @@ SamplerTester::execute()
assertEqual(local(3, i), global(8, i));
assertEqual(local(4, i), global(9, i));
}

for (dof_id_type i = _sampler.getLocalRowBegin(); i < _sampler.getLocalRowEnd(); ++i)
{
std::vector<Real> row = _sampler.getNextLocalRow();
for (unsigned int j = 0; j < 8; j++)
{
assertEqual(row[j], global(i, j));
}
}
}

else if (processor_id() == 2)
Expand All @@ -134,6 +214,15 @@ SamplerTester::execute()
assertEqual(local(2, i), global(12, i));
assertEqual(local(3, i), global(13, i));
}

for (dof_id_type i = _sampler.getLocalRowBegin(); i < _sampler.getLocalRowEnd(); ++i)
{
std::vector<Real> row = _sampler.getNextLocalRow();
for (unsigned int j = 0; j < 8; j++)
{
assertEqual(row[j], global(i, j));
}
}
}
}
}
Expand Down
14 changes: 12 additions & 2 deletions test/tests/samplers/base/tests
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,24 @@
input = threads.i
min_threads = 2
allow_test_objects = true
detail = "with threads and"
detail = "with threads,"
[]
[mpi]
type = RunApp
input = mpi.i
min_parallel = 2
allow_test_objects = true
detail = " MPI."
detail = " MPI, and"
[]

[iter]
type = RunApp
input = global_vs_local.i
allow_test_objects = true
cli_args = 'Samplers/sample/use_rand=true UserObjects/test/test_type=rand_global_vs_next'
max_parallel = 1 # by design, see SamplerTester

detail = "that operates with a row-based iteration scheme."
[]
[]

Expand Down
9 changes: 5 additions & 4 deletions test/tests/samplers/distribute/execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,14 @@ def execute(infile, outfile, n_samples, processors, test_type):
if __name__ == '__main__':

# Used for docs
#rows = 1e8
#procs = [1,2,4,8,16]
rows = 1e8
procs = [1,2,4,8,16]

# Used for testing
rows = 1e7
procs = [1,2,4]
#rows = 1e7
#procs = [1,2,4]

execute('distribute.i', 'distribute_none', 1, procs, 'getSamples')
execute('distribute.i', 'distribute_off', rows, procs, 'getSamples')
execute('distribute.i', 'distribute_on', rows, procs, 'getLocalSamples')
execute('distribute.i', 'distribute_row', rows, procs, 'getNextLocalRow')
9 changes: 5 additions & 4 deletions test/tests/samplers/distribute/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,23 @@ def execute(name, ylabel, yscale=1):
fig = plt.figure(figsize=[6,3], dpi=600, tight_layout=True)
ax = fig.subplots()

add_plot(ax, 'distribute_none', name, 'Baseline w/o sample data', yscale)
add_plot(ax, 'distribute_none', name, 'Baseline w/o sample data', yscale, marker='s')
add_plot(ax, 'distribute_off', name, 'Non-distributed', yscale)
add_plot(ax, 'distribute_on', name, 'Distributed', yscale)
add_plot(ax, 'distribute_row', name, 'Distributed (Next)', yscale)

ax.set_xlabel('Num. Processors', fontsize=14)
ax.set_ylabel(ylabel, fontsize=14)
ax.grid(True, color=[0.7]*3)
ax.legend()

fig.savefig('memory_{}.svg'.format(name))
fig.savefig('memory_{}.pdf'.format(name))

def add_plot(ax, prefix, name, label, yscale):
def add_plot(ax, prefix, name, label, yscale, marker='o'):
"""Show matplotlib plot of memory data"""
dirname = os.path.abspath(os.path.dirname(__file__))
data = pandas.read_csv(os.path.join(dirname, '{}.csv'.format(prefix)))
ax.plot(data['n_procs'], data[name]/yscale, label=label, marker='o')
ax.plot(data['n_procs'], data[name]/yscale, label=label, marker=marker)

if __name__ == '__main__':
execute('total', 'Memory (GiB)', 1024**3)
Expand Down

0 comments on commit ce50b62

Please sign in to comment.