Skip to content

Commit

Permalink
Add capability to store Monte Carlo sample data in parallel
Browse files Browse the repository at this point in the history
ref #13879
  • Loading branch information
jiangwen84 committed Aug 29, 2019
1 parent ae2ae53 commit 87612f6
Show file tree
Hide file tree
Showing 27 changed files with 318 additions and 96 deletions.
43 changes: 39 additions & 4 deletions framework/include/samplers/Sampler.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ class Sampler : public MooseObject, public SetupInterface, public DistributionIn
/**
* Return the sampled distribution data.
*/
std::vector<DenseMatrix<Real>> getSamples();
std::vector<DenseMatrix<Real>> & getSamples();

/**
* Return the sample names, by default 'sample_0, sample_1, etc.' is used.
Expand Down Expand Up @@ -137,6 +137,21 @@ class Sampler : public MooseObject, public SetupInterface, public DistributionIn
*/
dof_id_type getLocalRowEnd();

/**
* Return true if the sample data is distributed among all processors.
*/
bool isSampleDataDistributed() { return _is_distributed_sample_data; };

/**
* Return the number of samples.
*/
dof_id_type getNumberOfSamples() { return _num_samples; };

/**
* Return the number of matrices.
*/
dof_id_type getNumberOfMatrices() { return _num_matrices; };

protected:
/**
* Get the next random number from the generator.
Expand All @@ -152,7 +167,7 @@ class Sampler : public MooseObject, public SetupInterface, public DistributionIn
*
* @return The list of samples for the Sampler.
*/
virtual std::vector<DenseMatrix<Real>> sample() = 0;
virtual std::vector<DenseMatrix<Real>> & sample() = 0;

/**
* Set the number of seeds required by the sampler. The Sampler will generate
Expand All @@ -162,11 +177,16 @@ class Sampler : public MooseObject, public SetupInterface, public DistributionIn
*/
void setNumberOfRequiedRandomSeeds(const std::size_t & number);

/**
* Set the number of matrices for the sampler.
* @param number The number of matrices.
*/
void setNumberOfMatrices(const dof_id_type number) { _num_matrices = number; };

/**
* Reinitialize the offsets and row counts.
* @param data Samples, as returned from getSamples(), to use for re-computing size information
*/
void reinit(const std::vector<DenseMatrix<Real>> & data);
void reinit();

/// Map used to store the perturbed parameters and their corresponding distributions
std::vector<Distribution const *> _distributions;
Expand All @@ -177,6 +197,21 @@ class Sampler : public MooseObject, public SetupInterface, public DistributionIn
/// Sample names
std::vector<std::string> _sample_names;

/// Sampler data is distributed among all processors
bool _is_distributed_sample_data;

/// Number of samples
dof_id_type _num_samples;

/// Number of matrices
dof_id_type _num_matrices;

/// Compute the sample data once
bool _compute_sample_once;

/// Sampler data
std::vector<DenseMatrix<Real>> _sample_data;

private:
/// Random number generator, don't give users access we want to control it via the interface
/// from this class.
Expand Down
37 changes: 23 additions & 14 deletions framework/src/samplers/Sampler.C
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ validParams<Sampler>()
params.addRequiredParam<std::vector<DistributionName>>(
"distributions", "The names of distributions that you want to sample.");
params.addParam<unsigned int>("seed", 0, "Random number generator initial seed");
params.addParam<bool>("sample_data_distributed",
false,
"Return true if sample data is distributed across all processors.");
params.addRequiredParam<dof_id_type>(
"n_samples", "Number of samples to perform for each distribution within each matrix.");
params.addParam<bool>("compute_sample_once", false, "Compute sample data only once.");
params.registerBase("Sampler");
return params;
}
Expand All @@ -39,6 +45,9 @@ Sampler::Sampler(const InputParameters & parameters)
SetupInterface(this),
DistributionInterface(this),
_distribution_names(getParam<std::vector<DistributionName>>("distributions")),
_is_distributed_sample_data(getParam<bool>("sample_data_distributed")),
_num_samples(getParam<dof_id_type>("n_samples")),
_compute_sample_once(getParam<bool>("compute_sample_once")),
_seed(getParam<unsigned int>("seed")),
_total_rows(0)
{
Expand All @@ -51,23 +60,23 @@ void
Sampler::execute()
{
// Get the samples then save the state so that subsequent calls to getSamples returns the same
// random numbers until this execute command is called again.
std::vector<DenseMatrix<Real>> data = getSamples();
_generator.saveState();
reinit(data);
reinit();
}

void
Sampler::reinit(const std::vector<DenseMatrix<Real>> & data)
Sampler::reinit()
{
// Update offsets and total number of rows
_total_rows = 0;
_offsets.clear();
_offsets.reserve(data.size() + 1);
unsigned int num_matrices = getNumberOfMatrices();
unsigned int num_samples = getNumberOfSamples();
_offsets.reserve(num_matrices + 1);
_offsets.push_back(_total_rows);
for (const DenseMatrix<Real> & mat : data)
for (unsigned int i = 0; i < num_matrices; i++)
{
_total_rows += mat.m();
_total_rows += num_samples;
_offsets.push_back(_total_rows);
}

Expand All @@ -76,12 +85,12 @@ Sampler::reinit(const std::vector<DenseMatrix<Real>> & data)
_total_rows, n_processors(), processor_id(), _local_rows, _local_row_begin, _local_row_end);
}

std::vector<DenseMatrix<Real>>
std::vector<DenseMatrix<Real>> &
Sampler::getSamples()
{
_generator.restoreState();
sampleSetUp();
std::vector<DenseMatrix<Real>> output = sample();
std::vector<DenseMatrix<Real>> & output = sample();
sampleTearDown();

if (_sample_names.empty())
Expand Down Expand Up @@ -137,7 +146,7 @@ Sampler::Location
Sampler::getLocation(dof_id_type global_index)
{
if (_offsets.empty())
reinit(getSamples());
reinit();

mooseAssert(_offsets.size() > 1,
"The getSamples method returned an empty vector, if you are seeing this you have "
Expand All @@ -157,7 +166,7 @@ dof_id_type
Sampler::getTotalNumberOfRows()
{
if (_total_rows == 0)
reinit(getSamples());
reinit();
return _total_rows;
}

Expand All @@ -168,7 +177,7 @@ dof_id_type
Sampler::getLocalNumerOfRows()
{
if (_total_rows == 0)
reinit(getSamples());
reinit();
return _local_rows;
}

Expand All @@ -179,7 +188,7 @@ dof_id_type
Sampler::getLocalRowBegin()
{
if (_total_rows == 0)
reinit(getSamples());
reinit();
return _local_row_begin;
}

Expand All @@ -190,6 +199,6 @@ dof_id_type
Sampler::getLocalRowEnd()
{
if (_total_rows == 0)
reinit(getSamples());
reinit();
return _local_row_end;
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,8 @@ class MonteCarloSampler : public Sampler
MonteCarloSampler(const InputParameters & parameters);

protected:
virtual std::vector<DenseMatrix<Real>> sample() override;
virtual std::vector<DenseMatrix<Real>> & sample() override;

/// Number of matrices
const dof_id_type _num_matrices;

/// Number of monte carlo samples to create for each distribution
const dof_id_type _num_samples;
};
6 changes: 1 addition & 5 deletions modules/stochastic_tools/include/samplers/SobolSampler.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,13 @@ class SobolSampler : public Sampler
SobolSampler(const InputParameters & parameters);

protected:
virtual std::vector<DenseMatrix<Real>> sample() override;
virtual std::vector<DenseMatrix<Real>> & sample() override;
virtual void sampleSetUp() override;
virtual void sampleTearDown() override;

/// Number of Monte Carlo samples to create for each Sobol matrix
const std::size_t _num_samples;

///@{
/// Sobol Monte Carlo matrices, these are sized and cleared to avoid keeping large matrices around
DenseMatrix<Real> _a_matrix;
DenseMatrix<Real> _b_matrix;
///@}
};

Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class SamplerData : public GeneralVectorPostprocessor, SamplerInterface
SamplerData(const InputParameters & parameters);
void virtual initialize() override;
void virtual execute() override;
void virtual finalize() override;

protected:
/// Storage for declared vectors
Expand All @@ -38,4 +39,3 @@ class SamplerData : public GeneralVectorPostprocessor, SamplerInterface
/// Whether to output the number of rows and columns in the first two rows of output
const bool & _output_col_row_sizes;
};

Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ void
MultiAppCommandLineControl::execute()
{
std::vector<std::string> cli_args;
std::vector<DenseMatrix<Real>> samples = _sampler.getSamples();
std::vector<DenseMatrix<Real>> & samples = _sampler.getSamples();

for (const DenseMatrix<Real> & matrix : samples)
{
Expand Down
47 changes: 34 additions & 13 deletions modules/stochastic_tools/src/multiapps/SamplerFullSolveMultiApp.C
Original file line number Diff line number Diff line change
Expand Up @@ -146,25 +146,46 @@ SamplerFullSolveMultiApp::getCommandLineArgsParamHelper(unsigned int local_app)
// Since we only store param_names in cli_args, we need to find the values for each param from
// sampler data and combine them to get full command line option strings.

std::vector<DenseMatrix<Real>> samples = _sampler.getSamples();
std::vector<DenseMatrix<Real>> & samples = _sampler.getSamples();
std::ostringstream oss;
Sampler::Location loc =
_sampler.getLocation(_local_batch_app_index + _sampler.getLocalRowBegin());
DenseMatrix<Real> & matrix = samples[loc.sample()];

for (unsigned int i = 0; i < samples.size(); ++i)
const std::vector<std::string> & cli_args_name = MooseUtils::split(_cli_args[loc.sample()], ";");

unsigned int local_i = 0;
if (_sampler.isSampleDataDistributed())
{
DenseMatrix<Real> matrix = samples[i];
const std::vector<std::string> & cli_args_name = MooseUtils::split(_cli_args[i], ";");
for (unsigned int i = 0; i < loc.sample(); ++i)
local_i += samples[i].m();

for (unsigned int col = 0; col < matrix.n(); ++col)
{
if (col > 0)
oss << ";";
local_i = _local_batch_app_index - local_i;
}

if (_mode == StochasticTools::MultiAppMode::BATCH_RESET)
oss << cli_args_name[col] << "="
<< Moose::stringify(matrix(_local_batch_app_index + _sampler.getLocalRowBegin(), col));
for (unsigned int col = 0; col < matrix.n(); ++col)
{
if (col > 0)
oss << ";";

if (_mode == StochasticTools::MultiAppMode::BATCH_RESET)
{
if (_sampler.isSampleDataDistributed())
oss << cli_args_name[col] << "=" << Moose::stringify(matrix(local_i, col));
else
oss << cli_args_name[col] << "="
<< Moose::stringify(matrix(local_app + _first_local_app, col));
{
oss << cli_args_name[col] << "=" << Moose::stringify(matrix(loc.row(), col));
if (loc.row() >= matrix.m())
oss.str("");
}
}
else
{
oss << cli_args_name[col] << "="
<< Moose::stringify(matrix(local_app + _first_local_app, col));

if ((local_app + _first_local_app) >= matrix.m())
oss.str("");
}
}

Expand Down
72 changes: 60 additions & 12 deletions modules/stochastic_tools/src/samplers/MonteCarloSampler.C
Original file line number Diff line number Diff line change
Expand Up @@ -17,31 +17,79 @@ validParams<MonteCarloSampler>()
{
InputParameters params = validParams<Sampler>();
params.addClassDescription("Monte Carlo Sampler.");
params.addRequiredParam<dof_id_type>(
"n_samples",
"Number of Monte Carlo samples to perform for each distribution within each matrix.");
params.addParam<dof_id_type>(
"n_matrices", 1, "Number of matrices to create, each matrix will contain 'n_samples'.");
return params;
}

MonteCarloSampler::MonteCarloSampler(const InputParameters & parameters)
: Sampler(parameters),
_num_matrices(getParam<dof_id_type>("n_matrices")),
_num_samples(getParam<dof_id_type>("n_samples"))
: Sampler(parameters), _num_matrices(getParam<dof_id_type>("n_matrices"))
{
setNumberOfMatrices(_num_matrices);
if (_is_distributed_sample_data && n_processors() > _num_samples * _num_matrices)
mooseError("In MonteCarloSampler, the number of MPI processes cannot be larger than the number "
"of samples x matrices(",
_num_samples * _num_matrices,
") when sample data is distributed across all processors.");
}

std::vector<DenseMatrix<Real>>
std::vector<DenseMatrix<Real>> &
MonteCarloSampler::sample()
{
if (_compute_sample_once && _sample_data.size() != 0)
return _sample_data;

std::vector<DenseMatrix<Real>> output(_num_matrices);
for (MooseIndex(_num_matrices) m = 0; m < _num_matrices; ++m)

if (_is_distributed_sample_data)
{
output[m].resize(_num_samples, _distributions.size());
for (MooseIndex(_num_samples) i = 0; i < _num_samples; ++i)
dof_id_type total_rows, local_rows, local_row_begin, local_row_end;
total_rows = 0;
for (unsigned int i = 0; i < _num_matrices; i++)
total_rows += _num_samples;

MooseUtils::linearPartitionItems(
total_rows, n_processors(), processor_id(), local_rows, local_row_begin, local_row_end);

std::vector<unsigned int> matrix_local_rows(_num_matrices, 0);
for (MooseIndex(total_rows) m = 0; m < total_rows; ++m)
{
Sampler::Location loc = getLocation(m);
if (m >= local_row_begin && m < local_row_end)
matrix_local_rows[loc.sample()]++;
}

for (MooseIndex(_num_matrices) m = 0; m < _num_matrices; ++m)
output[m].resize(matrix_local_rows[m], _distributions.size());

std::vector<unsigned int> local_i(_num_matrices, 0);
for (MooseIndex(total_rows) m = 0; m < total_rows; ++m)
{
Sampler::Location loc = getLocation(m);

for (MooseIndex(_distributions) j = 0; j < _distributions.size(); ++j)
output[m](i, j) = _distributions[j]->quantile(rand());
{
Real quant = _distributions[j]->quantile(rand());
if (m >= local_row_begin && m < local_row_end)
output[loc.sample()](local_i[loc.sample()], j) = quant;
}

if (m >= local_row_begin && m < local_row_end)
local_i[loc.sample()]++;
}
}
else
{
for (MooseIndex(_num_matrices) m = 0; m < _num_matrices; ++m)
{
output[m].resize(_num_samples, _distributions.size());
for (MooseIndex(_num_samples) i = 0; i < _num_samples; ++i)
for (MooseIndex(_distributions) j = 0; j < _distributions.size(); ++j)
output[m](i, j) = _distributions[j]->quantile(rand());
}
}
return output;

_sample_data = output;

return _sample_data;
}

0 comments on commit 87612f6

Please sign in to comment.