Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
2012 lines (1821 sloc) 66.3 KB
/**
* \file distributions.h
*
* <!-- Created on: Jul 31, 2015
* Author: asaparov -->
*/
#ifndef DISTRIBUTIONS_H_
#define DISTRIBUTIONS_H_
#include <core/random.h>
#include "multiset.h"
#include "sparse_vector.h"
#include "log.h"
/* TODO: add documentation */
#define CATEGORICAL_MAX_THRESHOLD 16.0
#define CATEGORICAL_MIN_THRESHOLD 1.0e-5
/* forward declarations */
#if !defined(DOXYGEN_IGNORE)
template<typename V> struct dirichlet;
#endif
/**
* Returns the natural logarithm of the
* [rising factorial](https://en.wikipedia.org/wiki/Falling_and_rising_factorials):
* \f[ \log a^{(n)} = \log \left\{ \prod_{i=0}^{n-1} a^i \right\}, \f]
* where `base` is \f$ a \f$ and `exponent` is \f$ n \f$.
*/
inline double log_rising_factorial(double base, unsigned int exponent) {
return lgamma(base + exponent) - lgamma(base);
}
/* \tparam V satisfies [is_arithmetic](http://en.cppreference.com/w/cpp/types/is_arithmetic). */
template<typename V>
struct array_categorical {
V* log_probabilities;
V* probabilities; /* unnormalized */
V maximum; /* of log_probabilities */
array_categorical(unsigned int length) : log_probabilities(NULL), probabilities(NULL)
{
if (!initialize(length))
exit(EXIT_FAILURE);
}
~array_categorical() { free(); }
inline bool resize(unsigned int new_length) {
return core::resize(log_probabilities, new_length)
&& core::resize(probabilities, new_length);
}
void place(unsigned int i, const V& log_probability) {
log_probabilities[i] = log_probability;
}
void renormalize(unsigned int length) {
maximum = max(log_probabilities, length);
for (unsigned int i = 0; i < length; i++)
probabilities[i] = exp(log_probabilities[i] - maximum);
}
static inline void move(const array_categorical<V>& src, array_categorical<V>& dst) {
dst.log_probabilities = src.log_probabilities;
dst.probabilities = src.probabilities;
dst.maximum = src.maximum;
}
static inline void swap(array_categorical<V>& first, array_categorical<V>& second) {
core::swap(first.log_probabilities, second.log_probabilities);
core::swap(first.probabilities, second.probabilities);
core::swap(first.maximum, second.maximum);
}
/* NOTE: this function assumes that the type V has constant size */
static inline long unsigned int size_of(const array_categorical<V>& categorical, unsigned int length) {
return sizeof(V) * (2 * length + 1);
}
static inline void free(array_categorical<V>& categorical) {
categorical.free();
}
private:
inline bool initialize(unsigned int length) {
log_probabilities = (V*) malloc(sizeof(V) * length);
if (log_probabilities == NULL) {
fprintf(stderr, "array_categorical ERROR: Out of memory.\n");
return false;
}
probabilities = (V*) malloc(sizeof(V) * length);
if (probabilities == NULL) {
fprintf(stderr, "array_categorical ERROR: Out of memory.\n");
core::free(probabilities);
return false;
}
return true;
}
inline void free() {
core::free(log_probabilities);
core::free(probabilities);
}
template<typename A>
friend bool init(array_categorical<A>&, unsigned int);
};
template<typename V>
bool init(array_categorical<V>& categorical, unsigned int length) {
return categorical.initialize(length);
}
/**
* A structure representing a symmetric Dirichlet distribution, which is a
* regular [Dirichlet distribution](https://en.wikipedia.org/wiki/Dirichlet_distribution),
* where all the elements of the concentration parameter vector *pi* have the
* same value.
*
* The following example constructs a Dirichlet distribution with dimension 3,
* and concentration parameter [0.1, 0.1, 0.1]. It proceeds to generate two
* samples from this distribution. The expected output is
* `[0.004223, 0.995777, 0.000000] [0.964576, 0.035400, 0.000024]`.
*
* ```{.cpp}
* #include <math/distributions.h>
* using namespace core;
*
* template<typename V>
* struct custom_vector {
* V* elements;
*
* custom_vector(unsigned int length) {
* elements = (V*) malloc(sizeof(V) * length);
* }
*
* ~custom_vector() { free(elements); }
*
* inline V get(unsigned int index) const {
* return elements[index];
* }
*
* void set(unsigned int index, const V& value) {
* elements[index] = value;
* }
* };
*
* int main() {
* set_seed(100);
* symmetric_dirichlet<double> dir(3, 0.1);
*
* custom_vector<double> output(3);
* for (unsigned int i = 0; i < 2; i++) {
* sample(dir, output);
* print(output.elements, 3, stdout); print(' ', stdout);
* }
* }
* ```
*
* \tparam V satisfies [is_arithmetic](http://en.cppreference.com/w/cpp/types/is_arithmetic).
*/
template<typename V>
struct symmetric_dirichlet {
/**
* The type of the probabilities.
*/
typedef V value_type;
/**
* The value of each element of the concentration parameter.
*/
V pi;
/**
* The number of dimensions of the Dirichlet distribution.
*/
unsigned int atom_count;
/**
* The negative logarithm of `symmetric_dirichlet::atom_count`.
*/
V log_prob;
/**
* The sum of the elements in the concentration parameter vector. In the
* case of a symmetric Dirichlet distribution, this simply
* `symmetric_dirichlet::pi * symmetric_dirichlet::atom_count`.
*/
V total;
/**
* Constructs a symmetric Dirichlet distribution by copying the fields from
* the given symmetric Dirichlet distribution.
*/
symmetric_dirichlet(const symmetric_dirichlet<V>& prior) :
pi(prior.pi), atom_count(prior.atom_count), log_prob(-log(atom_count)), total(prior.pi * atom_count) { }
/**
* Constructs a symmetric Dirichlet distribution with the given dimension
* and concentration parameter.
*/
symmetric_dirichlet(unsigned int atom_count, const V& prior) :
pi(prior), atom_count(atom_count), log_prob(-log(atom_count)), total(prior * atom_count) { }
/**
* Checks if symmetric_dirichlet::atom_count is smaller than the given
* `new_atom_count`. If so, symmetric_dirichlet::atom_count is set to
* `new_atom_count` and symmetric_dirichlet::total is updated accordingly.
*/
inline void ensure_atom_count(unsigned int new_atom_count) {
if (new_atom_count > atom_count)
atom_count = new_atom_count;
total = pi * atom_count;
}
/**
* Returns the sum of the elements in the concentration parameter vector
* (symmetric_dirichlet::total).
*/
inline V sum() const {
return total;
}
/**
* Returns the maximum of the elements in the concentration parameter
* vector. In the case of the symmetric Dirichlet distribution, this is
* simply symmetric_dirichlet::pi.
*/
inline V max() const {
return pi;
}
/**
* Returns the element at the given `index` in the concentration parameter
* vector. In the case of the symmetric Dirichlet distribution, this
* function always returns symmetric_dirichlet::pi.
*/
inline V get_at_index(unsigned int index) const {
return pi;
}
/**
* Returns the element at the given `atom` in the concentration parameter
* vector. The atom is a non-empty element drawn from a categorical
* distribution with a Dirichlet prior. In the case of the symmetric
* Dirichlet distribution, this function always returns
* symmetric_dirichlet::pi.
*/
template<typename K>
inline V get_for_atom(const K& atom) const {
return pi;
}
template<typename K>
inline void add(sparse_vector<K, V>& vector) const {
vector.unspecified = pi;
}
inline void print(FILE* out) const {
fprintf(out, "pi: %lf\n", pi);
fprintf(out, "atom count: %u\n", atom_count);
}
/**
* Returns the parameters that may be used to construct this distribution,
* using either the constructor or init.
*/
inline const symmetric_dirichlet<V>& get_parameters() const {
return *this;
}
/**
* Samples from this symmetric Dirichlet distribution and puts the result in `dst`.
* \tparam Destination a vector type that implements the public member
* function `set(unsigned int, const V&)`.
*/
template<typename Destination>
inline void sample(Destination& dst) const {
std::gamma_distribution<V> distribution(pi, 1.0);
V sum = 0.0;
for (unsigned int i = 0; i < atom_count; i++) {
V value = distribution(engine);
dst.set(i, value);
sum += value;
}
for (unsigned int i = 0; i < atom_count; i++)
dst.set(i, dst.get(i) / sum);
}
/**
* Returns the log probability of a single observation `atom`, drawn from a
* Dirichlet-categorical distribution, where the Dirichlet is represented
* by this object. In the case of the symmetric Dirichlet distribution,
* this function always returns symmetric_dirichlet::log_prob.
*/
V log_probability(unsigned int item) const {
return log_prob;
}
template<typename Metric>
static inline long unsigned int size_of(const symmetric_dirichlet<V>& distribution, const Metric& metric) {
return core::size_of(distribution.atom_count) + core::size_of(distribution.log_prob)
+ core::size_of(distribution.pi) + core::size_of(distribution.total);
}
/**
* Frees the given symmetric Dirichlet distribution. Since
* symmetric_dirichlet does not allocate additional memory, this function
* is a no-op.
*/
static inline void free(symmetric_dirichlet<V>& distribution) { }
private:
bool initialize(unsigned int length, const V& pi_src) {
pi = pi_src;
atom_count = length;
log_prob = -log(atom_count);
total = pi * atom_count;
return true;
}
bool initialize(unsigned int length, const V* pi_src) {
fprintf(stderr, "symmetric_dirichlet_prior.initialize ERROR: "
"Dense parameterization for the prior is not supported"
" with a symmetric Dirichlet prior.");
return false;
}
template<typename A>
friend bool init(
symmetric_dirichlet<A>& prior,
const symmetric_dirichlet<A>& src);
};
/**
* Initializes the given symmetric_dirichlet `distribution` by copying its
* fields from the given symmetric_dirichlet `src`.
*/
template<typename V>
inline bool init(symmetric_dirichlet<V>& distribution,
const symmetric_dirichlet<V>& src)
{
return distribution.initialize(src.atom_count, src.pi);
}
/**
* Since a symmetric Dirichlet is a special case of a Dirichlet distribution,
* this function prints an error and exits.
*/
template<typename V>
inline bool init(symmetric_dirichlet<V>& distribution, const dirichlet<V>& src) {
fprintf(stderr, "init ERROR: Unsupported initialization of "
"symmetric_dirichlet_prior with a dirichlet_prior argument.\n");
exit(EXIT_FAILURE);
}
/**
* Reads a symmetric_dirichlet `distribution` from `in`.
*/
template<typename V>
inline bool read(symmetric_dirichlet<V>& distribution, FILE* in) {
if (!read(distribution.pi, in)) return false;
if (!read(distribution.atom_count, in)) return false;
distribution.log_prob = -log(distribution.atom_count);
return true;
}
/**
* Writes the symmetric_dirichlet `distribution` to `out`.
*/
template<typename V>
inline bool write(const symmetric_dirichlet<V>& distribution, FILE* out) {
if (!write(distribution.pi, out)) return false;
return write(distribution.atom_count, out);
}
/**
* Samples from the given symmetric Dirichlet `distribution` and puts the result in `dst`.
* \tparam Destination a vector type that implements the public member function
* `set(unsigned int, const V&)`.
*/
template<typename V, typename Destination>
inline bool sample(const symmetric_dirichlet<V>& distribution, Destination& dst) {
distribution.sample(dst);
return true;
}
/**
* A structure representing a finite [Dirichlet distribution](https://en.wikipedia.org/wiki/Dirichlet_distribution),
* which is a generalization of the symmetric Dirichlet distribution (see struct symmetric_dirichlet).
*
* The following example constructs a Dirichlet distribution with dimension 3,
* and concentration parameter [10, 1, 2]. It proceeds to generate two samples
* from this distribution. The expected output is
* `[0.491806, 0.023958, 0.484236] [0.804916, 0.019964, 0.175120]`.
*
* ```{.cpp}
* #include <math/distributions.h>
* using namespace core;
*
* template<typename V>
* struct custom_vector {
* V* elements;
*
* custom_vector(unsigned int length) {
* elements = (V*) malloc(sizeof(V) * length);
* }
*
* ~custom_vector() { free(elements); }
*
* inline V get(unsigned int index) const {
* return elements[index];
* }
*
* void set(unsigned int index, const V& value) {
* elements[index] = value;
* }
* };
*
* int main() {
* set_seed(100);
* double alpha[] = {10.0, 1.0, 2.0};
* dirichlet<double> dir(3, alpha);
*
* custom_vector<double> output(3);
* for (unsigned int i = 0; i < 2; i++) {
* sample(dir, output);
* print(output.elements, 3, stdout); print(' ', stdout);
* }
* }
* ```
*
* \tparam V satisfies [is_arithmetic](http://en.cppreference.com/w/cpp/types/is_arithmetic).
*/
template<typename V>
struct dirichlet {
/**
* The type of the probabilities.
*/
typedef V value_type;
/**
* The concentration parameter.
*/
V* pi;
/**
* The sum of all elements in the concentration parameter dirichlet::pi.
*/
V pi_sum;
/**
* The number of dimensions of this Dirichlet distribution.
*/
unsigned int atom_count;
/**
* Constructs a Dirichlet distribution by copying the fields from the given
* symmetric Dirichlet distribution.
*/
dirichlet(const symmetric_dirichlet<V>& prior) {
if (!initialize(prior.atom_count, prior.prior))
exit(EXIT_FAILURE);
}
/**
* Constructs a Dirichlet distribution by copying the fields from the given
* Dirichlet distribution.
*/
dirichlet(const dirichlet<V>& prior) {
if (!initialize(prior.atom_count, prior.pi))
exit(EXIT_FAILURE);
}
/**
* Constructs a Dirichlet distribution with the given dimension
* `atom_count` and symmetric concentration parameter `prior`.
*/
dirichlet(unsigned int atom_count, const V& prior) {
if (!initialize(atom_count, prior))
exit(EXIT_FAILURE);
}
/**
* Constructs a Dirichlet distribution with the given dimension
* `atom_count` and concentration parameter vector `prior`.
*/
dirichlet(unsigned int atom_count, const V* prior) {
if (!initialize(atom_count, prior))
exit(EXIT_FAILURE);
}
~dirichlet() { free(); }
/**
* Checks if dirichlet::atom_count is smaller than the given
* `new_atom_count`. If so, dirichlet::atom_count is set to
* `new_atom_count`.
*/
inline void ensure_atom_count(unsigned int new_atom_count) {
if (new_atom_count <= atom_count)
return;
fprintf(stderr, "dirichlet.ensure_atom_count ERROR: This is not implemented.\n");
}
/**
* Returns the sum of the elements in the concentration parameter vector
* (dirichlet::pi_sum).
*/
inline V sum() const {
return pi_sum;
}
/**
* Returns the element at the given `index` in the concentration parameter
* vector. This function does not perform any bounds checking.
*/
inline V get_at_index(unsigned int index) const {
return pi[index];
}
/**
* Returns the element at the given `atom` in the concentration parameter
* vector. The atom is a non-zero unsigned integer drawn from a categorical
* distribution with a Dirichlet prior. Thus, the atom `n` corresponds to
* the index `n - 1`. This function does not perform any bounds checking.
*/
inline V get_for_atom(unsigned int atom) const {
return pi[atom - 1];
}
template<typename K>
void add(sparse_vector<K, V>& vector) const {
for (unsigned int i = 0; i < vector.length; i++)
vector.set(i + 1, pi[i]);
}
void print(FILE* out) const {
if (atom_count == 0) {
fprintf(out, "pi: []\n");
return;
}
fprintf(out, "pi: [%lf", pi[0]);
for (unsigned int i = 1; i < atom_count; i++)
fprintf(out, ", %lf", pi[i]);
fprintf(out, "]\n");
fprintf(out, "atom count: %u\n", atom_count);
}
/**
* Returns the parameters that may be used to construct this distribution,
* using either the constructor or init.
*/
inline const dirichlet<V>& get_parameters() const {
return *this;
}
/**
* Samples from this Dirichlet distribution and puts the result in `dst`.
* \tparam Destination a vector type that implements the public member
* function `set(unsigned int, const V&)`.
*/
template<typename Destination>
inline void sample(Destination& dst) const {
V sum = 0.0;
for (unsigned int i = 0; i < atom_count; i++) {
std::gamma_distribution<V> distribution(pi[i], 1.0);
V value = distribution(engine);
dst.set(i, value);
sum += value;
}
for (unsigned int i = 0; i < atom_count; i++)
dst.set(i, dst.get(i) / sum);
}
/**
* Returns the log probability of a single observation `atom`, drawn from a
* Dirichlet-categorical distribution, where the Dirichlet is represented
* by this object.
*/
V log_probability(unsigned int atom) const {
return log(pi[atom - 1]) - log(pi_sum);
}
/* NOTE: this function assumes that the type V has constant size */
template<typename Metric>
static inline long unsigned int size_of(const dirichlet<V>& distribution, const Metric& metric) {
return core::size_of(distribution.atom_count) + core::size_of(distribution.pi_sum) + sizeof(V) * distribution.atom_count;
}
/**
* Frees dirichlet::pi in the given Dirichlet distribution.
*/
static inline void free(dirichlet<V>& distribution) {
distribution.free();
}
private:
bool initialize(unsigned int length, const V& pi_src) {
atom_count = length;
pi = (V*) malloc(sizeof(V) * atom_count);
if (pi == NULL) {
fprintf(stderr, "dirichlet_prior.initialize ERROR: Out of memory.\n");
return false;
}
for (unsigned int i = 0; i < atom_count; i++)
pi[i] = pi_src;
pi_sum = pi_src * atom_count;
return true;
}
bool initialize(unsigned int length, const V* pi_src) {
atom_count = length;
pi = (V*) malloc(sizeof(V) * atom_count);
if (pi == NULL) {
fprintf(stderr, "dirichlet_prior.initialize ERROR: Out of memory.\n");
return false;
}
pi_sum = 0.0;
for (unsigned int i = 0; i < atom_count; i++) {
pi[i] = pi_src[i];
pi_sum += pi_src[i];
}
return true;
}
inline void free() {
core::free(pi);
}
template<typename K>
friend bool init(dirichlet<K>&, const symmetric_dirichlet<K>&);
template<typename K>
friend bool init(dirichlet<K>&, const dirichlet<K>&);
};
/**
* Initializes the given Dirichlet `distribution` by copying its fields from
* the given symmetric_dirichlet distribution `src`.
*/
template<typename V>
inline bool init(dirichlet<V>& distribution,
const symmetric_dirichlet<V>& src)
{
return distribution.initialize(src.atom_count, src.pi);
}
/**
* Initializes the given Dirichlet `distribution` by copying its fields from
* the given Dirichlet distribution `src`.
*/
template<typename V>
inline bool init(dirichlet<V>& distribution, const dirichlet<V>& src)
{
return distribution.initialize(src.atom_count, src.pi);
}
/**
* Reads a dirichlet `distribution` from `in`.
*/
template<typename V>
inline bool read(dirichlet<V>& distribution, FILE* in) {
if (!read(distribution.atom_count, in))
return false;
distribution.pi = (V*) malloc(sizeof(V) * distribution.atom_count);
if (distribution.pi == NULL) return false;
if (!read(distribution.pi, in, distribution.atom_count)) {
free(distribution.pi);
return false;
}
distribution.pi_sum = 0.0;
for (unsigned int i = 0; i < distribution.atom_count; i++)
distribution.pi_sum += distribution.pi[i];
return true;
}
/**
* Writes the given dirichlet `distribution` to `out`.
*/
template<typename V>
inline bool write(const dirichlet<V>& distribution, FILE* out) {
if (!write(distribution.atom_count, out)) return false;
return write(distribution.pi, out, distribution.atom_count);
}
/**
* Samples from the given Dirichlet `distribution` and puts the result in `dst`.
* \tparam Destination a vector type that implements the public member function
* `set(unsigned int, const V&)`.
*/
template<typename V, typename Destination>
inline bool sample(const dirichlet<V>& distribution, Destination& dst) {
distribution.sample(dst);
return true;
}
/**
* <!-- Some useful type traits for Dirichlet distribution structures. -->
*/
/**
* A type trait that is [true_type](http://en.cppreference.com/w/cpp/types/integral_constant)
* if and only if `T` is either symmetric_dirichlet or dirichlet.
*/
template<typename T>
struct is_dirichlet : std::false_type { };
template<typename V>
struct is_dirichlet<symmetric_dirichlet<V>> : std::true_type { };
template<typename V>
struct is_dirichlet<dirichlet<V>> : std::true_type { };
/**
* This struct represents a categorical distribution, where the probabilities
* are represented as a sum of uniform and a non-uniform component. The
* non-uniform component is stored as a core::hash_map from atoms to
* probabilities. The dense_categorical struct instead stores all probabilities
* contiguously as a single native array. dense_categorical should be used if
* the dimension is small or if the probabilities cannot be easily represented
* sparsely as a sum of uniform and non-uniform components. Unlike
* dense_categorical, the observations do not necessarily have type `unsigned
* int`, and can have generic type `K`.
*
* The following is an example where a sparse_categorical distribution is
* constructed over the domain <code>{'a', 'b', 'c', 'd', 'e'}</code>. The probability of
* each event is specified and 10 samples are drawn. The expected output is
* `d, b, d, d, b, c, a, b, e, e,`.
* ```{.cpp}
* #include <math/distributions.h>
* using namespace core;
*
* int main() {
* set_seed(100);
* sparse_categorical<char, double> categorical(5);
* categorical.set('a', 0.1);
* categorical.set('b', 0.4);
* categorical.set('c', 0.1);
* categorical.set('d', 0.2);
* categorical.set('e', 0.2);
*
* for (unsigned int i = 0; i < 10; i++) {
* char c;
* sample(categorical, c);
* printf("%c, ", c);
* }
* }
* ```
*
* \tparam K the generic type of the observations. `K` must satisfy either:
* 1. [is_fundamental](http://en.cppreference.com/w/cpp/types/is_fundamental),
* 2. [is_enum](http://en.cppreference.com/w/cpp/types/is_enum),
* 3. [is_pointer](http://en.cppreference.com/w/cpp/types/is_pointer),
* 4. implements the public static method `unsigned int hash(const T&)`,
* the public static method `void is_empty(const T&)`, implements the
* operators `==`, satisfies [CopyAssignable](https://en.cppreference.com/w/cpp/named_req/CopyAssignable),
* and core::is_moveable. **NOTE:** The first argument to the `==`
* operator may be empty.
* \tparam V satisfies [is_arithmetic](http://en.cppreference.com/w/cpp/types/is_arithmetic).
*/
template<typename K, typename V>
struct sparse_categorical
{
/**
* The type of the probabilities.
*/
typedef V value_type;
/**
* A hash_map that encodes the non-uniform component of the categorical
* distribution. It maps from atoms to pairs, where the first entry in the
* pair contains the probability and the second entry contains the log
* probability.
*/
hash_map<K, pair<V, V>> probabilities;
/**
* The number of dimensions of the categorical distribution.
*/
unsigned int atom_count;
/**
* Stores the probability of every atom in the uniform component of the
* categorical distribution (i.e. every atom that is not a key in
* sparse_categorical::probabilities).
*/
V prob;
/**
* Stores the total probability mass in the non-uniform component of the
* categorical distribution (i.e. the sum of the probabilities in
* sparse_categorical::probabilities).
*/
V dense_prob;
/**
* The natural logarithm of sparse_categorical::prob.
*/
V log_prob;
/**
* Initializes this categorical distribution with the given dimension
* `atom_count`, setting all probabilities to zero.
*/
sparse_categorical(unsigned int atom_count) :
probabilities(16), atom_count(atom_count),
prob(1.0 / atom_count), dense_prob(0.0), log_prob(-log(atom_count)) { }
/**
* Initializes this categorical distribution by copying its fields from the
* given sparse_categorical distribution `src`.
*/
sparse_categorical(const sparse_categorical<K, V>& src) : probabilities(src.probabilities.table.capacity),
atom_count(src.atom_count), prob(src.prob), dense_prob(src.dense_prob), log_prob(src.log_prob)
{
if (!initialize(src))
exit(EXIT_FAILURE);
}
~sparse_categorical() { free(); }
/**
* Sets the `probability` of the given observation `key`. This function
* will make the key part of the non-uniform component of the categorical
* distribution.
*/
bool set(const K& key, const V& probability) {
if (!probabilities.check_size())
return false;
bool contains; unsigned int index;
pair<V, V>& value = probabilities.get(key, contains, index);
if (!contains) {
probabilities.table.keys[index] = key;
probabilities.table.size++;
value.key = probability;
value.value = log(probability);
} else dense_prob -= value.key;
dense_prob += probability;
if (probabilities.table.size >= atom_count) {
prob = 0.0;
log_prob = -std::numeric_limits<V>::infinity();
} else {
prob = (1.0 - dense_prob) / (atom_count - probabilities.table.size);
log_prob = log(prob);
}
return true;
}
/**
* Computes the conditional probability of observing the given `item` drawn
* from this constant distribution, *conditioned* on a collection of
* observations `conditioned`. This function assumes all the elements in
* `conditioned` are identical, and that `item` and `conditioned` have
* non-zero probability according to `prior`.
* \returns `true` if `item` is equivalent to the first element in `conditioned`.
* \returns `false` otherwise.
* \tparam PriorDistribution the type of the prior distribution. This function does not use `prior`.
*/
template<typename PriorDistribution>
static inline V conditional(const PriorDistribution& prior,
const K& item, const array_multiset<K>& conditioned)
{
for (unsigned int i = 0; i < conditioned.counts.size; i++) {
if (conditioned.counts.keys[i] == item) {
return (prior.get_for_atom(item) + conditioned.counts.values[i])
/ (prior.sum() + conditioned.total());
}
}
return (prior.get_for_atom(item)) / (prior.sum() + conditioned.total());
}
/**
* Returns the log probability of observing the given `item`, drawn from a
* categorical distribution, which is itself drawn from the given `prior`
* distribution, *conditioned* on the set of observations `conditioned`. It
* is assumed the given `prior` is a Dirichlet.
* \tparam PriorDistribution a distribution type with public member
* functions `V get_for_atom(unsigned int)` and `V sum()`.
*/
template<typename PriorDistribution>
static inline V log_conditional(const PriorDistribution& prior,
const K& item, const array_multiset<K>& conditioned)
{
for (unsigned int i = 0; i < conditioned.counts.size; i++) {
if (conditioned.counts.keys[i] == item) {
return log((prior.get_for_atom(item) + conditioned.counts.values[i]))
- log(prior.sum() + conditioned.total());
}
}
return log(prior.get_for_atom(item)) - log(prior.sum() + conditioned.total());
}
/**
* Returns the probability of observing the given collection of `items`,
* each drawn independently and identically from a categorical
* distribution, which is itself drawn from the given `prior` distribution,
* *conditioned* on the set of observations `conditioned`. It is assumed
* the given `prior` is a Dirichlet.
* \tparam PriorDistribution a distribution type with public member
* functions `V get_for_atom(unsigned int)` and `V sum()`.
*/
template<typename PriorDistribution>
static inline V log_conditional(const PriorDistribution& prior,
const array_multiset<K>& items, const array_multiset<K>& conditioned)
{
V log_probability = 0.0;
for (unsigned int i = 0; i < items.counts.size; i++) {
bool contains;
unsigned int count = conditioned.counts.get(items.counts.keys[i], contains);
if (contains) {
log_probability += log_rising_factorial(
prior.get_for_atom(items.counts.keys[i]) + count, items.counts.values[i]);
} else {
log_probability += log_rising_factorial(
prior.get_for_atom(items.counts.keys[i]), items.counts.values[i]);
}
}
return log_probability;
}
/**
* Returns the probability of the given observation `key`.
*/
inline V probability(const K& observation) const {
bool contains;
const pair<V, V>& entry = probabilities.get(observation, contains);
if (contains) return entry.key;
else return prob;
}
/**
* Returns the log probability of the given observation `key`.
*/
inline V log_probability(const K& observation) const {
bool contains;
const pair<V, V>& entry = probabilities.get(observation, contains);
if (contains) return entry.value;
else return log_prob;
}
/**
* Frees the given sparse_categorical `distribution` by releasing the
* memory resources associated with sparse_categorical::probabilities,
* along with all of its elements.
*/
static inline void free(sparse_categorical<K, V>& distribution) {
distribution.free();
core::free(distribution.probabilities);
}
private:
inline bool initialize(const sparse_categorical<K, V>& src) {
for (unsigned int i = 0; i < src.probabilities.table.capacity; i++) {
if (!is_empty(src.probabilities.table.keys[i])) {
if (!init(probabilities.table.keys[i], src.probabilities.table.keys[i])) {
set_empty(probabilities.table.keys[i]);
return false;
}
probabilities.values[i] = src.probabilities.values[i];
probabilities.table.size++;
}
}
return true;
}
inline void free() {
for (auto entry : probabilities)
core::free(entry.key);
}
template<typename A, typename B>
friend bool init(sparse_categorical<A, B>&, const sparse_categorical<A, B>&);
};
/**
* Initializes the given sparse_categorical `distribution` by copying its
* fields from the given sparse_categorical distribution `src`.
*/
template<typename K, typename V>
inline bool init(sparse_categorical<K, V>& distribution, const sparse_categorical<K, V>& src) {
distribution.atom_count = src.atom_count;
distribution.prob = src.prob;
distribution.log_prob = src.log_prob;
distribution.dense_prob = src.dense_prob;
if (!hash_map_init(distribution.probabilities, src.probabilities.table.capacity)) {
fprintf(stderr, "init ERROR: Unable to initialize hash_map in sparse_categorical.\n");
return false;
} else if (!distribution.initialize(src)) {
core::free(distribution.probabilities);
return false;
}
return true;
}
/**
* Draws a sample from the given sparse_categorical `distribution` and stores
* it in `output`. It is possible that the observation will be sampled from the
* uniform component of the sparse categorical distribution, in which case this
* function will print an error and return `false`.
* \tparam K satisfies core::is_copyable.
*/
template<typename K, typename V>
inline bool sample(const sparse_categorical<K, V>& distribution, K& output)
{
V random = sample_uniform<V>();
if (random < distribution.dense_prob
|| distribution.probabilities.table.size >= distribution.atom_count)
{
V aggregator = 0.0;
const K* last = NULL;
for (const auto& entry : distribution.probabilities) {
last = &entry.key;
aggregator += entry.value.key;
if (random < aggregator)
return copy(entry.key, output);
}
return copy(*last, output);
} else {
fprintf(stderr, "sample ERROR: We sampled an object "
"not belonging to the hash_map of known objects.");
return false;
}
}
/**
* This struct represents a categorical distribution, where the probabilities
* are stored contiguously in a native array of type `V`. The
* sparse_categorical struct instead stores only the non-uniform probabilities
* in a core::hash_map. sparse_categorical should be used if the dimension is
* large and the probabilities can be represented sparsely as a sum of uniform
* and non-uniform components.
* **NOTE:** This distribution assumes the observations have type
* `unsigned int` and take values in `{1, ..., dense_categorical::atom_count}`.
*
* In the following example, we define a categorical distribution over the
* ASCII characters (encoded as positive integers from 1 through 256).
* ASCII characters (encoded as positive integers from 1 through 256). Positive
* probability is only assigned to the characters <code>{'a', 'b', 'c', 'd', 'e'}</code>,
* and we draw 10 samples from the distribution. The expected output of the
* example is `a, c, b, b, d, e, b, e, b, b, `.
* ```{.cpp}
* #include <math/distributions.h>
* using namespace core;
*
* template<typename V>
* inline void set_probability(
* dense_categorical<V>& categorical,
* unsigned int atom, const V& probability)
* {
* categorical.phi[atom - 1] = probability;
* }
*
* int main() {
* set_seed(100);
* dense_categorical<double> categorical(256);
* set_probability(categorical, 'a', 0.1);
* set_probability(categorical, 'b', 0.4);
* set_probability(categorical, 'c', 0.1);
* set_probability(categorical, 'd', 0.2);
* set_probability(categorical, 'e', 0.2);
*
* for (unsigned int i = 0; i < 10; i++) {
* unsigned int output;
* sample(categorical, output);
* printf("%c, ", output);
* }
* }
* ```
*
* \tparam V satisfies [is_arithmetic](http://en.cppreference.com/w/cpp/types/is_arithmetic).
*/
template<typename V>
struct dense_categorical
{
/**
* The type of the probabilities.
*/
typedef V value_type;
/**
* The native array containing the probabilities of each event.
*/
V* phi;
/**
* The number of dimensions of the categorical distribution.
*/
unsigned int atom_count;
/**
* Initializes this categorical distribution with the given dimension
* `atom_count`, setting all probabilities to zero.
*/
dense_categorical(unsigned int atom_count) : atom_count(atom_count) {
phi = (V*) calloc(atom_count, sizeof(V));
if (phi == NULL) {
fprintf(stderr, "dense_categorical ERROR: Insufficient memory for phi.\n");
exit(EXIT_FAILURE);
}
}
/**
* Initializes this categorical distribution by copying the fields from the
* given dense_categorical distribution `src`.
*/
dense_categorical(const dense_categorical& src) : atom_count(src.atom_count) {
phi = (V*) malloc(atom_count * sizeof(V));
if (phi == NULL) {
fprintf(stderr, "dense_categorical ERROR: Insufficient memory for phi.\n");
exit(EXIT_FAILURE);
}
memcpy(phi, src.phi, sizeof(V) * atom_count);
}
~dense_categorical() { free(); }
/**
* Returns the element at the given `index` of the probability vector
* dense_categorical::phi. This is equivalent to the probability of
* observing `index + 1` drawn from this distribution.
*/
inline V get(unsigned int index) const {
return phi[index];
}
/**
* Returns the element at the index `atom - 1` of the probability vector
* dense_categorical::phi. This is equivalent to the probability of
* observing `atom` drawn from this distribution.
*/
inline V get_for_atom(unsigned int atom) const {
return phi[atom - 1];
}
/**
* It is assumed that the probabilities in dense_categorical::phi sum to 1,
* and so this function always returns 1.0.
*/
inline V sum() const {
return 1.0;
}
void ensure_atom_count(unsigned int new_atom_count) {
if (new_atom_count <= atom_count)
return;
fprintf(stderr, "dense_categorical.set_atom_count ERROR: This is not implemented.\n");
}
/**
* Returns the probability of observing `item` drawn from this distribution.
*/
inline V probability(unsigned int item) const {
#if !defined(NDEBUG)
if (item == 0) {
fprintf(stderr, "dense_categorical.conditional ERROR: Given item is zero.\n");
return std::numeric_limits<V>::signaling_NaN();
}
#endif
return phi[item - 1];
}
/**
* Returns the joint probability of observing the given collection of
* `items`, drawn independently and identically from this distribution.
*/
inline V probability(const array_multiset<unsigned int>& items) const {
V value = 1.0;
for (unsigned int i = 0; i < items.counts.size; i++)
value *= pow(probability(items.counts.keys[i]), items.counts.values[i]);
return value;
}
/**
* Returns the log probability of observing `item` drawn from this distribution.
*/
inline V log_probability(unsigned int item) const {
return log(phi[item - 1]);
}
/**
* Returns the joint log probability of observing the given collection of
* `items`, drawn independently and identically from this distribution.
*/
inline V log_probability(const array_multiset<unsigned int>& items) const {
V value = 0.0;
for (const auto& entry : items.counts)
value = log_probability(entry.key) * entry.value;
return value;
}
/**
* Returns the probability of observing the given `item`, drawn from a
* categorical distribution, which is itself drawn from the given `prior`
* distribution. It is assumed the given `prior` is a Dirichlet.
* \tparam PriorDistribution a distribution type with public member
* functions `V get_for_atom(unsigned int)` and `V sum()`.
*/
template<typename PriorDistribution>
static inline V probability(const PriorDistribution& prior, unsigned int item) {
return prior.get_for_atom(item) / prior.sum();
}
/**
* Returns the log probability of observing the given `item`, drawn from a
* categorical distribution, which is itself drawn from the given `prior`
* distribution. It is assumed the given `prior` is a Dirichlet.
* \tparam PriorDistribution a distribution type with public member
* functions `V get_for_atom(unsigned int)` and `V sum()`.
*/
template<typename PriorDistribution>
static inline V log_probability(const PriorDistribution& prior, unsigned int item) {
return prior.log_probability(item);
}
/**
* Returns the probability of observing the given collection of `items`,
* each drawn independently and identically from a categorical
* distribution, which is itself drawn from the given `prior` distribution.
* It is assumed the given `prior` is a Dirichlet.
* \tparam PriorDistribution a distribution type with public member
* functions `V get_for_atom(unsigned int)` and `V sum()`.
*/
template<typename PriorDistribution>
static inline V log_probability(const PriorDistribution& prior, const array_multiset<unsigned int>& items)
{
V log_probability = 0.0;
for (unsigned int i = 0; i < items.counts.size; i++)
log_probability += log_rising_factorial(
prior.get_for_atom(items.counts.keys[i]), items.counts.values[i]);
return log_probability - log_rising_factorial(prior.sum(), items.total());
}
/**
* Returns the probability of observing the given `item`, drawn from a
* categorical distribution, which is itself drawn from the given `prior`
* distribution, *conditioned* on the set of observations `conditioned`. It
* is assumed the given `prior` is a Dirichlet.
* \tparam PriorDistribution a distribution type with public member
* functions `V get_for_atom(unsigned int)` and `V sum()`.
*/
template<typename PriorDistribution>
static inline V conditional(const PriorDistribution& prior,
unsigned int item, const array_multiset<unsigned int>& conditioned)
{
return sparse_categorical<unsigned int, V>::conditional(prior, item, conditioned);
}
/**
* Returns the log probability of observing the given `item`, drawn from a
* categorical distribution, which is itself drawn from the given `prior`
* distribution, *conditioned* on the set of observations `conditioned`. It
* is assumed the given `prior` is a Dirichlet.
* \tparam PriorDistribution a distribution type with public member
* functions `V get_for_atom(unsigned int)` and `V sum()`.
*/
template<typename PriorDistribution>
static inline V log_conditional(const PriorDistribution& prior,
unsigned int item, const array_multiset<unsigned int>& conditioned)
{
return sparse_categorical<unsigned int, V>::log_conditional(prior, item, conditioned);
}
/**
* Returns the log probability of observing the given `item`, drawn from a
* categorical distribution, which is itself drawn from the given `prior`
* distribution, *conditioned* on the set of observations `conditioned`. It
* is assumed the given `prior` is a Dirichlet.
* \tparam PriorDistribution a distribution type with public member
* functions `V get_for_atom(unsigned int)` and `V sum()`.
*/
template<typename PriorDistribution>
static inline V log_conditional(const PriorDistribution& prior,
unsigned int item, const hash_multiset<unsigned int>& conditioned)
{
bool contains;
unsigned int count = conditioned.counts.get(item, contains);
if (contains) {
return fasterlog(prior.get_for_atom(item) + count);
} else {
return fasterlog(prior.get_for_atom(item));
}
}
/**
* Returns the log probability of observing the given item `holdout`, drawn
* from a categorical distribution, which is itself drawn from the given
* `prior` distribution, *conditioned* on the set of observations
* `conditioned` \ { `holdout` } (the set of conditioned observations with
* `holdout` being subtracted). It is assumed the given `prior` is a
* Dirichlet.
* \tparam PriorDistribution a distribution type with public member
* functions `V get_for_atom(unsigned int)` and `V sum()`.
*/
template<typename PriorDistribution>
static inline V log_conditional_without(const PriorDistribution& prior,
unsigned int holdout, const array_multiset<unsigned int>& conditioned)
{
for (unsigned int i = 0; i < conditioned.counts.size; i++) {
if (conditioned.counts.keys[i] == holdout) {
return log(prior.get_for_atom(holdout) + conditioned.counts.values[i] - 1)
- log(prior.sum() + conditioned.total() - 1);
}
}
return log(prior.get_for_atom(holdout)) - log(prior.sum() + conditioned.total());
}
/**
* Returns the log probability of observing the given item `holdout`, drawn
* from a categorical distribution, which is itself drawn from the given
* `prior` distribution, *conditioned* on the set of observations
* `conditioned` \ { `holdout` } (the set of conditioned observations with
* `holdout` being subtracted). It is assumed the given `prior` is a
* Dirichlet.
* \tparam PriorDistribution a distribution type with public member
* functions `V get_for_atom(unsigned int)` and `V sum()`.
*/
template<typename PriorDistribution>
static inline V log_conditional_without(const PriorDistribution& prior,
unsigned int holdout, const hash_multiset<unsigned int>& conditioned)
{
bool contains;
unsigned int count = conditioned.counts.get(holdout, contains);
if (contains) {
return log(prior.get_for_atom(holdout) + count - 1);
} else {
return log(prior.get_for_atom(holdout));
}
}
/**
* Returns the log probability of observing the given collection of
* `items`, each drawn independently and identically from a categorical
* distribution, which is itself drawn from the given `prior` distribution,
* *conditioned* on the set of observations `conditioned`. It is assumed
* the given `prior` is a Dirichlet.
* \tparam PriorDistribution a distribution type with public member
* functions `V get_for_atom(unsigned int)` and `V sum()`.
*/
template<typename PriorDistribution>
static inline V log_conditional(const PriorDistribution& prior,
const array_multiset<unsigned int>& items, const array_multiset<unsigned int>& conditioned)
{
return sparse_categorical<unsigned int, V>::log_conditional(prior, items, conditioned);
}
/**
* Returns the log probability of observing the given collection of
* `items`, each drawn independently and identically from a categorical
* distribution, which is itself drawn from the given `prior` distribution,
* *conditioned* on the set of observations `conditioned`. It is assumed
* the given `prior` is a Dirichlet.
* \tparam PriorDistribution a distribution type with public member
* functions `V get_for_atom(unsigned int)` and `V sum()`.
*/
template<typename PriorDistribution>
static inline V log_conditional(const PriorDistribution& prior,
const array_multiset<unsigned int>& items, const hash_multiset<unsigned int>& conditioned)
{
V log_probability = 0.0;
for (unsigned int i = 0; i < items.counts.size; i++) {
bool contains;
unsigned int count = conditioned.counts.get(items.counts.keys[i], contains);
if (contains) {
log_probability += log_rising_factorial(
prior.get_for_atom(items.counts.keys[i]) + count, items.counts.values[i]);
} else {
log_probability += log_rising_factorial(
prior.get_for_atom(items.counts.keys[i]), items.counts.values[i]);
}
}
return log_probability;
}
/**
* Returns the log probability of observing the given collection of items
* `holdout`, each drawn independently and identically from a categorical
* distribution, which is itself drawn from the given `prior` distribution,
* *conditioned* on the set of observations `conditioned` \ `holdout` (the
* set of conditioned observations with `holdout` being subtracted). It is
* assumed the given `prior` is a Dirichlet.
* \tparam PriorDistribution a distribution type with public member
* functions `V get_for_atom(unsigned int)` and `V sum()`.
*/
template<typename PriorDistribution>
static inline V log_conditional_without(const PriorDistribution& prior,
const array_multiset<unsigned int>& holdout, const array_multiset<unsigned int>& conditioned)
{
V log_probability = 0.0;
unsigned int i = 0, j = 0;
while (i < holdout.counts.size && j < conditioned.counts.size) {
if (holdout.counts.keys[i] == conditioned.counts.keys[j]) {
#if !defined(NDEBUG)
if (conditioned.counts.values[j] < holdout.counts.values[i])
fprintf(stderr, "dense_categorical.conditional_without WARNING:"
" 'holdout' is not a subset of 'conditioned'.\n");
#endif
log_probability += log_rising_factorial(prior.get_for_atom(holdout.counts.keys[i])
+ conditioned.counts.values[j] - holdout.counts.values[i], holdout.counts.values[i]);
i++; j++;
} else {
j++;
}
}
return log_probability - log_rising_factorial(
prior.sum() + conditioned.total() - holdout.total(), holdout.total());
}
/**
* Returns the log probability of observing the given collection of items
* `holdout`, each drawn independently and identically from a categorical
* distribution, which is itself drawn from the given `prior` distribution,
* *conditioned* on the set of observations `conditioned` \ `holdout` (the
* set of conditioned observations with `holdout` being subtracted). It is
* assumed the given `prior` is a Dirichlet.
* \tparam PriorDistribution a distribution type with public member
* functions `V get_for_atom(unsigned int)` and `V sum()`.
*/
template<typename PriorDistribution>
static inline V log_conditional_without(const PriorDistribution& prior,
const array_multiset<unsigned int>& holdout, const hash_multiset<unsigned int>& conditioned)
{
V log_probability = 0.0;
for (unsigned int i = 0; i < holdout.counts.size; i++) {
bool contains;
unsigned int count = conditioned.counts.get(holdout.counts.keys[i], contains);
if (contains) {
#if !defined(NDEBUG)
if (count < holdout.counts.values[i])
fprintf(stderr, "dense_categorical.conditional_without WARNING:"
" 'holdout' is not a subset of 'conditioned'.\n");
#endif
log_probability += log_rising_factorial(prior.get_for_atom(holdout.counts.keys[i])
+ count - holdout.counts.values[i], holdout.counts.values[i]);
}
}
return log_probability;
}
/**
* Moves the dense_categorical distribution from `src` into `dst`. This
* function simply copies the pointers, and does not initialize a new
* probability array.
*/
static inline void move(const dense_categorical<V>& src, dense_categorical<V>& dst) {
dst.phi = src.phi;
dst.atom_count = src.atom_count;
}
/**
* Copies the dense_categorical distribution from `src` into `dst`. This
* function initializes a new probability array in `dst` and copies into it
* the contents from the array in `src`.
*/
static inline bool copy(const dense_categorical<V>& src, dense_categorical<V>& dst) {
dst.phi = (V*) malloc(sizeof(V) * src.atom_count);
if (dst.phi == NULL) {
fprintf(stderr, "dense_categorical.copy ERROR: Out of memory.\n");
return false;
}
memcpy(dst.phi, src.phi, src.atom_count * sizeof(V));
dst.atom_count = src.atom_count;
return true;
}
/* NOTE: this function assumes that the type V has constant size */
template<typename Metric>
static inline long unsigned int size_of(const dense_categorical<V>& distribution, const Metric& metric) {
return sizeof(V) * distribution.atom_count
+ core::size_of(distribution.atom_count);
}
/**
* Frees the given categorical distribution, by releasing the resources
* associated with the native array dense_categorical::phi.
*/
static inline void free(dense_categorical<V>& distribution) {
distribution.free();
}
/**
* Returns the parameters that may be used to construct this distribution,
* using either the constructor or init. This particular function simply
* returns dense_categorical::atom_count.
*/
inline unsigned int get_parameters() const {
return atom_count;
}
private:
inline void free() {
core::free(phi);
}
};
/**
* Initializes the given categorical `distribution` with the given dimension
* `atom_count`, setting all probabilities to zero.
*/
template<typename V>
inline bool init(dense_categorical<V>& distribution, unsigned int atom_count) {
distribution.atom_count = atom_count;
distribution.phi = (V*) calloc(atom_count, sizeof(V));
if (distribution.phi == NULL) {
fprintf(stderr, "init ERROR: Insufficient memory for phi in the dense_categorical.\n");
return false;
}
return true;
}
/**
* Initializes the given categorical `distribution`, copying its fields from
* the given dense_categorical distribution `src`.
*/
template<typename V>
inline bool init(dense_categorical<V>& distribution, const dense_categorical<V>& src) {
distribution.atom_count = src.atom_count;
distribution.phi = (V*) malloc(src.atom_count * sizeof(V));
if (distribution.phi == NULL) {
fprintf(stderr, "init ERROR: Insufficient memory for phi in the dense_categorical.\n");
return false;
}
memcpy(distribution.phi, src.phi, sizeof(V) * src.atom_count);
return true;
}
/**
* Reads a dense_categorical `distribution` from `stream`.
* \tparam Stream satisfies is_readable.
*/
template<typename V, typename Stream>
bool read(dense_categorical<V>& distribution, Stream& stream) {
if (!read(distribution.atom_count, stream))
return false;
distribution.phi = (V*) malloc(distribution.atom_count * sizeof(V));
if (distribution.phi == NULL) {
fprintf(stderr, "read ERROR: Insufficient memory for phi in the dense_categorical.\n");
return false;
}
return read(distribution.phi, stream, distribution.atom_count);
}
/**
* Writes the given dense_categorical `distribution` to `stream`.
* \tparam Stream satisfies is_writeable.
*/
template<typename V, typename Stream>
bool write(const dense_categorical<V>& distribution, Stream& stream) {
return write(distribution.atom_count, stream)
&& write(distribution.phi, stream, distribution.atom_count);
}
/**
* Draws a sample from the given dense_categorical `distribution` and writes it to `output`.
*/
template<typename V>
inline bool sample(const dense_categorical<V>& distribution, unsigned int& output) {
output = sample_categorical(distribution.phi, 1.0, distribution.atom_count) + 1;
return true;
}
/**
* A struct that represents the [degenerate/constant distribution](https://en.wikipedia.org/wiki/Degenerate_distribution).
* \tparam K the type of the observation, which must implement the operator `==`.
*/
template<typename K>
struct constant
{
/**
* Computes the conditional probability of observing the given `item` drawn
* from this constant distribution, *conditioned* on a collection of
* observations `conditioned`. This function assumes all the elements in
* `conditioned` are identical, and that `item` and `conditioned` have
* non-zero probability according to `prior`.
* \returns `true` if `item` is equivalent to the first element in `conditioned`.
* \returns `false` otherwise.
* \tparam PriorDistribution the type of the prior distribution. This function does not use `prior`.
*/
template<typename PriorDistribution>
static inline bool conditional(const PriorDistribution& prior,
const K& item, const array_multiset<K>& conditioned)
{
return item == conditioned.counts.keys[0];
}
/**
* Computes the conditional probability of observing the given collection
* of `items`, each drawn independently and identically from this constant
* distribution, *conditioned* on a collection of observations
* `conditioned`. This function assumes all the elements in `items` are
* identical, all the elements in `conditioned` are identical, and that
* `items` and `conditioned` have non-zero probability according to `prior`.
* \returns `true` if the first element in `items` is equivalent to the first element in `conditioned`.
* \returns `false` otherwise.
* \tparam PriorDistribution the type of the prior distribution. This function does not use `prior`.
*/
template<typename PriorDistribution>
static inline bool conditional(const PriorDistribution& prior,
const array_multiset<K>& items, const array_multiset<K>& conditioned)
{
return items.counts.keys[0] == conditioned.counts.keys[0];
}
/**
* Computes the conditional log probability of observing the given `item`
* drawn from this constant distribution, *conditioned* on a collection of
* observations `conditioned`. This function assumes all the elements in
* `conditioned` are identical, and that `item` and `conditioned` have
* non-zero probability according to `prior`.
* \returns `0` if `item` is equivalent to the first element in `conditioned`.
* \returns `-inf` otherwise.
* \tparam PriorDistribution the type of the prior distribution. This function does not use `prior`.
*/
template<typename PriorDistribution>
static inline double log_conditional(const PriorDistribution& prior,
const K& item, const array_multiset<K>& conditioned)
{
if (item == conditioned.counts.keys[0])
return 0.0;
else return -std::numeric_limits<double>::infinity();
}
/**
* Computes the conditional probability of observing the given collection
* of `items`, each drawn independently and identically from this constant
* distribution, *conditioned* on a collection of observations
* `conditioned`. This function assumes all the elements in `items` are
* identical, all the elements in `conditioned` are identical, and that
* `items` and `conditioned` have non-zero probability according to `prior`.
* \returns `0` if the first element in `items` is equivalent to the first element in `conditioned`.
* \returns `-inf` otherwise.
* \tparam PriorDistribution the type of the prior distribution. This function does not use `prior`.
*/
template<typename PriorDistribution>
static inline double log_conditional(const PriorDistribution& prior,
const array_multiset<K>& items, const array_multiset<K>& conditioned)
{
if (items.counts.keys[0] == conditioned.counts.keys[0])
return 0.0;
else return -std::numeric_limits<double>::infinity();
}
/**
* Returns the probability of observing the given `item`, drawn from a
* constant distribution, which is itself drawn from the given `prior`
* distribution.
* \tparam PriorDistribution a distribution type that contains the typedef
* `value_type` implements the public member function `value_type probability(const K&)`.
*/
template<typename PriorDistribution>
static inline typename PriorDistribution::value_type probability(
const PriorDistribution& prior, const K& item)
{
return prior.probability(item);
}
/**
* Returns the probability of observing the given collection of `items`,
* each drawn independently and identically from a constant distribution,
* which is itself drawn from the given `prior` distribution.
* \tparam PriorDistribution a distribution type that contains the typedef
* `value_type` implements the public member function `value_type
* probability(const array_multiset<K>&)`.
*/
template<typename PriorDistribution>
static inline typename PriorDistribution::value_type probability(
const PriorDistribution& prior, const array_multiset<K>& items)
{
return prior.probability(items);
}
/**
* Returns the log probability of observing the given `item`, drawn from a
* constant distribution, which is itself drawn from the given `prior`
* distribution.
* \tparam PriorDistribution a distribution type that contains the typedef
* `value_type` implements the public member function `value_type log_probability(const K&)`.
*/
template<typename PriorDistribution>
static inline typename PriorDistribution::value_type log_probability(
const PriorDistribution& prior, const K& item)
{
return prior.log_probability(item);
}
/**
* Returns the log probability of observing the given collection of
* `items`, each drawn independently and identically from a constant
* distribution, which is itself drawn from the given `prior` distribution.
* \tparam PriorDistribution a distribution type that contains the typedef
* `value_type` implements the public member function `value_type
* log_probability(const array_multiset<K>&)`.
*/
template<typename PriorDistribution>
static inline typename PriorDistribution::value_type log_probability(
const PriorDistribution& prior, const array_multiset<K>& items)
{
return prior.log_probability(items);
}
/**
* Samples an observation from a constant distribution, which is itself
* drawn from a `prior` distribution, *conditioned* on the fact that the
* given set of `observations` was sampled from the constant distribution.
* The sample is written to `sample`. This function assumes the
* elements in `observations` are identical, and have non-zero probability
* according to `prior`.
* \tparam K satisfies core::is_copyable.
* \tparam PriorDistribution the type of the prior distribution. This function does not use `prior`.
*/
template<typename PriorDistribution>
static inline bool sample(const PriorDistribution& prior,
const array_multiset<K>& observations, K& sample)
{
return copy(observations.counts.keys[0], sample);
}
};
/**
* This struct represents a discrete uniform distribution, where probabilities
* have type `V`.
*/
template<typename V>
struct uniform_distribution {
/**
* The type of the probabilities.
*/
typedef V value_type;
/**
* The probability of each event.
*/
V prob;
/**
* The log probability of each event.
*/
V log_prob;
/**
* Constructs a uniform distribution with the given dimension/number of
* distinct events `count`.
*/
uniform_distribution(unsigned int count) :
prob(1.0 / count), log_prob(-log((V) count)) { }
/**
* Returns the probability of the given `observation`, which is
* equivalently uniform_distribution::prob.
*/
template<typename K>
inline V probability(const K& observation) const {
return prob;
}
/**
* Returns the log probability of the given `observation`, which is
* equivalently uniform_distribution::log_prob.
*/
template<typename K>
inline V log_probability(const K& observation) const {
return log_prob;
}
static inline void free(const uniform_distribution<V>& distribution) { }
};
template<typename V>
inline bool init(uniform_distribution<V>& distribution, unsigned int count) {
distribution.prob = 1.0 / count;
distribution.log_prob = -log((V) count);
return true;
}
/**
* This struct represents a distribution over sequences of events, where each
* event is drawn independently and identically from `ElementDistribution`,
* until a special "stop event" is drawn with probability
* sequence_distribution::end_probability. We assume the first event is not the
* stop event.
*
* The following example constructs a sequence_distribution where
* the ElementDistribution is a sparse_categorical distribution over the
* characters <code>{'a', 'b', 'c', 'd', 'e'}</code>. Five samples are drawn from the
* sequence distribution. The expected output is
* <code>'ddbc', 'eecdecdd', 'bbbb', 'dd', 'babababc', </code>.
*
* ```{.cpp}
* #include <math/distributions.h>
* using namespace core;
*
* int main() {
* set_seed(100);
* sparse_categorical<char, double> categorical(5);
* categorical.set('a', 0.1);
* categorical.set('b', 0.4);
* categorical.set('c', 0.1);
* categorical.set('d', 0.2);
* categorical.set('e', 0.2);
*
* sequence_distribution<sparse_categorical<char, double>> seq(categorical, 0.2);
*
* for (unsigned int i = 0; i < 5; i++) {
* string s;
* sample(seq, s);
* print("'", stdout);
* print(s, stdout);
* print("', ", stdout);
* }
* }
* ```
*
* \tparam ElementDistribution a distribution type that defines a public
* typedef `value_type` that indicates the type of the probabilities. Some
* operations in this class also require the public member functions
* `value_type probability(const T&)`, `value_type log_probability(const T&)`.
*/
template<typename ElementDistribution>
struct sequence_distribution
{
/**
* The type of the probabilities.
*/
typedef typename ElementDistribution::value_type V;
/**
* After the first event in the sequence, this is the probability that no
* further events are drawn, and the sequence ends.
*/
V end_probability;
/**
* The natural logarithm of sequence_distribution::end_probability.
*/
V log_end_probability;
/**
* The natural logarithm of (1 - sequence_distribution::end_probability).
*/
V log_not_end_probability;
/**
* The distribution from which each element in the sequence is drawn
* independently and identically.
*/
ElementDistribution element_distribution;
/**
* Constructs a sequence_distribution with the given
* sequence_distribution::element_distribution and
* sequence_distribution::end_probability.
* \tparam ElementDistribution a distribution type that can be constructed
* using a single parameter with type ElementDistribution&.
*/
sequence_distribution(ElementDistribution& element_distribution, V end_probability) :
end_probability(end_probability),
log_end_probability(log(end_probability)),
log_not_end_probability(log(1.0 - end_probability)),
element_distribution(element_distribution)
{ }
/**
* Returns the probability of the given observation `sequence` under this
* sequence distribution.
* \tparam SequenceType a sequence type that contains the integral field
* `length` that indicates the number of elements in the sequence, and
* implements the operator `[]` that returns an element at a
* particular index of the sequence.
*/
template<typename SequenceType>
inline V probability(const SequenceType& sequence) const {
if (sequence.length == 0) return 0.0;
V product = element_distribution.probability(sequence[0]);
for (unsigned int i = 1; i < sequence.length; i++)
product *= element_distribution.probability(sequence[i]) * (1.0 - end_probability);
return product * end_probability;
}
/**
* Returns the log probability of the given observation `sequence` under
* this sequence distribution.
* \tparam SequenceType a sequence type that contains the integral field
* `length` that indicates the number of elements in the sequence, and
* implements the operator `[]` that returns an element at a
* particular index of the sequence.
*/
template<typename SequenceType>
inline V log_probability(const SequenceType& sequence) const {
if (sequence.length == 0) return -std::numeric_limits<V>::infinity();
V sum = element_distribution.log_probability(sequence[0]);
for (unsigned int i = 1; i < sequence.length; i++)
sum += element_distribution.log_probability(sequence[i]) + log_not_end_probability;
return sum + log_end_probability;
}
/**
* Copies the sequence_distribution `src` into `dst`.
* \tparam ElementDistribution satisfies core::is_copyable.
*/
static inline bool copy(
const sequence_distribution<ElementDistribution>& src,
sequence_distribution<ElementDistribution>& dst)
{
dst.end_probability = src.end_probability;
dst.log_end_probability = src.log_end_probability;
dst.log_not_end_probability = src.log_not_end_probability;
return core::copy(src.element_distribution, dst.element_distribution);
}
/**
* Frees the given sequence `distribution`.
* \tparam ElementDistribution the type of the distribution of each element
* in the sequence. The function `core::free` is called with an
* argument of type `ElementDistribution&`.
*/
static inline void free(sequence_distribution<ElementDistribution>& distribution) {
core::free(distribution.element_distribution);
}
};
/**
* Initializes the given sequence_distribution `distribution` with the given
* sequence_distribution `src`.
* \tparam ElementDistribution a distribution type for which the function
* `bool init(ElementDistribution&, const ElementDistribution)` is implemented.
*/
template<typename ElementDistribution>
inline bool init(sequence_distribution<ElementDistribution>& distribution,
const sequence_distribution<ElementDistribution>& src)
{
distribution.end_probability = src.end_probability;
distribution.log_end_probability = src.log_end_probability;
distribution.log_not_end_probability = src.log_not_end_probability;
return init(distribution.element_distribution, src.element_distribution);
}
/**
* Reads the given sequence_distribution `distribution` from `stream`.
* \tparam Stream satisfies is_readable.
*/
template<typename ElementDistribution, typename Stream>
bool read(sequence_distribution<ElementDistribution>& distribution, Stream& stream)
{
if (!read(distribution.end_probability, stream))
return false;
distribution.log_end_probability = log(distribution.end_probability);
distribution.log_not_end_probability = log(1.0 - distribution.end_probability);
return read(distribution.element_distribution, stream);
}
/**
* Writes the given sequence_distribution `distribution` to `stream`.
* \tparam Stream satisfies is_readable.
*/
template<typename ElementDistribution, typename Stream>
bool write(const sequence_distribution<ElementDistribution>& distribution, Stream& stream)
{
return write(distribution.end_probability, stream)
&& write(distribution.element_distribution, stream);
}
/**
* Samples from the given sequence_distribution `distribution` and stores the result in `output`.
* \tparam SequenceType a sequence type for which the function
* `bool init(SequenceType&, unsigned int)` is implemented, which
* initializes the sequence with the given length. The operator `[]` must
* also be implemented, which returns a reference to the specified index
* in the sequence with type `T`, so that the function
* `bool sample(const ElementDistribution&, T&)` can be called. Finally,
* the function `core::free` may be called on the elements of the
* sequence, if the sampling fails.
*/
template<typename ElementDistribution, typename SequenceType>
bool sample(const sequence_distribution<ElementDistribution>& distribution, SequenceType& output)
{
/* first sample the length of the sequence */
unsigned int length = sample_geometric(distribution.end_probability) + 1;
if (!init(output, length)) return false;
for (unsigned int i = 0; i < length; i++) {
if (!sample(distribution.element_distribution, output[i])) {
for (unsigned int j = 0; j < i; j++)
free(output[j]);
return false;
}
}
return true;
}
#endif /* DISTRIBUTIONS_H_ */