Permalink
Branch: master
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
1659 lines (1281 sloc) 47.7 KB
uniform_distribution <- R6Class(
"uniform_distribution",
inherit = distribution_node,
public = list(
min = NA,
max = NA,
log_density = NULL,
initialize = function(min, max, dim) {
if (inherits(min, "greta_array") | inherits(max, "greta_array"))
stop("min and max must be fixed, they cannot be another greta array")
good_types <- is.numeric(min) && length(min) == 1 &
is.numeric(max) && length(max) == 1
if (!good_types) {
stop("min and max must be numeric vectors of length 1",
call. = FALSE)
}
if (!is.finite(min) | !is.finite(max)) {
stop("min and max must finite scalars",
call. = FALSE)
}
if (min >= max) {
stop("max must be greater than min",
call. = FALSE)
}
# store min and max as numeric scalars (needed in create_target, done in
# initialisation)
self$min <- min
self$max <- max
self$bounds <- c(min, max)
# initialize the rest
super$initialize("uniform", dim)
# add them as children and greta arrays
self$add_parameter(min, "min")
self$add_parameter(max, "max")
# the density is fixed, so calculate it now
self$log_density <- -log(max - min)
},
# default value (ignore any truncation arguments)
create_target = function(...) {
vble(truncation = c(self$min, self$max),
dim = self$dim)
},
tf_distrib = function(parameters, dag) {
tf_ld <- fl(self$log_density)
# weird hack to make TF see a gradient here
log_prob <- function(x) {
tf_ld + tf_flatten(x) * fl(0)
}
list(log_prob = log_prob, cdf = NULL, log_cdf = NULL)
}
)
)
normal_distribution <- R6Class(
"normal_distribution",
inherit = distribution_node,
public = list(
initialize = function(mean, sd, dim, truncation) {
mean <- as.greta_array(mean)
sd <- as.greta_array(sd)
# add the nodes as children and parameters
dim <- check_dims(mean, sd, target_dim = dim)
super$initialize("normal", dim, truncation)
self$add_parameter(mean, "mean")
self$add_parameter(sd, "sd")
},
tf_distrib = function(parameters, dag) {
tfp$distributions$Normal(loc = parameters$mean,
scale = parameters$sd)
}
)
)
lognormal_distribution <- R6Class(
"lognormal_distribution",
inherit = distribution_node,
public = list(
initialize = function(meanlog, sdlog, dim, truncation) {
meanlog <- as.greta_array(meanlog)
sdlog <- as.greta_array(sdlog)
dim <- check_dims(meanlog, sdlog, target_dim = dim)
check_positive(truncation)
self$bounds <- c(0, Inf)
super$initialize("lognormal", dim, truncation)
self$add_parameter(meanlog, "meanlog")
self$add_parameter(sdlog, "sdlog")
},
# Begin Exclude Linting
tf_distrib = function(parameters, dag) {
tfp$distributions$LogNormal(loc = parameters$meanlog,
scale = parameters$sdlog)
}
# End Exclude Linting
)
)
bernoulli_distribution <- R6Class(
"bernoulli_distribution",
inherit = distribution_node,
public = list(
prob_is_logit = FALSE,
prob_is_probit = FALSE,
initialize = function(prob, dim) {
prob <- as.greta_array(prob)
# add the nodes as children and parameters
dim <- check_dims(prob, target_dim = dim)
super$initialize("bernoulli", dim, discrete = TRUE)
if (has_representation(prob, "logit")) {
prob <- representation(prob, "logit")
self$prob_is_logit <- TRUE
} else if (has_representation(prob, "probit")) {
prob <- representation(prob, "probit")
self$prob_is_probit <- TRUE
}
self$add_parameter(prob, "prob")
},
tf_distrib = function(parameters, dag) {
if (self$prob_is_logit) {
tfp$distributions$Bernoulli(logits = parameters$prob)
} else if (self$prob_is_probit) {
# in the probit case, get the log probability of success and compute the
# log prob directly
probit <- parameters$prob
d <- tfp$distributions$Normal(fl(0), fl(1))
lprob <- d$log_cdf(probit)
lprobnot <- d$log_cdf(-probit)
log_prob <- function(x) {
x * lprob + (fl(1) - x) * lprobnot
}
list(log_prob = log_prob, cdf = NULL, log_cdf = NULL)
} else {
tfp$distributions$Bernoulli(probs = parameters$prob)
}
},
# no CDF for discrete distributions
tf_cdf_function = NULL,
tf_log_cdf_function = NULL
)
)
binomial_distribution <- R6Class(
"binomial_distribution",
inherit = distribution_node,
public = list(
prob_is_logit = FALSE,
prob_is_probit = FALSE,
initialize = function(size, prob, dim) {
size <- as.greta_array(size)
prob <- as.greta_array(prob)
# add the nodes as children and parameters
dim <- check_dims(size, prob, target_dim = dim)
super$initialize("binomial", dim, discrete = TRUE)
if (has_representation(prob, "logit")) {
prob <- representation(prob, "logit")
self$prob_is_logit <- TRUE
} else if (has_representation(prob, "probit")) {
prob <- representation(prob, "probit")
self$prob_is_probit <- TRUE
}
self$add_parameter(prob, "prob")
self$add_parameter(size, "size")
},
tf_distrib = function(parameters, dag) {
if (self$prob_is_logit) {
tfp$distributions$Binomial(total_count = parameters$size,
logits = parameters$prob)
} else if (self$prob_is_probit) {
# in the probit case, get the log probability of success and compute the
# log prob directly
size <- parameters$size
probit <- parameters$prob
d <- tfp$distributions$Normal(fl(0), fl(1))
lprob <- d$log_cdf(probit)
lprobnot <- d$log_cdf(-probit)
log_prob <- function(x) {
log_choose <- tf$lgamma(size + fl(1)) -
tf$lgamma(x + fl(1)) -
tf$lgamma(size - x + fl(1))
log_choose + x * lprob + (size - x) * lprobnot
}
list(log_prob = log_prob, cdf = NULL, log_cdf = NULL)
} else {
tfp$distributions$Binomial(total_count = parameters$size,
probs = parameters$prob)
}
},
# no CDF for discrete distributions
tf_cdf_function = NULL,
tf_log_cdf_function = NULL
)
)
beta_binomial_distribution <- R6Class(
"beta_binomial_distribution",
inherit = distribution_node,
public = list(
initialize = function(size, alpha, beta, dim) {
size <- as.greta_array(size)
alpha <- as.greta_array(alpha)
beta <- as.greta_array(beta)
# add the nodes as children and parameters
dim <- check_dims(size, alpha, beta, target_dim = dim)
super$initialize("beta_binomial", dim, discrete = TRUE)
self$add_parameter(size, "size")
self$add_parameter(alpha, "alpha")
self$add_parameter(beta, "beta")
},
tf_distrib = function(parameters, dag) {
size <- parameters$size
alpha <- parameters$alpha
beta <- parameters$beta
log_prob <- function(x) {
tf_lchoose(size, x) +
tf_lbeta(x + alpha, size - x + beta) -
tf_lbeta(alpha, beta)
}
list(log_prob = log_prob, cdf = NULL, log_cdf = NULL)
},
# no CDF for discrete distributions
tf_cdf_function = NULL,
tf_log_cdf_function = NULL
)
)
poisson_distribution <- R6Class(
"poisson_distribution",
inherit = distribution_node,
public = list(
lambda_is_log = FALSE,
initialize = function(lambda, dim) {
lambda <- as.greta_array(lambda)
# add the nodes as children and parameters
dim <- check_dims(lambda, target_dim = dim)
super$initialize("poisson", dim, discrete = TRUE)
if (has_representation(lambda, "log")) {
lambda <- representation(lambda, "log")
self$lambda_is_log <- TRUE
}
self$add_parameter(lambda, "lambda")
},
tf_distrib = function(parameters, dag) {
if (self$lambda_is_log) {
log_lambda <- parameters$lambda
} else {
log_lambda <- tf$log(parameters$lambda)
}
tfp$distributions$Poisson(log_rate = log_lambda)
},
# no CDF for discrete distributions
tf_cdf_function = NULL,
tf_log_cdf_function = NULL
)
)
negative_binomial_distribution <- R6Class(
"negative_binomial_distribution",
inherit = distribution_node,
public = list(
initialize = function(size, prob, dim) {
size <- as.greta_array(size)
prob <- as.greta_array(prob)
# add the nodes as children and parameters
dim <- check_dims(size, prob, target_dim = dim)
super$initialize("negative_binomial", dim, discrete = TRUE)
self$add_parameter(size, "size")
self$add_parameter(prob, "prob")
},
# Begin Exclude Linting
tf_distrib = function(parameters, dag) {
tfp$distributions$NegativeBinomial(total_count = parameters$size,
probs = fl(1) - parameters$prob)
},
# End Exclude Linting
# no CDF for discrete distributions
tf_cdf_function = NULL,
tf_log_cdf_function = NULL
)
)
hypergeometric_distribution <- R6Class(
"hypergeometric_distribution",
inherit = distribution_node,
public = list(
initialize = function(m, n, k, dim) {
m <- as.greta_array(m)
n <- as.greta_array(n)
k <- as.greta_array(k)
# add the nodes as children and parameters
dim <- check_dims(m, n, k, target_dim = dim)
super$initialize("hypergeometric", dim, discrete = TRUE)
self$add_parameter(m, "m")
self$add_parameter(n, "n")
self$add_parameter(k, "k")
},
tf_distrib = function(parameters, dag) {
m <- parameters$m
n <- parameters$n
k <- parameters$k
log_prob <- function(x) {
tf_lchoose(m, x) +
tf_lchoose(n, k - x) -
tf_lchoose(m + n, k)
}
list(log_prob = log_prob, cdf = NULL, log_cdf = NULL)
},
# no CDF for discrete distributions
tf_cdf_function = NULL,
tf_log_cdf_function = NULL
)
)
gamma_distribution <- R6Class(
"gamma_distribution",
inherit = distribution_node,
public = list(
initialize = function(shape, rate, dim, truncation) {
shape <- as.greta_array(shape)
rate <- as.greta_array(rate)
# add the nodes as children and parameters
dim <- check_dims(shape, rate, target_dim = dim)
check_positive(truncation)
self$bounds <- c(0, Inf)
super$initialize("gamma", dim, truncation)
self$add_parameter(shape, "shape")
self$add_parameter(rate, "rate")
},
tf_distrib = function(parameters, dag) {
tfp$distributions$Gamma(concentration = parameters$shape,
rate = parameters$rate)
}
)
)
inverse_gamma_distribution <- R6Class(
"inverse_gamma_distribution",
inherit = distribution_node,
public = list(
initialize = function(alpha, beta, dim, truncation) {
alpha <- as.greta_array(alpha)
beta <- as.greta_array(beta)
# add the nodes as children and parameters
dim <- check_dims(alpha, beta, target_dim = dim)
check_positive(truncation)
self$bounds <- c(0, Inf)
super$initialize("inverse_gamma", dim, truncation)
self$add_parameter(alpha, "alpha")
self$add_parameter(beta, "beta")
},
# Begin Exclude Linting
tf_distrib = function(parameters, dag) {
tfp$distributions$InverseGamma(concentration = parameters$alpha,
rate = parameters$beta)
}
# End Exclude Linting
)
)
weibull_distribution <- R6Class(
"weibull_distribution",
inherit = distribution_node,
public = list(
initialize = function(shape, scale, dim, truncation) {
shape <- as.greta_array(shape)
scale <- as.greta_array(scale)
# add the nodes as children and parameters
dim <- check_dims(shape, scale, target_dim = dim)
check_positive(truncation)
self$bounds <- c(0, Inf)
super$initialize("weibull", dim, truncation)
self$add_parameter(shape, "shape")
self$add_parameter(scale, "scale")
},
tf_distrib = function(parameters, dag) {
a <- parameters$shape
b <- parameters$scale
log_prob <- function(x) {
log(a) - log(b) + (a - fl(1)) * (log(x) - log(b)) - (x / b) ^ a
}
cdf <- function(x) {
fl(1) - exp(fl(-1) * (x / b) ^ a)
}
log_cdf <- function(x) {
log(cdf(x))
}
list(log_prob = log_prob, cdf = cdf, log_cdf = log_cdf)
}
)
)
exponential_distribution <- R6Class(
"exponential_distribution",
inherit = distribution_node,
public = list(
initialize = function(rate, dim, truncation) {
rate <- as.greta_array(rate)
# add the nodes as children and parameters
dim <- check_dims(rate, target_dim = dim)
check_positive(truncation)
self$bounds <- c(0, Inf)
super$initialize("exponential", dim, truncation)
self$add_parameter(rate, "rate")
},
tf_distrib = function(parameters, dag) {
tfp$distributions$Exponential(rate = parameters$rate)
}
)
)
pareto_distribution <- R6Class(
"pareto_distribution",
inherit = distribution_node,
public = list(
initialize = function(a, b, dim, truncation) {
a <- as.greta_array(a)
b <- as.greta_array(b)
# add the nodes as children and parameters
dim <- check_dims(a, b, target_dim = dim)
check_positive(truncation)
self$bounds <- c(0, Inf)
super$initialize("pareto", dim, truncation)
self$add_parameter(a, "a")
self$add_parameter(b, "b")
},
tf_distrib = function(parameters, dag) {
a <- parameters$a
b <- parameters$b
log_prob <- function(x)
log(a) + a * log(b) - (a + fl(1)) * log(x)
cdf <- function(x)
fl(1) - (b / x) ^ a
log_cdf <- function(x)
log(cdf(x))
list(log_prob = log_prob, cdf = cdf, log_cdf = log_cdf)
}
)
)
student_distribution <- R6Class(
"student_distribution",
inherit = distribution_node,
public = list(
initialize = function(df, mu, sigma, dim, truncation) {
df <- as.greta_array(df)
mu <- as.greta_array(mu)
sigma <- as.greta_array(sigma)
# add the nodes as children and parameters
dim <- check_dims(df, mu, sigma, target_dim = dim)
super$initialize("student", dim, truncation)
self$add_parameter(df, "df")
self$add_parameter(mu, "mu")
self$add_parameter(sigma, "sigma")
},
# Begin Exclude Linting
tf_distrib = function(parameters, dag) {
tfp$distributions$StudentT(df = parameters$df,
loc = parameters$mu,
scale = parameters$sigma)
}
# End Exclude Linting
)
)
laplace_distribution <- R6Class(
"laplace_distribution",
inherit = distribution_node,
public = list(
initialize = function(mu, sigma, dim, truncation) {
mu <- as.greta_array(mu)
sigma <- as.greta_array(sigma)
# add the nodes as children and parameters
dim <- check_dims(mu, sigma, target_dim = dim)
super$initialize("laplace", dim, truncation)
self$add_parameter(mu, "mu")
self$add_parameter(sigma, "sigma")
},
tf_distrib = function(parameters, dag) {
tfp$distributions$Laplace(loc = parameters$mu,
scale = parameters$sigma)
}
)
)
beta_distribution <- R6Class(
"beta_distribution",
inherit = distribution_node,
public = list(
initialize = function(shape1, shape2, dim, truncation) {
shape1 <- as.greta_array(shape1)
shape2 <- as.greta_array(shape2)
# add the nodes as children and parameters
dim <- check_dims(shape1, shape2, target_dim = dim)
check_unit(truncation)
self$bounds <- c(0, 1)
super$initialize("beta", dim, truncation)
self$add_parameter(shape1, "shape1")
self$add_parameter(shape2, "shape2")
},
tf_distrib = function(parameters, dag) {
tfp$distributions$Beta(concentration1 = parameters$shape1,
concentration0 = parameters$shape2)
}
)
)
cauchy_distribution <- R6Class(
"cauchy_distribution",
inherit = distribution_node,
public = list(
initialize = function(location, scale, dim, truncation) {
location <- as.greta_array(location)
scale <- as.greta_array(scale)
# add the nodes as children and parameters
dim <- check_dims(location, scale, target_dim = dim)
super$initialize("cauchy", dim, truncation)
self$add_parameter(location, "location")
self$add_parameter(scale, "scale")
},
tf_distrib = function(parameters, dag) {
loc <- parameters$location
s <- parameters$scale
log_prob <- function(x)
tf$negative(tf$log(fl(pi) * s * (fl(1) + tf$square( (x - loc) / s ))))
cdf <- function(x)
fl(1 / pi) * tf$atan( (x - loc) / s ) + fl(0.5)
log_cdf <- function(x)
tf$log(cdf(x))
list(log_prob = log_prob, cdf = cdf, log_cdf = log_cdf)
}
)
)
chi_squared_distribution <- R6Class(
"chi_squared_distribution",
inherit = distribution_node,
public = list(
initialize = function(df, dim, truncation) {
df <- as.greta_array(df)
# add the nodes as children and parameters
dim <- check_dims(df, target_dim = dim)
check_positive(truncation)
self$bounds <- c(0, Inf)
super$initialize("chi_squared", dim, truncation)
self$add_parameter(df, "df")
},
tf_distrib = function(parameters, dag) {
tfp$distributions$Chi2(df = parameters$df)
}
)
)
logistic_distribution <- R6Class(
"logistic_distribution",
inherit = distribution_node,
public = list(
initialize = function(location, scale, dim, truncation) {
location <- as.greta_array(location)
scale <- as.greta_array(scale)
# add the nodes as children and parameters
dim <- check_dims(location, scale, target_dim = dim)
super$initialize("logistic", dim, truncation)
self$add_parameter(location, "location")
self$add_parameter(scale, "scale")
},
tf_distrib = function(parameters, dag) {
tfp$distributions$Logistic(loc = parameters$location,
scale = parameters$scale)
},
# log_cdf in tf$cotrib$distributions has the wrong sign :/
tf_log_cdf_function = function(x, parameters) {
tf$log(self$tf_cdf_function(x, parameters))
}
)
)
f_distribution <- R6Class(
"f_distribution",
inherit = distribution_node,
public = list(
initialize = function(df1, df2, dim, truncation) {
df1 <- as.greta_array(df1)
df2 <- as.greta_array(df2)
# add the nodes as children and parameters
dim <- check_dims(df1, df2, target_dim = dim)
check_positive(truncation)
self$bounds <- c(0, Inf)
super$initialize("d", dim, truncation)
self$add_parameter(df1, "df1")
self$add_parameter(df2, "df2")
},
tf_distrib = function(parameters, dag) {
df1 <- parameters$df1
df2 <- parameters$df2
tf_lbeta <- function(a, b)
tf$lgamma(a) + tf$lgamma(b) - tf$lgamma(a + b)
log_prob <- function(x) {
df1_x <- df1 * x
la <- df1 * log(df1_x) + df2 * log(df2)
lb <- (df1 + df2) * log(df1_x + df2)
lnumerator <- fl(0.5) * (la - lb)
lnumerator - log(x) - tf_lbeta(df1 / fl(2), df2 / fl(2))
}
cdf <- function(x) {
df1_x <- df1 * x
ratio <- df1_x / (df1_x + df2)
tf$betainc(df1 / fl(2), df2 / fl(2), ratio)
}
log_cdf <- function(x)
log(cdf(x))
list(log_prob = log_prob, cdf = cdf, log_cdf = log_cdf)
}
)
)
dirichlet_distribution <- R6Class(
"dirichlet_distribution",
inherit = distribution_node,
public = list(
initialize = function(alpha, n_realisations, dimension) {
alpha <- as.greta_array(alpha)
# coerce to greta arrays
alpha <- as.greta_array(alpha)
dim <- check_multivariate_dims(vectors = list(alpha),
n_realisations = n_realisations,
dimension = dimension)
# coerce the parameter arguments to nodes and add as children and
# parameters
self$bounds <- c(0, Inf)
super$initialize("dirichlet", dim,
truncation = c(0, Inf))
self$add_parameter(alpha, "alpha")
},
create_target = function(truncation) {
# handle simplex via a greta array
free_greta_array <- variable(lower = 0, upper = 1, dim = self$dim)
sums <- rowSums(free_greta_array)
simplex_greta_array <- sweep(free_greta_array, 1, sums, "/")
# return the node for the simplex
target_node <- get_node(simplex_greta_array)
target_node
},
tf_distrib = function(parameters, dag) {
alpha <- parameters$alpha
tfp$distributions$Dirichlet(concentration = alpha)
},
# no CDF for multivariate distributions
tf_cdf_function = NULL,
tf_log_cdf_function = NULL
)
)
dirichlet_multinomial_distribution <- R6Class(
"dirichlet_multinomial_distribution",
inherit = distribution_node,
public = list(
initialize = function(size, alpha, n_realisations, dimension) {
# coerce to greta arrays
size <- as.greta_array(size)
alpha <- as.greta_array(alpha)
dim <- check_multivariate_dims(scalars = list(size),
vectors = list(alpha),
n_realisations = n_realisations,
dimension = dimension)
# need to handle size as a vector!
# coerce the parameter arguments to nodes and add as children and
# parameters
super$initialize("dirichlet_multinomial",
dim = dim,
discrete = TRUE)
self$add_parameter(size, "size")
self$add_parameter(alpha, "alpha")
},
# Begin Exclude Linting
tf_distrib = function(parameters, dag) {
parameters <- match_batches(parameters)
parameters$size <- tf_flatten(parameters$size)
distrib <- tfp$distributions$DirichletMultinomial
distrib(total_count = parameters$size,
concentration = parameters$alpha)
},
# End Exclude Linting
# no CDF for multivariate distributions
tf_cdf_function = NULL,
tf_log_cdf_function = NULL
)
)
multinomial_distribution <- R6Class(
"multinomial_distribution",
inherit = distribution_node,
public = list(
initialize = function(size, prob, n_realisations, dimension) {
# coerce to greta arrays
size <- as.greta_array(size)
prob <- as.greta_array(prob)
dim <- check_multivariate_dims(scalars = list(size),
vectors = list(prob),
n_realisations = n_realisations,
dimension = dimension)
# need to make sure size is a column vector!
# coerce the parameter arguments to nodes and add as children and
# parameters
super$initialize("multinomial",
dim = dim,
discrete = TRUE)
self$add_parameter(size, "size")
self$add_parameter(prob, "prob")
},
tf_distrib = function(parameters, dag) {
parameters <- match_batches(parameters)
parameters$size <- tf_flatten(parameters$size)
# scale probs to get absolute density correct
parameters$prob <- parameters$prob / tf_sum(parameters$prob)
tfp$distributions$Multinomial(total_count = parameters$size,
probs = parameters$prob)
},
# no CDF for multivariate distributions
tf_cdf_function = NULL,
tf_log_cdf_function = NULL
)
)
categorical_distribution <- R6Class(
"categorical_distribution",
inherit = distribution_node,
public = list(
initialize = function(prob, n_realisations, dimension) {
# coerce to greta arrays
prob <- as.greta_array(prob)
dim <- check_multivariate_dims(vectors = list(prob),
n_realisations = n_realisations,
dimension = dimension)
# coerce the parameter arguments to nodes and add as children and
# parameters
super$initialize("categorical", dim = dim, discrete = TRUE)
self$add_parameter(prob, "prob")
},
tf_distrib = function(parameters, dag) {
# scale probs to get absolute density correct
probs <- parameters$prob
probs <- probs / tf_sum(probs)
tfp$distributions$Multinomial(total_count = fl(1),
probs = probs)
},
# no CDF for multivariate distributions
tf_cdf_function = NULL,
tf_log_cdf_function = NULL
)
)
multivariate_normal_distribution <- R6Class(
"multivariate_normal_distribution",
inherit = distribution_node,
public = list(
Sigma_is_cholesky = FALSE,
initialize = function(mean, Sigma, n_realisations, dimension) {
# coerce to greta arrays
mean <- as.greta_array(mean)
Sigma <- as.greta_array(Sigma)
# check dim is a positive scalar integer
dim <- check_multivariate_dims(vectors = list(mean),
squares = list(Sigma),
n_realisations = n_realisations,
dimension = dimension)
# check dimensions of Sigma
if (nrow(Sigma) != ncol(Sigma) |
length(dim(Sigma)) != 2) {
stop("Sigma must be a square 2D greta array, ",
"but has dimensions ",
paste(dim(Sigma), collapse = " x "),
call. = FALSE)
}
# compare possible dimensions
dim_mean <- ncol(mean)
dim_Sigma <- nrow(Sigma)
if (dim_mean != dim_Sigma) {
stop("mean and Sigma have different dimensions, ",
dim_mean, " vs ", dim_Sigma,
call. = FALSE)
}
# coerce the parameter arguments to nodes and add as children and
# parameters
super$initialize("multivariate_normal", dim)
if (has_representation(Sigma, "cholesky")) {
Sigma <- representation(Sigma, "cholesky")
self$Sigma_is_cholesky <- TRUE
}
self$add_parameter(mean, "mean")
self$add_parameter(Sigma, "Sigma")
},
tf_distrib = function(parameters, dag) {
# if Sigma is a cholesky factor transpose it to tensorflow expoectation,
# otherwise decompose it
if (self$Sigma_is_cholesky) {
L <- tf_transpose(parameters$Sigma)
} else {
L <- tf$cholesky(parameters$Sigma)
}
# add an extra dimension for the observation batch size (otherwise tfp
# will try to use the n_chains batch dimension)
L <- tf$expand_dims(L, 1L)
mu <- parameters$mean
# Begin Exclude Linting
tfp$distributions$MultivariateNormalTriL(loc = mu,
scale_tril = L)
# End Exclude Linting
},
# no CDF for multivariate distributions
tf_cdf_function = NULL,
tf_log_cdf_function = NULL
)
)
wishart_distribution <- R6Class(
"wishart_distribution",
inherit = distribution_node,
public = list(
# set when defining the distribution
Sigma_is_cholesky = FALSE,
# set when defining the graph
target_is_cholesky = FALSE,
initialize = function(df, Sigma) {
# add the nodes as children and parameters
df <- as.greta_array(df)
Sigma <- as.greta_array(Sigma)
# check dimensions of Sigma
if (nrow(Sigma) != ncol(Sigma) |
length(dim(Sigma)) != 2) {
stop("Sigma must be a square 2D greta array, but has dimensions ",
paste(dim(Sigma), collapse = " x "),
call. = FALSE)
}
dim <- nrow(Sigma)
# initialize with a cholesky factor
super$initialize("wishart", dim(Sigma))
# set parameters
if (has_representation(Sigma, "cholesky")) {
Sigma <- representation(Sigma, "cholesky")
self$Sigma_is_cholesky <- TRUE
}
self$add_parameter(df, "df")
self$add_parameter(Sigma, "Sigma")
# make the initial value PD (no idea whether this does anything)
self$value(unknowns(dims = c(dim, dim), data = diag(dim)))
},
# create a variable, and transform to a symmetric matrix (with cholesky
# factor representation)
create_target = function(truncation) {
# create a flat variable greta array
k <- self$dim[1]
free_greta_array <- vble(truncation = c(-Inf, Inf),
dim = k + k * (k - 1) / 2)
free_greta_array$constraint <- "covariance_matrix"
# reshape to a cholesky factor and then to a symmetric matrix (which
# retains the cholesky representation)
chol_greta_array <- flat_to_chol(free_greta_array, self$dim)
matrix_greta_array <- chol_to_symmetric(chol_greta_array)
# return the node for the symmetric matrix
target_node <- get_node(matrix_greta_array)
target_node
},
# get a cholesky factor for the target if possible
get_tf_target_node = function() {
target <- self$target
if (has_representation(target, "cholesky")) {
chol <- representation(target, "cholesky")
target <- get_node(chol)
self$target_is_cholesky <- TRUE
}
target
},
# if the target is changed, make sure target_is_cholesky is reset to FALSE
# (can be resent on graph definition)
reset_target_flags = function() {
self$target_is_cholesky <- FALSE
},
tf_distrib = function(parameters, dag) {
# this is messy, we want to use the tfp wishart, but can't define the
# density without expanding the dimension of x
log_prob <- function(x) {
# reshape the dimensions
df <- tf_flatten(parameters$df)
Sigma <- tf$expand_dims(parameters$Sigma, 1L)
x <- tf$expand_dims(x, 1L)
# get the cholesky factor of Sigma in tf orientation
if (self$Sigma_is_cholesky) {
Sigma_chol <- tf$matrix_transpose(Sigma)
} else {
Sigma_chol <- tf$cholesky(Sigma)
}
# get the cholesky factor of the target in tf_orientation
if (self$target_is_cholesky) {
x_chol <- tf$matrix_transpose(x)
} else {
x_chol <- tf$cholesky(x)
}
# use the density for choleskied x, with choleskied Sigma
distrib <- tfp$distributions$Wishart(df = df,
scale_tril = Sigma_chol,
input_output_cholesky = TRUE)
distrib$log_prob(x_chol)
}
list(log_prob = log_prob, cdf = NULL, log_cdf = NULL)
},
# no CDF for multivariate distributions
tf_cdf_function = NULL,
tf_log_cdf_function = NULL
)
)
lkj_correlation_distribution <- R6Class(
"lkj_correlation_distribution",
inherit = distribution_node,
public = list(
# set when defining the graph
target_is_cholesky = FALSE,
initialize = function(eta, dimension = 2) {
dimension <- check_dimension(target = dimension)
if (!inherits(eta, "greta_array")) {
if (!is.numeric(eta) || !length(eta) == 1 || eta <= 0) {
stop("eta must be a positive scalar value, or a scalar greta array",
call. = FALSE)
}
}
# add the nodes as children and parameters
eta <- as.greta_array(eta)
if (!is_scalar(eta)) {
stop("eta must be a scalar, but had dimensions: ",
capture.output(dput(dim(eta))),
call. = FALSE)
}
dim <- c(dimension, dimension)
super$initialize("lkj_correlation", dim)
self$add_parameter(eta, "eta")
# make the initial value PD
self$value(unknowns(dims = dim, data = diag(dimension)))
},
# default (cholesky factor, ignores truncation)
create_target = function(truncation) {
# handle reshaping via a greta array
k <- self$dim[1]
free_greta_array <- vble(truncation = c(-Inf, Inf),
dim = k * (k - 1) / 2)
free_greta_array$constraint <- "correlation_matrix"
# reshape to a cholesky factor and then to a symmetric correlation matrix
# (which retains the cholesky representation)
chol_greta_array <- flat_to_chol(free_greta_array,
self$dim,
correl = TRUE)
matrix_greta_array <- chol_to_symmetric(chol_greta_array)
# return the node for the symmetric matrix
target_node <- get_node(matrix_greta_array)
target_node
},
# get a cholesky factor for the target if possible
get_tf_target_node = function() {
target <- self$target
if (has_representation(target, "cholesky")) {
chol <- representation(target, "cholesky")
target <- get_node(chol)
self$target_is_cholesky <- TRUE
}
target
},
# if the target is changed, make sure target_is_cholesky is reset to FALSE
# (can be resent on graph definition)
reset_target_flags = function() {
self$target_is_cholesky <- FALSE
},
tf_distrib = function(parameters, dag) {
eta <- parameters$eta
log_prob <- function(x) {
n <- self$dim[1]
# normalising constant
k <- 1:n
a <- fl(1 - n) * tf$lgamma(eta + fl(0.5 * (n - 1)))
b <- tf_sum(fl(0.5 * k * log(pi)) +
tf$lgamma(eta + fl(0.5 * (n - 1 - k))))
norm <- a + b
# get the cholesky factor of the target in tf_orientation
if (self$target_is_cholesky) {
x_chol <- tf$matrix_transpose(x)
} else {
x_chol <- tf$cholesky(x)
}
diags <- tf$matrix_diag_part(x_chol)
det <- tf$square(tf_prod(diags))
(eta - fl(1)) * tf$log(det) + norm
}
list(log_prob = log_prob, cdf = NULL, log_cdf = NULL)
},
# no CDF for multivariate distributions
tf_cdf_function = NULL,
tf_log_cdf_function = NULL
)
)
# module for export via .internals
distribution_classes_module <- module(uniform_distribution,
normal_distribution,
lognormal_distribution,
bernoulli_distribution,
binomial_distribution,
beta_binomial_distribution,
negative_binomial_distribution,
hypergeometric_distribution,
poisson_distribution,
gamma_distribution,
inverse_gamma_distribution,
weibull_distribution,
exponential_distribution,
pareto_distribution,
student_distribution,
laplace_distribution,
beta_distribution,
cauchy_distribution,
chi_squared_distribution,
logistic_distribution,
f_distribution,
multivariate_normal_distribution,
wishart_distribution,
lkj_correlation_distribution,
multinomial_distribution,
categorical_distribution,
dirichlet_distribution,
dirichlet_multinomial_distribution)
# export constructors
#' @name distributions
#' @title probability distributions
#' @description These functions can be used to define random variables in a
#' greta model. They return a variable greta array that follows the specified
#' distribution. This variable greta array can be used to represent a
#' parameter with prior distribution, combined into a mixture distribution
#' using \code{\link{mixture}}, or used with \code{\link{distribution}} to
#' define a distribution over a data greta array.
#'
#' @param truncation a length-two vector giving values between which to truncate
#' the distribution, similarly to the \code{lower} and \code{upper} arguments
#' to \code{\link{variable}}
#'
#' @param min,max scalar values giving optional limits to \code{uniform}
#' variables. Like \code{lower} and \code{upper}, these must be specified as
#' numerics, they cannot be greta arrays (though see details for a
#' workaround). Unlike \code{lower} and \code{upper}, they must be finite.
#' \code{min} must always be less than \code{max}.
#'
#' @param mean,meanlog,location,mu unconstrained parameters
#'
#' @param
#' sd,sdlog,sigma,lambda,shape,rate,df,scale,shape1,shape2,alpha,beta,df1,df2,a,b,eta
#' positive parameters, \code{alpha} must be a vector for \code{dirichlet}
#' and \code{dirichlet_multinomial}.
#'
#' @param size,m,n,k positive integer parameter
#'
#' @param prob probability parameter (\code{0 < prob < 1}), must be a vector for
#' \code{multinomial} and \code{categorical}
#'
#' @param Sigma positive definite variance-covariance matrix parameter
#'
#' @param dim the dimensions of the greta array to be returned, either a scalar
#' or a vector of positive integers. See details.
#'
#' @param dimension the dimension of a multivariate distribution
#'
#' @param n_realisations the number of independent realisation of a multivariate
#' distribution
#'
#' @details The discrete probability distributions (\code{bernoulli},
#' \code{binomial}, \code{negative_binomial}, \code{poisson},
#' \code{multinomial}, \code{categorical}, \code{dirichlet_multinomial}) can
#' be used when they have fixed values (e.g. defined as a likelihood using
#' \code{\link{distribution}}, but not as unknown variables.
#'
#' For univariate distributions \code{dim} gives the dimensions of the greta
#' array to create. Each element of the greta array will be (independently)
#' distributed according to the distribution. \code{dim} can also be left at
#' its default of \code{NULL}, in which case the dimension will be detected
#' from the dimensions of the parameters (provided they are compatible with
#' one another).
#'
#' For multivariate distributions (\code{multivariate_normal()},
#' \code{multinomial()}, \code{categorical()}, \code{dirichlet()}, and
#' \code{dirichlet_multinomial()}) each row of the output and parameters
#' corresponds to an independent realisation. If a single realisation or
#' parameter value is specified, it must therefore be a row vector (see
#' example). \code{n_realisations} gives the number of rows/realisations, and
#' \code{dimension} gives the dimension of the distribution. Ie. a bivariate
#' normal distribution would be produced with \code{multivariate_normal(...,
#' dimension = 2)}. The dimension can usually be detected from the parameters.
#'
#' \code{multinomial()} does not check that observed values sum to
#' \code{size}, and \code{categorical()} does not check that only one of the
#' observed entries is 1. It's the user's responsibility to check their data
#' matches the distribution!
#'
#' The parameters of \code{uniform} must be fixed, not greta arrays. This
#' ensures these values can always be transformed to a continuous scale to run
#' the samplers efficiently. However, a hierarchical \code{uniform} parameter
#' can always be created by defining a \code{uniform} variable constrained
#' between 0 and 1, and then transforming it to the required scale. See below
#' for an example.
#'
#' Wherever possible, the parameterisations and argument names of greta
#' distributions match commonly used R functions for distributions, such as
#' those in the \code{stats} or \code{extraDistr} packages. The following
#' table states the distribution function to which greta's implementation
#' corresponds:
#'
#' \tabular{ll}{ greta \tab reference\cr \code{uniform} \tab
#' \link[stats:dunif]{stats::dunif}\cr \code{normal} \tab
#' \link[stats:dnorm]{stats::dnorm}\cr \code{lognormal} \tab
#' \link[stats:dlnorm]{stats::dlnorm}\cr \code{bernoulli} \tab
#' \link[extraDistr:dbern]{extraDistr::dbern}\cr \code{binomial} \tab
#' \link[stats:dbinom]{stats::dbinom}\cr \code{beta_binomial} \tab
#' \link[extraDistr:dbbinom]{extraDistr::dbbinom}\cr \code{negative_binomial}
#' \tab \link[stats:dnbinom]{stats::dnbinom}\cr \code{hypergeometric} \tab
#' \link[stats:dhyper]{stats::dhyper}\cr \code{poisson} \tab
#' \link[stats:dpois]{stats::dpois}\cr \code{gamma} \tab
#' \link[stats:dgamma]{stats::dgamma}\cr \code{inverse_gamma} \tab
#' \link[extraDistr:dinvgamma]{extraDistr::dinvgamma}\cr \code{weibull} \tab
#' \link[stats:dweibull]{stats::dweibull}\cr \code{exponential} \tab
#' \link[stats:dexp]{stats::dexp}\cr \code{pareto} \tab
#' \link[extraDistr:dpareto]{extraDistr::dpareto}\cr \code{student} \tab
#' \link[extraDistr:dlst]{extraDistr::dlst}\cr \code{laplace} \tab
#' \link[extraDistr:dlaplace]{extraDistr::dlaplace}\cr \code{beta} \tab
#' \link[stats:dbeta]{stats::dbeta}\cr \code{cauchy} \tab
#' \link[stats:dcauchy]{stats::dcauchy}\cr \code{chi_squared} \tab
#' \link[stats:dchisq]{stats::dchisq}\cr \code{logistic} \tab
#' \link[stats:dlogis]{stats::dlogis}\cr \code{f} \tab
#' \link[stats:df]{stats::df}\cr \code{multivariate_normal} \tab
#' \link[mvtnorm:dmvnorm]{mvtnorm::dmvnorm}\cr \code{multinomial} \tab
#' \link[stats:dmultinom]{stats::dmultinom}\cr \code{categorical} \tab
#' {\link[stats:dmultinom]{stats::dmultinom} (size = 1)}\cr \code{dirichlet}
#' \tab \link[extraDistr:ddirichlet]{extraDistr::ddirichlet}\cr
#' \code{dirichlet_multinomial} \tab
#' \link[extraDistr:ddirmnom]{extraDistr::ddirmnom}\cr \code{wishart} \tab
#' \link[stats:rWishart]{stats::rWishart}\cr \code{lkj_correlation} \tab
#' \href{https://rdrr.io/github/rmcelreath/rethinking/man/dlkjcorr.html}{rethinking::dlkjcorr}
#' }
#'
#' @examples
#' \dontrun{
#'
#' # a uniform parameter constrained to be between 0 and 1
#' phi <- uniform(min = 0, max = 1)
#'
#' # a length-three variable, with each element following a standard normal
#' # distribution
#' alpha <- normal(0, 1, dim = 3)
#'
#' # a length-three variable of lognormals
#' sigma <- lognormal(0, 3, dim = 3)
#'
#' # a hierarchical uniform, constrained between alpha and alpha + sigma,
#' eta <- alpha + uniform(0, 1, dim = 3) * sigma
#'
#' # a hierarchical distribution
#' mu <- normal(0, 1)
#' sigma <- lognormal(0, 1)
#' theta <- normal(mu, sigma)
#'
#' # a vector of 3 variables drawn from the same hierarchical distribution
#' thetas <- normal(mu, sigma, dim = 3)
#'
#' # a matrix of 12 variables drawn from the same hierarchical distribution
#' thetas <- normal(mu, sigma, dim = c(3, 4))
#'
#' # a multivariate normal variable, with correlation between two elements
#' # note that the parameter must be a row vector
#' Sig <- diag(4)
#' Sig[3, 4] <- Sig[4, 3] <- 0.6
#' theta <- multivariate_normal(t(rep(mu, 4)), Sig)
#'
#' # 10 independent replicates of that
#' theta <- multivariate_normal(t(rep(mu, 4)), Sig, n_realisations = 10)
#'
#' # 10 multivariate normal replicates, each with a different mean vector,
#' # but the same covariance matrix
#' means <- matrix(rnorm(40), 10, 4)
#' theta <- multivariate_normal(means, Sig, n_realisations = 10)
#' dim(theta)
#'
#' # a Wishart variable with the same covariance parameter
#' theta <- wishart(df = 5, Sigma = Sig)
#'
#' }
NULL
#' @rdname distributions
#' @export
uniform <- function(min, max, dim = NULL)
distrib("uniform", min, max, dim)
#' @rdname distributions
#' @export
normal <- function(mean, sd, dim = NULL, truncation = c(-Inf, Inf))
distrib("normal", mean, sd, dim, truncation)
#' @rdname distributions
#' @export
lognormal <- function(meanlog, sdlog, dim = NULL, truncation = c(0, Inf))
distrib("lognormal", meanlog, sdlog, dim, truncation)
#' @rdname distributions
#' @export
bernoulli <- function(prob, dim = NULL)
distrib("bernoulli", prob, dim)
#' @rdname distributions
#' @export
binomial <- function(size, prob, dim = NULL) {
check_in_family("binomial", size)
distrib("binomial", size, prob, dim)
}
#' @rdname distributions
#' @export
beta_binomial <- function(size, alpha, beta, dim = NULL)
distrib("beta_binomial", size, alpha, beta, dim)
#' @rdname distributions
#' @export
negative_binomial <- function(size, prob, dim = NULL)
distrib("negative_binomial", size, prob, dim)
#' @rdname distributions
#' @export
hypergeometric <- function(m, n, k, dim = NULL)
distrib("hypergeometric", m, n, k, dim)
#' @rdname distributions
#' @export
poisson <- function(lambda, dim = NULL) {
check_in_family("poisson", lambda)
distrib("poisson", lambda, dim)
}
#' @rdname distributions
#' @export
gamma <- function(shape, rate, dim = NULL, truncation = c(0, Inf))
distrib("gamma", shape, rate, dim, truncation)
#' @rdname distributions
#' @export
inverse_gamma <- function(alpha, beta, dim = NULL, truncation = c(0, Inf))
distrib("inverse_gamma", alpha, beta, dim, truncation)
#' @rdname distributions
#' @export
weibull <- function(shape, scale, dim = NULL, truncation = c(0, Inf))
distrib("weibull", shape, scale, dim, truncation)
#' @rdname distributions
#' @export
exponential <- function(rate, dim = NULL, truncation = c(0, Inf))
distrib("exponential", rate, dim, truncation)
#' @rdname distributions
#' @export
pareto <- function(a, b, dim = NULL, truncation = c(0, Inf))
distrib("pareto", a, b, dim, truncation)
#' @rdname distributions
#' @export
student <- function(df, mu, sigma, dim = NULL, truncation = c(-Inf, Inf))
distrib("student", df, mu, sigma, dim, truncation)
#' @rdname distributions
#' @export
laplace <- function(mu, sigma, dim = NULL, truncation = c(-Inf, Inf))
distrib("laplace", mu, sigma, dim, truncation)
#' @rdname distributions
#' @export
beta <- function(shape1, shape2, dim = NULL, truncation = c(0, 1))
distrib("beta", shape1, shape2, dim, truncation)
#' @rdname distributions
#' @export
cauchy <- function(location, scale, dim = NULL, truncation = c(-Inf, Inf))
distrib("cauchy", location, scale, dim, truncation)
#' @rdname distributions
#' @export
chi_squared <- function(df, dim = NULL, truncation = c(0, Inf))
distrib("chi_squared", df, dim, truncation)
#' @rdname distributions
#' @export
logistic <- function(location, scale, dim = NULL, truncation = c(-Inf, Inf))
distrib("logistic", location, scale, dim, truncation)
#' @rdname distributions
#' @export
f <- function(df1, df2, dim = NULL, truncation = c(0, Inf))
distrib("f", df1, df2, dim, truncation)
#' @rdname distributions
#' @export
multivariate_normal <- function(mean, Sigma,
n_realisations = NULL, dimension = NULL) {
distrib("multivariate_normal", mean, Sigma,
n_realisations, dimension)
}
#' @rdname distributions
#' @export
wishart <- function(df, Sigma)
distrib("wishart", df, Sigma)
#' @rdname distributions
#' @export
lkj_correlation <- function(eta, dimension = 2)
distrib("lkj_correlation", eta, dimension)
#' @rdname distributions
#' @export
multinomial <- function(size, prob, n_realisations = NULL, dimension = NULL)
distrib("multinomial", size, prob, n_realisations, dimension)
#' @rdname distributions
#' @export
categorical <- function(prob, n_realisations = NULL, dimension = NULL)
distrib("categorical", prob, n_realisations, dimension)
#' @rdname distributions
#' @export
dirichlet <- function(alpha, n_realisations = NULL, dimension = NULL)
distrib("dirichlet", alpha, n_realisations, dimension)
#' @rdname distributions
#' @export
dirichlet_multinomial <- function(size, alpha,
n_realisations = NULL, dimension = NULL) {
distrib("dirichlet_multinomial",
size, alpha, n_realisations, dimension)
}