Skip to content

Commit

Permalink
version 0.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
marco-bee authored and cran-robot committed Jun 23, 2023
0 parents commit 176c001
Show file tree
Hide file tree
Showing 36 changed files with 2,358 additions and 0 deletions.
22 changes: 22 additions & 0 deletions DESCRIPTION
@@ -0,0 +1,22 @@
Package: FitDynMix
Title: Estimation of Dynamic Mixtures
Version: 0.1.0
Authors@R:
person("Marco", "Bee", , "marco.bee@unitn.it", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-9579-3650"))
Description: Estimation of a dynamic lognormal - Generalized Pareto mixture via the Approximate Maximum Likelihood and the Cross-Entropy methods. See Bee, M. (2023) <doi:10.1016/j.csda.2023.107764>.
License: MIT + file LICENSE
Encoding: UTF-8
RoxygenNote: 7.2.3
Depends: R (>= 2.10)
LazyData: true
RdMacros: Rdpack
Imports: evir, MASS, pracma, Rdpack, parallel, ks, stats, graphics
URL: https://github.com/marco-bee/FitDynMix
BugReports: https://github.com/marco-bee/FitDynMix/issues
NeedsCompilation: no
Packaged: 2023-06-22 18:54:31 UTC; marco.bee
Author: Marco Bee [aut, cre] (<https://orcid.org/0000-0002-9579-3650>)
Maintainer: Marco Bee <marco.bee@unitn.it>
Repository: CRAN
Date/Publication: 2023-06-23 19:00:02 UTC
2 changes: 2 additions & 0 deletions LICENSE
@@ -0,0 +1,2 @@
YEAR: 2023
COPYRIGHT HOLDER: FitDynMix authors
35 changes: 35 additions & 0 deletions MD5
@@ -0,0 +1,35 @@
12bdaa640feee6b4c865894a6d4e8564 *DESCRIPTION
ebd6bc7e962609ced9444ae67560ca5a *LICENSE
6b10bd7694a1e521cce47ae66d347f42 *NAMESPACE
6e762f1fc30d62acf7ea14d96d4b06c5 *NEWS.md
fe8dd4ef52fa9046e886c3dd2ca8b536 *R/AMLEfit.R
29295b1aa66d2fd64030ffe1314decb1 *R/AMLEmode.R
0369d3c66ccfbe0036389944d6156974 *R/CENoisyFit.R
efd64e2dc1c49c2120eec8c83acb15ed *R/CENoisyFitBoot.R
9ec10375c60a327d017bf1beea0f9c80 *R/MLEBoot.R
b3e03fb655f379e5bef5e277648e35ec *R/MLEfit.R
33c5fb1c61e24ce7d2f3e12d43d348f4 *R/cvm_stat_M.R
a32c55a62c8958f08dfb7190539209e9 *R/data.R
13de619cf097bb10cd65bcd66fd16b2a *R/ddyn.R
c573299277bee5afceb26a9bef1e5ea2 *R/dynloglik.R
fe68078cd147dff23c99e3f953c19c80 *R/dynloglikMC.R
5179359fe64d9753c435360bc1af95c4 *R/nConst_MC.R
363e8c20ade55568d422d5e1ff747dff *R/rDynMix.R
a331570487722e8c0a6835985bfb8f76 *README.md
ba072085a418dca954752902ef4e4e09 *build/partial.rdb
dbc3cc2e17c34649f2f38cc59bfdb232 *data/Metro2019.rda
0232fcafa921db54d054c8757e2f13b9 *inst/REFERENCES.bib
9dd48a4b72091561c453e74eb4f362de *man/AMLEfit.Rd
0db1e1973d92385271ed4981156b41c1 *man/AMLEmode.Rd
d20180765ad237316d876b7743af743b *man/CENoisyFit.Rd
c82f4c2deecd86eed7fe0568ff30b1cf *man/CENoisyFitBoot.Rd
234a8be3d1ceccfd59aaa13b590aa0fd *man/MLEBoot.Rd
9c1ceeebfc6389e661bacaf10e628560 *man/MLEfit.Rd
b8fd4493c2245b7dc29e375666c92e12 *man/Metro2019.Rd
5e56f67e0abc3de5a28fff5bf490b214 *man/cvm_stat_M.Rd
a2f647090e291ddde65ac06f3c584b7f *man/ddyn.Rd
c14039c6c27632cb780230fa74f4dbd2 *man/dynloglik.Rd
2e86050866e82a2a30f6819f840b1142 *man/dynloglikMC.Rd
502a5f8dd6ba90060101596366cb7d58 *man/figures/README-pressure-1.png
721497271701f2695b0c91772788b2dd *man/nConst_MC.Rd
1eb8034ff81f6dd9e27f359b62357a39 *man/rDynMix.Rd
17 changes: 17 additions & 0 deletions NAMESPACE
@@ -0,0 +1,17 @@
# Generated by roxygen2: do not edit by hand

export(AMLEfit)
export(AMLEmode)
export(CENoisyFit)
export(CENoisyFitBoot)
export(MLEBoot)
export(MLEfit)
export(cvm_stat_M)
export(ddyn)
export(dynloglik)
export(dynloglikMC)
export(nConst_MC)
export(rDynMix)
import(graphics)
import(stats)
importFrom(Rdpack,reprompt)
3 changes: 3 additions & 0 deletions NEWS.md
@@ -0,0 +1,3 @@
# FitDynMix 0.1.0

The first release of FitDynMix provides facilities for estimation of a lognormal-GPD dynamic mixture.
147 changes: 147 additions & 0 deletions R/AMLEfit.R
@@ -0,0 +1,147 @@
#' Estimating a dynamic mixture via AMLE
#'
#' This function fits a dynamic mixture via Approximate Maximum Likelihood.
#' Currently only implemented for the lognormal - generalized Pareto case.
#' The bootstrap estimation of the standard errors of the MLEs (used for finding
#' the supports of the uniform priors) is carried outed via parallel computing.
#' @param yObs numerical vector: observed sample.
#' @param epsilon non-negative scalar: scale parameter of the Markov kernel.
#' @param k non-negative integer: number of samples generated in the AMLE
#' approach, such that k*epsilon = ABC sample size.
#' @param bootreps positive integer: number of bootstrap replications.
#' @param intTol non-negative scalar: threshold for stopping the computation of the integral in the normalization
#' constant: if the integral on the interval from n-1 to n is smaller than intTol, the approximation procedure stops.
#' @return A list with the following elements:
#'
#' AMLEpars a list of four 6-dimensional vectors: approximate maximum likelihood estimates computed via sample mean,
#' maxima of the marginal kernel densities, maximum of the multivariate kernel densities,
#' maximum of the product of the marginal kernel densities.
#'
#' ABCsam ((k x epsilon) x 6) matrix: ABC sample.
#'
#' MLEpars (7 x 1) vector: maximum likelihood estimates and
#' maximized log-likelihood.
#'
#' MLEboot (bootreps x 6) matrix: maximum likelihood estimates obtained in
#' each bootstrap replication.
#' @details
#' For the lognormal and GPD parameters, the support of the uniform
#' prior is set equal to the 99% confidence interval of the bootstrap
#' distribution after discarding the outliers.
#' For the Cauchy parameters, the support is given by the range of the
#' bootstrap distribution after discarding the outliers.
#' Be aware that computing times are large when k and/or bootreps are large.
#' @keywords dynamic mixture; approximate maximum likelihood.
#' @seealso{\code{\link{AMLEmode}}.}
#' @export
#' @import stats graphics
#' @examples
#' \donttest{k <- 5000
#' epsilon <- .02
#' bootreps <- 2
#' res = AMLEfit(Metro2019, epsilon, k, bootreps)}
#' @references{
#' \insertRef{bee22b}{FitDynMix}
#' }
#'
#'
#' @importFrom Rdpack reprompt

AMLEfit <- function(yObs,epsilon,k,bootreps,intTol=1e-4)
{
n = length(yObs)
mediana = median(yObs)
y1 = yObs[yObs<mediana]
y2 = yObs[yObs>=mediana]
mu0 = MASS::fitdistr(y1,'lognormal')$estimate[1]
sigma0 = MASS::fitdistr(y1,'lognormal')$estimate[2]
xi0 = evir::gpd(y2,mediana)$par.ests['xi']
beta0 = evir::gpd(y2,mediana)$par.ests['beta']
muc0 = quantile(yObs,.5)
tau0 = log(sd(yObs)/2)
x0Lik = as.numeric(c(muc0,tau0,mu0,sigma0,xi0,beta0))
res <- optim(x0Lik,dynloglik, gr=NULL,yObs,intTol,method='L-BFGS-B',lower=c(-Inf,.01,-Inf,.05,10^-10,.1),upper=c(Inf,Inf,Inf,10,Inf,10),control=list(fnscale=-1))
estMLE <- c(res$par,res$value) # muc, tau, mu, sigma, xi, beta
nreps.list <- sapply(1:bootreps, list)

chk <- Sys.getenv("_R_CHECK_LIMIT_CORES_", "")
if (nzchar(chk) && chk == "TRUE") {
n.cores <- 2L
} else {
n.cores <- parallel::detectCores()
}
clust <- parallel::makeCluster(n.cores)
MLEboot = matrix(0,bootreps,6)
temp <- parallel::parLapply(clust,nreps.list, MLEBoot,yObs,intTol)
parallel::stopCluster(cl=clust)
for (i in 1:bootreps)
{
MLEboot[i,] = as.vector(unlist(temp[[i]]))
}
varcov = cov(MLEboot)
stddev = sqrt(diag(varcov))
CI99a = apply(MLEboot[,1:2],2,range)
CI99b = apply(MLEboot[,3:6],2,quantile,c(.005,.995))
CI99 = cbind(CI99a,CI99b)
boxCA1 = boxplot(MLEboot[,1],plot = FALSE)
boxCA2 = boxplot(MLEboot[,2],plot = FALSE)
CA1 = MLEboot[,1]
CA2 = MLEboot[,2]
outl1 = boxCA1$out
outl2 = boxCA2$out
nout1 = length(outl1)
nout2 = length(outl2)
if (nout1 > 0)
{
indiciCA1 = rep(0,nout1)
for (i in 1:nout1)
{
indiciCA1[i] = which(CA1==outl1[i])
}
CA1noout = CA1[-indiciCA1]
}
if (nout1 == 0)
CA1noout = CA1
if (nout2 > 0)
{
indiciCA2 = rep(0,nout2)
for (i in 1:nout2)
{
indiciCA2[i] = which(CA2==outl2[i])
}
CA2noout = CA2[-indiciCA2]
}
if (nout2 == 0)
CA2noout = CA2
CI99[,1] = c(min(CA1noout),max(CA1noout))
CI99[,2] = c(min(CA2noout),max(CA2noout)) # muc, tau, mu, sigma, xi, beta
CI99[1,2] = pmin(0,abs(CI99[1,2]))
CI99[1,5] = abs(CI99[1,5])

# ABC algorithm

distCVM <- rep(0,k)
xiV <- runif(k,CI99[1,5],CI99[2,5])
betaV <- runif(k,CI99[1,6],CI99[2,6])
muV <- runif(k,CI99[1,3],CI99[2,3])
sigmaV <- runif(k,CI99[1,4],CI99[2,4])
CA1V <- runif(k,CI99[1,1],CI99[2,1])
CA2V <- runif(k,CI99[1,2],CI99[2,2])

for (j in 1:k)
{
dataSim <- rDynMix(n,c(CA1V[j],CA2V[j],muV[j],sigmaV[j],xiV[j],betaV[j]))
distCVM[j] = cvm_stat_M(yObs,dataSim,power = 2)
}

# Select only epsilon values

distCVM_jit <- jitter(distCVM)
thresh <- quantile(distCVM_jit,epsilon)[1]
indici <- which(distCVM_jit<thresh)
Matr <- cbind(xiV,betaV,muV,sigmaV,CA1V,CA2V)
ABCsam = Matr[indici,]
estAMLE = AMLEmode(ABCsam)
out <- list(AMLEpars=estAMLE,ABCsam=ABCsam,MLEpars=estMLE,MLEboot=MLEboot)
return(out)
}
56 changes: 56 additions & 0 deletions R/AMLEmode.R
@@ -0,0 +1,56 @@
#' Approximating the mode of a multivariate empirical distribution
#'
#' This function approximates the mode of a multivariate empirical distribution by means of:
#' 1. the sample mean
#' 2. the product of the maxima of the univariate kernel densities estimated using the marginals
#' 3. the maximum of the multivariate kernel density
#' 4. the maximum of the product of the univariate kernel densities
#' Typically used in connection with AMLEfit (see AMLEfit for examples).
#' @param ABCsam (m x k) matrix: ABC sample, where m is the ABC sample size and k is the
#' number of parameters.
#' @return A list containing the 4 approximate modes.
#' @details The bandwidth is estimated via smoothed cross-validation
#' @keywords dynamic mixture; approximate maximum likelihood.
#' @export

AMLEmode <- function(ABCsam)
{
N = nrow(ABCsam) # ABC sample size
qq = ncol(ABCsam)

# 1. sample mean

AMLE1 = colMeans(ABCsam)

# 2. univariate kd

AMLE2 = rep(0,qq)
for (i in 1:qq)
{
vettore = ABCsam[,i]
bw = ks::hscv(vettore)
f_eps = ks::kde(vettore,bw,eval.points=vettore)
AMLE2[i] = vettore[which.max(f_eps$estimate)]
}

# 3. multivariate kde

bw = ks::Hscv(ABCsam)
f_eps = ks::kde(ABCsam,bw,eval.points=ABCsam,binned=FALSE)
AMLE3 = ABCsam[which.max(f_eps$estimate),]

# 4. maximum of the product of the univariate kernel densities

f_eps_g = matrix(0,N,qq)
for (i in 1:qq)
{
vettore = ABCsam[,i]
bw = ks::hscv(vettore)
f_eps = ks::kde(vettore,bw,eval.points=vettore)
f_eps_g[,i] = f_eps$estimate
}
F_eps = apply(f_eps_g,1,prod)
AMLE4 = ABCsam[which.max(F_eps),]
res = list(SampleMean = AMLE1, UnivariateKD = AMLE2, MultivariateKD = AMLE3, ProductUnivariateKD = AMLE4)
return(res)
}

0 comments on commit 176c001

Please sign in to comment.