version 0.1.0

cran · Jun 23, 2023 · 176c001 · 176c001
commit 176c001
Show file tree

Hide file tree

Showing 36 changed files with 2,358 additions and 0 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,22 @@
+Package: FitDynMix
+Title: Estimation of Dynamic Mixtures
+Version: 0.1.0
+Authors@R: 
+    person("Marco", "Bee", , "marco.bee@unitn.it", role = c("aut", "cre"),
+           comment = c(ORCID = "0000-0002-9579-3650"))
+Description: Estimation of a dynamic lognormal - Generalized Pareto mixture via the Approximate Maximum Likelihood and the Cross-Entropy methods. See Bee, M. (2023) <doi:10.1016/j.csda.2023.107764>.
+License: MIT + file LICENSE
+Encoding: UTF-8
+RoxygenNote: 7.2.3
+Depends: R (>= 2.10)
+LazyData: true
+RdMacros: Rdpack
+Imports: evir, MASS, pracma, Rdpack, parallel, ks, stats, graphics
+URL: https://github.com/marco-bee/FitDynMix
+BugReports: https://github.com/marco-bee/FitDynMix/issues
+NeedsCompilation: no
+Packaged: 2023-06-22 18:54:31 UTC; marco.bee
+Author: Marco Bee [aut, cre] (<https://orcid.org/0000-0002-9579-3650>)
+Maintainer: Marco Bee <marco.bee@unitn.it>
+Repository: CRAN
+Date/Publication: 2023-06-23 19:00:02 UTC
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,2 @@
+YEAR: 2023
+COPYRIGHT HOLDER: FitDynMix authors
diff --git a/MD5 b/MD5
@@ -0,0 +1,35 @@
+12bdaa640feee6b4c865894a6d4e8564 *DESCRIPTION
+ebd6bc7e962609ced9444ae67560ca5a *LICENSE
+6b10bd7694a1e521cce47ae66d347f42 *NAMESPACE
+6e762f1fc30d62acf7ea14d96d4b06c5 *NEWS.md
+fe8dd4ef52fa9046e886c3dd2ca8b536 *R/AMLEfit.R
+29295b1aa66d2fd64030ffe1314decb1 *R/AMLEmode.R
+0369d3c66ccfbe0036389944d6156974 *R/CENoisyFit.R
+efd64e2dc1c49c2120eec8c83acb15ed *R/CENoisyFitBoot.R
+9ec10375c60a327d017bf1beea0f9c80 *R/MLEBoot.R
+b3e03fb655f379e5bef5e277648e35ec *R/MLEfit.R
+33c5fb1c61e24ce7d2f3e12d43d348f4 *R/cvm_stat_M.R
+a32c55a62c8958f08dfb7190539209e9 *R/data.R
+13de619cf097bb10cd65bcd66fd16b2a *R/ddyn.R
+c573299277bee5afceb26a9bef1e5ea2 *R/dynloglik.R
+fe68078cd147dff23c99e3f953c19c80 *R/dynloglikMC.R
+5179359fe64d9753c435360bc1af95c4 *R/nConst_MC.R
+363e8c20ade55568d422d5e1ff747dff *R/rDynMix.R
+a331570487722e8c0a6835985bfb8f76 *README.md
+ba072085a418dca954752902ef4e4e09 *build/partial.rdb
+dbc3cc2e17c34649f2f38cc59bfdb232 *data/Metro2019.rda
+0232fcafa921db54d054c8757e2f13b9 *inst/REFERENCES.bib
+9dd48a4b72091561c453e74eb4f362de *man/AMLEfit.Rd
+0db1e1973d92385271ed4981156b41c1 *man/AMLEmode.Rd
+d20180765ad237316d876b7743af743b *man/CENoisyFit.Rd
+c82f4c2deecd86eed7fe0568ff30b1cf *man/CENoisyFitBoot.Rd
+234a8be3d1ceccfd59aaa13b590aa0fd *man/MLEBoot.Rd
+9c1ceeebfc6389e661bacaf10e628560 *man/MLEfit.Rd
+b8fd4493c2245b7dc29e375666c92e12 *man/Metro2019.Rd
+5e56f67e0abc3de5a28fff5bf490b214 *man/cvm_stat_M.Rd
+a2f647090e291ddde65ac06f3c584b7f *man/ddyn.Rd
+c14039c6c27632cb780230fa74f4dbd2 *man/dynloglik.Rd
+2e86050866e82a2a30f6819f840b1142 *man/dynloglikMC.Rd
+502a5f8dd6ba90060101596366cb7d58 *man/figures/README-pressure-1.png
+721497271701f2695b0c91772788b2dd *man/nConst_MC.Rd
+1eb8034ff81f6dd9e27f359b62357a39 *man/rDynMix.Rd
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,17 @@
+# Generated by roxygen2: do not edit by hand
+
+export(AMLEfit)
+export(AMLEmode)
+export(CENoisyFit)
+export(CENoisyFitBoot)
+export(MLEBoot)
+export(MLEfit)
+export(cvm_stat_M)
+export(ddyn)
+export(dynloglik)
+export(dynloglikMC)
+export(nConst_MC)
+export(rDynMix)
+import(graphics)
+import(stats)
+importFrom(Rdpack,reprompt)
diff --git a/NEWS.md b/NEWS.md
@@ -0,0 +1,3 @@
+# FitDynMix 0.1.0
+
+The first release of FitDynMix provides facilities for estimation of a lognormal-GPD dynamic mixture.
diff --git a/R/AMLEfit.R b/R/AMLEfit.R
@@ -0,0 +1,147 @@
+#' Estimating a dynamic mixture via AMLE
+#'
+#' This function fits a dynamic mixture via Approximate Maximum Likelihood.
+#' Currently only implemented for the lognormal - generalized Pareto case.
+#' The bootstrap estimation of the standard errors of the MLEs (used for finding
+#' the supports of the uniform priors) is carried outed via parallel computing.
+#' @param yObs numerical vector: observed sample.
+#' @param epsilon non-negative scalar: scale parameter of the Markov kernel.
+#' @param k non-negative integer: number of samples generated in the AMLE
+#' approach, such that k*epsilon = ABC sample size.
+#' @param bootreps positive integer: number of bootstrap replications.
+#' @param intTol non-negative scalar: threshold for stopping the computation of the integral in the normalization
+#' constant: if the integral on the interval from n-1 to n is smaller than intTol, the approximation procedure stops.
+#' @return A list with the following elements:
+#'
+#' AMLEpars a list of four 6-dimensional vectors: approximate maximum likelihood estimates computed via sample mean,
+#' maxima of the marginal kernel densities, maximum of the multivariate kernel densities,
+#' maximum of the product of the marginal kernel densities.
+#'
+#' ABCsam ((k x epsilon) x 6) matrix: ABC sample.
+#' 
+#' MLEpars (7 x 1) vector: maximum likelihood estimates and
+#' maximized log-likelihood.
+#'
+#' MLEboot (bootreps x 6) matrix: maximum likelihood estimates obtained in
+#' each bootstrap replication.
+#' @details 
+#' For the lognormal and GPD parameters, the support of the uniform
+#' prior is set equal to the 99% confidence interval of the bootstrap
+#' distribution after discarding the outliers.
+#' For the Cauchy parameters, the support is given by the range of the
+#' bootstrap distribution after discarding the outliers.
+#' Be aware that computing times are large when k and/or bootreps are large.
+#' @keywords dynamic mixture; approximate maximum likelihood.
+#' @seealso{\code{\link{AMLEmode}}.}
+#' @export
+#' @import stats graphics
+#' @examples
+#' \donttest{k <- 5000
+#' epsilon <- .02
+#' bootreps <- 2
+#' res = AMLEfit(Metro2019, epsilon, k, bootreps)}
+#' @references{
+#'   \insertRef{bee22b}{FitDynMix}
+#' }
+#'
+#'
+#' @importFrom Rdpack reprompt
+
+AMLEfit <- function(yObs,epsilon,k,bootreps,intTol=1e-4)
+{
+  n = length(yObs)
+  mediana = median(yObs)
+  y1 = yObs[yObs<mediana]
+  y2 = yObs[yObs>=mediana]
+  mu0 = MASS::fitdistr(y1,'lognormal')$estimate[1]
+  sigma0 = MASS::fitdistr(y1,'lognormal')$estimate[2]
+  xi0 = evir::gpd(y2,mediana)$par.ests['xi']
+  beta0 = evir::gpd(y2,mediana)$par.ests['beta']
+  muc0 = quantile(yObs,.5)
+  tau0 = log(sd(yObs)/2)
+  x0Lik = as.numeric(c(muc0,tau0,mu0,sigma0,xi0,beta0))
+  res <- optim(x0Lik,dynloglik, gr=NULL,yObs,intTol,method='L-BFGS-B',lower=c(-Inf,.01,-Inf,.05,10^-10,.1),upper=c(Inf,Inf,Inf,10,Inf,10),control=list(fnscale=-1))
+  estMLE <- c(res$par,res$value) # muc, tau, mu, sigma, xi, beta
+  nreps.list <- sapply(1:bootreps, list)
+
+  chk <- Sys.getenv("_R_CHECK_LIMIT_CORES_", "")
+  if (nzchar(chk) && chk == "TRUE") {
+    n.cores <- 2L
+  } else {
+    n.cores <- parallel::detectCores()
+  }
+  clust <- parallel::makeCluster(n.cores)
+  MLEboot = matrix(0,bootreps,6)
+  temp <- parallel::parLapply(clust,nreps.list, MLEBoot,yObs,intTol)
+  parallel::stopCluster(cl=clust)
+  for (i in 1:bootreps)
+  {
+    MLEboot[i,] = as.vector(unlist(temp[[i]]))
+  }
+  varcov = cov(MLEboot)
+  stddev = sqrt(diag(varcov))
+  CI99a = apply(MLEboot[,1:2],2,range)
+  CI99b = apply(MLEboot[,3:6],2,quantile,c(.005,.995))
+  CI99 = cbind(CI99a,CI99b)
+  boxCA1 = boxplot(MLEboot[,1],plot = FALSE)
+  boxCA2 = boxplot(MLEboot[,2],plot = FALSE)
+  CA1 = MLEboot[,1]
+  CA2 = MLEboot[,2]
+  outl1 = boxCA1$out
+  outl2 = boxCA2$out
+  nout1 = length(outl1)
+  nout2 = length(outl2)
+  if (nout1 > 0)
+  {
+    indiciCA1 = rep(0,nout1)
+    for (i in 1:nout1)
+    {
+      indiciCA1[i] = which(CA1==outl1[i])
+    }
+    CA1noout = CA1[-indiciCA1]
+  }
+  if (nout1 == 0)
+    CA1noout = CA1
+  if (nout2 > 0)
+  {
+    indiciCA2 = rep(0,nout2)
+    for (i in 1:nout2)
+    {
+      indiciCA2[i] = which(CA2==outl2[i])
+    }
+    CA2noout = CA2[-indiciCA2]
+  }
+  if (nout2 == 0)
+    CA2noout = CA2
+  CI99[,1] = c(min(CA1noout),max(CA1noout))
+  CI99[,2] = c(min(CA2noout),max(CA2noout)) # muc, tau, mu, sigma, xi, beta
+  CI99[1,2] = pmin(0,abs(CI99[1,2]))
+  CI99[1,5] = abs(CI99[1,5])
+
+  # ABC algorithm
+
+  distCVM <- rep(0,k)
+  xiV <- runif(k,CI99[1,5],CI99[2,5])
+  betaV <- runif(k,CI99[1,6],CI99[2,6])
+  muV <- runif(k,CI99[1,3],CI99[2,3])
+  sigmaV <- runif(k,CI99[1,4],CI99[2,4])
+  CA1V <- runif(k,CI99[1,1],CI99[2,1])
+  CA2V <- runif(k,CI99[1,2],CI99[2,2])
+
+  for (j in 1:k)
+  {
+    dataSim <- rDynMix(n,c(CA1V[j],CA2V[j],muV[j],sigmaV[j],xiV[j],betaV[j]))
+    distCVM[j] = cvm_stat_M(yObs,dataSim,power = 2)
+  }
+
+  # Select only epsilon values
+
+  distCVM_jit <- jitter(distCVM)
+  thresh <- quantile(distCVM_jit,epsilon)[1]
+  indici <- which(distCVM_jit<thresh)
+  Matr <- cbind(xiV,betaV,muV,sigmaV,CA1V,CA2V)
+  ABCsam = Matr[indici,]
+  estAMLE = AMLEmode(ABCsam)
+  out <- list(AMLEpars=estAMLE,ABCsam=ABCsam,MLEpars=estMLE,MLEboot=MLEboot)
+  return(out)
+}
diff --git a/R/AMLEmode.R b/R/AMLEmode.R
@@ -0,0 +1,56 @@
+#' Approximating the mode of a multivariate empirical distribution
+#'
+#' This function approximates the mode of a multivariate empirical distribution by means of:
+#' 1. the sample mean
+#' 2. the product of the maxima of the univariate kernel densities estimated using the marginals
+#' 3. the maximum of the multivariate kernel density
+#' 4. the maximum of the product of the univariate kernel densities
+#' Typically used in connection with AMLEfit (see AMLEfit for examples).
+#' @param ABCsam (m x k) matrix: ABC sample, where m is the ABC sample size and k is the
+#' number of parameters.
+#' @return A list containing the 4 approximate modes.
+#' @details The bandwidth is estimated via smoothed cross-validation
+#' @keywords dynamic mixture; approximate maximum likelihood.
+#' @export
+
+AMLEmode <- function(ABCsam)
+{
+  N = nrow(ABCsam) # ABC sample size
+  qq = ncol(ABCsam)
+
+  # 1. sample mean
+
+  AMLE1 = colMeans(ABCsam)
+
+  # 2. univariate kd
+
+  AMLE2 = rep(0,qq)
+  for (i in 1:qq)
+  {
+    vettore = ABCsam[,i]
+    bw = ks::hscv(vettore)
+    f_eps = ks::kde(vettore,bw,eval.points=vettore)
+    AMLE2[i] = vettore[which.max(f_eps$estimate)]
+  }
+
+  # 3. multivariate kde
+
+  bw = ks::Hscv(ABCsam)
+  f_eps = ks::kde(ABCsam,bw,eval.points=ABCsam,binned=FALSE)
+  AMLE3 = ABCsam[which.max(f_eps$estimate),]
+
+  # 4. maximum of the product of the univariate kernel densities
+
+  f_eps_g = matrix(0,N,qq)
+  for (i in 1:qq)
+  {
+    vettore = ABCsam[,i]
+    bw = ks::hscv(vettore)
+    f_eps = ks::kde(vettore,bw,eval.points=vettore)
+    f_eps_g[,i] = f_eps$estimate
+  }
+  F_eps = apply(f_eps_g,1,prod)
+  AMLE4 = ABCsam[which.max(F_eps),]
+  res = list(SampleMean = AMLE1, UnivariateKD = AMLE2, MultivariateKD = AMLE3, ProductUnivariateKD = AMLE4)
+  return(res)
+  }