Skip to content

Commit

Permalink
version 1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
adrianozambom authored and cran-robot committed Feb 9, 2024
0 parents commit b92c22c
Show file tree
Hide file tree
Showing 40 changed files with 2,125 additions and 0 deletions.
17 changes: 17 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
Package: VLMCX
Type: Package
Title: Variable Length Markov Chain with Exogenous Covariates
Version: 1.0
Date: 2024-02-01
Imports: graphics, nnet, berryFunctions, stats, utils
Authors@R: c(person("Adriano Zanin Zambom", "Developer", role = c("aut", "cre","cph"), email = "adriano.zambom@gmail.com"), person("Seonjin Kim", "Developer", role = "aut"), person("Nancy Lopes Garcia", "Developer", role = "aut") )
Description: Models categorical time series through a Markov Chain when a) covariates are predictors for transitioning into the next state/symbol and b) when the dependence in the past states has variable length. The probability of transitioning to the next state in the Markov Chain is defined by a multinomial regression whose parameters depend on the past states of the chain and, moreover, the number of states in the past needed to predict the next state also depends on the observed states themselves. See Zambom, Kim, and Garcia (2022) <doi:10.1111/jtsa.12615>.
License: GPL (>= 2)
NeedsCompilation: no
Packaged: 2024-02-07 07:14:39 UTC; adrianozambom
Author: Adriano Zanin Zambom Developer [aut, cre, cph],
Seonjin Kim Developer [aut],
Nancy Lopes Garcia Developer [aut]
Maintainer: Adriano Zanin Zambom Developer <adriano.zambom@gmail.com>
Repository: CRAN
Date/Publication: 2024-02-08 21:10:07 UTC
39 changes: 39 additions & 0 deletions MD5
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
ce371c91a15cff1a483ab501eda17100 *DESCRIPTION
bc5271f5892328c6f087e62ee26ae2d8 *NAMESPACE
fe3793a96dd4e7233334aa4bc4715105 *R/AIC.VLMCX.R
85c9eb537b286d70e0379b23d63797a3 *R/BIC.VLMCX.R
19b915cc4bf729206df37263de63cfd5 *R/LogLik.VLMCX.R
45dd9fc728cf2ff9f063de2127aad313 *R/VLMCX.R
b9f0fdb8c69b565712ed354655f36491 *R/VLMCX_depth.R
17c137fcd6e79289eee4b0c9297435bb *R/VLMCX_sorttree.R
e66b8dd17cfb8884d7e057874e287b83 *R/add.children.R
02252c9a05b31b62cf17f30d84f156ef *R/backtrack.beta.R
2eefce3349bef7f7c917169f3f13bf61 *R/coef.VLMCX.R
8a80405240dfc7be645cf241ebb9aed6 *R/context.algorithm.R
ebdb8f7fe3dc2939a5418dd59083cff3 *R/count.param.R
4ecbd3fef1d0aa7bc690ce02356b1c97 *R/draw.VLMCX.R
16c8c9d9336a64a6c52ae0876a4e91ef *R/draw_node.R
68e577cd657584ac770c138c17b14eed *R/estimate.R
48ab00f60426b942482469e88c58e475 *R/find.underlying.context.in.tree.R
5dc24e4e390bdea0fefc11dbbe4f9f06 *R/get.alphabet.from.tree.R
8c6dd5f6ad7d50426758ae5133163240 *R/maximum.context.R
d6a3e01733beffa0e95a9b9d673bd5d8 *R/predict.VLMCX.R
54e4667d0708d97facd74770d6a418f3 *R/print.VLMCX.R
bec409f3d1c0d91a9b2e44585749bf0d *R/remove.last.beta.R
58f482d192c0080f13f1bb3125d309fa *R/simulate.R
7d715790025ea5afaaebacf9a7454775 *R/update.node.R
75d0ee772ee3f412fceff936f1958562 *R/update.node.from.tree.R
9163a8fe12dcc96f4519e6251f2241c2 *R/vecIn.R
0ada4f8b6e171b389128e83e38a3e604 *R/which.children.had.betas.cut.R
c155ae58f8465af80569ff3f3d4e0cf4 *R/write.node.R
b24d663575fd6ed3f37b08da1073f7a5 *man/AIC.Rd
bb096039031b048844ccea4310627e21 *man/BIC.Rd
a1fde268b909a0519969f81f07eba5af *man/LogLik.Rd
02897eace442cb0eda98239af0266a2f *man/VLMCX.Rd
a21a1a8329ab8f6efcec1e614e5292c6 *man/coef.Rd
8ed09fb1756bad7f8cbc75a652924981 *man/context.algorithm.Rd
beb56ce8f56ee7efb9a64168ff79f95b *man/draw.Rd
ba2f67f7805663694a40ffd47ac64cab *man/estimate.Rd
ed064759044c16784240c97678bccd0d *man/maximum.context.Rd
03abad3ed125ba668da74d3181b0e626 *man/predict.Rd
c1456412cf8a86da1ca1c99259b8ef41 *man/simulate.Rd
28 changes: 28 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
export(maximum.context)
export(context.algorithm)
export(AIC)
export(BIC)
export(estimate)
export(coef)
export(draw)
export(LogLik)
export(VLMCX)
export(predict)
export(simulate)
S3method(maximum.context, default)
S3method(context.algorithm, default)
S3method(AIC, VLMCX)
S3method(BIC, VLMCX)
S3method(estimate, default)
S3method(coef, VLMCX)
S3method(draw, VLMCX)
S3method(LogLik, VLMCX)
S3method(VLMCX, default)
S3method(simulate, default)
S3method(predict, VLMCX)
S3method(print, VLMCX)
importFrom("berryFunctions", "circle")
importFrom("graphics", "lines", "text", "par")
importFrom("nnet", "multinom")
importFrom("stats", "pchisq", "rnorm")
importFrom("utils", "combn")
14 changes: 14 additions & 0 deletions R/AIC.VLMCX.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@


AIC <- function(fit)UseMethod("AIC")

## computes the AIC of the model for the data in a VLMCX object using the count.param() and LogLik.VLMCX() functions
AIC.VLMCX <- function(fit)
{
k = count.param(fit$tree)

result = -2*LogLik.VLMCX(fit) + 2*k

return(result)
}

14 changes: 14 additions & 0 deletions R/BIC.VLMCX.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@


BIC <- function(fit)UseMethod("BIC")

## computes the BIC of the model for the data in a VLMCX object using the count.param() and LogLik.VLMCX() functions

BIC.VLMCX <- function(fit)
{
k = count.param(fit$tree)

result = -2*LogLik.VLMCX(fit) + log(length(fit$y))*k

return(result)
}
83 changes: 83 additions & 0 deletions R/LogLik.VLMCX.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
LogLik <- function(fit)UseMethod("LogLik")

#######################################################################################################################
# computes the log-likelihood of the data based on the tree structure and estimated parameters in the fitted VLMCX model
#######################################################################################################################
LogLik.VLMCX <- function(fit)
{
## estimate the parameters based on the tree structure
fit$tree = estimate(fit$tree, fit$y, fit$X)
alphabet = sort(unique(fit$y))


y = fit$y
X = fit$X
d = ifelse(is.null(dim(X)), 1, dim(X)[2]) ## number of covariates

logLikel = log(1/length(alphabet)) ## for the first data point - since there is no past
for (i in 2:length(fit$y))
{
node = find.underlying.context.in.tree(fit$y[1:(i-1)], fit$tree)

if (is.null(node)) ## context does not exist in tree: set equal probabilities
prob = rep(1/length(alphabet), length(alphabet))
else
if (is.null(node$alpha)) ## there are no estimated parameters: it is not a leaf, not final node (nor partially final node)
prob = rep(1/length(alphabet), length(alphabet))
else ## there is alpha and maybe beta
{
if (is.null(node$beta)) ## check if beta was dropped
{
alpha_u = 0
prob = rep(0,(length(alphabet)-1))
for (ind_alphabet in 1:(length(alphabet)-1))
{
alpha_u = node$alpha[ind_alphabet]
prob[ind_alphabet] = exp(as.numeric(alpha_u)) ## probabilities are computed only with alpha
}

}
else ## beta exists
{
alpha_u = 0
beta_v = 0
prob = rep(0,(length(alphabet)-1))
for (ind_alphabet in 1:(length(alphabet)-1))
{
alpha_u = node$alpha[ind_alphabet]
if (d ==1) ## there is only one covariate through time: X is a vector
{
beta_v = as.numeric(node$beta[,1,ind_alphabet])
prob[ind_alphabet] = exp(as.numeric(alpha_u + beta_v%*%X[(i-1):(i-length(beta_v))]))
}
else ## there is a vector with several covariates observed through time: X is a matrix
{
dimension = length(node$beta[,1,ind_alphabet]) ## number of steps into the past
beta_v = 0
for (ind in 1:dimension)
beta_v = beta_v + as.numeric(node$beta[ind,,ind_alphabet]%*%X[(i-ind),])
prob[ind_alphabet] = exp(as.numeric(alpha_u + beta_v)) ## multinomial regression
}
}
}
prob = c(1,prob)/(1 + sum(prob))
}
prob_this_observation = prob[which(alphabet == y[i])] ## get the proba corresponding to the observed data point

if (is.nan(prob_this_observation)) ## this happens when exp(alpha+betaX) is too large then we have Inf/(1+Inf): should be 1
prob_this_observation = 1
if (prob_this_observation != 0)
logLikel = logLikel + log(prob_this_observation)

}
return(logLikel)
}









26 changes: 26 additions & 0 deletions R/VLMCX.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
VLMCX <- function(y, X, alpha.level = 0.05, max.depth = 5, n.min = 5, trace = FALSE)UseMethod("VLMCX")

###############################################################################################################
###############################################################################################################
## Main function
## Starts obtaining the maximum context tree and calls the context algorithm to sequentially prune
## as in Zambom et al 2022
###############################################################################################################
###############################################################################################################
VLMCX.default <- function(y, X, alpha.level = 0.05, max.depth = 5, n.min = 5, trace = FALSE)
{
max_context = maximum.context(y, X, max.depth = max.depth, n.min = n.min)
r = max_context$tree.depth

## main call to the pruning algorithm
result = context.algorithm(max_context, max_context$tree, alpha.level = alpha.level, max.depth = max.depth, n.min = n.min, trace = trace)

if (trace == TRUE)
draw(result, title = "Estimated Context Tree Structure", print.coef = trace)

result$tree = estimate(result$tree, y, X)

return(result)
}


19 changes: 19 additions & 0 deletions R/VLMCX_depth.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
VLMCX_depth <- function(VLMCXtree)UseMethod("VLMCX_depth")


## recursive function that returns the maximum depth of the tree
VLMCX_depth.default <- function(VLMCXtree)
{

if (length(VLMCXtree$child) == 0)
result = 0
else
{
depth.children = 0
for (ind in 1:length(VLMCXtree$child))
depth.children[ind] = VLMCX_depth(VLMCXtree$child[[ind]])
result = 1 + max(depth.children)

}
return(result)
}
30 changes: 30 additions & 0 deletions R/VLMCX_sorttree.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
VLMCX_sorttree <- function(VLMCXtree)UseMethod("VLMCX_sorttree")

##### recursive function that
##### sorts the children of each node to be in alphabetical order (corresponding to the order() function)
VLMCX_sorttree.default <- function(VLMCXtree)
{
node = VLMCXtree

n.children = length(node$child)
if (n.children <= 1)
ordered_tree = node
else
{
ordered_tree = node

children.names = NULL
for (ind in 1:n.children)
children.names[ind] = node$child[[ind]]$context[length(node$child[[ind]]$context)]

children.names.order = order(children.names) ## get the children names in alphabetical order

for (ind in 1:n.children)
{
ordered_tree$child[[ind]] = node$child[[children.names.order[ind]]] ## assign the ordered children names
ordered_tree$child[[ind]] = VLMCX_sorttree(ordered_tree$child[[ind]]) ## recursive call for the child nodes
}
}

return(ordered_tree)
}
53 changes: 53 additions & 0 deletions R/add.children.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
add.children <- function(node, y, d, max.depth = 5, n.min = 5)UseMethod("add.children")

#######################################################################
## add.children is a recursive function that adds nodes (children) down
## the rooted tree as long as max.depth has not been reached and
## there is a minimum number of observations of the context in the data
## - called by "maximum.context()"
#######################################################################
add.children.default <- function(node, y, d, max.depth = 5, n.min = 5)
{

alphabet = sort(unique(y))

this.node = node

index.child = 0
for (index in 1:length(alphabet))
{
if (length(this.node$context) == 1)
{
if (this.node$context == "x") ## when we are at the root we can possibly add nodes that correspond to the alphabet
possible.child = alphabet[index]
else
possible.child = c(this.node$context, alphabet[index]) ## the child is context wu, where w is the parent and u is the alphabet
}
else
possible.child = c(this.node$context, alphabet[index])

if (length(possible.child) <= max.depth) ## do not add if max.depth has been reached
{
n.occur = vecIn(y,rev(possible.child))
if ((length(n.occur)!=0))
if (length(n.occur) >= n.min*(1+d*length(rev(possible.child))*(length(alphabet)-1))) ## check if there is a minimum number of observations of the context in the data
{
index.child = index.child + 1

new_node = NULL
new_node$context = possible.child
new_node$alpha = rep(0,length(alphabet)-1)
new_node$beta = array(0,c(length(new_node$context), d, length(alphabet)-1)) ## steps, d, alphabet
new_node$child = list()

### RECURSION: call the same function to add the child node (new_node) to the current one
new_node = add.children(new_node, y, d = d, max.depth = max.depth, n.min = n.min)

this.node$child[[index.child]] = new_node
}
}
}

return(this.node)

}
49 changes: 49 additions & 0 deletions R/backtrack.beta.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@


backtrack.beta <- function(result_fit, node, alpha.level = 0.05, trace = TRUE)UseMethod("backtrack.beta")

##########################################################################################################
##########################################################################################################
##########################################################################################################
## find the children whose betas were cut - only return those without grandchildren (nodes with grandchild cant be prunned)

backtrack.beta.default <- function(result_fit, node, alpha.level = 0.05, trace = FALSE)
{
X = result_fit$X
y = result_fit$y
alphabet = sort(unique(y))
d = ifelse(is.null(dim(X)), 1, dim(X)[2])

need.to.backtrack.beta = TRUE
while ((need.to.backtrack.beta == TRUE) && (!is.null(node$beta)))
{
result_fit_H0 = result_fit

if (dim(node$beta)[1] == 1)
node$beta = NULL
else
node$beta = array(0,c(dim(node$beta)[1]-1, d, length(alphabet)-1)) ## steps, d, alphabet
result_fit_H0$tree = update.node(result_fit_H0$tree, node)
result_fit_H0$tree = estimate(result_fit_H0$tree, y, X)
p.value = 1-pchisq(-2*(LogLik(result_fit_H0) - LogLik(result_fit)), d*(length(alphabet)-1))
if (p.value > alpha.level)
{
if (trace == TRUE)
if (is.null(node$beta))
cat("\n pruning beta parameters sequentially for node ", node$context, ": now beta depth 0")
else
cat("\n pruning beta parameters sequentially for node ", node$context, ": now beta depth ", dim(node$beta)[1])
result_fit = result_fit_H0
}
else
need.to.backtrack.beta = FALSE
}
return(result_fit)
}







0 comments on commit b92c22c

Please sign in to comment.