-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit b92c22c
Showing
40 changed files
with
2,125 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
Package: VLMCX | ||
Type: Package | ||
Title: Variable Length Markov Chain with Exogenous Covariates | ||
Version: 1.0 | ||
Date: 2024-02-01 | ||
Imports: graphics, nnet, berryFunctions, stats, utils | ||
Authors@R: c(person("Adriano Zanin Zambom", "Developer", role = c("aut", "cre","cph"), email = "adriano.zambom@gmail.com"), person("Seonjin Kim", "Developer", role = "aut"), person("Nancy Lopes Garcia", "Developer", role = "aut") ) | ||
Description: Models categorical time series through a Markov Chain when a) covariates are predictors for transitioning into the next state/symbol and b) when the dependence in the past states has variable length. The probability of transitioning to the next state in the Markov Chain is defined by a multinomial regression whose parameters depend on the past states of the chain and, moreover, the number of states in the past needed to predict the next state also depends on the observed states themselves. See Zambom, Kim, and Garcia (2022) <doi:10.1111/jtsa.12615>. | ||
License: GPL (>= 2) | ||
NeedsCompilation: no | ||
Packaged: 2024-02-07 07:14:39 UTC; adrianozambom | ||
Author: Adriano Zanin Zambom Developer [aut, cre, cph], | ||
Seonjin Kim Developer [aut], | ||
Nancy Lopes Garcia Developer [aut] | ||
Maintainer: Adriano Zanin Zambom Developer <adriano.zambom@gmail.com> | ||
Repository: CRAN | ||
Date/Publication: 2024-02-08 21:10:07 UTC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
ce371c91a15cff1a483ab501eda17100 *DESCRIPTION | ||
bc5271f5892328c6f087e62ee26ae2d8 *NAMESPACE | ||
fe3793a96dd4e7233334aa4bc4715105 *R/AIC.VLMCX.R | ||
85c9eb537b286d70e0379b23d63797a3 *R/BIC.VLMCX.R | ||
19b915cc4bf729206df37263de63cfd5 *R/LogLik.VLMCX.R | ||
45dd9fc728cf2ff9f063de2127aad313 *R/VLMCX.R | ||
b9f0fdb8c69b565712ed354655f36491 *R/VLMCX_depth.R | ||
17c137fcd6e79289eee4b0c9297435bb *R/VLMCX_sorttree.R | ||
e66b8dd17cfb8884d7e057874e287b83 *R/add.children.R | ||
02252c9a05b31b62cf17f30d84f156ef *R/backtrack.beta.R | ||
2eefce3349bef7f7c917169f3f13bf61 *R/coef.VLMCX.R | ||
8a80405240dfc7be645cf241ebb9aed6 *R/context.algorithm.R | ||
ebdb8f7fe3dc2939a5418dd59083cff3 *R/count.param.R | ||
4ecbd3fef1d0aa7bc690ce02356b1c97 *R/draw.VLMCX.R | ||
16c8c9d9336a64a6c52ae0876a4e91ef *R/draw_node.R | ||
68e577cd657584ac770c138c17b14eed *R/estimate.R | ||
48ab00f60426b942482469e88c58e475 *R/find.underlying.context.in.tree.R | ||
5dc24e4e390bdea0fefc11dbbe4f9f06 *R/get.alphabet.from.tree.R | ||
8c6dd5f6ad7d50426758ae5133163240 *R/maximum.context.R | ||
d6a3e01733beffa0e95a9b9d673bd5d8 *R/predict.VLMCX.R | ||
54e4667d0708d97facd74770d6a418f3 *R/print.VLMCX.R | ||
bec409f3d1c0d91a9b2e44585749bf0d *R/remove.last.beta.R | ||
58f482d192c0080f13f1bb3125d309fa *R/simulate.R | ||
7d715790025ea5afaaebacf9a7454775 *R/update.node.R | ||
75d0ee772ee3f412fceff936f1958562 *R/update.node.from.tree.R | ||
9163a8fe12dcc96f4519e6251f2241c2 *R/vecIn.R | ||
0ada4f8b6e171b389128e83e38a3e604 *R/which.children.had.betas.cut.R | ||
c155ae58f8465af80569ff3f3d4e0cf4 *R/write.node.R | ||
b24d663575fd6ed3f37b08da1073f7a5 *man/AIC.Rd | ||
bb096039031b048844ccea4310627e21 *man/BIC.Rd | ||
a1fde268b909a0519969f81f07eba5af *man/LogLik.Rd | ||
02897eace442cb0eda98239af0266a2f *man/VLMCX.Rd | ||
a21a1a8329ab8f6efcec1e614e5292c6 *man/coef.Rd | ||
8ed09fb1756bad7f8cbc75a652924981 *man/context.algorithm.Rd | ||
beb56ce8f56ee7efb9a64168ff79f95b *man/draw.Rd | ||
ba2f67f7805663694a40ffd47ac64cab *man/estimate.Rd | ||
ed064759044c16784240c97678bccd0d *man/maximum.context.Rd | ||
03abad3ed125ba668da74d3181b0e626 *man/predict.Rd | ||
c1456412cf8a86da1ca1c99259b8ef41 *man/simulate.Rd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
export(maximum.context) | ||
export(context.algorithm) | ||
export(AIC) | ||
export(BIC) | ||
export(estimate) | ||
export(coef) | ||
export(draw) | ||
export(LogLik) | ||
export(VLMCX) | ||
export(predict) | ||
export(simulate) | ||
S3method(maximum.context, default) | ||
S3method(context.algorithm, default) | ||
S3method(AIC, VLMCX) | ||
S3method(BIC, VLMCX) | ||
S3method(estimate, default) | ||
S3method(coef, VLMCX) | ||
S3method(draw, VLMCX) | ||
S3method(LogLik, VLMCX) | ||
S3method(VLMCX, default) | ||
S3method(simulate, default) | ||
S3method(predict, VLMCX) | ||
S3method(print, VLMCX) | ||
importFrom("berryFunctions", "circle") | ||
importFrom("graphics", "lines", "text", "par") | ||
importFrom("nnet", "multinom") | ||
importFrom("stats", "pchisq", "rnorm") | ||
importFrom("utils", "combn") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
|
||
|
||
AIC <- function(fit)UseMethod("AIC") | ||
|
||
## computes the AIC of the model for the data in a VLMCX object using the count.param() and LogLik.VLMCX() functions | ||
AIC.VLMCX <- function(fit) | ||
{ | ||
k = count.param(fit$tree) | ||
|
||
result = -2*LogLik.VLMCX(fit) + 2*k | ||
|
||
return(result) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
|
||
|
||
BIC <- function(fit)UseMethod("BIC") | ||
|
||
## computes the BIC of the model for the data in a VLMCX object using the count.param() and LogLik.VLMCX() functions | ||
|
||
BIC.VLMCX <- function(fit) | ||
{ | ||
k = count.param(fit$tree) | ||
|
||
result = -2*LogLik.VLMCX(fit) + log(length(fit$y))*k | ||
|
||
return(result) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
LogLik <- function(fit)UseMethod("LogLik") | ||
|
||
####################################################################################################################### | ||
# computes the log-likelihood of the data based on the tree structure and estimated parameters in the fitted VLMCX model | ||
####################################################################################################################### | ||
LogLik.VLMCX <- function(fit) | ||
{ | ||
## estimate the parameters based on the tree structure | ||
fit$tree = estimate(fit$tree, fit$y, fit$X) | ||
alphabet = sort(unique(fit$y)) | ||
|
||
|
||
y = fit$y | ||
X = fit$X | ||
d = ifelse(is.null(dim(X)), 1, dim(X)[2]) ## number of covariates | ||
|
||
logLikel = log(1/length(alphabet)) ## for the first data point - since there is no past | ||
for (i in 2:length(fit$y)) | ||
{ | ||
node = find.underlying.context.in.tree(fit$y[1:(i-1)], fit$tree) | ||
|
||
if (is.null(node)) ## context does not exist in tree: set equal probabilities | ||
prob = rep(1/length(alphabet), length(alphabet)) | ||
else | ||
if (is.null(node$alpha)) ## there are no estimated parameters: it is not a leaf, not final node (nor partially final node) | ||
prob = rep(1/length(alphabet), length(alphabet)) | ||
else ## there is alpha and maybe beta | ||
{ | ||
if (is.null(node$beta)) ## check if beta was dropped | ||
{ | ||
alpha_u = 0 | ||
prob = rep(0,(length(alphabet)-1)) | ||
for (ind_alphabet in 1:(length(alphabet)-1)) | ||
{ | ||
alpha_u = node$alpha[ind_alphabet] | ||
prob[ind_alphabet] = exp(as.numeric(alpha_u)) ## probabilities are computed only with alpha | ||
} | ||
|
||
} | ||
else ## beta exists | ||
{ | ||
alpha_u = 0 | ||
beta_v = 0 | ||
prob = rep(0,(length(alphabet)-1)) | ||
for (ind_alphabet in 1:(length(alphabet)-1)) | ||
{ | ||
alpha_u = node$alpha[ind_alphabet] | ||
if (d ==1) ## there is only one covariate through time: X is a vector | ||
{ | ||
beta_v = as.numeric(node$beta[,1,ind_alphabet]) | ||
prob[ind_alphabet] = exp(as.numeric(alpha_u + beta_v%*%X[(i-1):(i-length(beta_v))])) | ||
} | ||
else ## there is a vector with several covariates observed through time: X is a matrix | ||
{ | ||
dimension = length(node$beta[,1,ind_alphabet]) ## number of steps into the past | ||
beta_v = 0 | ||
for (ind in 1:dimension) | ||
beta_v = beta_v + as.numeric(node$beta[ind,,ind_alphabet]%*%X[(i-ind),]) | ||
prob[ind_alphabet] = exp(as.numeric(alpha_u + beta_v)) ## multinomial regression | ||
} | ||
} | ||
} | ||
prob = c(1,prob)/(1 + sum(prob)) | ||
} | ||
prob_this_observation = prob[which(alphabet == y[i])] ## get the proba corresponding to the observed data point | ||
|
||
if (is.nan(prob_this_observation)) ## this happens when exp(alpha+betaX) is too large then we have Inf/(1+Inf): should be 1 | ||
prob_this_observation = 1 | ||
if (prob_this_observation != 0) | ||
logLikel = logLikel + log(prob_this_observation) | ||
|
||
} | ||
return(logLikel) | ||
} | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
VLMCX <- function(y, X, alpha.level = 0.05, max.depth = 5, n.min = 5, trace = FALSE)UseMethod("VLMCX") | ||
|
||
############################################################################################################### | ||
############################################################################################################### | ||
## Main function | ||
## Starts obtaining the maximum context tree and calls the context algorithm to sequentially prune | ||
## as in Zambom et al 2022 | ||
############################################################################################################### | ||
############################################################################################################### | ||
VLMCX.default <- function(y, X, alpha.level = 0.05, max.depth = 5, n.min = 5, trace = FALSE) | ||
{ | ||
max_context = maximum.context(y, X, max.depth = max.depth, n.min = n.min) | ||
r = max_context$tree.depth | ||
|
||
## main call to the pruning algorithm | ||
result = context.algorithm(max_context, max_context$tree, alpha.level = alpha.level, max.depth = max.depth, n.min = n.min, trace = trace) | ||
|
||
if (trace == TRUE) | ||
draw(result, title = "Estimated Context Tree Structure", print.coef = trace) | ||
|
||
result$tree = estimate(result$tree, y, X) | ||
|
||
return(result) | ||
} | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
VLMCX_depth <- function(VLMCXtree)UseMethod("VLMCX_depth") | ||
|
||
|
||
## recursive function that returns the maximum depth of the tree | ||
VLMCX_depth.default <- function(VLMCXtree) | ||
{ | ||
|
||
if (length(VLMCXtree$child) == 0) | ||
result = 0 | ||
else | ||
{ | ||
depth.children = 0 | ||
for (ind in 1:length(VLMCXtree$child)) | ||
depth.children[ind] = VLMCX_depth(VLMCXtree$child[[ind]]) | ||
result = 1 + max(depth.children) | ||
|
||
} | ||
return(result) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
VLMCX_sorttree <- function(VLMCXtree)UseMethod("VLMCX_sorttree") | ||
|
||
##### recursive function that | ||
##### sorts the children of each node to be in alphabetical order (corresponding to the order() function) | ||
VLMCX_sorttree.default <- function(VLMCXtree) | ||
{ | ||
node = VLMCXtree | ||
|
||
n.children = length(node$child) | ||
if (n.children <= 1) | ||
ordered_tree = node | ||
else | ||
{ | ||
ordered_tree = node | ||
|
||
children.names = NULL | ||
for (ind in 1:n.children) | ||
children.names[ind] = node$child[[ind]]$context[length(node$child[[ind]]$context)] | ||
|
||
children.names.order = order(children.names) ## get the children names in alphabetical order | ||
|
||
for (ind in 1:n.children) | ||
{ | ||
ordered_tree$child[[ind]] = node$child[[children.names.order[ind]]] ## assign the ordered children names | ||
ordered_tree$child[[ind]] = VLMCX_sorttree(ordered_tree$child[[ind]]) ## recursive call for the child nodes | ||
} | ||
} | ||
|
||
return(ordered_tree) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
add.children <- function(node, y, d, max.depth = 5, n.min = 5)UseMethod("add.children") | ||
|
||
####################################################################### | ||
## add.children is a recursive function that adds nodes (children) down | ||
## the rooted tree as long as max.depth has not been reached and | ||
## there is a minimum number of observations of the context in the data | ||
## - called by "maximum.context()" | ||
####################################################################### | ||
add.children.default <- function(node, y, d, max.depth = 5, n.min = 5) | ||
{ | ||
|
||
alphabet = sort(unique(y)) | ||
|
||
this.node = node | ||
|
||
index.child = 0 | ||
for (index in 1:length(alphabet)) | ||
{ | ||
if (length(this.node$context) == 1) | ||
{ | ||
if (this.node$context == "x") ## when we are at the root we can possibly add nodes that correspond to the alphabet | ||
possible.child = alphabet[index] | ||
else | ||
possible.child = c(this.node$context, alphabet[index]) ## the child is context wu, where w is the parent and u is the alphabet | ||
} | ||
else | ||
possible.child = c(this.node$context, alphabet[index]) | ||
|
||
if (length(possible.child) <= max.depth) ## do not add if max.depth has been reached | ||
{ | ||
n.occur = vecIn(y,rev(possible.child)) | ||
if ((length(n.occur)!=0)) | ||
if (length(n.occur) >= n.min*(1+d*length(rev(possible.child))*(length(alphabet)-1))) ## check if there is a minimum number of observations of the context in the data | ||
{ | ||
index.child = index.child + 1 | ||
|
||
new_node = NULL | ||
new_node$context = possible.child | ||
new_node$alpha = rep(0,length(alphabet)-1) | ||
new_node$beta = array(0,c(length(new_node$context), d, length(alphabet)-1)) ## steps, d, alphabet | ||
new_node$child = list() | ||
|
||
### RECURSION: call the same function to add the child node (new_node) to the current one | ||
new_node = add.children(new_node, y, d = d, max.depth = max.depth, n.min = n.min) | ||
|
||
this.node$child[[index.child]] = new_node | ||
} | ||
} | ||
} | ||
|
||
return(this.node) | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
|
||
|
||
backtrack.beta <- function(result_fit, node, alpha.level = 0.05, trace = TRUE)UseMethod("backtrack.beta") | ||
|
||
########################################################################################################## | ||
########################################################################################################## | ||
########################################################################################################## | ||
## find the children whose betas were cut - only return those without grandchildren (nodes with grandchild cant be prunned) | ||
|
||
backtrack.beta.default <- function(result_fit, node, alpha.level = 0.05, trace = FALSE) | ||
{ | ||
X = result_fit$X | ||
y = result_fit$y | ||
alphabet = sort(unique(y)) | ||
d = ifelse(is.null(dim(X)), 1, dim(X)[2]) | ||
|
||
need.to.backtrack.beta = TRUE | ||
while ((need.to.backtrack.beta == TRUE) && (!is.null(node$beta))) | ||
{ | ||
result_fit_H0 = result_fit | ||
|
||
if (dim(node$beta)[1] == 1) | ||
node$beta = NULL | ||
else | ||
node$beta = array(0,c(dim(node$beta)[1]-1, d, length(alphabet)-1)) ## steps, d, alphabet | ||
result_fit_H0$tree = update.node(result_fit_H0$tree, node) | ||
result_fit_H0$tree = estimate(result_fit_H0$tree, y, X) | ||
p.value = 1-pchisq(-2*(LogLik(result_fit_H0) - LogLik(result_fit)), d*(length(alphabet)-1)) | ||
if (p.value > alpha.level) | ||
{ | ||
if (trace == TRUE) | ||
if (is.null(node$beta)) | ||
cat("\n pruning beta parameters sequentially for node ", node$context, ": now beta depth 0") | ||
else | ||
cat("\n pruning beta parameters sequentially for node ", node$context, ": now beta depth ", dim(node$beta)[1]) | ||
result_fit = result_fit_H0 | ||
} | ||
else | ||
need.to.backtrack.beta = FALSE | ||
} | ||
return(result_fit) | ||
} | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
Oops, something went wrong.