Skip to content

Commit

Permalink
Merge branch 'gujyjean-multinom-model-selection' into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
itsrainingdata committed Apr 10, 2017
2 parents 4305d2f + e635d12 commit 1f4dd64
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 9 deletions.
47 changes: 47 additions & 0 deletions R/sparsebnUtils-inference.R
Original file line number Diff line number Diff line change
Expand Up @@ -228,3 +228,50 @@ gaussian_profile_loglikelihood <- function(dat, coefs){

-pll ### Need to take negative output loglikelihood (vs NLL)
}

# a function that calculate log-likelihood of a single graph
# parents is an edgeList object
# dat is a data.frame
multinom_loglikelihood <- function(parents,
data
) {
# check parents
if(!sparsebnUtils::is.edgeList(parents)) stop("parents should be a edgeList object!")

# check data
if(!is.data.frame(data)) stop("dat should be a data.frame object!")

# data <- as.data.frame(data)
n_levels <- unlist(sparsebnUtils::auto_count_levels(data))

node <- ncol(data)
# check that the number of node and the what has been input in parents are consistent
if (length(parents) != ncol(data)) {stop(sprintf("Incompatible graph and data! Data has %d columns but graph has %d nodes.", ncol(data), length(parents)))}

# factorize each observation
for (i in 1:node){
data[,i] <- factor(data[,i])
}

# subtract dependent and independent variables for each regression
loglikelihood <- 0
loglikelihood_path <- rep(0, node)
for (i in 1:node){
x_ind <- parents[[i]] # if inputs are only edgeList object
if (length(x_ind)!=0) { # do nothing if a node has no parents
sub_dat <- cbind(data[, c(i, x_ind)])
fu <- stats::as.formula(paste0(colnames(sub_dat)[1], "~", paste(colnames(sub_dat)[c(-1)], collapse = "+")))
fit <- nnet::multinom(fu, data = sub_dat, trace = FALSE)
loglikelihood_path[i] <- as.numeric(stats::logLik(fit))
loglikelihood <- loglikelihood + as.numeric(stats::logLik(fit))
}
else {
sub_dat <- data[, i, drop = FALSE]
fu <- stats::as.formula(paste0(colnames(sub_dat)[1], "~ 1"))
fit <- nnet::multinom(fu, data = sub_dat, trace = FALSE)
# loglikelihood_path[i] <- as.numeric(stats::logLik(fit))
loglikelihood <- loglikelihood + as.numeric(stats::logLik(fit))
}
}
return(loglikelihood)
}
26 changes: 17 additions & 9 deletions R/sparsebnUtils-selection.R
Original file line number Diff line number Diff line change
Expand Up @@ -100,16 +100,24 @@ select.parameter <- function(x,
### Check args
stopifnot(is.sparsebnPath(x))
stopifnot(is.sparsebnData(data))
type <- match.arg(type, c("profile", "full"))

### Estimate unpenalized parameters
params <- estimate.parameters(x, data)
if (data$type == "continuous") {
type <- match.arg(type, c("profile", "full"))

### Estimate unpenalized parameters
params <- estimate.parameters(x, data)

### Compute (profile / full) log-likelihood + number of edges
obj <- switch(type,
profile = unlist(lapply(params, function(x) gaussian_profile_loglikelihood(data$data, x$coefs))),
full = unlist(lapply(params, function(x) gaussian_loglikelihood(data$data, x$coefs, x$vars)))
)
}
else {
edge_path <- lapply(x, function(x){x$edges})
obj <- sapply(edge_path, function(x) multinom_loglikelihood(x, data$data))
}

### Compute (profile / full) log-likelihood + number of edges
obj <- switch(type,
profile = unlist(lapply(params, function(x) gaussian_profile_loglikelihood(data$data, x$coefs))),
full = unlist(lapply(params, function(x) gaussian_loglikelihood(data$data, x$coefs, x$vars)))
)
nedges <- num.edges(x)

### Compute lagged differences, difference ratios, and threshold
Expand All @@ -120,7 +128,7 @@ select.parameter <- function(x,
dr[dnedge == 0] <- NA
threshold <- alpha * max(dr, na.rm = TRUE)

max(which(dr >= threshold))
max(which(dr >= threshold))+1
}

### Deprecated
Expand Down
66 changes: 66 additions & 0 deletions tests/testthat/test-multinom_loglikelihood.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
context("multinom_loglikelihood")

# set up input variable
data <- matrix(c(1, 1, 0, 0, 1, 1,
1, 1, 0, 1, 1, 1,
0, 0, 1, 0, 0, 1,
0, 0, 1, 0, 0, 1,
0, 0, 0, 1, 1, 0,
0, 0, 0, 1, 1, 1,
1, 1, 1, 1, 0, 0,
1, 0, 1, 1, 0, 1,
0, 0, 0, 0, 1, 0,
1, 1, 1, 1, 0, 1,
1, 1, 0, 1, 1, 1,
0, 0, 1, 0, 0, 1,
1, 1, 0, 1, 0, 0,
1, 0, 1, 1, 0, 1,
1, 1, 1, 1, 1, 0,
1, 0, 1, 1, 1, 1,
0, 0, 1, 0, 0, 0,
1, 1, 0, 1, 1, 1,
1, 1, 1, 0, 0, 0,
1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 1, 0,
0, 0, 1, 0, 0, 0,
1, 0, 0, 0, 1, 1,
0, 0, 1, 0, 0, 0,
1, 0, 1, 1, 0, 1,
0, 0, 0, 1, 1, 0,
0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 1, 0,
0, 0, 1, 0, 0, 0,
0, 0, 1, 0, 0, 0), byrow = TRUE, ncol = 6)
dat <- as.data.frame(data)
edge_NULL <- lapply(1:6, function(x){integer(0)})
edge <- lapply(1:6, function(x){integer(0)})
edge[[1]] <-4; edge[[2]] <-1; edge[[4]] <-5; edge[[5]] <- 3; edge[[6]] <- 1
parent_NULL <- as.edgeList(edge_NULL)
parent <- as.edgeList(edge)

# test
test_that("multinom_loglikelihood run as expected", {
# multinom can run with NULL graph
expect_error(multinom_loglikelihood(parents = parent_NULL, data = dat), NA)
# multinom can run with regular graph
expect_error(multinom_loglikelihood(parents = parent, data = dat), NA)
})

test_that("Check input parent", {
### Throw error if parent is not edgeList object
parent_list <- as.list(parent)
expect_error(multinom_loglikelihood(parents = parent_list, data = dat))

})

test_that("Check input data", {
### Throw error if data is not data.frame
expect_error(multinom_loglikelihood(parents = parent_list, data = data))

### Throw error if data is not compatible with parent
parent_short <- parent[1:5]
expect_error(multinom_loglikelihood(parents = parent_short, data = data))

})


0 comments on commit 1f4dd64

Please sign in to comment.