From 3aee94b8cfaae4660c4ba9fcc9ac28e382dbbefd Mon Sep 17 00:00:00 2001
From: Ian Flint <ian.flint@unimelb.edu.au>
Date: Fri, 17 Nov 2023 23:16:56 +1100
Subject: [PATCH] Improved handling of NA values on covariates

---
 R/gibbsm.R                               | 751 ++++++++++++-----------
 src/compute_vcov.cpp                     |   9 +-
 src/prepare_gibbsm_data.cpp              |  57 +-
 src/utility/im_wrapper.hpp               |  22 +-
 src/utility/subset_to_nonNA.hpp          |  31 +
 tests/testthat/test-compute_papangelou.R |   8 +-
 tests/testthat/test-gibbsm.R             |  34 +
 7 files changed, 490 insertions(+), 422 deletions(-)
 create mode 100644 src/utility/subset_to_nonNA.hpp

diff --git a/R/gibbsm.R b/R/gibbsm.R
index c667832..0700745 100644
--- a/R/gibbsm.R
+++ b/R/gibbsm.R
@@ -1,366 +1,3 @@
-fit_gibbs <- function(gibbsm_data,
-                      fitting_package,
-                      which_lambda,
-                      estimate_alpha,
-                      estimate_gamma,
-                      types_names,
-                      covariates_names,
-                      use_regularization,
-                      nthreads,
-                      refit_glmnet,
-                      debug,
-                      ...) {
-  # Extract a few useful quantities used in most regressions
-  regressors <- gibbsm_data$regressors
-  if(any(is.na(regressors))) {
-    max_indices <- 10
-    na_row_indices <- which(rowSums(is.na(regressors)) > 0)[seq_len(max_indices)]
-    na_row_indices <- na_row_indices[!is.na(na_row_indices)]
-    warning(paste0("Some NA values detected in regression matrix; indices: "),
-            na_row_indices)
-    print(regressors[na_row_indices, ])
-  }
-
-  number_types <- length(gibbsm_data$shift)
-  nregressors <- ncol(regressors)
-
-  # Penalty factor used in penalized regressions
-  pfactor <- rep(1, nregressors)
-  pfactor[startsWith(colnames(regressors), "beta0")] <- 0
-
-  if(fitting_package == "glmnet") {
-    # Avoid a bug in glmnet: if intercept = FALSE, and there's a column of ones, it gets ignored by glmnet
-    # even though its penalty factor is zero.
-    if(all(1 == regressors[, startsWith(colnames(regressors), "beta0")])) {
-      regressors[1, startsWith(colnames(regressors), "beta0")] <- 1.00001
-    }
-
-    # We don't use an offset explicitely because the call to glmnet above returns nonsensical results or hangs.
-    # Instead, we'll use a shift for all the beta0 regressors according to -log(rho).
-    arguments <- list(x = Matrix(regressors, sparse = TRUE),
-                      y = Matrix(gibbsm_data$response, sparse = TRUE),
-                      intercept = FALSE,
-                      family = "binomial")
-    user_supplied <- list(...)
-
-    if(use_regularization) {
-      # Add penalty factor
-      arguments <- append(arguments, list(penalty.factor = pfactor))
-
-      # Call glmnet
-      arguments <- append(arguments, user_supplied)
-      if(debug) {
-        tm <- Sys.time()
-        message("Calling glmnet...")
-      }
-      fit <- do.call(glmnet, arguments)
-      if(debug) {
-        message("Finished call to glmnet.")
-        print(Sys.time() - tm)
-      }
-      if(refit_glmnet > 0) {
-        lambda_seq <- fit$lambda
-        N <- (1 + refit_glmnet) * length(lambda_seq)
-        lambda_new <- vector(mode = 'numeric', length = N)
-        lambda_new[1] <- lambda_seq[1]
-        by <- (lambda_seq[1] - lambda_seq[2]) / lambda_seq[1]
-        for(i in 2:N) {
-          lambda_new[i] <- (1 - by) * lambda_new[i - 1]
-        }
-
-        arguments$lambda <- lambda_new
-        arguments$nlambda <- length(lambda_new)
-
-        if(debug) {
-          tm <- Sys.time()
-          message("Calling glmnet again...")
-        }
-        fit <- do.call(glmnet, arguments)
-        if(debug) {
-          message("Finished call to glmnet.")
-          print(Sys.time() - tm)
-        }
-      }
-
-      # Compute AIC or BIC for the vector of lambdas
-      # Reference: https://stackoverflow.com/questions/40920051/r-getting-aic-bic-likelihood-from-glmnet
-      tLL <- fit$nulldev - deviance(fit)
-      k <- fit$df
-      n <- fit$nobs
-      aic <- -tLL + 2 * k + 2 * k * (k + 1) / (n - k - 1)
-      bic <- log(n) * k - tLL
-
-      # Choose lambda that minimizes AIC or BIC.
-      if(which_lambda == "AIC") {
-        coef <- coefficients(fit)[, which.min(aic)]
-        aic <- min(aic)
-        bic <- bic[which.min(aic)]
-      } else if(which_lambda == "BIC") {
-        coef <- coefficients(fit)[, which.min(bic)]
-        aic <- aic[which.min(bic)]
-        bic <- min(bic)
-      } else if(which_lambda == "smallest") {
-        coef <- coefficients(fit)
-        coef <- coef[, which.min(fit$lambda)]
-        aic <- aic[length(aic)]
-        bic <- bic[length(bic)]
-      } else {
-        stop("Unrecognised option for which_lambda, should be one of 'AIC', 'BIC' or 'smallest'.")
-      }
-    } else {
-      # Call glmnet with a decreasing sequence of lambdas
-      if(!("lambda" %in% names(user_supplied))) {
-        arguments <- append(arguments, list(lambda = rev(0:99)))
-      }
-      arguments <- append(arguments, user_supplied)
-      if(debug) {
-        tm <- Sys.time()
-        message("Calling glmnet...")
-      }
-      fit <- do.call(glmnet, arguments)
-      if(debug) {
-        message("Finished call to glmnet.")
-        print(Sys.time() - tm)
-      }
-
-      # Compute AIC or BIC for the vector of lambdas
-      # Reference: https://stackoverflow.com/questions/40920051/r-getting-aic-bic-likelihood-from-glmnet
-      tLL <- fit$nulldev - deviance(fit)
-      k <- fit$df
-      n <- fit$nobs
-      aic <- -tLL + 2 * k + 2 * k * (k + 1) / (n - k - 1)
-      bic <- log(n) * k - tLL
-
-      # Choose AIC / BIC corresponding to lambda = 0
-      aic <- aic[length(aic)]
-      bic <- bic[length(bic)]
-
-      # Extract the corresponding coefficients
-      coef <- coefficients(fit, s = 0)
-      coef <- setNames(as.vector(coef), nm = rownames(coef))
-    }
-
-    # Even with intercept = FALSE, glmnet still returns an intercept 0-valued coefficient
-    coef <- coef[-which(names(coef) == "(Intercept)")]
-
-    # We didn't actually use an offset so we have to shift back the intercept coefficients
-    shift <- gibbsm_data$shift
-    for(i in seq_len(number_types)) {
-      coef[match(paste0("beta0_", i), names(coef))] <- coef[match(paste0("beta0_", i), names(coef))] - shift[i]
-    }
-
-    # Clean up
-    fit_algorithm <- "glmnet"
-  # } else if(fitting_package == "oem") {
-  #
-  #   arguments <- list(x = regressors,
-  #                     y = gibbsm_data$response,
-  #                     intercept = FALSE,
-  #                     family = "binomial",
-  #                     ncores = nthreads)
-  #   user_supplied <- list(...)
-  #
-  #   if(!("compute.loss" %in% names(user_supplied))) {
-  #     arguments <- append(arguments, list(compute.loss = TRUE))
-  #   }
-  #
-  #   if(use_regularization) {
-  #     # Add penalty factor
-  #     arguments <- append(arguments, list(penalty.factor = pfactor))
-  #
-  #     # Add penalty type
-  #     if(!("penalty" %in% names(user_supplied))) {
-  #       arguments <- append(arguments, list(penalty = "lasso"))
-  #     }
-  #
-  #     # Call oem
-  #     arguments <- append(arguments, user_supplied)
-  #     if(debug) {
-  #       tm <- Sys.time()
-  #       message("Calling oem...")
-  #     }
-  #     fit <- do.call(oem, arguments)
-  #     if(debug) {
-  #       message("Finished call to oem.")
-  #       print(Sys.time() - tm)
-  #     }
-  #
-  #     # Extract coefficients
-  #     coef <- fit$beta$lasso
-  #
-  #     # Compute AIC/BIC for vector of lambdas
-  #     # Reference: https://stackoverflow.com/questions/40920051/r-getting-aic-bic-likelihood-from-glmnet
-  #     k <- sapply(seq_len(ncol(coef)), function(col) sum(coef[, col] != 0.))
-  #     n <- fit$nobs
-  #     LL <- 2 * logLik(fit)
-  #     aic <- -LL + 2 * k + 2 * k * (k + 1) / (n - k - 1)
-  #     bic <- -LL + log(n) * k
-  #
-  #     # Choose lambda that minimizes AIC or BIC
-  #     if(which_lambda == "AIC") {
-  #       coef <- coef[, which.min(aic)]
-  #       aic <- min(aic)
-  #       bic <- bic[which.min(aic)]
-  #     } else if(which_lambda == "BIC") {
-  #       coef <- coef[, which.min(bic)]
-  #       aic <- aic[which.min(bic)]
-  #       bic <- min(bic)
-  #     } else if(which_lambda == "smallest") {
-  #       coef <- coef[, ncol(coef)]
-  #       aic <- aic[length(aic)]
-  #       bic <- bic[length(bic)]
-  #     } else {
-  #       stop("Unrecognised option for which_lambda, should be one of 'AIC', 'BIC' or 'smallest'.")
-  #     }
-  #
-  #     # Put coef in correct format
-  #     coef <- setNames(as.vector(coef), nm = names(coef))
-  #   } else {
-  #     # Use 'ols' penalty for non-regularized regression
-  #     if(!("penalty" %in% names(user_supplied))) {
-  #       arguments <- append(arguments, list(penalty = "ols"))
-  #     }
-  #
-  #     # Call oem
-  #     arguments <- append(arguments, user_supplied)
-  #     if(debug) {
-  #       tm <- Sys.time()
-  #       message("Calling oem...")
-  #     }
-  #     fit <- do.call(oem, arguments)
-  #     if(debug) {
-  #       message("Finished call to oem.")
-  #       print(Sys.time() - tm)
-  #     }
-  #
-  #     # Extract AIC/BIC using oem functions
-  #     aic <- AIC(fit)
-  #     bic <- BIC(fit)
-  #
-  #     # Extract coefficients
-  #     coef <- fit$beta$ols
-  #     coef <- setNames(as.vector(coef), nm = rownames(coef))
-  #   }
-  #
-  #   # Remove intercept
-  #   coef <- coef[-which(names(coef) == "(Intercept)")]
-  #
-  #   # We didn't actually use an offset so we have to shift back the intercept coefficients
-  #   shift <- gibbsm_data$shift
-  #   for(i in seq_len(number_types)) {
-  #     coef[match(paste0("beta0_", i), names(coef))] <- coef[match(paste0("beta0_", i), names(coef))] - shift[i]
-  #   }
-  #
-  #   # Clean up
-  #   fit_algorithm <- "oem"
-  # } else if(fitting_package == "h2o") {
-  #   # Initialization
-  #   h2o.init(nthreads = nthreads)
-  #
-  #   # Put everything into a unique data.frame
-  #   regressors <- as.data.frame(regressors)
-  #   regressors$response <- gibbsm_data$response
-  #   regressors$offset <- gibbsm_data$shift[gibbsm_data$type]
-  #
-  #   arguments <- list(training_frame = as.h2o(regressors),
-  #                     y = 'response',
-  #                     intercept = FALSE,
-  #                     family = "binomial",
-  #                     offset_column = 'offset',
-  #                     standardize = FALSE,
-  #                     link = 'logit')
-  #   user_supplied <- list(...)
-  #
-  #   if(use_regularization) {
-  #     stop("Not implemented yet")
-  #   } else {
-  #     # Non-regularized corresponds to lambda = 0
-  #     if(!("lambda" %in% names(user_supplied))) {
-  #       arguments <- append(arguments, list(lambda = 0))
-  #     }
-  #
-  #     # Call h2o
-  #     arguments <- append(arguments, user_supplied)
-  #     if(debug) {
-  #       tm <- Sys.time()
-  #       message("Calling h2o")
-  #     }
-  #     fit <- do.call(h2o.glm, arguments)
-  #     if(debug) {
-  #       message("Finished call to h2o")
-  #       print(Sys.time() - tm)
-  #     }
-  #
-  #     # Compute AIC/BIC
-  #     warning("AIC/BIC not implemented yet for h2o fitting package")
-  #     aic <- NA
-  #     bic <- NA
-  #
-  #     # Extract coefs
-  #     coef <- h2o.coef(fit)
-  #   }
-  #
-  #   # Remove intercept from coefficients
-  #   coef <- coef[-which(names(coef) == "Intercept")]
-  #
-  #   # Shift intercepts back
-  #   shift <- gibbsm_data$shift
-  #   for(i in seq_len(number_types)) {
-  #     coef[match(paste0("beta0_", i), names(coef))] <- coef[match(paste0("beta0_", i), names(coef))] - shift[i]
-  #   }
-  #
-  #   # Clean up
-  #   fit_algorithm <- "h2o"
-  } else if(fitting_package == "glm") {
-    if(use_regularization) {
-      warning("Cannot have regularization with glm, doing a regular glm fit.")
-    }
-
-    # Formula for the GLM regression
-    fmla <- as.formula(paste0("response ~ 0 + offset(offset) + ", paste0(colnames(regressors), collapse = ' + ')))
-
-    # Put all the required vectors into a data.frame to send to glm
-    regressors <- as.data.frame(regressors)
-    regressors$offset <- gibbsm_data$shift[gibbsm_data$type]
-    regressors$response <- gibbsm_data$response
-
-    # Call GLM
-    if(debug) {
-      tm <- Sys.time()
-      message("Calling glm...")
-    }
-    fit <- glm(fmla, family = binomial(), data = regressors, ...)
-    if(debug) {
-      message("Finished call to glm.")
-      print(Sys.time() - tm)
-    }
-
-    # Compute AIC/BIC
-    aic <- AIC(fit)
-    bic <- BIC(fit)
-
-    # Clean up
-    coef <- coefficients(fit)
-    fit_algorithm <- "glm"
-  } else {
-    stop("Supplied fitting package not recognized.")
-  }
-
-  # Clean up
-  list(fit = fit,
-       coefficients = format_coefficient_vector(coefficient_vector = coef,
-                                                number_types = number_types,
-                                                types_names = types_names,
-                                                covariates_names = covariates_names,
-                                                estimate_alpha = estimate_alpha,
-                                                estimate_gamma = estimate_gamma),
-       coefficients_vector = coef,
-       aic = aic,
-       bic = bic,
-       fit_algorithm = fit_algorithm)
-}
-
-
 #' Fit a multivariate Gibbs model to a dataset.
 #'
 #' @param configuration_list A single configuration or a list of configurations assumed to be drawn from the multivariate Gibbs.
@@ -389,7 +26,7 @@ fit_gibbs <- function(gibbsm_data,
 #' @importFrom glmnet glmnet
 #' @importFrom Matrix Matrix
 #' @importFrom stats AIC as.formula BIC binomial coefficients deviance glm lm logLik setNames
-#' @importFrom spatstat.geom as.im as.owin
+#' @importFrom spatstat.geom as.im as.owin inside.owin
 #' @md
 #' @export
 gibbsm <- function(configuration_list,
@@ -479,12 +116,17 @@ gibbsm <- function(configuration_list,
   # Subset configurations to the window.
   configuration_list <- lapply(configuration_list, function(configuration) {
     configuration <- as.data.frame(configuration)
-    configuration <- configuration[inside.owin(x = configuration$x,
-                                               y = configuration$y,
-                                               w = window), ]
+    configuration <- configuration[spatstat.geom::inside.owin(x = configuration$x,
+                                                              y = configuration$y,
+                                                              w = window), ]
     configuration <- as.Configuration(configuration)
   })
 
+  # Subset configurations to points with non-NA covariate values
+  configuration_list <- lapply(configuration_list, function(configuration) {
+    remove_NA_on_covariates(configuration, covariates = covariates)
+  })
+
   # Set types names and covariates names
   types_names <- levels(types(configuration_list[[1]]))
   covariates_names <- names(covariates)
@@ -813,3 +455,378 @@ gibbsm <- function(configuration_list,
   class(ret) <- "gibbsm"
   ret
 }
+
+# Return a subset of the configuration that does not give any NA values on the covariates
+remove_NA_on_covariates <- function(configuration, covariates) {
+  keep <- rep(TRUE, length(configuration$x))
+  for(cov in covariates) {
+    keep <- keep & spatstat.geom::inside.owin(configuration$x, configuration$y,
+                                              w = as.owin(cov))
+  }
+  Configuration(x = configuration$x[keep],
+                y = configuration$y[keep],
+                marks = configuration$marks[keep],
+                types = configuration$types[keep])
+}
+
+fit_gibbs <- function(gibbsm_data,
+                      fitting_package,
+                      which_lambda,
+                      estimate_alpha,
+                      estimate_gamma,
+                      types_names,
+                      covariates_names,
+                      use_regularization,
+                      nthreads,
+                      refit_glmnet,
+                      debug,
+                      ...) {
+  # Extract a few useful quantities used in most regressions
+  regressors <- gibbsm_data$regressors
+  if(any(is.na(regressors))) {
+    max_indices <- 10
+    na_row_indices <- which(rowSums(is.na(regressors)) > 0)[seq_len(max_indices)]
+    na_row_indices <- na_row_indices[!is.na(na_row_indices)]
+    warning(paste0("Some NA values detected in regression matrix; indices: "),
+            na_row_indices)
+    print(regressors[na_row_indices, ])
+  }
+
+  number_types <- length(gibbsm_data$shift)
+  nregressors <- ncol(regressors)
+
+  # Penalty factor used in penalized regressions
+  pfactor <- rep(1, nregressors)
+  pfactor[startsWith(colnames(regressors), "beta0")] <- 0
+
+  if(fitting_package == "glmnet") {
+    # Avoid a bug in glmnet: if intercept = FALSE, and there's a column of ones, it gets ignored by glmnet
+    # even though its penalty factor is zero.
+    if(all(1 == regressors[, startsWith(colnames(regressors), "beta0")])) {
+      regressors[1, startsWith(colnames(regressors), "beta0")] <- 1.00001
+    }
+
+    # We don't use an offset explicitely because the call to glmnet above returns nonsensical results or hangs.
+    # Instead, we'll use a shift for all the beta0 regressors according to -log(rho).
+    arguments <- list(x = Matrix(regressors, sparse = TRUE),
+                      y = Matrix(gibbsm_data$response, sparse = TRUE),
+                      intercept = FALSE,
+                      family = "binomial")
+    user_supplied <- list(...)
+
+    if(use_regularization) {
+      # Add penalty factor
+      arguments <- append(arguments, list(penalty.factor = pfactor))
+
+      # Call glmnet
+      arguments <- append(arguments, user_supplied)
+      if(debug) {
+        tm <- Sys.time()
+        message("Calling glmnet...")
+      }
+      fit <- do.call(glmnet, arguments)
+      if(debug) {
+        message("Finished call to glmnet.")
+        print(Sys.time() - tm)
+      }
+      if(refit_glmnet > 0) {
+        lambda_seq <- fit$lambda
+        N <- (1 + refit_glmnet) * length(lambda_seq)
+        lambda_new <- vector(mode = 'numeric', length = N)
+        lambda_new[1] <- lambda_seq[1]
+        by <- (lambda_seq[1] - lambda_seq[2]) / lambda_seq[1]
+        for(i in 2:N) {
+          lambda_new[i] <- (1 - by) * lambda_new[i - 1]
+        }
+
+        arguments$lambda <- lambda_new
+        arguments$nlambda <- length(lambda_new)
+
+        if(debug) {
+          tm <- Sys.time()
+          message("Calling glmnet again...")
+        }
+        fit <- do.call(glmnet, arguments)
+        if(debug) {
+          message("Finished call to glmnet.")
+          print(Sys.time() - tm)
+        }
+      }
+
+      # Compute AIC or BIC for the vector of lambdas
+      # Reference: https://stackoverflow.com/questions/40920051/r-getting-aic-bic-likelihood-from-glmnet
+      tLL <- fit$nulldev - deviance(fit)
+      k <- fit$df
+      n <- fit$nobs
+      aic <- -tLL + 2 * k + 2 * k * (k + 1) / (n - k - 1)
+      bic <- log(n) * k - tLL
+
+      # Choose lambda that minimizes AIC or BIC.
+      if(which_lambda == "AIC") {
+        coef <- coefficients(fit)[, which.min(aic)]
+        aic <- min(aic)
+        bic <- bic[which.min(aic)]
+      } else if(which_lambda == "BIC") {
+        coef <- coefficients(fit)[, which.min(bic)]
+        aic <- aic[which.min(bic)]
+        bic <- min(bic)
+      } else if(which_lambda == "smallest") {
+        coef <- coefficients(fit)
+        coef <- coef[, which.min(fit$lambda)]
+        aic <- aic[length(aic)]
+        bic <- bic[length(bic)]
+      } else {
+        stop("Unrecognised option for which_lambda, should be one of 'AIC', 'BIC' or 'smallest'.")
+      }
+    } else {
+      # Call glmnet with a decreasing sequence of lambdas
+      if(!("lambda" %in% names(user_supplied))) {
+        arguments <- append(arguments, list(lambda = rev(0:99)))
+      }
+      arguments <- append(arguments, user_supplied)
+      if(debug) {
+        tm <- Sys.time()
+        message("Calling glmnet...")
+      }
+      fit <- do.call(glmnet, arguments)
+      if(debug) {
+        message("Finished call to glmnet.")
+        print(Sys.time() - tm)
+      }
+
+      # Compute AIC or BIC for the vector of lambdas
+      # Reference: https://stackoverflow.com/questions/40920051/r-getting-aic-bic-likelihood-from-glmnet
+      tLL <- fit$nulldev - deviance(fit)
+      k <- fit$df
+      n <- fit$nobs
+      aic <- -tLL + 2 * k + 2 * k * (k + 1) / (n - k - 1)
+      bic <- log(n) * k - tLL
+
+      # Choose AIC / BIC corresponding to lambda = 0
+      aic <- aic[length(aic)]
+      bic <- bic[length(bic)]
+
+      # Extract the corresponding coefficients
+      coef <- coefficients(fit, s = 0)
+      coef <- setNames(as.vector(coef), nm = rownames(coef))
+    }
+
+    # Even with intercept = FALSE, glmnet still returns an intercept 0-valued coefficient
+    coef <- coef[-which(names(coef) == "(Intercept)")]
+
+    # We didn't actually use an offset so we have to shift back the intercept coefficients
+    shift <- gibbsm_data$shift
+    for(i in seq_len(number_types)) {
+      coef[match(paste0("beta0_", i), names(coef))] <- coef[match(paste0("beta0_", i), names(coef))] - shift[i]
+    }
+
+    # Clean up
+    fit_algorithm <- "glmnet"
+    # } else if(fitting_package == "oem") {
+    #
+    #   arguments <- list(x = regressors,
+    #                     y = gibbsm_data$response,
+    #                     intercept = FALSE,
+    #                     family = "binomial",
+    #                     ncores = nthreads)
+    #   user_supplied <- list(...)
+    #
+    #   if(!("compute.loss" %in% names(user_supplied))) {
+    #     arguments <- append(arguments, list(compute.loss = TRUE))
+    #   }
+    #
+    #   if(use_regularization) {
+    #     # Add penalty factor
+    #     arguments <- append(arguments, list(penalty.factor = pfactor))
+    #
+    #     # Add penalty type
+    #     if(!("penalty" %in% names(user_supplied))) {
+    #       arguments <- append(arguments, list(penalty = "lasso"))
+    #     }
+    #
+    #     # Call oem
+    #     arguments <- append(arguments, user_supplied)
+    #     if(debug) {
+    #       tm <- Sys.time()
+    #       message("Calling oem...")
+    #     }
+    #     fit <- do.call(oem, arguments)
+    #     if(debug) {
+    #       message("Finished call to oem.")
+    #       print(Sys.time() - tm)
+    #     }
+    #
+    #     # Extract coefficients
+    #     coef <- fit$beta$lasso
+    #
+    #     # Compute AIC/BIC for vector of lambdas
+    #     # Reference: https://stackoverflow.com/questions/40920051/r-getting-aic-bic-likelihood-from-glmnet
+    #     k <- sapply(seq_len(ncol(coef)), function(col) sum(coef[, col] != 0.))
+    #     n <- fit$nobs
+    #     LL <- 2 * logLik(fit)
+    #     aic <- -LL + 2 * k + 2 * k * (k + 1) / (n - k - 1)
+    #     bic <- -LL + log(n) * k
+    #
+    #     # Choose lambda that minimizes AIC or BIC
+    #     if(which_lambda == "AIC") {
+    #       coef <- coef[, which.min(aic)]
+    #       aic <- min(aic)
+    #       bic <- bic[which.min(aic)]
+    #     } else if(which_lambda == "BIC") {
+    #       coef <- coef[, which.min(bic)]
+    #       aic <- aic[which.min(bic)]
+    #       bic <- min(bic)
+    #     } else if(which_lambda == "smallest") {
+    #       coef <- coef[, ncol(coef)]
+    #       aic <- aic[length(aic)]
+    #       bic <- bic[length(bic)]
+    #     } else {
+    #       stop("Unrecognised option for which_lambda, should be one of 'AIC', 'BIC' or 'smallest'.")
+    #     }
+    #
+    #     # Put coef in correct format
+    #     coef <- setNames(as.vector(coef), nm = names(coef))
+    #   } else {
+    #     # Use 'ols' penalty for non-regularized regression
+    #     if(!("penalty" %in% names(user_supplied))) {
+    #       arguments <- append(arguments, list(penalty = "ols"))
+    #     }
+    #
+    #     # Call oem
+    #     arguments <- append(arguments, user_supplied)
+    #     if(debug) {
+    #       tm <- Sys.time()
+    #       message("Calling oem...")
+    #     }
+    #     fit <- do.call(oem, arguments)
+    #     if(debug) {
+    #       message("Finished call to oem.")
+    #       print(Sys.time() - tm)
+    #     }
+    #
+    #     # Extract AIC/BIC using oem functions
+    #     aic <- AIC(fit)
+    #     bic <- BIC(fit)
+    #
+    #     # Extract coefficients
+    #     coef <- fit$beta$ols
+    #     coef <- setNames(as.vector(coef), nm = rownames(coef))
+    #   }
+    #
+    #   # Remove intercept
+    #   coef <- coef[-which(names(coef) == "(Intercept)")]
+    #
+    #   # We didn't actually use an offset so we have to shift back the intercept coefficients
+    #   shift <- gibbsm_data$shift
+    #   for(i in seq_len(number_types)) {
+    #     coef[match(paste0("beta0_", i), names(coef))] <- coef[match(paste0("beta0_", i), names(coef))] - shift[i]
+    #   }
+    #
+    #   # Clean up
+    #   fit_algorithm <- "oem"
+    # } else if(fitting_package == "h2o") {
+    #   # Initialization
+    #   h2o.init(nthreads = nthreads)
+    #
+    #   # Put everything into a unique data.frame
+    #   regressors <- as.data.frame(regressors)
+    #   regressors$response <- gibbsm_data$response
+    #   regressors$offset <- gibbsm_data$shift[gibbsm_data$type]
+    #
+    #   arguments <- list(training_frame = as.h2o(regressors),
+    #                     y = 'response',
+    #                     intercept = FALSE,
+    #                     family = "binomial",
+    #                     offset_column = 'offset',
+    #                     standardize = FALSE,
+    #                     link = 'logit')
+    #   user_supplied <- list(...)
+    #
+    #   if(use_regularization) {
+    #     stop("Not implemented yet")
+    #   } else {
+    #     # Non-regularized corresponds to lambda = 0
+    #     if(!("lambda" %in% names(user_supplied))) {
+    #       arguments <- append(arguments, list(lambda = 0))
+    #     }
+    #
+    #     # Call h2o
+    #     arguments <- append(arguments, user_supplied)
+    #     if(debug) {
+    #       tm <- Sys.time()
+    #       message("Calling h2o")
+    #     }
+    #     fit <- do.call(h2o.glm, arguments)
+    #     if(debug) {
+    #       message("Finished call to h2o")
+    #       print(Sys.time() - tm)
+    #     }
+    #
+    #     # Compute AIC/BIC
+    #     warning("AIC/BIC not implemented yet for h2o fitting package")
+    #     aic <- NA
+    #     bic <- NA
+    #
+    #     # Extract coefs
+    #     coef <- h2o.coef(fit)
+    #   }
+    #
+    #   # Remove intercept from coefficients
+    #   coef <- coef[-which(names(coef) == "Intercept")]
+    #
+    #   # Shift intercepts back
+    #   shift <- gibbsm_data$shift
+    #   for(i in seq_len(number_types)) {
+    #     coef[match(paste0("beta0_", i), names(coef))] <- coef[match(paste0("beta0_", i), names(coef))] - shift[i]
+    #   }
+    #
+    #   # Clean up
+    #   fit_algorithm <- "h2o"
+  } else if(fitting_package == "glm") {
+    if(use_regularization) {
+      warning("Cannot have regularization with glm, doing a regular glm fit.")
+    }
+
+    # Formula for the GLM regression
+    fmla <- as.formula(paste0("response ~ 0 + offset(offset) + ", paste0(colnames(regressors), collapse = ' + ')))
+
+    # Put all the required vectors into a data.frame to send to glm
+    regressors <- as.data.frame(regressors)
+    regressors$offset <- gibbsm_data$shift[gibbsm_data$type]
+    regressors$response <- gibbsm_data$response
+
+    # Call GLM
+    if(debug) {
+      tm <- Sys.time()
+      message("Calling glm...")
+    }
+    fit <- glm(fmla, family = binomial(), data = regressors, ...)
+    if(debug) {
+      message("Finished call to glm.")
+      print(Sys.time() - tm)
+    }
+
+    # Compute AIC/BIC
+    aic <- AIC(fit)
+    bic <- BIC(fit)
+
+    # Clean up
+    coef <- coefficients(fit)
+    fit_algorithm <- "glm"
+  } else {
+    stop("Supplied fitting package not recognized.")
+  }
+
+  # Clean up
+  list(fit = fit,
+       coefficients = format_coefficient_vector(coefficient_vector = coef,
+                                                number_types = number_types,
+                                                types_names = types_names,
+                                                covariates_names = covariates_names,
+                                                estimate_alpha = estimate_alpha,
+                                                estimate_gamma = estimate_gamma),
+       coefficients_vector = coef,
+       aic = aic,
+       bic = bic,
+       fit_algorithm = fit_algorithm)
+}
diff --git a/src/compute_vcov.cpp b/src/compute_vcov.cpp
index b744fe5..1684b77 100644
--- a/src/compute_vcov.cpp
+++ b/src/compute_vcov.cpp
@@ -21,6 +21,7 @@
 #include "utility/im_wrapper.hpp"
 #include "utility/is_symmetric_matrix.hpp"
 #include "utility/lightweight_matrix.hpp"
+#include "utility/subset_to_nonNA.hpp"
 #include "utility/timer.hpp"
 #include "utility/window.hpp"
 #include "utility/window_concrete.hpp"
@@ -244,11 +245,11 @@ inline auto make_G2_stratified(const Configuration& configuration,
   for(decltype(rho.size()) i(0); i < rho.size(); ++i) {
     delta[i] = static_cast<computation_t>(1.) / std::sqrt(static_cast<computation_t>(rho[i]));
   }
-  const auto other_stratified(ppjsdm::rstratpp_single<Configuration>(window, delta, delta));
+  const auto other_stratified(subset_to_nonNA(ppjsdm::rstratpp_single<Configuration>(window, delta, delta), covariates));
 
   if(ppjsdm::size(dummy) != ppjsdm::size(other_stratified)) {
     Rcpp::Rcout << "Size dummy: " << ppjsdm::size(dummy) << " and size new stratified: " << ppjsdm::size(other_stratified) << ".\n";
-    Rcpp::stop("The dummy points and the independent draw of a stratified binomial point process should have the same number of points.");
+    Rcpp::stop("The dummy points and the independent draw of a stratified binomial point process should have the same number of points. This could be due to NA values on the covariates, causing some of the dummy points to be removed on the draws. To solve this, either subset the window to locations where the covariates are non-NA, or avoid dummy points distributed as a stratified point process.");
   }
 
   ppjsdm::Lightweight_square_matrix<computation_t> G2(number_parameters);
@@ -1321,8 +1322,8 @@ Rcpp::NumericMatrix compute_G2_cpp(SEXP configuration,
                                   cpp_window.volume(),
                                   nthreads);
   } else if(dummy_distribution == std::string("stratified")) {
-    G2 = detail::make_G2_stratified(wrapped_configuration,
-                                    wrapped_dummy,
+    G2 = detail::make_G2_stratified(vector_configuration,
+                                    vector_dummy,
                                     cpp_window,
                                     Rcpp::as<std::vector<double>>(theta),
                                     Rcpp::as<std::vector<double>>(rho),
diff --git a/src/prepare_gibbsm_data.cpp b/src/prepare_gibbsm_data.cpp
index 3caad87..c91409d 100644
--- a/src/prepare_gibbsm_data.cpp
+++ b/src/prepare_gibbsm_data.cpp
@@ -19,6 +19,7 @@
 #include "utility/im_wrapper.hpp"
 #include "utility/is_symmetric_matrix.hpp"
 #include "utility/lightweight_matrix.hpp"
+#include "utility/subset_to_nonNA.hpp"
 #include "utility/timer.hpp"
 #include "utility/window.hpp"
 
@@ -26,7 +27,6 @@
 #include <cmath> // std::log
 #include <iterator> // std::next
 #include <string> // std::string, std::to_string
-#include <sstream> // std::stringstream
 #include <type_traits> // std::remove_cv_t
 #include <vector> // std::vector
 
@@ -45,16 +45,15 @@ inline auto convert_list_of_boolean_matrices_to_vector(Rcpp::List boolean_matric
 
 } // detail
 
-template<typename computation_t, typename Vector>
+template<typename Configuration, typename Vector>
 auto make_dummy_points(const ppjsdm::Window& window,
                        const Vector& rho_times_volume,
                        std::string dummy_distribution) {
-  using dummy_configuration_t = std::vector<ppjsdm::Marked_point>;
-  dummy_configuration_t D;
+  Configuration D;
   if(dummy_distribution == std::string("binomial")) {
-    D = ppjsdm::rbinomialpp_single<dummy_configuration_t>(window, rho_times_volume);
+    D = ppjsdm::rbinomialpp_single<Configuration>(window, rho_times_volume);
   } else if(dummy_distribution == std::string("stratified")) {
-    D = ppjsdm::rstratpp_single<dummy_configuration_t>(window, rho_times_volume);
+    D = ppjsdm::rstratpp_single<Configuration>(window, rho_times_volume);
   } else {
     Rcpp::stop("Unexpected dummy_distribution parameter.");
   }
@@ -140,17 +139,7 @@ Rcpp::List prepare_gibbsm_data_helper(const std::vector<Configuration>& configur
   }
 
   // Remove points in D which generate NA values for one of the regressors.
-  Dummy D_no_NA_covariates(D);
-  D_no_NA_covariates.erase(std::remove_if(D_no_NA_covariates.begin(), D_no_NA_covariates.end(),
-      [&covariates](const auto& point) {
-        for(std::remove_cv_t<decltype(covariates.size())> covariate_index(0); covariate_index < covariates.size(); ++covariate_index) {
-          const auto covariate(covariates[covariate_index](point));
-          if(R_IsNA(covariate)) {
-            return true;
-          }
-        }
-        return false;
-      }), D_no_NA_covariates.end());
+  Dummy D_no_NA_covariates(ppjsdm::subset_to_nonNA(D, covariates));
 
   // Make shift vector
   Rcpp::NumericVector shift(Rcpp::no_init(number_types));
@@ -191,29 +180,13 @@ Rcpp::List prepare_gibbsm_data_helper(const std::vector<Configuration>& configur
   }
 
   // Precompute how many of the points in the configuration we'll have to drop due to NA values on the covariates.
-  size_t total_configuration_length(0), total_removed_points(0);
+  size_t total_configuration_length(0);
   for(size_t i(0); i < configuration_list.size(); ++i) {
     total_configuration_length += ppjsdm::size(configuration_list[i]);
-    for(size_t point_index(0); point_index < ppjsdm::size(configuration_list[i]); ++point_index) {
-      for(std::remove_cv_t<decltype(covariates.size())> k(0); k < covariates.size(); ++k) {
-        const auto covariate(covariates[k](configuration_list[i][point_index]));
-        if(R_IsNA(covariate)) {
-          ++total_removed_points;
-          break;
-        }
-      }
-    }
-  }
-
-  // Send a warning if we had to drop any
-  if(total_removed_points > 0) {
-    std::stringstream ss;
-    ss << "Some of the covariates had an NA value on " << total_removed_points << " of the configuration points; these have been removed.";
-    Rcpp::warning(ss.str());
   }
 
   // Compute a few parameters necessary for the construction of regressors
-  const auto total_points(total_configuration_length - total_removed_points + D_no_NA_covariates.size() * configuration_list.size());
+  const auto total_points(total_configuration_length + D_no_NA_covariates.size() * configuration_list.size());
   const auto number_parameters_struct(ppjsdm::get_number_parameters(number_types,
                                                                     covariates.size(),
                                                                     estimate_alpha,
@@ -239,18 +212,13 @@ Rcpp::List prepare_gibbsm_data_helper(const std::vector<Configuration>& configur
                                  D_no_NA_covariates[point_index - ppjsdm::size(configuration_list[configuration_index])]);
       // Check if the points generates any NA values, move on if so
       std::vector<decltype(covariates[0](current_point))> covariates_values(covariates.size());
-      bool found_na_point(false);
       for(size_t covariate_index(0); covariate_index < static_cast<size_t>(covariates.size()); ++covariate_index) {
         const auto covariate(covariates[covariate_index](current_point));
         if(R_IsNA(covariate)) {
-          found_na_point = true;
-          break;
+          Rcpp::warning("Some of the covariates had an NA value on points in the configuration. This should not happen, as the configurations have been subset to non-NA points.");
         }
         covariates_values[covariate_index] = covariate;
       }
-      if(found_na_point) {
-        continue;
-      }
 
       // fill response
       if(point_index < ppjsdm::size(configuration_list[configuration_index])) {
@@ -334,6 +302,7 @@ Rcpp::List prepare_gibbsm_data(Rcpp::List configuration_list,
                                bool debug,
                                std::string dummy_distribution,
                                Rcpp::CharacterVector type_names) {
+  // TODO: A lot of code duplication between this function and the one below
   using computation_t = double;
 
   // Construct std::vector of configurations.
@@ -378,9 +347,9 @@ Rcpp::List prepare_gibbsm_data(Rcpp::List configuration_list,
                                                                    dummy_factor));
 
   // Sample the dummy points D.
-  auto D(make_dummy_points<computation_t>(cpp_window,
-                                          rho_times_volume,
-                                          dummy_distribution));
+  auto D(make_dummy_points<std::vector<ppjsdm::Marked_point>>(cpp_window,
+                                                              rho_times_volume,
+                                                              dummy_distribution));
 
   return prepare_gibbsm_data_helper(vector_configurations,
                                     D,
diff --git a/src/utility/im_wrapper.hpp b/src/utility/im_wrapper.hpp
index 4f8cbde..9867c60 100644
--- a/src/utility/im_wrapper.hpp
+++ b/src/utility/im_wrapper.hpp
@@ -52,11 +52,27 @@ class Im_wrapper {
   }
 
   double operator()(double x, double y) const {
-    const R_xlen_t col(std::floor((x - x_min()) / xstep_));
-    const R_xlen_t row(std::floor((y - y_min()) / ystep_));
-    if(col < 0 || col >= number_col_ || row < 0 || row >= number_row_) {
+    const double index_x((x - x_min()) / xstep_);
+    const double index_y((y - y_min()) / ystep_);
+    if((index_x < 0) || (index_y < 0)) {
       return NA_REAL;
+    } else if(index_x >= static_cast<double>(number_col_)) {
+      if(index_x == static_cast<double>(number_col_)) { // x is on rhs boundary
+        const R_xlen_t row(std::floor(index_y));
+        return get_matrix(row, number_col_ - 1);
+      } else {
+        return NA_REAL;
+      }
+    } else if(index_y >= static_cast<double>(number_row_)) {
+      if(index_y == static_cast<double>(number_row_)) { // y is on top boundary
+        const R_xlen_t col(std::floor(index_x));
+        return get_matrix(number_row_ - 1, col);
+      } else {
+        return NA_REAL;
+      }
     } else {
+      const R_xlen_t row(std::floor(index_y));
+      const R_xlen_t col(std::floor(index_x));
       return get_matrix(row, col);
     }
   }
diff --git a/src/utility/subset_to_nonNA.hpp b/src/utility/subset_to_nonNA.hpp
new file mode 100644
index 0000000..5ae101b
--- /dev/null
+++ b/src/utility/subset_to_nonNA.hpp
@@ -0,0 +1,31 @@
+#ifndef INCLUDE_PPJSDM_SUBSET_TO_NONNA
+#define INCLUDE_PPJSDM_SUBSET_TO_NONNA
+
+#include <Rinternals.h>
+
+#include <algorithm> // std::remove_if
+#include <type_traits> // std::remove_cv_t
+
+namespace ppjsdm {
+
+template<typename Configuration, typename ImList>
+inline auto subset_to_nonNA(const Configuration& configuration,
+                            const ImList& covariates) {
+  Configuration copy(configuration);
+  copy.erase(std::remove_if(copy.begin(), copy.end(),
+                            [&covariates](const auto& point) {
+                              for(std::remove_cv_t<decltype(covariates.size())> covariate_index(0); covariate_index < covariates.size(); ++covariate_index) {
+                                const auto covariate(covariates[covariate_index](point));
+                                if(R_IsNA(covariate)) {
+                                  return true;
+                                }
+                              }
+                              return false;
+                            }), copy.end());
+
+  return copy;
+}
+
+} // namespace ppjsdm
+
+#endif // INCLUDE_PPJSDM_SUBSET_TO_NONNA
diff --git a/tests/testthat/test-compute_papangelou.R b/tests/testthat/test-compute_papangelou.R
index bea8a28..5d49891 100644
--- a/tests/testthat/test-compute_papangelou.R
+++ b/tests/testthat/test-compute_papangelou.R
@@ -35,7 +35,7 @@ test_that("Default values", {
 
 test_that("Subsetting types from a fit object", {
   npoints <- 5
-  nconditional <- 10
+  nconditional <- 100
 
   types <- c("a", "b", "c", "d")
 
@@ -486,7 +486,7 @@ test_that("Basic test of plot_papangelou", {
 
 test_that("Subsetting types for plot_papangelou", {
   npoints <- 5
-  nconditional <- 10
+  nconditional <- 100
 
   types <- c("a", "b", "c", "d")
 
@@ -510,7 +510,7 @@ test_that("Subsetting types for plot_papangelou", {
 
 test_that("Subsetting types for plot_papangelou with covariates", {
   npoints <- 5
-  nconditional <- 10
+  nconditional <- 100
 
   covariates <- list(x = function(x, y) x, y = function(x, y) y)
 
@@ -536,7 +536,7 @@ test_that("Subsetting types for plot_papangelou with covariates", {
 
 test_that("Subsetting types for plot_papangelou with covariates", {
   npoints <- 5
-  nconditional <- 10
+  nconditional <- 100
 
   covariates <- list(x = function(x, y) x, y = function(x, y) y)
 
diff --git a/tests/testthat/test-gibbsm.R b/tests/testthat/test-gibbsm.R
index e58086f..f47d590 100644
--- a/tests/testthat/test-gibbsm.R
+++ b/tests/testthat/test-gibbsm.R
@@ -377,3 +377,37 @@ test_that("gibbsm works with infinite saturation for Exponential/Square exponent
   expect_equal(as.vector(fit_large$coefficients$gamma), as.vector(fit_infinite$coefficients$gamma))
   expect_equal(as.vector(fit_large$coefficients$beta0), as.vector(fit_infinite$coefficients$beta0))
 })
+
+test_that("gibbsm can handle points on the boundary of a window", {
+  library(spatstat)
+
+  covariates <- as.im(function(x, y) x - 0.5, W = owin())
+
+  set.seed(1)
+
+  configuration <- ppjsdm::rppp(lambda = 100)
+  configuration <- ppjsdm::Configuration(x = c(configuration$x, c(0, 0,   0, 0.5, 0.5, 0.5, 1, 1,   1)),
+                                         y = c(configuration$y, c(0, 0.5, 1, 0,   0.5, 1,   0, 0.5, 1)))
+
+  # Points land exactly on the border: no warning
+  expect_warning(ppjsdm::gibbsm(configuration, covariates = list(covariates)), NA)
+})
+
+# TODO: This test is problematic: the stratified point process G2 computation requires an independent draw of the dummy points
+# WITH THE EXACT SAME NUMBER OF POINTS. But if there are some covariates with NA values, the dummy points with NA values on them are removed.
+# The same is done on the independent draw... but in the end the same cells need not be included.
+# Solving this is not at all straightforward, so commenting the test for now.
+# test_that("gibbsm can handle summary of fit with NA covariate values", {
+#   library(spatstat)
+#
+#   covariates <- list(x = as.im(function(x, y) ifelse(x < 0.5, NA, x - 0.5), W = owin()))
+#
+#   set.seed(1)
+#
+#   configuration <- ppjsdm::rppp(lambda = 100)
+#
+#   expect_warning(fit <- ppjsdm::gibbsm(configuration, dummy_distribution = "binomial", covariates = covariates))
+#   expect_error(summary(fit), NA)
+# })
+
+