In [None]:
library(rsample)   # data splitting 
library(ggplot2)   # plotting
library(earth)     # fit MARS models
library(caret)     # automating the tuning process
library(vip)       # variable importance
library(pdp)       # variable relationships
library(plot3D)
library("AmesHousing")
library("pls")
library("glmnet")
library("kableExtra")

In [None]:
# loads individuals run results data set
results <- read.table('https://raw.githubusercontent.com/edigley/spif/master/results/farsite_individuals_runtime_jonquera.txt', header=T)
ds <- subset(results, select=c(paste("p", 0:9, sep=""), "runtime", "maxRSS"))

# extract only the main features
params <- c("p_1h", "p_10h", "p_100h", "p_herb", "p_1000h", "p_ws", "p_wd", "p_th", "p_hh", "p_adj")
colnames(ds) <- c(params, "runtime", "maxRSS")

In [None]:
# generates a model for individuals' runtime
dsRuntime <- subset(ds, select=c(params, "runtime"))

set.seed(1984)
dsRuntime_split <- initial_split(dsRuntime, prop = .9, strata = "runtime")
dsRuntime_train <- training(dsRuntime_split)
dsRuntime_test  <- testing(dsRuntime_split)
hyper_grid <- expand.grid(
  degree = 1:5, 
  nprune = seq(1, 20, length.out = 20) %>% floor()
)
marsRuntime <- train(
  x = subset(dsRuntime_train, select = -runtime),
  y = dsRuntime_train$runtime,
  method = "earth",
  metric = "RMSE",
  trControl = trainControl(method = "cv", number = 10),
  tuneGrid = hyper_grid
)
marsRuntime$bestTune
marsRuntime$finalModel
summary(marsRuntime)
ggplot(marsRuntime)

In [None]:
# generates a model for individuals memory's Resident Set Size (RSS)
dsMaxRSS <- subset(ds, select=c(params, "maxRSS"))

set.seed(7303)
dsMaxRSS_split <- initial_split(dsMaxRSS, prop = .9, strata = "maxRSS")
dsMaxRSS_train <- training(dsMaxRSS_split)
dsMaxRSS_test  <- testing(dsMaxRSS_split)
hyper_grid <- expand.grid(
  degree = 1:5, 
  nprune = seq(1, 20, length.out = 20) %>% floor()
)
marsMaxRSS <- train(
  x = subset(dsMaxRSS_train, select = -maxRSS),
  y = dsMaxRSS_train$maxRSS,
  method = "earth",
  metric = "RMSE",
  trControl = trainControl(method = "cv", number = 10),
  tuneGrid = hyper_grid
)
marsMaxRSS$bestTune
marsMaxRSS$finalModel
summary(marsMaxRSS)
ggplot(marsMaxRSS)

In [None]:
plot(residuals(marsRuntime), pch=".")
plot(residuals(marsMaxRSS),  pch=".")

In [None]:
p1 <- partial(marsRuntime, pred.var = "p_ws", grid.resolution = 10) %>% autoplot()
p2 <- partial(marsRuntime, pred.var = "p_hh", grid.resolution = 10) %>% autoplot()
gridExtra::grid.arrange(p1, p2, ncol = 2)
partial(marsRuntime, pred.var = c("p_ws", "p_hh"), grid.resolution = 25) %>% 
  plotPartial(levelplot = FALSE, zlab = "runtime_hat", drape = TRUE, colorkey = TRUE, screen = list(z = 100, x = -60))

In [None]:
p1 <- partial(marsMaxRSS, pred.var = "p_ws", grid.resolution = 10) %>% autoplot()
p2 <- partial(marsMaxRSS, pred.var = "p_hh", grid.resolution = 10) %>% autoplot()
gridExtra::grid.arrange(p1, p2, ncol = 2)
partial(marsMaxRSS, pred.var = c("p_ws", "p_hh"), grid.resolution = 25) %>% 
  plotPartial(levelplot = FALSE, zlab = "runtime_hat", drape = TRUE, colorkey = TRUE, screen = list(z = 100, x = -60))