diff --git a/DESCRIPTION b/DESCRIPTION
index a610da806..36e1a101a 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: ranger
 Type: Package
 Title: A Fast Implementation of Random Forests
-Version: 0.11.4
+Version: 0.11.5
 Date: 2019-09-25
 Author: Marvin N. Wright [aut, cre], Stefan Wager [ctb], Philipp Probst [ctb]
 Maintainer: Marvin N. Wright <cran@wrig.de>
diff --git a/NEWS b/NEWS
index a46075005..3f2d244c4 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,7 @@
+##### Version 0.11.5
+* Add x/y interface
+* Internal changes (seed differences possible, prediction incompatible with older versions)
+
 ##### Version 0.11.4
 * Add "beta" splitrule for bounded outcomes
 
diff --git a/NEWS.md b/NEWS.md
index ee7b103df..b2d62e431 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,7 @@
+##### Version 0.11.5
+* Add x/y interface
+* Internal changes (seed differences possible, prediction incompatible with older versions)
+
 ##### Version 0.11.4
 * Add "beta" splitrule for bounded outcomes
 
diff --git a/R/RcppExports.R b/R/RcppExports.R
index 4312eaaef..9d4618600 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -1,8 +1,8 @@
 # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 
-rangerCpp <- function(treetype, dependent_variable_name, input_data, variable_names, mtry, num_trees, verbose, seed, num_threads, write_forest, importance_mode_r, min_node_size, split_select_weights, use_split_select_weights, always_split_variable_names, use_always_split_variable_names, status_variable_name, prediction_mode, loaded_forest, snp_data, sample_with_replacement, probability, unordered_variable_names, use_unordered_variable_names, save_memory, splitrule_r, case_weights, use_case_weights, class_weights, predict_all, keep_inbag, sample_fraction, alpha, minprop, holdout, prediction_type_r, num_random_splits, sparse_data, use_sparse_data, order_snps, oob_error, max_depth, inbag, use_inbag) {
-    .Call(`_ranger_rangerCpp`, treetype, dependent_variable_name, input_data, variable_names, mtry, num_trees, verbose, seed, num_threads, write_forest, importance_mode_r, min_node_size, split_select_weights, use_split_select_weights, always_split_variable_names, use_always_split_variable_names, status_variable_name, prediction_mode, loaded_forest, snp_data, sample_with_replacement, probability, unordered_variable_names, use_unordered_variable_names, save_memory, splitrule_r, case_weights, use_case_weights, class_weights, predict_all, keep_inbag, sample_fraction, alpha, minprop, holdout, prediction_type_r, num_random_splits, sparse_data, use_sparse_data, order_snps, oob_error, max_depth, inbag, use_inbag)
+rangerCpp <- function(treetype, input_x, input_y, variable_names, mtry, num_trees, verbose, seed, num_threads, write_forest, importance_mode_r, min_node_size, split_select_weights, use_split_select_weights, always_split_variable_names, use_always_split_variable_names, prediction_mode, loaded_forest, snp_data, sample_with_replacement, probability, unordered_variable_names, use_unordered_variable_names, save_memory, splitrule_r, case_weights, use_case_weights, class_weights, predict_all, keep_inbag, sample_fraction, alpha, minprop, holdout, prediction_type_r, num_random_splits, sparse_x, use_sparse_data, order_snps, oob_error, max_depth, inbag, use_inbag) {
+    .Call(`_ranger_rangerCpp`, treetype, input_x, input_y, variable_names, mtry, num_trees, verbose, seed, num_threads, write_forest, importance_mode_r, min_node_size, split_select_weights, use_split_select_weights, always_split_variable_names, use_always_split_variable_names, prediction_mode, loaded_forest, snp_data, sample_with_replacement, probability, unordered_variable_names, use_unordered_variable_names, save_memory, splitrule_r, case_weights, use_case_weights, class_weights, predict_all, keep_inbag, sample_fraction, alpha, minprop, holdout, prediction_type_r, num_random_splits, sparse_x, use_sparse_data, order_snps, oob_error, max_depth, inbag, use_inbag)
 }
 
 numSmaller <- function(values, reference) {
diff --git a/R/predict.R b/R/predict.R
index 90e1582bd..8b423eb98 100644
--- a/R/predict.R
+++ b/R/predict.R
@@ -81,11 +81,9 @@ predict.ranger.forest <- function(object, data, predict.all = FALSE,
     snp.data <- data@gtdata@gtps@.Data
     data <- data@phdata[, -1, drop = FALSE]
     gwa.mode <- TRUE
-    variable.names <- c(names(data), snp.names)
   } else {
     snp.data <- as.matrix(0)
     gwa.mode <- FALSE
-    variable.names <- colnames(data)
   }
 
   ## Check forest argument
@@ -94,14 +92,13 @@ predict.ranger.forest <- function(object, data, predict.all = FALSE,
   } else {
     forest <- object
   }
-  if (is.null(forest$dependent.varID) || is.null(forest$num.trees) ||
+  if (is.null(forest$num.trees) ||
         is.null(forest$child.nodeIDs) || is.null(forest$split.varIDs) ||
         is.null(forest$split.values) || is.null(forest$independent.variable.names) ||
         is.null(forest$treetype)) {
     stop("Error: Invalid forest object.")
   }
-  if (forest$treetype == "Survival" && (is.null(forest$status.varID)  ||
-                                        is.null(forest$chf) || is.null(forest$unique.death.times))) {
+  if (forest$treetype == "Survival" && (is.null(forest$chf) || is.null(forest$unique.death.times))) {
     stop("Error: Invalid forest object.")
   }
   
@@ -109,6 +106,9 @@ predict.ranger.forest <- function(object, data, predict.all = FALSE,
   if (length(forest$child.nodeIDs) != forest$num.trees || length(forest$child.nodeIDs[[1]]) != 2) {
     stop("Error: Invalid forest object. Is the forest grown in ranger version <0.3.9? Try to predict with the same version the forest was grown.")
   }
+  if (!is.null(forest$dependent.varID)) {
+    stop("Error: Invalid forest object. Is the forest grown in ranger version <0.11.5? Try to predict with the same version the forest was grown.")
+  }
   
   ## Prediction type
   if (type == "response" || type == "se") {
@@ -142,106 +142,53 @@ predict.ranger.forest <- function(object, data, predict.all = FALSE,
   if (type == "se") {
     predict.all <- TRUE
   }
+  
+  x <- data
+  
+  if (sum(!(forest$independent.variable.names %in% colnames(x))) > 0) {
+    stop("Error: One or more independent variables not found in data.")
+  }
 
-  ## Create final data
-  if (forest$treetype == "Survival") {
-    if (forest$dependent.varID > 0 && forest$status.varID > 1) {
-      if (ncol(data) == length(forest$independent.variable.names)+2) {
-        ## If alternative interface used and same data structure, don't subset data
-        data.used <- data
-      } else if (ncol(data) == length(forest$independent.variable.names)) {
-        data.selected <- data[, forest$independent.variable.names, drop = FALSE]
-        data.used <- cbind(0, 0, data.selected)
-        variable.names <- c("time", "status", forest$independent.variable.names)
-        forest$dependent.varID <- 0
-        forest$status.varID <- 1
-      } else {
-        stop("Invalid prediction data. Include both time and status variable or none.")
-      }
-    } else {
-      ## If formula interface used, subset data
-      data.selected <- data[, forest$independent.variable.names, drop = FALSE]
-
-      ## Arange data as in original data
-      data.used <- cbind(0, 0, data.selected)
-      variable.names <- c("time", "status", forest$independent.variable.names)
-    }
-
-  ## Index of no-recode variables
-  idx.norecode <- c(-(forest$dependent.varID+1), -(forest$status.varID+1))
-
-  } else {
-    ## No survival
-    if (ncol(data) == length(forest$independent.variable.names)+1 && forest$dependent.varID > 0) {
-      ## If alternative interface used and same data structure, don't subset data
-      data.used <- data
-    } else {
-      ## If formula interface used, subset data
-      data.selected <- data[, forest$independent.variable.names, drop = FALSE]
-
-      ## Arange data as in original data
-      if (forest$dependent.varID == 0) {
-        data.used <- cbind(0, data.selected)
-        variable.names <- c("dependent", forest$independent.variable.names)
-      } else if (forest$dependent.varID >= ncol(data)) {
-        data.used <- cbind(data.selected, 0)
-        variable.names <- c(forest$independent.variable.names, "dependent")
-      } else {
-        data.used <- cbind(data.selected[, 1:forest$dependent.varID],
-                           0,
-                           data.selected[, (forest$dependent.varID+1):ncol(data.selected)])
-        variable.names <- c(forest$independent.variable.names[1:forest$dependent.varID],
-                            "dependent",
-                            forest$independent.variable.names[(forest$dependent.varID+1):length(forest$independent.variable.names)])
-      }
-    }
-
-    ## Index of no-recode variables
-    idx.norecode <- -(forest$dependent.varID+1)
+  ## Subset to same column as in training if necessary
+  if (length(colnames(x)) != length(forest$independent.variable.names) || any(colnames(x) != forest$independent.variable.names)) {
+    x <- x[, forest$independent.variable.names, drop = FALSE]
   }
 
   ## Recode characters
-  if (!is.matrix(data.used) && !inherits(data.used, "Matrix")) {
-    char.columns <- sapply(data.used, is.character)
-    data.used[char.columns] <- lapply(data.used[char.columns], factor)
+  if (!is.matrix(x) && !inherits(x, "Matrix")) {
+    char.columns <- sapply(x, is.character)
+    if (length(char.columns) > 0) {
+      x[char.columns] <- lapply(x[char.columns], factor)
+    }
   }
-
+  
   ## Recode factors if forest grown 'order' mode
   if (!is.null(forest$covariate.levels) && !all(sapply(forest$covariate.levels, is.null))) {
-    data.used[, idx.norecode] <- mapply(function(x, y) {
-      if(is.null(y)) {
-        x
+    x <- mapply(function(xx, yy) {
+      if(is.null(yy)) {
+        xx
       } else {
-        new.levels <- setdiff(levels(x), y)
-        factor(x, levels = c(y, new.levels), exclude = NULL)
+        new.levels <- setdiff(levels(xx), yy)
+        factor(xx, levels = c(yy, new.levels), exclude = NULL)
       }
-    }, data.used[, idx.norecode], forest$covariate.levels, SIMPLIFY = !is.data.frame(data.used[, idx.norecode]))
+    }, x, forest$covariate.levels, SIMPLIFY = !is.data.frame(x))
   }
-
-  ## Convert to data matrix
-  if (is.matrix(data.used) || inherits(data.used, "Matrix")) {
-    data.final <- data.used
-  } else {
-    data.final <- data.matrix(data.used)
+  if (is.list(x) && !is.data.frame(x)) {
+    x <- as.data.frame(x)
   }
-  
 
-  ## If gwa mode, add snp variable names
-  if (gwa.mode) {
-    variable.names <- c(variable.names, snp.names)
+  ## Convert to data matrix
+  if (!is.matrix(x) & !inherits(x, "Matrix")) {
+    x <- data.matrix(x)
   }
 
   ## Check missing values
-  if (any(is.na(data.final))) {
-    offending_columns <- colnames(data.final)[colSums(is.na(data.final)) > 0]
+  if (any(is.na(x))) {
+    offending_columns <- colnames(x)[colSums(is.na(x)) > 0]
     stop("Missing data in columns: ",
          paste0(offending_columns, collapse = ", "), ".", call. = FALSE)
   }
 
-  if (sum(!(forest$independent.variable.names %in% variable.names)) > 0) {
-    stop("Error: One or more independent variables not found in data.")
-  }
-
   ## Num threads
   ## Default 0 -> detect from system in C++.
   if (is.null(num.threads)) {
@@ -268,7 +215,6 @@ predict.ranger.forest <- function(object, data, predict.all = FALSE,
   }
 
   ## Defaults for variables not needed
-  dependent.variable.name <- ""
   mtry <- 0
   importance <- 0
   min.node.size <- 0
@@ -276,7 +222,6 @@ predict.ranger.forest <- function(object, data, predict.all = FALSE,
   use.split.select.weights <- FALSE
   always.split.variables <- c("0", "0")
   use.always.split.variables <- FALSE
-  status.variable.name <- "status"
   prediction.mode <- TRUE
   write.forest <- FALSE
   replace <- TRUE
@@ -299,27 +244,29 @@ predict.ranger.forest <- function(object, data, predict.all = FALSE,
   max.depth <- 0
   inbag <- list(c(0,0))
   use.inbag <- FALSE
+  y <- matrix(c(0, 0))
   
   ## Use sparse matrix
-  if ("dgCMatrix" %in% class(data.final)) {
-    sparse.data <- data.final
-    data.final <- matrix(c(0, 0))
+  if ("dgCMatrix" %in% class(x)) {
+    sparse.x <- x
+    x <- matrix(c(0, 0))
     use.sparse.data <- TRUE
   } else {
-    sparse.data <- Matrix(matrix(c(0, 0)))
+    sparse.x <- Matrix(matrix(c(0, 0)))
     use.sparse.data <- FALSE
+    x <- data.matrix(x)
   }
   
   ## Call Ranger
-  result <- rangerCpp(treetype, dependent.variable.name, data.final, variable.names, mtry,
+  result <- rangerCpp(treetype, x, y, forest$independent.variable.names, mtry,
                       num.trees, verbose, seed, num.threads, write.forest, importance,
                       min.node.size, split.select.weights, use.split.select.weights,
                       always.split.variables, use.always.split.variables,
-                      status.variable.name, prediction.mode, forest, snp.data, replace, probability,
+                      prediction.mode, forest, snp.data, replace, probability,
                       unordered.factor.variables, use.unordered.factor.variables, save.memory, splitrule,
                       case.weights, use.case.weights, class.weights, 
                       predict.all, keep.inbag, sample.fraction, alpha, minprop, holdout, 
-                      prediction.type, num.random.splits, sparse.data, use.sparse.data,
+                      prediction.type, num.random.splits, sparse.x, use.sparse.data,
                       order.snps, oob.error, max.depth, inbag, use.inbag)
 
   if (length(result) == 0) {
@@ -327,7 +274,7 @@ predict.ranger.forest <- function(object, data, predict.all = FALSE,
   }
 
   ## Prepare results
-  result$num.samples <- nrow(data.final)
+  result$num.samples <- nrow(x)
   result$treetype <- forest$treetype
 
   if (predict.all) {
diff --git a/R/ranger.R b/R/ranger.R
index 6fdbc8811..141cdcafe 100644
--- a/R/ranger.R
+++ b/R/ranger.R
@@ -66,7 +66,8 @@
 ##' This importance measure can be combined with the methods to estimate p-values in \code{\link{importance_pvalues}}.
 ##'
 ##' For a large number of variables and data frames as input data the formula interface can be slow or impossible to use.
-##' Alternatively \code{dependent.variable.name} (and \code{status.variable.name} for survival) can be used.
+##' Alternatively \code{dependent.variable.name} (and \code{status.variable.name} for survival) or \code{x} and \code{y} can be used.
+##' Use \code{x} and \code{y} with a matrix for \code{x} to avoid conversions and save memory.
 ##' Consider setting \code{save.memory = TRUE} if you encounter memory problems for very large datasets, but be aware that this option slows down the tree growing. 
 ##' 
 ##' For GWAS data consider combining \code{ranger} with the \code{GenABEL} package. 
@@ -114,6 +115,8 @@
 ##' @param dependent.variable.name Name of dependent variable, needed if no formula given. For survival forests this is the time variable.
 ##' @param status.variable.name Name of status variable, only applicable to survival data and needed if no formula given. Use 1 for event and 0 for censoring.
 ##' @param classification Only needed if data is a matrix. Set to \code{TRUE} to grow a classification forest.
+##' @param x Predictor data (independent variables), alternative interface to data with formula or dependent.variable.name.
+##' @param y Response vector (dependent variable), alternative interface to data with formula or dependent.variable.name. For survival use a \code{Surv()} object or a matrix with time and status.
 ##' @return Object of class \code{ranger} with elements
 ##'   \item{\code{forest}}{Saved forest (If write.forest set to TRUE). Note that the variable IDs in the \code{split.varIDs} object do not necessarily represent the column number in R.}
 ##'   \item{\code{predictions}}{Predicted classes/values, based on out of bag samples (classification and regression only).}
@@ -161,8 +164,9 @@
 ##' rg.veteran <- ranger(Surv(time, status) ~ ., data = veteran)
 ##' plot(rg.veteran$unique.death.times, rg.veteran$survival[1,])
 ##'
-##' ## Alternative interface
+##' ## Alternative interfaces (same results)
 ##' ranger(dependent.variable.name = "Species", data = iris)
+##' ranger(y = iris[, 5], x = iris[, -5])
 ##' 
 ##' \dontrun{
 ##' ## Use GenABEL interface to read Plink data into R and grow a classification forest
@@ -212,79 +216,93 @@ ranger <- function(formula = NULL, data = NULL, num.trees = 500, mtry = NULL,
                    num.threads = NULL, save.memory = FALSE,
                    verbose = TRUE, seed = NULL, 
                    dependent.variable.name = NULL, status.variable.name = NULL, 
-                   classification = NULL) {
-  
-  ## GenABEL GWA data
-  if ("gwaa.data" %in% class(data)) {
-    snp.names <- data@gtdata@snpnames
-    snp.data <- data@gtdata@gtps@.Data
-    data <- data@phdata
-    if ("id" %in% names(data)) {
-      data$"id" <- NULL
-    }
-    gwa.mode <- TRUE
-    save.memory <- FALSE
-  } else {
-    snp.data <- as.matrix(0)
-    gwa.mode <- FALSE
+                   classification = NULL, x = NULL, y = NULL) {
+  
+  ## By default not in GWAS mode
+  snp.data <- as.matrix(0)
+  gwa.mode <- FALSE
+  
+  if (is.null(data)) {
+    ## x/y interface
+    if (is.null(x) | is.null(y)) {
+      stop("Error: Either data or x and y is required.")
+    }
+  }  else {
+    ## GenABEL GWA data
+    if ("gwaa.data" %in% class(data)) {
+      snp.names <- data@gtdata@snpnames
+      snp.data <- data@gtdata@gtps@.Data
+      data <- data@phdata
+      if ("id" %in% names(data)) {
+        data$"id" <- NULL
+      }
+      gwa.mode <- TRUE
+      save.memory <- FALSE
+    } 
+    
+    ## Formula interface. Use whole data frame if no formula provided and depvarname given
+    if (is.null(formula)) {
+      if (is.null(dependent.variable.name)) {
+        if (is.null(y) | is.null(x)) {
+          stop("Error: Please give formula, dependent variable name or x/y.")
+        } 
+      } else {
+        if (is.null(status.variable.name)) {
+          y <- data[, dependent.variable.name, drop = TRUE]
+          x <- data[, !(colnames(data) %in% dependent.variable.name), drop = FALSE]
+        } else {
+          y <- survival::Surv(data[, dependent.variable.name], data[, status.variable.name]) 
+          x <- data[, !(colnames(data) %in% c(dependent.variable.name, status.variable.name)), drop = FALSE]
+        }
+      }
+    } else {
+      formula <- formula(formula)
+      if (class(formula) != "formula") {
+        stop("Error: Invalid formula.")
+      }
+      data.selected <- parse.formula(formula, data, env = parent.frame())
+      y <- data.selected[, 1]
+      x <- data.selected[, -1, drop = FALSE]
+    }
   }
   
   ## Sparse matrix data
-  if (inherits(data, "Matrix")) {
-    if (!("dgCMatrix" %in% class(data))) {
+  if (inherits(x, "Matrix")) {
+    if (!("dgCMatrix" %in% class(x))) {
       stop("Error: Currently only sparse data of class 'dgCMatrix' supported.")
-    }
-  
+    } 
     if (!is.null(formula)) {
-      stop("Error: Sparse matrices only supported with alternative interface. Use dependent.variable.name instead of formula.")
-    }
-  }
-    
-  ## Formula interface. Use whole data frame is no formula provided and depvarname given
-  if (is.null(formula)) {
-    if (is.null(dependent.variable.name)) {
-      stop("Error: Please give formula or dependent variable name.")
-    }
-    if (is.null(status.variable.name)) {
-      status.variable.name <- ""
-      response <- data[, dependent.variable.name, drop = TRUE]
-    } else {
-      response <- survival::Surv(data[, dependent.variable.name], data[, status.variable.name]) #data[, c(dependent.variable.name, status.variable.name)]
-    }
-    data.selected <- data
-  } else {
-    formula <- formula(formula)
-    if (class(formula) != "formula") {
-      stop("Error: Invalid formula.")
+      stop("Error: Sparse matrices only supported with alternative interface. Use dependent.variable.name or x/y instead of formula.")
     }
-    data.selected <- parse.formula(formula, data, env = parent.frame())
-    response <- data.selected[, 1]
   }
   
   ## Check missing values
-  if (any(is.na(data.selected))) {
-    offending_columns <- colnames(data.selected)[colSums(is.na(data.selected)) > 0]
+  if (any(is.na(x))) {
+    offending_columns <- colnames(x)[colSums(is.na(x)) > 0]
     stop("Missing data in columns: ",
          paste0(offending_columns, collapse = ", "), ".", call. = FALSE)
   }
+  if (any(is.na(y))) {
+    stop("Missing data in dependent variable.", call. = FALSE)
+  }
   
   ## Check response levels
-  if (is.factor(response)) {
-    if (nlevels(response) != nlevels(droplevels(response))) {
-      dropped_levels <- setdiff(levels(response), levels(droplevels(response)))
+  if (is.factor(y)) {
+    if (nlevels(y) != nlevels(droplevels(y))) {
+      dropped_levels <- setdiff(levels(y), levels(droplevels(y)))
       warning("Dropped unused factor level(s) in dependent variable: ",
               paste0(dropped_levels, collapse = ", "), ".", call. = FALSE)
     }
   }
   
   ## Treetype
-  if (is.factor(response)) {
+  if (is.factor(y)) {
     if (probability) {
       treetype <- 9
     } else {
       treetype <- 1
     }
-  } else if (is.numeric(response) && (is.null(ncol(response)) || ncol(response) == 1)) {
+  } else if (is.numeric(y) && (is.null(ncol(y)) || ncol(y) == 1)) {
     if (!is.null(classification) && classification && !probability) {
       treetype <- 1
     } else if (probability) {
@@ -292,7 +310,7 @@ ranger <- function(formula = NULL, data = NULL, num.trees = 500, mtry = NULL,
     } else {
       treetype <- 3
     }
-  } else if (class(response) == "Surv" || is.data.frame(response) || is.matrix(response)) {
+  } else if (class(y) == "Surv" || is.data.frame(y) || is.matrix(y)) {
     treetype <- 5
   } else {
     stop("Error: Unsupported type of dependent variable.")
@@ -302,21 +320,8 @@ ranger <- function(formula = NULL, data = NULL, num.trees = 500, mtry = NULL,
   if (quantreg && treetype != 3) {
     stop("Error: Quantile prediction implemented only for regression outcomes.")
   }
-  
-  ## Dependent and status variable name. For non-survival dummy status variable name.
-  if (!is.null(formula)) {
-    if (treetype == 5) {
-      dependent.variable.name <- dimnames(response)[[2]][1]
-      status.variable.name <- dimnames(response)[[2]][2]
-    } else {
-      dependent.variable.name <- names(data.selected)[1]
-      status.variable.name <- ""
-    }
-    independent.variable.names <- names(data.selected)[-1]
-  } else {
-    independent.variable.names <- colnames(data.selected)[colnames(data.selected) != dependent.variable.name &
-                                                          colnames(data.selected) != status.variable.name]
-  }
+
+  independent.variable.names <- colnames(x)
   
   ## respect.unordered.factors
   if (is.null(respect.unordered.factors)) {
@@ -335,82 +340,66 @@ ranger <- function(formula = NULL, data = NULL, num.trees = 500, mtry = NULL,
   }
   
   ## Recode characters as factors and recode factors if 'order' mode
-  if (!is.matrix(data.selected) && !inherits(data.selected, "Matrix")) {
-    character.idx <- sapply(data.selected, is.character)
+  if (!is.matrix(x) && !inherits(x, "Matrix") && ncol(x) > 0) {
+    character.idx <- sapply(x, is.character)
     
     if (respect.unordered.factors == "order") {
       ## Recode characters and unordered factors
-      names.selected <- names(data.selected)
-      ordered.idx <- sapply(data.selected, is.ordered)
-      factor.idx <- sapply(data.selected, is.factor)
-      independent.idx <- names.selected %in% independent.variable.names
-      recode.idx <- independent.idx & (character.idx | (factor.idx & !ordered.idx))
+      ordered.idx <- sapply(x, is.ordered)
+      factor.idx <- sapply(x, is.factor)
+      recode.idx <- character.idx | (factor.idx & !ordered.idx)
 
       if (any(recode.idx) & (importance == "impurity_corrected" || importance == "impurity_unbiased")) {
         warning("Corrected impurity importance may not be unbiased for re-ordered factor levels. Consider setting respect.unordered.factors to 'ignore' or 'partition' or manually compute corrected importance.")
       }
       
       ## Numeric response
-      if (is.factor(response)) {
-        num.response <- as.numeric(response)
+      if (is.factor(y)) {
+        num.y <- as.numeric(y)
       } else {
-        num.response <- response
+        num.y <- y
       }
 
       ## Recode each column
-      data.selected[recode.idx] <- lapply(data.selected[recode.idx], function(x) {
-        if (!is.factor(x)) {
-          x <- as.factor(x)
+      x[recode.idx] <- lapply(x[recode.idx], function(xx) {
+        if (!is.factor(xx)) {
+          xx <- as.factor(xx)
         } 
         
-        if (length(levels(x)) == 1) {
+        if (length(levels(xx)) == 1) {
           ## Don't order if only one level
-          levels.ordered <- levels(x)
-        } else if ("Surv" %in% class(response)) {
+          levels.ordered <- levels(xx)
+        } else if ("Surv" %in% class(y)) {
           ## Use median survival if available or largest quantile available in all strata if median not available
-          levels.ordered <- largest.quantile(response ~ x)
+          levels.ordered <- largest.quantile(y ~ xx)
           
           ## Get all levels not in node
-          levels.missing <- setdiff(levels(x), levels.ordered)
+          levels.missing <- setdiff(levels(xx), levels.ordered)
           levels.ordered <- c(levels.missing, levels.ordered)
-        } else if (is.factor(response) & nlevels(response) > 2) {
-          levels.ordered <- pca.order(y = response, x = x)
+        } else if (is.factor(y) & nlevels(y) > 2) {
+          levels.ordered <- pca.order(y = y, x = xx)
         } else {
           ## Order factor levels by mean response
-          means <- sapply(levels(x), function(y) {
-            mean(num.response[x == y])
+          means <- sapply(levels(xx), function(y) {
+            mean(num.y[xx == y])
           })
-          levels.ordered <- as.character(levels(x)[order(means)])
+          levels.ordered <- as.character(levels(xx)[order(means)])
         }
         
         ## Return reordered factor
-        factor(x, levels = levels.ordered, ordered = TRUE, exclude = NULL)
+        factor(xx, levels = levels.ordered, ordered = TRUE, exclude = NULL)
       })
       
       ## Save levels
-      covariate.levels <- lapply(data.selected[independent.idx], levels)
+      covariate.levels <- lapply(x, levels)
     } else {
       ## Recode characters only
-      data.selected[character.idx] <- lapply(data.selected[character.idx], factor)
+      x[character.idx] <- lapply(x[character.idx], factor)
     }
   }
   
-  ## Input data and variable names, create final data matrix
-  if (!is.null(formula) && treetype == 5) {
-    data.final <- data.matrix(cbind(response[, 1], response[, 2],
-                              data.selected[-1]))
-    colnames(data.final) <- c(dependent.variable.name, status.variable.name,
-                              independent.variable.names)
-  } else if (is.matrix(data.selected) || inherits(data.selected, "Matrix")) {
-    data.final <- data.selected
-  } else {
-    data.final <- data.matrix(data.selected)
-  }
-  variable.names <- colnames(data.final)
-  
   ## If gwa mode, add snp variable names
   if (gwa.mode) {
-    variable.names <- c(variable.names, snp.names)
     all.independent.variable.names <- c(independent.variable.names, snp.names)
   } else {
     all.independent.variable.names <- independent.variable.names
@@ -507,14 +496,14 @@ ranger <- function(formula = NULL, data = NULL, num.trees = 500, mtry = NULL,
     if (sum(sample.fraction) <= 0) {
       stop("Error: Invalid value for sample.fraction. Sum of values must be >0.")
     }
-    if (length(sample.fraction) != nlevels(response)) {
-      stop("Error: Invalid value for sample.fraction. Expecting ", nlevels(response), " values, provided ", length(sample.fraction), ".")
+    if (length(sample.fraction) != nlevels(y)) {
+      stop("Error: Invalid value for sample.fraction. Expecting ", nlevels(y), " values, provided ", length(sample.fraction), ".")
     }
-    if (!replace & any(sample.fraction * length(response) > table(response))) {
-      idx <- which(sample.fraction * length(response) > table(response))[1]
+    if (!replace & any(sample.fraction * length(y) > table(y))) {
+      idx <- which(sample.fraction * length(y) > table(y))[1]
       stop("Error: Not enough samples in class ", names(idx), 
-           "; available: ", table(response)[idx], 
-           ", requested: ", (sample.fraction * length(response))[idx], ".")
+           "; available: ", table(y)[idx], 
+           ", requested: ", (sample.fraction * length(y))[idx], ".")
     }
     if (!is.null(case.weights)) {
       stop("Error: Combination of case.weights and class-wise sampling not supported.")
@@ -557,7 +546,7 @@ ranger <- function(formula = NULL, data = NULL, num.trees = 500, mtry = NULL,
       sample.fraction <- sample.fraction * mean(case.weights > 0)
     }
     
-    if (!replace && sum(case.weights > 0) < sample.fraction * nrow(data.final)) {
+    if (!replace && sum(case.weights > 0) < sample.fraction * nrow(x)) {
       stop("Error: Fewer non-zero case weights than observations to sample.")
     }
   }
@@ -583,7 +572,7 @@ ranger <- function(formula = NULL, data = NULL, num.trees = 500, mtry = NULL,
   
   ## Class weights: NULL for no weights (all 1)
   if (is.null(class.weights)) {
-    class.weights <- rep(1, nlevels(response))
+    class.weights <- rep(1, nlevels(y))
   } else {
     if (!(treetype %in% c(1, 9))) {
       stop("Error: Argument class.weights only valid for classification forests.")
@@ -591,12 +580,12 @@ ranger <- function(formula = NULL, data = NULL, num.trees = 500, mtry = NULL,
     if (!is.numeric(class.weights) || any(class.weights < 0)) {
       stop("Error: Invalid value for class.weights. Please give a vector of non-negative values.")
     }
-    if (length(class.weights) != nlevels(response)) {
+    if (length(class.weights) != nlevels(y)) {
       stop("Error: Number of class weights not equal to number of classes.")
     }
 
     ## Reorder (C++ expects order as appearing in the data)
-    class.weights <- class.weights[unique(as.numeric(response))]
+    class.weights <- class.weights[unique(as.numeric(y))]
   }
   
   ## Split select weights: NULL for no weights
@@ -686,7 +675,7 @@ ranger <- function(formula = NULL, data = NULL, num.trees = 500, mtry = NULL,
     }
     
     ## Check for 0..1 outcome
-    if (min(response) < 0 || max(response) > 1) {
+    if (min(y) < 0 || max(y) > 1) {
       stop("Error: beta splitrule applicable to regression data with outcome between 0 and 1 only.")
     }
   } else {
@@ -711,16 +700,14 @@ ranger <- function(formula = NULL, data = NULL, num.trees = 500, mtry = NULL,
 
   ## Unordered factors  
   if (respect.unordered.factors == "partition") {
-    names.selected <- names(data.selected)
-    ordered.idx <- sapply(data.selected, is.ordered)
-    factor.idx <- sapply(data.selected, is.factor)
-    independent.idx <- names.selected != dependent.variable.name & names.selected != status.variable.name
-    unordered.factor.variables <- names.selected[factor.idx & !ordered.idx & independent.idx]
+    ordered.idx <- sapply(x, is.ordered)
+    factor.idx <- sapply(x, is.factor)
+    unordered.factor.variables <- independent.variable.names[factor.idx & !ordered.idx]
     
     if (length(unordered.factor.variables) > 0) {
       use.unordered.factor.variables <- TRUE
       ## Check level count
-      num.levels <- sapply(data.selected[, factor.idx & !ordered.idx & independent.idx, drop = FALSE], nlevels)
+      num.levels <- sapply(x[, factor.idx & !ordered.idx, drop = FALSE], nlevels)
       max.level.count <- .Machine$double.digits
       if (max(num.levels) > max.level.count) {
         stop(paste("Too many levels in unordered categorical variable ", unordered.factor.variables[which.max(num.levels)], 
@@ -754,12 +741,11 @@ ranger <- function(formula = NULL, data = NULL, num.trees = 500, mtry = NULL,
     if (treetype == 3 && splitrule == "maxstat") {
       warning("Warning: The 'order' mode for unordered factor handling with the 'maxstat' splitrule is experimental.")
     }
-    if (gwa.mode & ((treetype %in% c(1,9) & nlevels(response) > 2) | treetype == 5)) {
+    if (gwa.mode & ((treetype %in% c(1,9) & nlevels(y) > 2) | treetype == 5)) {
       stop("Error: Ordering of SNPs currently only implemented for regression and binary outcomes.")
     }
   }
   
-
   ## Prediction mode always false. Use predict.ranger() method.
   prediction.mode <- FALSE
   predict.all <- FALSE
@@ -769,13 +755,22 @@ ranger <- function(formula = NULL, data = NULL, num.trees = 500, mtry = NULL,
   loaded.forest <- list()
   
   ## Use sparse matrix
-  if ("dgCMatrix" %in% class(data.final)) {
-    sparse.data <- data.final
-    data.final <- matrix(c(0, 0))
+  if ("dgCMatrix" %in% class(x)) {
+    sparse.x <- x
+    x <- matrix(c(0, 0))
     use.sparse.data <- TRUE
   } else {
-    sparse.data <- Matrix(matrix(c(0, 0)))
+    sparse.x <- Matrix(matrix(c(0, 0)))
     use.sparse.data <- FALSE
+    if (is.data.frame(x)) {
+      x <- data.matrix(x)
+    }
+  }
+  
+  if (treetype == 5) {
+    y.mat <- as.matrix(y)
+  } else {
+    y.mat <- as.matrix(as.numeric(y))
   }
   
   if (respect.unordered.factors == "order"){
@@ -784,20 +779,16 @@ ranger <- function(formula = NULL, data = NULL, num.trees = 500, mtry = NULL,
     order.snps <- FALSE
   }
   
-  
-  ## Clean up
-  rm("data.selected")
-
   ## Call Ranger
-  result <- rangerCpp(treetype, dependent.variable.name, data.final, variable.names, mtry,
+  result <- rangerCpp(treetype, x, y.mat, independent.variable.names, mtry,
                       num.trees, verbose, seed, num.threads, write.forest, importance.mode,
                       min.node.size, split.select.weights, use.split.select.weights,
                       always.split.variables, use.always.split.variables,
-                      status.variable.name, prediction.mode, loaded.forest, snp.data,
+                      prediction.mode, loaded.forest, snp.data,
                       replace, probability, unordered.factor.variables, use.unordered.factor.variables, 
                       save.memory, splitrule.num, case.weights, use.case.weights, class.weights, 
                       predict.all, keep.inbag, sample.fraction, alpha, minprop, holdout, prediction.type, 
-                      num.random.splits, sparse.data, use.sparse.data, order.snps, oob.error, max.depth, 
+                      num.random.splits, sparse.x, use.sparse.data, order.snps, oob.error, max.depth, 
                       inbag, use.inbag)
   
   if (length(result) == 0) {
@@ -810,12 +801,12 @@ ranger <- function(formula = NULL, data = NULL, num.trees = 500, mtry = NULL,
   }
 
   ## Set predictions
-  if (treetype == 1 && is.factor(response) && oob.error) {
-    result$predictions <- integer.to.factor(result$predictions,
-                                            levels(response))
-    true.values <- integer.to.factor(unlist(data.final[, dependent.variable.name]),
-                                     levels(response))
-    result$confusion.matrix <- table(true.values, result$predictions, 
+  if (treetype == 1 && oob.error) {
+    if (is.factor(y)) {
+      result$predictions <- integer.to.factor(result$predictions,
+                                              levels(y))
+    } 
+    result$confusion.matrix <- table(y, result$predictions, 
                                      dnn = c("true", "predicted"), useNA = "ifany")
   } else if (treetype == 5 && oob.error) {
     if (is.list(result$predictions)) {
@@ -827,7 +818,7 @@ ranger <- function(formula = NULL, data = NULL, num.trees = 500, mtry = NULL,
     result$chf <- result$predictions
     result$predictions <- NULL
     result$survival <- exp(-result$chf)
-  } else if (treetype == 9 && !is.matrix(data) && oob.error) {
+  } else if (treetype == 9 && oob.error) {
     if (is.list(result$predictions)) {
       result$predictions <- do.call(rbind, result$predictions)
     } 
@@ -836,9 +827,9 @@ ranger <- function(formula = NULL, data = NULL, num.trees = 500, mtry = NULL,
     }
     
     ## Set colnames and sort by levels
-    colnames(result$predictions) <- unique(response)
-    if (is.factor(response)) {
-      result$predictions <- result$predictions[, levels(droplevels(response)), drop = FALSE]
+    colnames(result$predictions) <- unique(y)
+    if (is.factor(y)) {
+      result$predictions <- result$predictions[, levels(droplevels(y)), drop = FALSE]
     }
   }
   
@@ -856,24 +847,24 @@ ranger <- function(formula = NULL, data = NULL, num.trees = 500, mtry = NULL,
     result$treetype <- "Probability estimation"
   }
   if (treetype == 3) {
-    result$r.squared <- 1 - result$prediction.error / var(response)
+    result$r.squared <- 1 - result$prediction.error / var(y)
   }
   result$call <- sys.call()
   result$importance.mode <- importance
-  result$num.samples <- nrow(data.final)
+  result$num.samples <- nrow(x)
   result$replace <- replace
   
   ## Write forest object
   if (write.forest) {
-    if (is.factor(response)) {
-      result$forest$levels <- levels(response)
+    if (is.factor(y)) {
+      result$forest$levels <- levels(y)
     }
     result$forest$independent.variable.names <- independent.variable.names
     result$forest$treetype <- result$treetype
     class(result$forest) <- "ranger.forest"
     
     ## In 'ordered' mode, save covariate levels
-    if (respect.unordered.factors == "order" && !is.matrix(data)) {
+    if (respect.unordered.factors == "order" && ncol(x) > 0) {
       result$forest$covariate.levels <- covariate.levels
     }
   }
@@ -882,14 +873,14 @@ ranger <- function(formula = NULL, data = NULL, num.trees = 500, mtry = NULL,
   
   ## Prepare quantile prediction
   if (quantreg) {
-    terminal.nodes <- predict(result, data, type = "terminalNodes")$predictions + 1
+    terminal.nodes <- predict(result, x, type = "terminalNodes")$predictions + 1
     n <- result$num.samples
     result$random.node.values <- matrix(nrow = max(terminal.nodes), ncol = num.trees)
     
     ## Select one random obs per node and tree
     for (tree in 1:num.trees){
       idx <- sample(1:n, n)
-      result$random.node.values[terminal.nodes[idx, tree], tree] <- response[idx]
+      result$random.node.values[terminal.nodes[idx, tree], tree] <- y[idx]
     }
     
     ## Prepare out-of-bag quantile regression
@@ -909,7 +900,7 @@ ranger <- function(formula = NULL, data = NULL, num.trees = 500, mtry = NULL,
           for (j in 1:num.oob) {
             idx <- terminal.nodes[, tree] == oob.nodes[j]
             idx[oob.obs[j]] <- FALSE
-            random.node.values.oob[oob.obs[j], tree] <- save.sample(response[idx], size = 1)
+            random.node.values.oob[oob.obs[j], tree] <- save.sample(y[idx], size = 1)
           }
         }
       }
diff --git a/R/treeInfo.R b/R/treeInfo.R
index e197ed1f2..bcff7a27c 100644
--- a/R/treeInfo.R
+++ b/R/treeInfo.R
@@ -68,14 +68,13 @@ treeInfo <- function(object, tree = 1) {
   if (is.null(forest)) {
     stop("Error: No saved forest in ranger object. Please set write.forest to TRUE when calling ranger.")
   }
-  if (is.null(forest$dependent.varID) || is.null(forest$num.trees) ||
+  if (is.null(forest$num.trees) ||
       is.null(forest$child.nodeIDs) || is.null(forest$split.varIDs) ||
       is.null(forest$split.values) || is.null(forest$independent.variable.names) ||
       is.null(forest$treetype)) {
     stop("Error: Invalid forest object.")
   }
-  if (forest$treetype == "Survival" && (is.null(forest$status.varID)  ||
-                                        is.null(forest$chf) || is.null(forest$unique.death.times))) {
+  if (forest$treetype == "Survival" && (is.null(forest$chf) || is.null(forest$unique.death.times))) {
     stop("Error: Invalid forest object.")
   }
   if (length(forest$child.nodeIDs) != forest$num.trees || length(forest$child.nodeIDs[[1]]) != 2) {
@@ -99,25 +98,19 @@ treeInfo <- function(object, tree = 1) {
   result$splitvarID[result$terminal] <- NA
   result$splitvarName[result$terminal] <- NA
   result$splitval[result$terminal] <- NA
-
-  ## Get names of splitting variables 
-  # should be -1 for all >= dependent.varID but +1 change for 1-index
-  # for survival another -1 if >= status.varID
-  independent.varID <- result$splitvarID
-  idx <- !is.na(result$splitvarID) & result$splitvarID < forest$dependent.varID
-  independent.varID[idx] <- result$splitvarID[idx] + 1
-  if (forest$treetype == "Survival") {
-    idx <- !is.na(result$splitvarID) & result$splitvarID >= forest$status.varID
-    independent.varID[idx] <- independent.varID[idx] - 1
-  }
-  result$splitvarName <- forest$independent.variable.names[independent.varID]
+  result$splitvarName <- forest$independent.variable.names[result$splitvarID + 1]
 
   ## Unordered splitting
   idx.unordered <- !result$terminal & !forest$is.ordered[result$splitvarID + 1]
   if (any(idx.unordered)) {
-    result$splitval[idx.unordered] <- sapply(result$splitval[idx.unordered], function(x) {
-      paste(which(as.logical(intToBits(x))), collapse = ",")
-    })
+    if (any(result$splitval[idx.unordered] > (2^31 - 1))) {
+      warning("Unordered splitting levels can only be shown for up to 31 levels.")
+      result$splitval[idx.unordered] <- NA
+    } else {
+      result$splitval[idx.unordered] <- sapply(result$splitval[idx.unordered], function(x) {
+        paste(which(as.logical(intToBits(x))), collapse = ",")
+      })
+    }
   }
   
   ## Prediction
diff --git a/cpp_version/.cproject b/cpp_version/.cproject
index 7c0cd1c8d..b4e456f7f 100644
--- a/cpp_version/.cproject
+++ b/cpp_version/.cproject
@@ -5,12 +5,12 @@
 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.toolchain.gnu.macosx.base.1266040536" moduleId="org.eclipse.cdt.core.settings" name="Default">
 				<externalSettings/>
 				<extensions>
+					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
 					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
-					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
 				</extensions>
 			</storageModule>
 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
diff --git a/cpp_version/src/utility/ArgumentHandler.cpp b/cpp_version/src/utility/ArgumentHandler.cpp
index cc2afddc0..15f1a9cb8 100644
--- a/cpp_version/src/utility/ArgumentHandler.cpp
+++ b/cpp_version/src/utility/ArgumentHandler.cpp
@@ -1,13 +1,13 @@
 /*-------------------------------------------------------------------------------
-This file is part of ranger.
+ This file is part of ranger.
 
-Copyright (c) [2014-2018] [Marvin N. Wright]
+ Copyright (c) [2014-2018] [Marvin N. Wright]
 
-This software may be modified and distributed under the terms of the MIT license.
+ This software may be modified and distributed under the terms of the MIT license.
 
-Please note that the C++ core of ranger is distributed under MIT license and the
-R package "ranger" under GPL3 license.
-#-------------------------------------------------------------------------------*/
+ Please note that the C++ core of ranger is distributed under MIT license and the
+ R package "ranger" under GPL3 license.
+ #-------------------------------------------------------------------------------*/
 
 #include <fstream>
 #include <iostream>
@@ -36,46 +36,24 @@ int ArgumentHandler::processArguments() {
   char const *short_options = "A:C:D:F:HM:NOP:Q:R:S:U:XZa:b:c:d:f:hil::m:o:pr:s:t:uvwy:z:";
 
   // long options: longname, no/optional/required argument?, flag(not used!), shortname
-    const struct option long_options[] = {
-
-      { "alwayssplitvars",      required_argument,  0, 'A'},
-      { "caseweights",          required_argument,  0, 'C'},
-      { "depvarname",           required_argument,  0, 'D'},
-      { "fraction",             required_argument,  0, 'F'},
-      { "holdout",              no_argument,        0, 'H'},
-      { "memmode",              required_argument,  0, 'M'},
-      { "savemem",              no_argument,        0, 'N'},
-      { "skipoob",              no_argument,        0, 'O'},
-      { "predict",              required_argument,  0, 'P'},
-      { "predictiontype",       required_argument,  0, 'Q'},
-      { "randomsplits",         required_argument,  0, 'R'},
-      { "splitweights",         required_argument,  0, 'S'},
-      { "nthreads",             required_argument,  0, 'U'},
-      { "predall",              no_argument,        0, 'X'},
-      { "version",              no_argument,        0, 'Z'},
-
-      { "alpha",                required_argument,  0, 'a'},
-      { "minprop",              required_argument,  0, 'b'},
-      { "catvars",              required_argument,  0, 'c'},
-      { "maxdepth",             required_argument,  0, 'd'},
-      { "file",                 required_argument,  0, 'f'},
-      { "help",                 no_argument,        0, 'h'},
-      { "impmeasure",           required_argument,  0, 'i'},
-      { "targetpartitionsize",  required_argument,  0, 'l'},
-      { "mtry",                 required_argument,  0, 'm'},
-      { "outprefix",            required_argument,  0, 'o'},
-      { "probability",          no_argument,        0, 'p'},
-      { "splitrule",            required_argument,  0, 'r'},
-      { "statusvarname",        required_argument,  0, 's'},
-      { "ntree",                required_argument,  0, 't'},
-      { "noreplace",            no_argument,        0, 'u'},
-      { "verbose",              no_argument,        0, 'v'},
-      { "write",                no_argument,        0, 'w'},
-      { "treetype",             required_argument,  0, 'y'},
-      { "seed",                 required_argument,  0, 'z'},
-
-      { 0, 0, 0, 0}
-    };
+  const struct option long_options[] = {
+
+  { "alwayssplitvars", required_argument, 0, 'A' }, { "caseweights", required_argument, 0, 'C' }, { "depvarname",
+      required_argument, 0, 'D' }, { "fraction", required_argument, 0, 'F' }, { "holdout", no_argument, 0, 'H' }, {
+      "memmode", required_argument, 0, 'M' }, { "savemem", no_argument, 0, 'N' }, { "skipoob", no_argument, 0, 'O' }, {
+      "predict", required_argument, 0, 'P' }, { "predictiontype", required_argument, 0, 'Q' }, { "randomsplits",
+      required_argument, 0, 'R' }, { "splitweights", required_argument, 0, 'S' }, { "nthreads", required_argument, 0,
+      'U' }, { "predall", no_argument, 0, 'X' }, { "version", no_argument, 0, 'Z' },
+
+  { "alpha", required_argument, 0, 'a' }, { "minprop", required_argument, 0, 'b' }, { "catvars", required_argument, 0,
+      'c' }, { "maxdepth", required_argument, 0, 'd' }, { "file", required_argument, 0, 'f' }, { "help", no_argument, 0,
+      'h' }, { "impmeasure", required_argument, 0, 'i' }, { "targetpartitionsize", required_argument, 0, 'l' }, {
+      "mtry", required_argument, 0, 'm' }, { "outprefix", required_argument, 0, 'o' }, { "probability", no_argument, 0,
+      'p' }, { "splitrule", required_argument, 0, 'r' }, { "statusvarname", required_argument, 0, 's' }, { "ntree",
+      required_argument, 0, 't' }, { "noreplace", no_argument, 0, 'u' }, { "verbose", no_argument, 0, 'v' }, { "write",
+      no_argument, 0, 'w' }, { "treetype", required_argument, 0, 'y' }, { "seed", required_argument, 0, 'z' },
+
+  { 0, 0, 0, 0 } };
 
   while (1) {
     int option_index = 0;
@@ -142,22 +120,22 @@ int ArgumentHandler::processArguments() {
       break;
 
     case 'Q':
-          try {
-            switch (std::stoi(optarg)) {
-            case 1:
-              predictiontype = RESPONSE;
-              break;
-            case 2:
-              predictiontype = TERMINALNODES;
-              break;
-            default:
-              throw std::runtime_error("");
-              break;
-            }
-          } catch (...) {
-            throw std::runtime_error("Illegal prediction type selected. See '--help' for details.");
-          }
+      try {
+        switch (std::stoi(optarg)) {
+        case 1:
+          predictiontype = RESPONSE;
+          break;
+        case 2:
+          predictiontype = TERMINALNODES;
           break;
+        default:
+          throw std::runtime_error("");
+          break;
+        }
+      } catch (...) {
+        throw std::runtime_error("Illegal prediction type selected. See '--help' for details.");
+      }
+      break;
 
     case 'R':
       try {
@@ -424,7 +402,7 @@ void ArgumentHandler::checkArguments() {
     throw std::runtime_error("Please specify a dependent variable name with '--depvarname'. See '--help' for details.");
   }
 
-  if (treetype == TREE_SURVIVAL && statusvarname.empty()) {
+  if (predict.empty() && treetype == TREE_SURVIVAL && statusvarname.empty()) {
     throw std::runtime_error("Please specify a status variable name with '--statusvarname'. See '--help' for details.");
   }
   if (treetype != TREE_SURVIVAL && !statusvarname.empty()) {
@@ -448,8 +426,8 @@ void ArgumentHandler::checkArguments() {
       throw std::runtime_error("Could not read from input file: " + predict + ".");
     }
 
-    // Do not read dependent_varID, num_variables, num_trees and is_ordered_variable
-    infile.seekg(2 * sizeof(size_t));
+    // Do not read num_variables, num_trees and is_ordered_variable
+    infile.seekg(sizeof(size_t));
     size_t length;
     infile.read((char*) &length, sizeof(length));
     infile.seekg(4 * sizeof(size_t) + length * sizeof(bool));
@@ -469,7 +447,8 @@ void ArgumentHandler::checkArguments() {
 
   // Check splitrule
   if (((splitrule == AUC || splitrule == AUC_IGNORE_TIES) && treetype != TREE_SURVIVAL)
-      || (splitrule == MAXSTAT && (treetype != TREE_SURVIVAL && treetype != TREE_REGRESSION))) {
+      || (splitrule == MAXSTAT && (treetype != TREE_SURVIVAL && treetype != TREE_REGRESSION))
+      || (splitrule == BETA && treetype != TREE_REGRESSION)) {
     throw std::runtime_error("Illegal splitrule selected. See '--help' for details.");
   }
 
@@ -503,64 +482,119 @@ void ArgumentHandler::displayHelp() {
   std::cout << "    " << "--help                        Print this help." << std::endl;
   std::cout << "    " << "--version                     Print version and citation information." << std::endl;
   std::cout << "    " << "--verbose                     Turn on verbose mode." << std::endl;
-  std::cout << "    " << "--file FILE                   Filename of input data. Only numerical values are supported." << std::endl;
+  std::cout << "    " << "--file FILE                   Filename of input data. Only numerical values are supported."
+      << std::endl;
   std::cout << "    " << "--treetype TYPE               Set tree type to:" << std::endl;
   std::cout << "    " << "                              TYPE = 1: Classification." << std::endl;
   std::cout << "    " << "                              TYPE = 3: Regression." << std::endl;
   std::cout << "    " << "                              TYPE = 5: Survival." << std::endl;
   std::cout << "    " << "                              (Default: 1)" << std::endl;
-  std::cout << "    " << "--probability                 Grow a Classification forest with probability estimation for the classes." << std::endl;
+  std::cout << "    "
+      << "--probability                 Grow a Classification forest with probability estimation for the classes."
+      << std::endl;
   std::cout << "    " << "                              Use in combination with --treetype 1." << std::endl;
-  std::cout << "    " << "--depvarname NAME             Name of dependent variable. For survival trees this is the time variable." << std::endl;
-  std::cout << "    " << "--statusvarname NAME          Name of status variable, only applicable for survival trees." << std::endl;
+  std::cout << "    "
+      << "--depvarname NAME             Name of dependent variable. For survival trees this is the time variable."
+      << std::endl;
+  std::cout << "    " << "--statusvarname NAME          Name of status variable, only applicable for survival trees."
+      << std::endl;
   std::cout << "    " << "                              Coding is 1 for event and 0 for censored." << std::endl;
   std::cout << "    " << "--ntree N                     Set number of trees to N." << std::endl;
   std::cout << "    " << "                              (Default: 500)" << std::endl;
-  std::cout << "    " << "--mtry N                      Number of variables to possibly split at in each node." << std::endl;
-  std::cout << "    " << "                              (Default: sqrt(p) with p = number of independent variables)" << std::endl;
+  std::cout << "    " << "--mtry N                      Number of variables to possibly split at in each node."
+      << std::endl;
+  std::cout << "    " << "                              (Default: sqrt(p) with p = number of independent variables)"
+      << std::endl;
   std::cout << "    " << "--targetpartitionsize N       Set minimal node size to N." << std::endl;
-  std::cout << "    " << "                              For Classification and Regression growing is stopped if a node reaches a size smaller than N." << std::endl;
-  std::cout << "    " << "                              For Survival growing is stopped if one child would reach a size smaller than N." << std::endl;
-  std::cout << "    " << "                              This means nodes with size smaller N can occur for Classification and Regression." << std::endl;
-  std::cout << "    " << "                              (Default: 1 for Classification, 5 for Regression, and 3 for Survival)" << std::endl;
+  std::cout << "    "
+      << "                              For Classification and Regression growing is stopped if a node reaches a size smaller than N."
+      << std::endl;
+  std::cout << "    "
+      << "                              For Survival growing is stopped if one child would reach a size smaller than N."
+      << std::endl;
+  std::cout << "    "
+      << "                              This means nodes with size smaller N can occur for Classification and Regression."
+      << std::endl;
+  std::cout << "    "
+      << "                              (Default: 1 for Classification, 5 for Regression, and 3 for Survival)"
+      << std::endl;
   std::cout << "    " << "--maxdepth N                  Set maximal tree depth to N." << std::endl;
-  std::cout << "    " << "                              Set to 0 for unlimited depth. A value of 1 corresponds to tree stumps (1 split)." << std::endl;
-  std::cout << "    " << "--catvars V1,V2,..            Comma separated list of names of (unordered) categorical variables. " << std::endl;
-  std::cout << "    " << "                              Categorical variables must contain only positive integer values." << std::endl;
+  std::cout << "    "
+      << "                              Set to 0 for unlimited depth. A value of 1 corresponds to tree stumps (1 split)."
+      << std::endl;
+  std::cout << "    "
+      << "--catvars V1,V2,..            Comma separated list of names of (unordered) categorical variables. "
+      << std::endl;
+  std::cout << "    "
+      << "                              Categorical variables must contain only positive integer values." << std::endl;
   std::cout << "    " << "--write                       Save forest to file <outprefix>.forest." << std::endl;
-  std::cout << "    " << "--predict FILE                Load forest from FILE and predict with new data. The new data is expected in the exact same " << std::endl;
-  std::cout << "    " << "                              shape as the training data. If the outcome of your new dataset is unknown, add a dummy column." << std::endl;
-  std::cout << "    " << "--predall                     Return a matrix with individual predictions for each tree instead of aggregated " << std::endl;
-  std::cout << "    " << "                              predictions for all trees (classification and regression only)." << std::endl;
+  std::cout << "    "
+      << "--predict FILE                Load forest from FILE and predict with new data. The new data is expected in the exact same "
+      << std::endl;
+  std::cout << "    "
+      << "                              shape as the training data. If the outcome of your new dataset is unknown, add a dummy column."
+      << std::endl;
+  std::cout << "    "
+      << "--predall                     Return a matrix with individual predictions for each tree instead of aggregated "
+      << std::endl;
+  std::cout << "    " << "                              predictions for all trees (classification and regression only)."
+      << std::endl;
   std::cout << "    " << "--predictiontype TYPE         Set type of prediction to:" << std::endl;
   std::cout << "    " << "                              TYPE = 1: Return predicted classes or values." << std::endl;
-  std::cout << "    " << "                              TYPE = 2: Return terminal node IDs per tree for new observations." << std::endl;
+  std::cout << "    "
+      << "                              TYPE = 2: Return terminal node IDs per tree for new observations." << std::endl;
   std::cout << "    " << "                              (Default: 1)" << std::endl;
   std::cout << "    " << "--impmeasure TYPE             Set importance mode to:" << std::endl;
   std::cout << "    " << "                              TYPE = 0: none." << std::endl;
-  std::cout << "    " << "                              TYPE = 1: Node impurity: Gini for Classification, variance for Regression, sum of test statistic for Survival." << std::endl;
-  std::cout << "    " << "                              TYPE = 2: Permutation importance, scaled by standard errors." << std::endl;
+  std::cout << "    "
+      << "                              TYPE = 1: Node impurity: Gini for Classification, variance for Regression, sum of test statistic for Survival."
+      << std::endl;
+  std::cout << "    " << "                              TYPE = 2: Permutation importance, scaled by standard errors."
+      << std::endl;
   std::cout << "    " << "                              TYPE = 3: Permutation importance, no scaling." << std::endl;
-  std::cout << "    " << "                              TYPE = 5: Corrected node impurity: Bias-corrected version of node impurity importance." << std::endl;
+  std::cout << "    "
+      << "                              TYPE = 5: Corrected node impurity: Bias-corrected version of node impurity importance."
+      << std::endl;
   std::cout << "    " << "                              (Default: 0)" << std::endl;
   std::cout << "    " << "--noreplace                   Sample without replacement." << std::endl;
-  std::cout << "    " << "--fraction X                  Fraction of observations to sample. Default is 1 for sampling with replacement " << std::endl;
+  std::cout << "    "
+      << "--fraction X                  Fraction of observations to sample. Default is 1 for sampling with replacement "
+      << std::endl;
   std::cout << "    " << "                              and 0.632 for sampling without replacement." << std::endl;
   std::cout << "    " << "--splitrule RULE              Splitting rule:" << std::endl;
-  std::cout << "    " << "                              RULE = 1: Gini for Classification, variance for Regression, logrank for Survival." << std::endl;
-  std::cout << "    " << "                              RULE = 2: AUC for Survival, not available for Classification and Regression." << std::endl;
-  std::cout << "    " << "                              RULE = 3: AUC (ignore ties) for Survival, not available for Classification and Regression." << std::endl;
-  std::cout << "    " << "                              RULE = 4: MAXSTAT for Survival and Regression, not available for Classification." << std::endl;
+  std::cout << "    "
+      << "                              RULE = 1: Gini for Classification, variance for Regression, logrank for Survival."
+      << std::endl;
+  std::cout << "    "
+      << "                              RULE = 2: AUC for Survival, not available for Classification and Regression."
+      << std::endl;
+  std::cout << "    "
+      << "                              RULE = 3: AUC (ignore ties) for Survival, not available for Classification and Regression."
+      << std::endl;
+  std::cout << "    "
+      << "                              RULE = 4: MAXSTAT for Survival and Regression, not available for Classification."
+      << std::endl;
   std::cout << "    " << "                              RULE = 5: ExtraTrees for all tree types." << std::endl;
+  std::cout << "    " << "                              RULE = 6: BETA for regression." << std::endl;
   std::cout << "    " << "                              (Default: 1)" << std::endl;
-  std::cout << "    " << "--randomsplits N              Number of random splits to consider for each splitting variable (ExtraTrees splitrule only)." << std::endl;
-  std::cout << "    " << "--alpha VAL                   Significance threshold to allow splitting (MAXSTAT splitrule only)." << std::endl;
-  std::cout << "    " << "--minprop VAL                 Lower quantile of covariate distribtuion to be considered for splitting (MAXSTAT splitrule only)." << std::endl;
+  std::cout << "    "
+      << "--randomsplits N              Number of random splits to consider for each splitting variable (ExtraTrees splitrule only)."
+      << std::endl;
+  std::cout << "    "
+      << "--alpha VAL                   Significance threshold to allow splitting (MAXSTAT splitrule only)."
+      << std::endl;
+  std::cout << "    "
+      << "--minprop VAL                 Lower quantile of covariate distribtuion to be considered for splitting (MAXSTAT splitrule only)."
+      << std::endl;
   std::cout << "    " << "--caseweights FILE            Filename of case weights file." << std::endl;
-  std::cout << "    " << "--holdout                     Hold-out mode. Hold-out all samples with case weight 0 and use these for variable " << std::endl;
+  std::cout << "    "
+      << "--holdout                     Hold-out mode. Hold-out all samples with case weight 0 and use these for variable "
+      << std::endl;
   std::cout << "    " << "                              importance and prediction error." << std::endl;
   std::cout << "    " << "--splitweights FILE           Filename of split select weights file." << std::endl;
-  std::cout << "    " << "--alwayssplitvars V1,V2,..    Comma separated list of variable names to be always considered for splitting." << std::endl;
+  std::cout << "    "
+      << "--alwayssplitvars V1,V2,..    Comma separated list of variable names to be always considered for splitting."
+      << std::endl;
   std::cout << "    " << "--skipoob                     Skip computation of OOB error." << std::endl;
   std::cout << "    " << "--nthreads N                  Set number of parallel threads to N." << std::endl;
   std::cout << "    " << "                              (Default: Number of CPUs available)" << std::endl;
@@ -582,11 +616,15 @@ void ArgumentHandler::displayVersion() {
   std::cout << "Ranger version: " << RANGER_VERSION << std::endl;
   std::cout << std::endl;
   std::cout << "Please cite Ranger: " << std::endl;
-  std::cout << "Wright, M. N. & Ziegler, A. (2017). ranger: A Fast Implementation of Random Forests for High Dimensional Data in C++ and R. Journal of Statistical Software 77:1-17." << std::endl;
+  std::cout
+      << "Wright, M. N. & Ziegler, A. (2017). ranger: A Fast Implementation of Random Forests for High Dimensional Data in C++ and R. Journal of Statistical Software 77:1-17."
+      << std::endl;
   std::cout << std::endl;
   std::cout << "BibTeX:" << std::endl;
   std::cout << "@Article{," << std::endl;
-  std::cout << "    title = {{ranger}: A Fast Implementation of Random Forests for High Dimensional Data in {C++} and {R}," << std::endl;
+  std::cout
+      << "    title = {{ranger}: A Fast Implementation of Random Forests for High Dimensional Data in {C++} and {R},"
+      << std::endl;
   std::cout << "    author = {Wright, Marvin N. and Ziegler, Andreas}," << std::endl;
   std::cout << "    journal = {Journal of Statistical Software}," << std::endl;
   std::cout << "    year = {2017}," << std::endl;
diff --git a/cpp_version/src/utility/DataChar.cpp b/cpp_version/src/utility/DataChar.cpp
deleted file mode 120000
index 364b3291d..000000000
--- a/cpp_version/src/utility/DataChar.cpp
+++ /dev/null
@@ -1 +0,0 @@
-../../../src/DataChar.cpp
\ No newline at end of file
diff --git a/cpp_version/src/utility/DataFloat.cpp b/cpp_version/src/utility/DataFloat.cpp
deleted file mode 120000
index 7b07157f7..000000000
--- a/cpp_version/src/utility/DataFloat.cpp
+++ /dev/null
@@ -1 +0,0 @@
-../../../src/DataFloat.cpp
\ No newline at end of file
diff --git a/cpp_version/src/version.h b/cpp_version/src/version.h
index 2ebfb79b4..5c6aeecbb 100644
--- a/cpp_version/src/version.h
+++ b/cpp_version/src/version.h
@@ -1,3 +1,3 @@
 #ifndef RANGER_VERSION
-#define RANGER_VERSION "0.11.4"
+#define RANGER_VERSION "0.11.5"
 #endif
diff --git a/cpp_version/test/test_maxstat.R b/cpp_version/test/test_maxstat.R
deleted file mode 100644
index c9a1d4066..000000000
--- a/cpp_version/test/test_maxstat.R
+++ /dev/null
@@ -1,105 +0,0 @@
-library(ranger)
-library(maxstatRF)
-library(survival)
-library(reshape2)
-library(ggplot2)
-
-# Compare survival curves -------------------------------------------------
-idx <- sample(nrow(veteran), 2/3*nrow(veteran), replace = FALSE)
-train_data <- veteran[idx, ]
-test_data <- veteran[-idx, ]
-
-num_trees <- 50
-mtry <- 3
-minprop <- 0.1
-alpha <- 0.5
-min_node_size <- 5
-replace <- TRUE
-pmethod <- "minLau"
-
-rf_maxstatRF <- maxstatRF(Surv(time, status) ~., train_data, 
-                          num_trees = num_trees, mtry = mtry, minprop = minprop, 
-                          alpha = alpha, min_node_size = min_node_size, replace = replace, 
-                          pmethod = pmethod)
-
-rf_ranger <- ranger(Surv(time, status) ~., train_data, splitrule = "maxstat",
-                    write.forest = TRUE,
-                    num.trees = num_trees, mtry = mtry, minprop = minprop, 
-                    alpha = alpha, min.node.size = min_node_size, replace = replace)
-
-pred_maxstatRF <- rf_maxstatRF$predict(test_data)
-pred_ranger <- predict(rf_ranger, test_data)$survival
-
-colnames(pred_maxstatRF) <- rf_maxstatRF$timepoints
-colnames(pred_ranger) <- timepoints(rf_ranger)
-df <- rbind(data.frame(Package = "maxstatRF", melt(pred_maxstatRF, value.name = "Survival", varnames = c("ID", "Time"))), 
-            data.frame(Package = "ranger", melt(pred_ranger, value.name = "Survival", varnames = c("ID", "Time"))))
-
-ggplot(df, aes(x = Time, y = Survival, color = Package)) + 
-  geom_line() + 
-  facet_wrap(~ID)
-
-# Compare Brier score -------------------------------------------------
-library(ranger)
-library(maxstatRF)
-library(survival)
-library(prodlim)
-library(pec)
-
-## Pec function for maxstatRF
-predictSurvProb.ForestSurvival <- function(object, newdata, times, ...) {
-  pred <- object$predict(newdata)
-  pos <- sapply(times, function(x) {
-    max(c(0, which(x >= object$timepoints)))
-  })    
-  p <- cbind(1, pred)[, pos+1, drop = FALSE]
-  return(p)
-}
-
-## Pec function for ranger
-predictSurvProb.ranger <- function(object, newdata, times, ...) {
-  pred <- predict(object, newdata)
-  pos <- sapply(times, function(x) {
-    max(c(0, which(x >= pred$unique.death.times)))
-  })    
-  p <- cbind(1, predictions(pred))[, pos+1, drop = FALSE]
-  return(p)
-}
-
-## Parameters
-num_trees <- 50
-mtry <- 3
-minprop <- 0.1
-alpha <- 0.5
-min_node_size <- 5
-replace <- TRUE
-pmethod <- "minLau"
-formula <- Surv(time, status) ~.
-
-compare_packages <- function() {
-  ## Data
-  idx <- sample(nrow(veteran), 2/3*nrow(veteran), replace = FALSE)
-  train_data <- veteran[idx, ]
-  test_data <- veteran[-idx, ]
-  time <- sort(unique(train_data$time))
-  
-  ## Run
-  rf_maxstatRF <- maxstatRF(formula, train_data, 
-                            num_trees = num_trees, mtry = mtry, minprop = minprop, 
-                            alpha = alpha, min_node_size = min_node_size, replace = replace, 
-                            pmethod = pmethod)
-  
-  rf_ranger <- ranger(formula, train_data, splitrule = "maxstat",
-                      write.forest = TRUE,
-                      num.trees = num_trees, mtry = mtry, minprop = minprop, 
-                      alpha = alpha, min.node.size = min_node_size, replace = replace)
-  
-  ## Compare
-  fit.pec <- pec(list(maxstatRF = rf_maxstatRF, ranger = rf_ranger), 
-                 formula = formula, data = test_data, 
-                 times = time, cens.model = "marginal", reference = FALSE)
-  crps(fit.pec)[, 1]
-}
-
-res <- replicate(100, compare_packages())
-boxplot(t(res))
diff --git a/cpp_version/test/testthat.R b/cpp_version/test/testthat.R
new file mode 100644
index 000000000..5aea37d66
--- /dev/null
+++ b/cpp_version/test/testthat.R
@@ -0,0 +1,28 @@
+
+library(ranger)
+library(data.table)
+library(testthat)
+library(survival)
+
+# Function to call C++ version from R
+ranger_cpp <- function(data, ...) {
+  if (is.data.frame(data) && any(sapply(data, is.numeric))) {
+    idx_numeric <- sapply(data, is.numeric)
+    data[, !idx_numeric] <- lapply(data[, !idx_numeric, drop = FALSE], as.numeric)
+  }
+  fwrite(data, "temp_data.csv")
+  ret <- system2("../../build/ranger", 
+                 args = c("--verbose", "--file temp_data.csv", paste0("--", names(list(...)), " ", list(...))), 
+                 stdout = TRUE, stderr = TRUE)
+  if (length(ret) == 1 &&  nchar(ret) >= 5 && substr(ret, 1, 5) == "Error") {
+    stop(ret)
+  }
+  unlink("temp_data.csv")
+  ret
+}
+
+test_dir("testthat")
+
+
+
+
diff --git a/cpp_version/test/testthat/.gitignore b/cpp_version/test/testthat/.gitignore
new file mode 100644
index 000000000..b1873686f
--- /dev/null
+++ b/cpp_version/test/testthat/.gitignore
@@ -0,0 +1 @@
+ranger_out.*
\ No newline at end of file
diff --git a/cpp_version/test/testthat/test_arguments.R b/cpp_version/test/testthat/test_arguments.R
new file mode 100644
index 000000000..23a1786d2
--- /dev/null
+++ b/cpp_version/test/testthat/test_arguments.R
@@ -0,0 +1,11 @@
+
+context("ranger_cpp_arguments")
+
+test_that("Error if sample fraction is 0 or >1", {
+  expect_warning(
+    expect_error(ranger_cpp(data = iris, depvarname = "Species", ntree = 5, fraction = 0), 
+      "Error: Illegal argument for option 'fraction'\\. Please give a value in \\(0,1\\]\\. See '--help' for details\\. Ranger will EXIT now\\."))
+  expect_warning(
+    expect_error(ranger_cpp(data = iris, depvarname = "Species", ntree = 5, fraction = 1.1), 
+      "Error: Illegal argument for option 'fraction'\\. Please give a value in \\(0,1\\]\\. See '--help' for details\\. Ranger will EXIT now\\."))
+})
diff --git a/cpp_version/test/testthat/test_classification.R b/cpp_version/test/testthat/test_classification.R
new file mode 100644
index 000000000..b48ede461
--- /dev/null
+++ b/cpp_version/test/testthat/test_classification.R
@@ -0,0 +1,49 @@
+
+context("ranger_cpp_classification")
+
+test_that("Prediction is equal to R version", {
+  # C++ version
+  rf <- ranger_cpp(data = iris, depvarname = "Species", ntree = 5, write = "", seed = 10)
+  pred <- ranger_cpp(data = iris, predict = "ranger_out.forest", seed = 20)
+  preds_cpp <- as.data.frame(fread("ranger_out.prediction"))[, 1]
+  
+  # R version
+  rf <- ranger(Species ~ ., iris, num.trees = 5, seed = 10)
+  preds_r <- as.numeric(predict(rf, iris, seed = 20)$predictions)
+  
+  expect_equal(preds_cpp, preds_r)
+})
+
+test_that("Predictions are positive numbers", {
+  rf <- ranger_cpp(data = iris, depvarname = "Species", ntree = 5, write = "", seed = 10)
+  pred <- ranger_cpp(data = iris, predict = "ranger_out.forest")
+  preds_cpp <- as.data.frame(fread("ranger_out.prediction"))[, 1]
+  expect_is(preds_cpp, "integer")
+  expect_true(all(preds_cpp > 0))
+})
+
+test_that("Same result with default splitting", {
+  # C++ version
+  rf <- ranger_cpp(data = iris, depvarname = "Species", ntree = 5, seed = 10)
+  err_cpp <- grep("Overall OOB prediction error", rf, value = TRUE)
+  err_cpp <- as.numeric(gsub("[^0-9.]", "", err_cpp))
+  
+  # R version
+  rf <- ranger(Species ~ ., iris, num.trees = 5, seed = 10)
+  err_r <- rf$prediction.error
+  
+  expect_equal(round(err_cpp, 4), round(err_r, 4))
+})
+
+test_that("Same result with extratrees splitting", {
+  # C++ version
+  rf <- ranger_cpp(data = iris, depvarname = "Species", ntree = 5, splitrule = 5, seed = 10)
+  err_cpp <- grep("Overall OOB prediction error", rf, value = TRUE)
+  err_cpp <- as.numeric(gsub("[^0-9.]", "", err_cpp))
+  
+  # R version
+  rf <- ranger(Species ~ ., iris, num.trees = 5, splitrule = "extratrees", seed = 10)
+  err_r <- rf$prediction.error
+  
+  expect_equal(round(err_cpp, 4), round(err_r, 4))
+})
diff --git a/cpp_version/test/testthat/test_probability.R b/cpp_version/test/testthat/test_probability.R
new file mode 100644
index 000000000..a2a2c0b3a
--- /dev/null
+++ b/cpp_version/test/testthat/test_probability.R
@@ -0,0 +1,53 @@
+
+context("ranger_cpp_probability")
+
+test_that("Prediction is equal to R version", {
+  # C++ version
+  rf <- ranger_cpp(data = iris, depvarname = "Species", probability = "", ntree = 5, write = "", seed = 10)
+  pred <- ranger_cpp(data = iris, predict = "ranger_out.forest", probability = "", seed = 20)
+  preds_cpp <- as.matrix(fread("ranger_out.prediction"))
+  colnames(preds_cpp) <- NULL
+  
+  # R version
+  rf <- ranger(Species ~ ., iris, probability = TRUE, num.trees = 5, seed = 10)
+  preds_r <- predict(rf, iris, seed = 20)$predictions
+  colnames(preds_r) <- NULL
+  
+  expect_equal(round(preds_cpp, 4), round(preds_r, 4))
+})
+
+test_that("Predictions are probabilites", {
+  rf <- ranger_cpp(data = iris, depvarname = "Species", probability = "", ntree = 5, write = "", seed = 10)
+  pred <- ranger_cpp(data = iris, predict = "ranger_out.forest", probability = "")
+  preds_cpp <- as.matrix(fread("ranger_out.prediction"))
+  expect_is(preds_cpp, "matrix")
+  expect_equal(dim(preds_cpp), c(150, 3))
+  expect_true(all(preds_cpp >= 0))
+  expect_true(all(preds_cpp <= 1))
+})
+
+test_that("Same result with default splitting", {
+  # C++ version
+  rf <- ranger_cpp(data = iris, depvarname = "Species", probability = "", ntree = 5, seed = 10)
+  err_cpp <- grep("Overall OOB prediction error", rf, value = TRUE)
+  err_cpp <- as.numeric(gsub("[^0-9.]", "", err_cpp))
+  
+  # R version
+  rf <- ranger(Species ~ ., iris, probability = TRUE, num.trees = 5, seed = 10)
+  err_r <- rf$prediction.error
+  
+  expect_equal(round(err_cpp, 4), round(err_r, 4))
+})
+
+test_that("Same result with extratrees splitting", {
+  # C++ version
+  rf <- ranger_cpp(data = iris, depvarname = "Species", probability = "", ntree = 5, splitrule = 5, seed = 10)
+  err_cpp <- grep("Overall OOB prediction error", rf, value = TRUE)
+  err_cpp <- as.numeric(gsub("[^0-9.]", "", err_cpp))
+  
+  # R version
+  rf <- ranger(Species ~ ., iris, probability = TRUE, num.trees = 5, splitrule = "extratrees", seed = 10)
+  err_r <- rf$prediction.error
+  
+  expect_equal(round(err_cpp, 4), round(err_r, 4))
+})
diff --git a/cpp_version/test/testthat/test_regression.R b/cpp_version/test/testthat/test_regression.R
new file mode 100644
index 000000000..015955bc0
--- /dev/null
+++ b/cpp_version/test/testthat/test_regression.R
@@ -0,0 +1,98 @@
+
+context("ranger_cpp_regression")
+
+test_that("Prediction is equal to R version", {
+  # C++ version
+  rf <- ranger_cpp(data = iris, depvarname = "Sepal.Length", treetype = 3, ntree = 5, write = "", seed = 10)
+  pred <- ranger_cpp(data = iris, treetype = 3, predict = "ranger_out.forest", seed = 20)
+  preds_cpp <- as.data.frame(fread("ranger_out.prediction"))[, 1]
+  
+  # R version
+  rf <- ranger(Sepal.Length ~ ., iris, num.trees = 5, seed = 10)
+  preds_r <- as.numeric(predict(rf, iris, seed = 20)$predictions)
+  
+  expect_equal(round(preds_cpp, 4), round(preds_r, 4))
+})
+
+test_that("Predictions are in range of original data", {
+  rf <- ranger_cpp(data = iris, depvarname = "Sepal.Width", treetype = 3, ntree = 5, write = "", seed = 10)
+  pred <- ranger_cpp(data = iris, predict = "ranger_out.forest", treetype = 3)
+  preds_cpp <- as.data.frame(fread("ranger_out.prediction"))[, 1]
+  expect_is(preds_cpp, "numeric")
+  expect_true(all(preds_cpp >= min(iris$Sepal.Width)))
+  expect_true(all(preds_cpp <= max(iris$Sepal.Width)))
+})
+
+test_that("Same result with default splitting", {
+  # C++ version
+  rf <- ranger_cpp(data = iris, depvarname = "Petal.Length", treetype = 3, ntree = 5, seed = 10)
+  err_cpp <- grep("Overall OOB prediction error", rf, value = TRUE)
+  err_cpp <- as.numeric(gsub("[^0-9.]", "", err_cpp))
+  
+  # R version
+  rf <- ranger(Petal.Length ~ ., iris, num.trees = 5, seed = 10)
+  err_r <- rf$prediction.error
+  
+  expect_equal(round(err_cpp, 4), round(err_r, 4))
+})
+
+test_that("Same result with extratrees splitting", {
+  # C++ version
+  rf <- ranger_cpp(data = iris, depvarname = "Sepal.Length", treetype = 3, ntree = 5, splitrule = 5, catvars = "Species", seed = 10)
+  err_cpp <- grep("Overall OOB prediction error", rf, value = TRUE)
+  err_cpp <- as.numeric(gsub("[^0-9.]", "", err_cpp))
+  
+  # R version
+  rf <- ranger(Sepal.Length ~ ., iris, num.trees = 5, splitrule = "extratrees", seed = 10)
+  err_r <- rf$prediction.error
+  
+  expect_equal(round(err_cpp, 4), round(err_r, 4))
+})
+
+test_that("Same result with maxstat splitting", {
+  # C++ version
+  rf <- ranger_cpp(data = iris, depvarname = "Sepal.Length", treetype = 3, ntree = 5, splitrule = 4, seed = 10)
+  err_cpp <- grep("Overall OOB prediction error", rf, value = TRUE)
+  err_cpp <- as.numeric(gsub("[^0-9.]", "", err_cpp))
+  
+  # R version
+  rf <- ranger(Sepal.Length ~ ., iris, num.trees = 5, splitrule = "maxstat", seed = 10)
+  err_r <- rf$prediction.error
+  
+  expect_equal(round(err_cpp, 4), round(err_r, 4))
+})
+
+test_that("Same result with beta splitting", {
+  # Generate data with 0..1 outcome
+  n <- 100
+  p <- 4
+  beta <- c(0, 1, 2, 3)
+  x <- round(replicate(p, runif(n)), 3)
+  y <- as.vector(x %*% beta)
+  y <- round((y-min(y))/(max(y)-min(y)), 3)
+  dat <- data.frame(y = y, x)
+  
+  # C++ version
+  rf <- ranger_cpp(data = dat, depvarname = "y", treetype = 3, ntree = 5, splitrule = 6, seed = 10)
+  err_cpp <- grep("Overall OOB prediction error", rf, value = TRUE)
+  err_cpp <- as.numeric(gsub("[^0-9.]", "", err_cpp))
+  
+  # R version
+  rf <- ranger(y ~ ., dat, num.trees = 5, splitrule = "beta", seed = 10)
+  err_r <- rf$prediction.error
+  
+  expect_equal(round(err_cpp, 4), round(err_r, 4))
+})
+
+test_that("Same result with unordered splitting", {
+  # C++ version
+  rf <- ranger_cpp(data = iris, depvarname = "Sepal.Length", treetype = 3, ntree = 5, catvars = "Species", seed = 10)
+  err_cpp <- grep("Overall OOB prediction error", rf, value = TRUE)
+  err_cpp <- as.numeric(gsub("[^0-9.]", "", err_cpp))
+  
+  # R version
+  rf <- ranger(Sepal.Length ~ ., iris, num.trees = 5, respect.unordered.factors = "partition", seed = 10)
+  err_r <- rf$prediction.error
+  
+  expect_equal(round(err_cpp, 4), round(err_r, 4))
+})
diff --git a/cpp_version/test/testthat/test_survival.R b/cpp_version/test/testthat/test_survival.R
new file mode 100644
index 000000000..0beb0e20d
--- /dev/null
+++ b/cpp_version/test/testthat/test_survival.R
@@ -0,0 +1,76 @@
+
+context("ranger_cpp_regression")
+
+test_that("Prediction is equal to R version", {
+  # C++ version
+  rf <- ranger_cpp(data = veteran, depvarname = "time", statusvarname = "status", treetype = 5, ntree = 5, write = "", seed = 10)
+  pred <- ranger_cpp(data = veteran, treetype = 5, predict = "ranger_out.forest", seed = 20)
+  preds_cpp <- as.matrix(fread("ranger_out.prediction", skip = 4))
+  dimnames(preds_cpp) <- NULL
+  
+  # R version
+  rf <- ranger(Surv(time, status) ~ ., veteran, num.trees = 5, seed = 10)
+  preds_r <- predict(rf, veteran, seed = 20)$chf
+  
+  expect_equal(round(preds_cpp, 2), round(preds_r, 2))
+})
+
+test_that("Predictions are positive numerics", {
+  rf <- ranger_cpp(data = veteran, depvarname = "time", statusvarname = "status", treetype = 5, ntree = 5, write = "", seed = 10)
+  pred <- ranger_cpp(data = veteran, predict = "ranger_out.forest", treetype = 5)
+  preds_cpp <- as.matrix(fread("ranger_out.prediction", skip = 4))
+  expect_is(preds_cpp, "matrix")
+  expect_true(all(preds_cpp >= 0))
+})
+
+test_that("Same result with default splitting", {
+  # C++ version
+  rf <- ranger_cpp(data = veteran, depvarname = "time", statusvarname = "status", treetype = 5, ntree = 5, seed = 10)
+  err_cpp <- grep("Overall OOB prediction error", rf, value = TRUE)
+  err_cpp <- as.numeric(gsub("[^0-9.]", "", err_cpp))
+  
+  # R version
+  rf <- ranger(Surv(time, status) ~ ., veteran, num.trees = 5, seed = 10)
+  err_r <- rf$prediction.error
+  
+  expect_equal(round(err_cpp, 4), round(err_r, 4))
+})
+
+test_that("Same result with extratrees splitting", {
+  # C++ version
+  rf <- ranger_cpp(data = veteran, depvarname = "time", statusvarname = "status", treetype = 5, ntree = 5, splitrule = 5,  catvars = "celltype", seed = 10)
+  err_cpp <- grep("Overall OOB prediction error", rf, value = TRUE)
+  err_cpp <- as.numeric(gsub("[^0-9.]", "", err_cpp))
+  
+  # R version
+  rf <- ranger(Surv(time, status) ~ ., veteran, num.trees = 5, splitrule = "extratrees", seed = 10)
+  err_r <- rf$prediction.error
+  
+  expect_equal(round(err_cpp, 4), round(err_r, 4))
+})
+
+test_that("Same result with maxstat splitting", {
+  # C++ version
+  rf <- ranger_cpp(data = veteran, depvarname = "time", statusvarname = "status", treetype = 5, ntree = 5, splitrule = 4, seed = 10)
+  err_cpp <- grep("Overall OOB prediction error", rf, value = TRUE)
+  err_cpp <- as.numeric(gsub("[^0-9.]", "", err_cpp))
+  
+  # R version
+  rf <- ranger(Surv(time, status) ~ ., veteran, num.trees = 5, splitrule = "maxstat", seed = 10)
+  err_r <- rf$prediction.error
+  
+  expect_equal(round(err_cpp, 4), round(err_r, 4))
+})
+
+test_that("Same result with unordered splitting", {
+  # C++ version
+  rf <- ranger_cpp(data = veteran, depvarname = "time", statusvarname = "status", treetype = 5, ntree = 5, catvars = "celltype", seed = 10)
+  err_cpp <- grep("Overall OOB prediction error", rf, value = TRUE)
+  err_cpp <- as.numeric(gsub("[^0-9.]", "", err_cpp))
+  
+  # R version
+  rf <- ranger(Surv(time, status) ~ ., veteran, num.trees = 5, respect.unordered.factors = "partition", seed = 10)
+  err_r <- rf$prediction.error
+  
+  expect_equal(round(err_cpp, 4), round(err_r, 4))
+})
diff --git a/man/ranger.Rd b/man/ranger.Rd
index f1a5bd91a..199866408 100644
--- a/man/ranger.Rd
+++ b/man/ranger.Rd
@@ -16,7 +16,8 @@ ranger(formula = NULL, data = NULL, num.trees = 500, mtry = NULL,
   inbag = NULL, holdout = FALSE, quantreg = FALSE,
   oob.error = TRUE, num.threads = NULL, save.memory = FALSE,
   verbose = TRUE, seed = NULL, dependent.variable.name = NULL,
-  status.variable.name = NULL, classification = NULL)
+  status.variable.name = NULL, classification = NULL, x = NULL,
+  y = NULL)
 }
 \arguments{
 \item{formula}{Object of class \code{formula} or \code{character} describing the model to fit. Interaction terms supported only for numerical variables.}
@@ -84,6 +85,10 @@ ranger(formula = NULL, data = NULL, num.trees = 500, mtry = NULL,
 \item{status.variable.name}{Name of status variable, only applicable to survival data and needed if no formula given. Use 1 for event and 0 for censoring.}
 
 \item{classification}{Only needed if data is a matrix. Set to \code{TRUE} to grow a classification forest.}
+
+\item{x}{Predictor data (independent variables), alternative interface to data with formula or dependent.variable.name.}
+
+\item{y}{Response vector (dependent variable), alternative interface to data with formula or dependent.variable.name. For survival use a \code{Surv()} object or a matrix with time and status.}
 }
 \value{
 Object of class \code{ranger} with elements
@@ -148,7 +153,8 @@ See Nembrini et al. (2018) for details.
 This importance measure can be combined with the methods to estimate p-values in \code{\link{importance_pvalues}}.
 
 For a large number of variables and data frames as input data the formula interface can be slow or impossible to use.
-Alternatively \code{dependent.variable.name} (and \code{status.variable.name} for survival) can be used.
+Alternatively \code{dependent.variable.name} (and \code{status.variable.name} for survival) or \code{x} and \code{y} can be used.
+Use \code{x} and \code{y} with a matrix for \code{x} to avoid conversions and save memory.
 Consider setting \code{save.memory = TRUE} if you encounter memory problems for very large datasets, but be aware that this option slows down the tree growing.
 
 For GWAS data consider combining \code{ranger} with the \code{GenABEL} package. 
@@ -190,8 +196,9 @@ require(survival)
 rg.veteran <- ranger(Surv(time, status) ~ ., data = veteran)
 plot(rg.veteran$unique.death.times, rg.veteran$survival[1,])
 
-## Alternative interface
+## Alternative interfaces (same results)
 ranger(dependent.variable.name = "Species", data = iris)
+ranger(y = iris[, 5], x = iris[, -5])
 
 \dontrun{
 ## Use GenABEL interface to read Plink data into R and grow a classification forest
diff --git a/src/Data.cpp b/src/Data.cpp
index f3ca090d6..ca85ba166 100644
--- a/src/Data.cpp
+++ b/src/Data.cpp
@@ -33,14 +33,16 @@ size_t Data::getVariableID(const std::string& variable_name) const {
   return (std::distance(variable_names.cbegin(), it));
 }
 
+// #nocov start (cannot be tested anymore because GenABEL not on CRAN)
 void Data::addSnpData(unsigned char* snp_data, size_t num_cols_snp) {
   num_cols = num_cols_no_snp + num_cols_snp;
   num_rows_rounded = roundToNextMultiple(num_rows, 4);
   this->snp_data = snp_data;
 }
+// #nocov end
 
 // #nocov start
-bool Data::loadFromFile(std::string filename) {
+bool Data::loadFromFile(std::string filename, std::vector<std::string>& dependent_variable_names) {
 
   bool result;
 
@@ -67,11 +69,11 @@ bool Data::loadFromFile(std::string filename) {
 
   // Find out if comma, semicolon or whitespace seperated and call appropriate method
   if (header_line.find(",") != std::string::npos) {
-    result = loadFromFileOther(input_file, header_line, ',');
+    result = loadFromFileOther(input_file, header_line, dependent_variable_names, ',');
   } else if (header_line.find(";") != std::string::npos) {
-    result = loadFromFileOther(input_file, header_line, ';');
+    result = loadFromFileOther(input_file, header_line, dependent_variable_names, ';');
   } else {
-    result = loadFromFileWhitespace(input_file, header_line);
+    result = loadFromFileWhitespace(input_file, header_line, dependent_variable_names);
   }
 
   externalData = false;
@@ -79,19 +81,36 @@ bool Data::loadFromFile(std::string filename) {
   return result;
 }
 
-bool Data::loadFromFileWhitespace(std::ifstream& input_file, std::string header_line) {
+bool Data::loadFromFileWhitespace(std::ifstream& input_file, std::string header_line,
+    std::vector<std::string>& dependent_variable_names) {
+
+  size_t num_dependent_variables = dependent_variable_names.size();
+  std::vector<size_t> dependent_varIDs;
+  dependent_varIDs.resize(num_dependent_variables);
 
   // Read header
   std::string header_token;
   std::stringstream header_line_stream(header_line);
+  size_t col = 0;
   while (header_line_stream >> header_token) {
-    variable_names.push_back(header_token);
+    bool is_dependent_var = false;
+    for (size_t i = 0; i < dependent_variable_names.size(); ++i) {
+      if (header_token == dependent_variable_names[i]) {
+        dependent_varIDs[i] = col;
+        is_dependent_var = true;
+      }
+    }
+    if (!is_dependent_var) {
+      variable_names.push_back(header_token);
+    }
+    ++col;
   }
+
   num_cols = variable_names.size();
   num_cols_no_snp = num_cols;
 
   // Read body
-  reserveMemory();
+  reserveMemory(num_dependent_variables);
   bool error = false;
   std::string line;
   size_t row = 0;
@@ -100,13 +119,26 @@ bool Data::loadFromFileWhitespace(std::ifstream& input_file, std::string header_
     std::stringstream line_stream(line);
     size_t column = 0;
     while (readFromStream(line_stream, token)) {
-      set(column, row, token, error);
+      size_t column_x = column;
+      bool is_dependent_var = false;
+      for (size_t i = 0; i < dependent_varIDs.size(); ++i) {
+        if (column == dependent_varIDs[i]) {
+          set_y(i, row, token, error);
+          is_dependent_var = true;
+          break;
+        } else if (column > dependent_varIDs[i]) {
+          --column_x;
+        }
+      }
+      if (!is_dependent_var) {
+        set_x(column_x, row, token, error);
+      }
       ++column;
     }
-    if (column > num_cols) {
+    if (column > (num_cols + num_dependent_variables)) {
       throw std::runtime_error(
           std::string("Could not open input file. Too many columns in row ") + std::to_string(row) + std::string("."));
-    } else if (column < num_cols) {
+    } else if (column < (num_cols + num_dependent_variables)) {
       throw std::runtime_error(
           std::string("Could not open input file. Too few columns in row ") + std::to_string(row)
               + std::string(". Are all values numeric?"));
@@ -117,19 +149,36 @@ bool Data::loadFromFileWhitespace(std::ifstream& input_file, std::string header_
   return error;
 }
 
-bool Data::loadFromFileOther(std::ifstream& input_file, std::string header_line, char seperator) {
+bool Data::loadFromFileOther(std::ifstream& input_file, std::string header_line,
+    std::vector<std::string>& dependent_variable_names, char seperator) {
+
+  size_t num_dependent_variables = dependent_variable_names.size();
+  std::vector<size_t> dependent_varIDs;
+  dependent_varIDs.resize(num_dependent_variables);
 
   // Read header
   std::string header_token;
   std::stringstream header_line_stream(header_line);
+  size_t col = 0;
   while (getline(header_line_stream, header_token, seperator)) {
-    variable_names.push_back(header_token);
+    bool is_dependent_var = false;
+    for (size_t i = 0; i < dependent_variable_names.size(); ++i) {
+      if (header_token == dependent_variable_names[i]) {
+        dependent_varIDs[i] = col;
+        is_dependent_var = true;
+      }
+    }
+    if (!is_dependent_var) {
+      variable_names.push_back(header_token);
+    }
+    ++col;
   }
+
   num_cols = variable_names.size();
   num_cols_no_snp = num_cols;
 
   // Read body
-  reserveMemory();
+  reserveMemory(num_dependent_variables);
   bool error = false;
   std::string line;
   size_t row = 0;
@@ -141,7 +190,21 @@ bool Data::loadFromFileOther(std::ifstream& input_file, std::string header_line,
     while (getline(line_stream, token_string, seperator)) {
       std::stringstream token_stream(token_string);
       readFromStream(token_stream, token);
-      set(column, row, token, error);
+
+      size_t column_x = column;
+      bool is_dependent_var = false;
+      for (size_t i = 0; i < dependent_varIDs.size(); ++i) {
+        if (column == dependent_varIDs[i]) {
+          set_y(i, row, token, error);
+          is_dependent_var = true;
+          break;
+        } else if (column > dependent_varIDs[i]) {
+          --column_x;
+        }
+      }
+      if (!is_dependent_var) {
+        set_x(column_x, row, token, error);
+      }
       ++column;
     }
     ++row;
@@ -151,14 +214,15 @@ bool Data::loadFromFileOther(std::ifstream& input_file, std::string header_line,
 }
 // #nocov end
 
-void Data::getAllValues(std::vector<double>& all_values, std::vector<size_t>& sampleIDs, size_t varID, size_t start, size_t end) const {
+void Data::getAllValues(std::vector<double>& all_values, std::vector<size_t>& sampleIDs, size_t varID, size_t start,
+    size_t end) const {
 
   // All values for varID (no duplicates) for given sampleIDs
   if (getUnpermutedVarID(varID) < num_cols_no_snp) {
 
-    all_values.reserve(end-start);
+    all_values.reserve(end - start);
     for (size_t pos = start; pos < end; ++pos) {
-      all_values.push_back(get(sampleIDs[pos], varID));
+      all_values.push_back(get_x(sampleIDs[pos], varID));
     }
     std::sort(all_values.begin(), all_values.end());
     all_values.erase(std::unique(all_values.begin(), all_values.end()), all_values.end());
@@ -168,13 +232,14 @@ void Data::getAllValues(std::vector<double>& all_values, std::vector<size_t>& sa
   }
 }
 
-void Data::getMinMaxValues(double& min, double&max, std::vector<size_t>& sampleIDs, size_t varID, size_t start, size_t end) const {
+void Data::getMinMaxValues(double& min, double&max, std::vector<size_t>& sampleIDs, size_t varID, size_t start,
+    size_t end) const {
   if (sampleIDs.size() > 0) {
-    min = get(sampleIDs[start], varID);
+    min = get_x(sampleIDs[start], varID);
     max = min;
   }
   for (size_t pos = start; pos < end; ++pos) {
-    double value = get(sampleIDs[pos], varID);
+    double value = get_x(sampleIDs[pos], varID);
     if (value < min) {
       min = value;
     }
@@ -195,14 +260,15 @@ void Data::sort() {
     // Get all unique values
     std::vector<double> unique_values(num_rows);
     for (size_t row = 0; row < num_rows; ++row) {
-      unique_values[row] = get(row, col);
+      unique_values[row] = get_x(row, col);
     }
     std::sort(unique_values.begin(), unique_values.end());
     unique_values.erase(unique(unique_values.begin(), unique_values.end()), unique_values.end());
 
     // Get index of unique value
     for (size_t row = 0; row < num_rows; ++row) {
-      size_t idx = std::lower_bound(unique_values.begin(), unique_values.end(), get(row, col)) - unique_values.begin();
+      size_t idx = std::lower_bound(unique_values.begin(), unique_values.end(), get_x(row, col))
+          - unique_values.begin();
       index_data[col * num_rows + row] = idx;
     }
 
@@ -215,13 +281,13 @@ void Data::sort() {
 }
 
 // TODO: Implement ordering for multiclass and survival
-void Data::orderSnpLevels(std::string dependent_variable_name, bool corrected_importance) {
+// #nocov start (cannot be tested anymore because GenABEL not on CRAN)
+void Data::orderSnpLevels(bool corrected_importance) {
   // Stop if now SNP data
   if (snp_data == 0) {
     return;
   }
 
-  size_t dependent_varID = getVariableID(dependent_variable_name);
   size_t num_snps;
   if (corrected_importance) {
     num_snps = 2 * (num_cols - num_cols_no_snp);
@@ -256,7 +322,7 @@ void Data::orderSnpLevels(std::string dependent_variable_name, bool corrected_im
         value = 0;
       }
 
-      means[value] += get(row, dependent_varID);
+      means[value] += get_y(row, 0);
       ++counts[value];
     }
 
@@ -270,6 +336,7 @@ void Data::orderSnpLevels(std::string dependent_variable_name, bool corrected_im
 
   order_snps = true;
 }
+// #nocov end
 
 } // namespace ranger
 
diff --git a/src/Data.h b/src/Data.h
index b4519e229..7c0ebc993 100644
--- a/src/Data.h
+++ b/src/Data.h
@@ -31,22 +31,29 @@ class Data {
 
   virtual ~Data() = default;
 
-  virtual double get(size_t row, size_t col) const = 0;
+  virtual double get_x(size_t row, size_t col) const = 0;
+  virtual double get_y(size_t row, size_t col) const = 0;
 
   size_t getVariableID(const std::string& variable_name) const;
 
-  virtual void reserveMemory() = 0;
-  virtual void set(size_t col, size_t row, double value, bool& error) = 0;
+  virtual void reserveMemory(size_t y_cols) = 0;
+
+  virtual void set_x(size_t col, size_t row, double value, bool& error) = 0;
+  virtual void set_y(size_t col, size_t row, double value, bool& error) = 0;
 
   void addSnpData(unsigned char* snp_data, size_t num_cols_snp);
 
-  bool loadFromFile(std::string filename);
-  bool loadFromFileWhitespace(std::ifstream& input_file, std::string header_line);
-  bool loadFromFileOther(std::ifstream& input_file, std::string header_line, char seperator);
+  bool loadFromFile(std::string filename, std::vector<std::string>& dependent_variable_names);
+  bool loadFromFileWhitespace(std::ifstream& input_file, std::string header_line,
+      std::vector<std::string>& dependent_variable_names);
+  bool loadFromFileOther(std::ifstream& input_file, std::string header_line,
+      std::vector<std::string>& dependent_variable_names, char seperator);
 
-  void getAllValues(std::vector<double>& all_values, std::vector<size_t>& sampleIDs, size_t varID, size_t start, size_t end) const;
+  void getAllValues(std::vector<double>& all_values, std::vector<size_t>& sampleIDs, size_t varID, size_t start,
+      size_t end) const;
 
-  void getMinMaxValues(double& min, double&max, std::vector<size_t>& sampleIDs, size_t varID, size_t start, size_t end) const;
+  void getMinMaxValues(double& min, double&max, std::vector<size_t>& sampleIDs, size_t varID, size_t start,
+      size_t end) const;
 
   size_t getIndex(size_t row, size_t col) const {
     // Use permuted data for corrected impurity importance
@@ -63,6 +70,7 @@ class Data {
     }
   }
 
+  // #nocov start (cannot be tested anymore because GenABEL not on CRAN)
   size_t getSnp(size_t row, size_t col, size_t col_permuted) const {
     // Get data out of snp storage. -1 because of GenABEL coding.
     size_t idx = (col - num_cols_no_snp) * num_rows_rounded + row;
@@ -76,13 +84,14 @@ class Data {
     // Order SNPs
     if (order_snps) {
       if (col_permuted >= num_cols) {
-        result = snp_order[col_permuted + no_split_variables.size() - 2 * num_cols_no_snp][result];
+        result = snp_order[col_permuted - 2 * num_cols_no_snp][result];
       } else {
         result = snp_order[col - num_cols_no_snp][result];
       }
     }
     return result;
   }
+  // #nocov end
 
   double getUniqueDataValue(size_t varID, size_t index) const {
     // Use permuted data for corrected impurity importance
@@ -114,7 +123,7 @@ class Data {
 
   void sort();
 
-  void orderSnpLevels(std::string dependent_variable_name, bool corrected_importance);
+  void orderSnpLevels(bool corrected_importance);
 
   const std::vector<std::string>& getVariableNames() const {
     return variable_names;
@@ -136,15 +145,6 @@ class Data {
     }
   }
 
-  const std::vector<size_t>& getNoSplitVariables() const noexcept {
-    return no_split_variables;
-  }
-
-  void addNoSplitVariable(size_t varID) {
-    no_split_variables.push_back(varID);
-    std::sort(no_split_variables.begin(), no_split_variables.end());
-  }
-
   std::vector<bool>& getIsOrderedVariable() noexcept {
     return is_ordered_variable;
   }
@@ -182,16 +182,11 @@ class Data {
   size_t getUnpermutedVarID(size_t varID) const {
     if (varID >= num_cols) {
       varID -= num_cols;
-
-      for (auto& skip : no_split_variables) {
-        if (varID >= skip) {
-          ++varID;
-        }
-      }
     }
     return varID;
   }
 
+  // #nocov start (cannot be tested anymore because GenABEL not on CRAN)
   const std::vector<std::vector<size_t>>& getSnpOrder() const {
     return snp_order;
   }
@@ -200,6 +195,7 @@ class Data {
     this->snp_order = snp_order;
     order_snps = true;
   }
+  // #nocov end
 
 protected:
   std::vector<std::string> variable_names;
@@ -216,9 +212,6 @@ class Data {
   std::vector<std::vector<double>> unique_data_values;
   size_t max_num_unique_values;
 
-  // Variable to not split at (only dependent_varID for non-survival trees)
-  std::vector<size_t> no_split_variables;
-
   // For each varID true if ordered
   std::vector<bool> is_ordered_variable;
 
diff --git a/src/DataChar.cpp b/src/DataChar.cpp
deleted file mode 100644
index 24169c880..000000000
--- a/src/DataChar.cpp
+++ /dev/null
@@ -1,48 +0,0 @@
-/*-------------------------------------------------------------------------------
- This file is part of ranger.
-
- Copyright (c) [2014-2018] [Marvin N. Wright]
-
- This software may be modified and distributed under the terms of the MIT license.
-
- Please note that the C++ core of ranger is distributed under MIT license and the
- R package "ranger" under GPL3 license.
- #-------------------------------------------------------------------------------*/
-
-// Ignore in coverage report (not used in R package)
-// #nocov start
-#include <limits.h>
-#include <math.h>
-#include <iostream>
-
-#include "DataChar.h"
-
-namespace ranger {
-
-DataChar::DataChar(double* data_double, std::vector<std::string> variable_names, size_t num_rows, size_t num_cols,
-    bool& error) {
-  this->variable_names = variable_names;
-  this->num_rows = num_rows;
-  this->num_cols = num_cols;
-  this->num_cols_no_snp = num_cols;
-
-  reserveMemory();
-
-  // Save data and report errors
-  for (size_t i = 0; i < num_cols; ++i) {
-    for (size_t j = 0; j < num_rows; ++j) {
-      double value = data_double[i * num_rows + j];
-      if (value > CHAR_MAX || value < CHAR_MIN) {
-        error = true;
-      }
-      if (floor(value) != ceil(value)) {
-        error = true;
-      }
-      data[i * num_rows + j] = value;
-    }
-  }
-}
-
-} // namespace ranger
-
-// #nocov end
diff --git a/src/DataChar.h b/src/DataChar.h
index b0f4e0dae..2302c2d48 100644
--- a/src/DataChar.h
+++ b/src/DataChar.h
@@ -15,9 +15,10 @@
 #define DATACHAR_H_
 
 #include <vector>
-#include <limits.h>
+#include <utility>
 
 #include "globals.h"
+#include "utility.h"
 #include "Data.h"
 
 namespace ranger {
@@ -25,14 +26,13 @@ namespace ranger {
 class DataChar: public Data {
 public:
   DataChar() = default;
-  DataChar(double* data_double, std::vector<std::string> variable_names, size_t num_rows, size_t num_cols, bool& error);
 
   DataChar(const DataChar&) = delete;
   DataChar& operator=(const DataChar&) = delete;
 
   virtual ~DataChar() override = default;
 
-  double get(size_t row, size_t col) const override {
+  double get_x(size_t row, size_t col) const override {
     // Use permuted data for corrected impurity importance
     size_t col_permuted = col;
     if (col >= num_cols) {
@@ -41,28 +41,32 @@ class DataChar: public Data {
     }
 
     if (col < num_cols_no_snp) {
-      return data[col * num_rows + row];
+      return x[col * num_rows + row];
     } else {
       return getSnp(row, col, col_permuted);
     }
   }
 
-  void reserveMemory() override {
-    data.resize(num_cols * num_rows);
+  double get_y(size_t row, size_t col) const override {
+    return y[col * num_rows + row];
   }
 
-  void set(size_t col, size_t row, double value, bool& error) override {
-    if (value > CHAR_MAX || value < CHAR_MIN) {
-      error = true;
-    }
-    if (floor(value) != ceil(value)) {
-      error = true;
-    }
-    data[col * num_rows + row] = value;
+  void reserveMemory(size_t y_cols) override {
+    x.resize(num_cols * num_rows);
+    y.resize(y_cols * num_rows);
+  }
+
+  void set_x(size_t col, size_t row, double value, bool& error) override {
+    x[col * num_rows + row] = value;
+  }
+
+  void set_y(size_t col, size_t row, double value, bool& error) override {
+    y[col * num_rows + row] = value;
   }
 
 private:
-  std::vector<char> data;
+  std::vector<char> x;
+  std::vector<char> y;
 };
 
 } // namespace ranger
diff --git a/src/DataDouble.h b/src/DataDouble.h
index aa8713053..aa2e60bdb 100644
--- a/src/DataDouble.h
+++ b/src/DataDouble.h
@@ -9,6 +9,8 @@
  R package "ranger" under GPL3 license.
  #-------------------------------------------------------------------------------*/
 
+// Ignore in coverage report (not used in R package)
+// #nocov start
 #ifndef DATADOUBLE_H_
 #define DATADOUBLE_H_
 
@@ -24,20 +26,13 @@ namespace ranger {
 class DataDouble: public Data {
 public:
   DataDouble() = default;
-  DataDouble(std::vector<double> data, std::vector<std::string> variable_names, size_t num_rows, size_t num_cols) :
-      data { std::move(data) } {
-    this->variable_names = variable_names;
-    this->num_rows = num_rows;
-    this->num_cols = num_cols;
-    this->num_cols_no_snp = num_cols;
-  }
 
   DataDouble(const DataDouble&) = delete;
   DataDouble& operator=(const DataDouble&) = delete;
 
   virtual ~DataDouble() override = default;
 
-  double get(size_t row, size_t col) const override {
+  double get_x(size_t row, size_t col) const override {
     // Use permuted data for corrected impurity importance
     size_t col_permuted = col;
     if (col >= num_cols) {
@@ -46,24 +41,35 @@ class DataDouble: public Data {
     }
 
     if (col < num_cols_no_snp) {
-      return data[col * num_rows + row];
+      return x[col * num_rows + row];
     } else {
       return getSnp(row, col, col_permuted);
     }
   }
 
-  void reserveMemory() override {
-    data.resize(num_cols * num_rows);
+  double get_y(size_t row, size_t col) const override {
+    return y[col * num_rows + row];
+  }
+
+  void reserveMemory(size_t y_cols) override {
+    x.resize(num_cols * num_rows);
+    y.resize(y_cols * num_rows);
+  }
+
+  void set_x(size_t col, size_t row, double value, bool& error) override {
+    x[col * num_rows + row] = value;
   }
 
-  void set(size_t col, size_t row, double value, bool& error) override {
-    data[col * num_rows + row] = value;
+  void set_y(size_t col, size_t row, double value, bool& error) override {
+    y[col * num_rows + row] = value;
   }
 
 private:
-  std::vector<double> data;
+  std::vector<double> x;
+  std::vector<double> y;
 };
 
 } // namespace ranger
 
 #endif /* DATADOUBLE_H_ */
+// #nocov end
diff --git a/src/DataFloat.cpp b/src/DataFloat.cpp
deleted file mode 100644
index a8027abbb..000000000
--- a/src/DataFloat.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-/*-------------------------------------------------------------------------------
- This file is part of ranger.
-
- Copyright (c) [2014-2018] [Marvin N. Wright]
-
- This software may be modified and distributed under the terms of the MIT license.
-
- Please note that the C++ core of ranger is distributed under MIT license and the
- R package "ranger" under GPL3 license.
- #-------------------------------------------------------------------------------*/
-
-// Ignore in coverage report (not used in R package)
-// #nocov start
-#include <iostream>
-
-#include "DataFloat.h"
-
-namespace ranger {
-
-DataFloat::DataFloat(double* data_double, std::vector<std::string> variable_names, size_t num_rows, size_t num_cols) {
-  this->variable_names = variable_names;
-  this->num_rows = num_rows;
-  this->num_cols = num_cols;
-  this->num_cols_no_snp = num_cols;
-
-  reserveMemory();
-  for (size_t i = 0; i < num_cols; ++i) {
-    for (size_t j = 0; j < num_rows; ++j) {
-      data[i * num_rows + j] = (float) data_double[i * num_rows + j];
-    }
-  }
-}
-
-// #nocov end
-
-}// namespace ranger
diff --git a/src/DataFloat.h b/src/DataFloat.h
index a3c4da4f2..67989bb72 100644
--- a/src/DataFloat.h
+++ b/src/DataFloat.h
@@ -15,8 +15,10 @@
 #define DATAFLOAT_H_
 
 #include <vector>
+#include <utility>
 
 #include "globals.h"
+#include "utility.h"
 #include "Data.h"
 
 namespace ranger {
@@ -24,14 +26,13 @@ namespace ranger {
 class DataFloat: public Data {
 public:
   DataFloat() = default;
-  DataFloat(double* data_double, std::vector<std::string> variable_names, size_t num_rows, size_t num_cols);
 
   DataFloat(const DataFloat&) = delete;
   DataFloat& operator=(const DataFloat&) = delete;
 
   virtual ~DataFloat() override = default;
 
-  double get(size_t row, size_t col) const override {
+  double get_x(size_t row, size_t col) const override {
     // Use permuted data for corrected impurity importance
     size_t col_permuted = col;
     if (col >= num_cols) {
@@ -40,22 +41,32 @@ class DataFloat: public Data {
     }
 
     if (col < num_cols_no_snp) {
-      return data[col * num_rows + row];
+      return x[col * num_rows + row];
     } else {
       return getSnp(row, col, col_permuted);
     }
   }
 
-  void reserveMemory() override {
-    data.resize(num_cols * num_rows);
+  double get_y(size_t row, size_t col) const override {
+    return y[col * num_rows + row];
   }
 
-  void set(size_t col, size_t row, double value, bool& error) override {
-    data[col * num_rows + row] = value;
+  void reserveMemory(size_t y_cols) override {
+    x.resize(num_cols * num_rows);
+    y.resize(y_cols * num_rows);
+  }
+
+  void set_x(size_t col, size_t row, double value, bool& error) override {
+    x[col * num_rows + row] = value;
+  }
+
+  void set_y(size_t col, size_t row, double value, bool& error) override {
+    y[col * num_rows + row] = value;
   }
 
 private:
-  std::vector<float> data;
+  std::vector<float> x;
+  std::vector<float> y;
 };
 
 } // namespace ranger
diff --git a/src/DataRcpp.h b/src/DataRcpp.h
index 5b3c4c714..a4b8db08f 100644
--- a/src/DataRcpp.h
+++ b/src/DataRcpp.h
@@ -39,8 +39,9 @@ namespace ranger {
 class DataRcpp: public Data {
 public:
   DataRcpp() = default;
-  DataRcpp(Rcpp::NumericMatrix& data, std::vector<std::string> variable_names, size_t num_rows, size_t num_cols) {
-      this->data = data;
+  DataRcpp(Rcpp::NumericMatrix& x, Rcpp::NumericMatrix& y, std::vector<std::string> variable_names, size_t num_rows, size_t num_cols) {
+      this->x = x;
+      this->y = y;
       this->variable_names = variable_names;
       this->num_rows = num_rows;
       this->num_cols = num_cols;
@@ -52,7 +53,7 @@ class DataRcpp: public Data {
   
   virtual ~DataRcpp() override = default;
   
-  double get(size_t row, size_t col) const override {
+  double get_x(size_t row, size_t col) const override {
     // Use permuted data for corrected impurity importance
     size_t col_permuted = col;
     if (col >= num_cols) {
@@ -61,22 +62,33 @@ class DataRcpp: public Data {
     }
     
     if (col < num_cols_no_snp) {
-      return data[col * num_rows + row];
+      return x[col * num_rows + row];
     } else {
       return getSnp(row, col, col_permuted);
     }
   }
   
-  void reserveMemory() override {
+  double get_y(size_t row, size_t col) const override {
+    return y[col * num_rows + row];
+  }
+  
+  // #nocov start 
+  void reserveMemory(size_t y_cols) override {
     // Not needed
   }
   
-  void set(size_t col, size_t row, double value, bool& error) override {
-    data[col * num_rows + row] = value;
+  void set_x(size_t col, size_t row, double value, bool& error) override {
+    x[col * num_rows + row] = value;
+  }
+  
+  void set_y(size_t col, size_t row, double value, bool& error) override {
+    y[col * num_rows + row] = value;
   }
+  // #nocov end 
   
 private:
-  Rcpp::NumericMatrix data;
+  Rcpp::NumericMatrix x;
+  Rcpp::NumericMatrix y;
 };
 
 } // namespace ranger
diff --git a/src/DataSparse.cpp b/src/DataSparse.cpp
index 3b3d0add1..779a54d6b 100644
--- a/src/DataSparse.cpp
+++ b/src/DataSparse.cpp
@@ -30,10 +30,11 @@
 
 namespace ranger {
 
-DataSparse::DataSparse(Eigen::SparseMatrix<double>& data, std::vector<std::string> variable_names, size_t num_rows,
+DataSparse::DataSparse(Eigen::SparseMatrix<double>& x, Rcpp::NumericMatrix& y, std::vector<std::string> variable_names, size_t num_rows,
     size_t num_cols) :
-    data { } {
-  this->data.swap(data);
+    x { }{
+  this->x.swap(x);
+  this->y = y;
   this->variable_names = variable_names;
   this->num_rows = num_rows;
   this->num_cols = num_cols;
diff --git a/src/DataSparse.h b/src/DataSparse.h
index a56648f0e..af5032c54 100644
--- a/src/DataSparse.h
+++ b/src/DataSparse.h
@@ -40,7 +40,7 @@ class DataSparse: public Data {
 public:
   DataSparse() = default;
 
-  DataSparse(Eigen::SparseMatrix<double>& data, std::vector<std::string> variable_names, size_t num_rows,
+  DataSparse(Eigen::SparseMatrix<double>& x, Rcpp::NumericMatrix& y, std::vector<std::string> variable_names, size_t num_rows,
       size_t num_cols);
 
   DataSparse(const DataSparse&) = delete;
@@ -48,26 +48,36 @@ class DataSparse: public Data {
 
   virtual ~DataSparse() override = default;
 
-  double get(size_t row, size_t col) const override {
+  double get_x(size_t row, size_t col) const override {
     // Use permuted data for corrected impurity importance
-    size_t col_permuted = col;
     if (col >= num_cols) {
       col = getUnpermutedVarID(col);
       row = getPermutedSampleID(row);
     }
-    return data.coeff(row, col);
+    return x.coeff(row, col);
+  }
+  
+  double get_y(size_t row, size_t col) const override {
+    return y[col * num_rows + row];
   }
 
-  void reserveMemory() override {
-    data.resize(num_rows, num_cols);
+  // #nocov start 
+  void reserveMemory(size_t y_cols) override {
+    // Not needed
   }
 
-  void set(size_t col, size_t row, double value, bool& error) override {
-    data.coeffRef(row, col) = value;
+  void set_x(size_t col, size_t row, double value, bool& error) override {
+    x.coeffRef(row, col) = value;
+  }
+  
+  void set_y(size_t col, size_t row, double value, bool& error) override {
+    y[col * num_rows + row] = value;
   }
+  // #nocov end 
 
 private:
-  Eigen::SparseMatrix<double> data;
+  Eigen::SparseMatrix<double> x;
+  Rcpp::NumericMatrix y;
 };
 
 } // namespace ranger
diff --git a/src/Forest.cpp b/src/Forest.cpp
index 12b13a7ad..89341738b 100644
--- a/src/Forest.cpp
+++ b/src/Forest.cpp
@@ -29,40 +29,15 @@
 namespace ranger {
 
 Forest::Forest() :
-    verbose_out(0), num_trees(DEFAULT_NUM_TREE), mtry(0), min_node_size(0), num_variables(0), num_independent_variables(
-        0), seed(0), dependent_varID(0), num_samples(0), prediction_mode(false), memory_mode(MEM_DOUBLE), sample_with_replacement(
-        true), memory_saving_splitting(false), splitrule(DEFAULT_SPLITRULE), predict_all(false), keep_inbag(false), sample_fraction(
-        { 1 }), holdout(false), prediction_type(DEFAULT_PREDICTIONTYPE), num_random_splits(DEFAULT_NUM_RANDOM_SPLITS), max_depth(
+    verbose_out(0), num_trees(DEFAULT_NUM_TREE), mtry(0), min_node_size(0), num_independent_variables(0), seed(0), num_samples(
+        0), prediction_mode(false), memory_mode(MEM_DOUBLE), sample_with_replacement(true), memory_saving_splitting(
+        false), splitrule(DEFAULT_SPLITRULE), predict_all(false), keep_inbag(false), sample_fraction( { 1 }), holdout(
+        false), prediction_type(DEFAULT_PREDICTIONTYPE), num_random_splits(DEFAULT_NUM_RANDOM_SPLITS), max_depth(
         DEFAULT_MAXDEPTH), alpha(DEFAULT_ALPHA), minprop(DEFAULT_MINPROP), num_threads(DEFAULT_NUM_THREADS), data { }, overall_prediction_error(
     NAN), importance_mode(DEFAULT_IMPORTANCE_MODE), progress(0) {
 }
 
 // #nocov start
-std::unique_ptr<Data> load_data_from_file(const std::string& data_path, const MemoryMode memory_mode,
-    std::ostream* verbose_out = nullptr) {
-  std::unique_ptr<Data> result { };
-  switch (memory_mode) {
-  case MEM_DOUBLE:
-    result = make_unique<DataDouble>();
-    break;
-  case MEM_FLOAT:
-    result = make_unique<DataFloat>();
-    break;
-  case MEM_CHAR:
-    result = make_unique<DataChar>();
-    break;
-  }
-
-  if (verbose_out)
-    *verbose_out << "Loading input file: " << data_path << "." << std::endl;
-  bool found_rounding_error = result->loadFromFile(data_path);
-  if (found_rounding_error && verbose_out) {
-    *verbose_out << "Warning: Rounding or Integer overflow occurred. Use FLOAT or DOUBLE precision to avoid this."
-        << std::endl;
-  }
-  return result;
-}
-
 void Forest::initCpp(std::string dependent_variable_name, MemoryMode memory_mode, std::string input_file, uint mtry,
     std::string output_prefix, uint num_trees, std::ostream* verbose_out, uint seed, uint num_threads,
     std::string load_forest_filename, ImportanceMode importance_mode, uint min_node_size,
@@ -74,6 +49,14 @@ void Forest::initCpp(std::string dependent_variable_name, MemoryMode memory_mode
 
   this->verbose_out = verbose_out;
 
+  if (!dependent_variable_name.empty()) {
+    if (status_variable_name.empty()) {
+      this->dependent_variable_names = {dependent_variable_name};
+    } else {
+      this->dependent_variable_names = {dependent_variable_name, status_variable_name};
+    }
+  }
+
   // Set prediction mode
   bool prediction_mode = false;
   if (!load_forest_filename.empty()) {
@@ -90,12 +73,15 @@ void Forest::initCpp(std::string dependent_variable_name, MemoryMode memory_mode
   }
   std::vector<double> sample_fraction_vector = { sample_fraction };
 
+  if (prediction_mode) {
+    loadDependentVariableNamesFromFile(load_forest_filename);
+  }
+
   // Call other init function
-  init(dependent_variable_name, memory_mode, load_data_from_file(input_file, memory_mode, verbose_out), mtry,
-      output_prefix, num_trees, seed, num_threads, importance_mode, min_node_size, status_variable_name,
-      prediction_mode, sample_with_replacement, unordered_variable_names, memory_saving_splitting, splitrule,
-      predict_all, sample_fraction_vector, alpha, minprop, holdout, prediction_type, num_random_splits, false,
-      max_depth);
+  init(memory_mode, loadDataFromFile(input_file), mtry, output_prefix, num_trees, seed, num_threads, importance_mode,
+      min_node_size, prediction_mode, sample_with_replacement, unordered_variable_names, memory_saving_splitting,
+      splitrule, predict_all, sample_fraction_vector, alpha, minprop, holdout, prediction_type, num_random_splits,
+      false, max_depth);
 
   if (prediction_mode) {
     loadFromFile(load_forest_filename);
@@ -111,7 +97,7 @@ void Forest::initCpp(std::string dependent_variable_name, MemoryMode memory_mode
     std::vector<std::vector<double>> split_select_weights;
     split_select_weights.resize(1);
     loadDoubleVectorFromFile(split_select_weights[0], split_select_weights_file);
-    if (split_select_weights[0].size() != num_variables - 1) {
+    if (split_select_weights[0].size() != num_independent_variables) {
       throw std::runtime_error("Number of split select weights is not equal to number of independent variables.");
     }
     setSplitWeightVector(split_select_weights);
@@ -146,22 +132,21 @@ void Forest::initCpp(std::string dependent_variable_name, MemoryMode memory_mode
 }
 // #nocov end
 
-void Forest::initR(std::string dependent_variable_name, std::unique_ptr<Data> input_data, uint mtry, uint num_trees,
-    std::ostream* verbose_out, uint seed, uint num_threads, ImportanceMode importance_mode, uint min_node_size,
+void Forest::initR(std::unique_ptr<Data> input_data, uint mtry, uint num_trees, std::ostream* verbose_out, uint seed,
+    uint num_threads, ImportanceMode importance_mode, uint min_node_size,
     std::vector<std::vector<double>>& split_select_weights, const std::vector<std::string>& always_split_variable_names,
-    std::string status_variable_name, bool prediction_mode, bool sample_with_replacement,
-    const std::vector<std::string>& unordered_variable_names, bool memory_saving_splitting, SplitRule splitrule,
-    std::vector<double>& case_weights, std::vector<std::vector<size_t>>& manual_inbag, bool predict_all,
-    bool keep_inbag, std::vector<double>& sample_fraction, double alpha, double minprop, bool holdout,
-    PredictionType prediction_type, uint num_random_splits, bool order_snps, uint max_depth) {
+    bool prediction_mode, bool sample_with_replacement, const std::vector<std::string>& unordered_variable_names,
+    bool memory_saving_splitting, SplitRule splitrule, std::vector<double>& case_weights,
+    std::vector<std::vector<size_t>>& manual_inbag, bool predict_all, bool keep_inbag,
+    std::vector<double>& sample_fraction, double alpha, double minprop, bool holdout, PredictionType prediction_type,
+    uint num_random_splits, bool order_snps, uint max_depth) {
 
   this->verbose_out = verbose_out;
 
   // Call other init function
-  init(dependent_variable_name, MEM_DOUBLE, std::move(input_data), mtry, "", num_trees, seed, num_threads,
-      importance_mode, min_node_size, status_variable_name, prediction_mode, sample_with_replacement,
-      unordered_variable_names, memory_saving_splitting, splitrule, predict_all, sample_fraction, alpha, minprop,
-      holdout, prediction_type, num_random_splits, order_snps, max_depth);
+  init(MEM_DOUBLE, std::move(input_data), mtry, "", num_trees, seed, num_threads, importance_mode, min_node_size,
+      prediction_mode, sample_with_replacement, unordered_variable_names, memory_saving_splitting, splitrule,
+      predict_all, sample_fraction, alpha, minprop, holdout, prediction_type, num_random_splits, order_snps, max_depth);
 
   // Set variables to be always considered for splitting
   if (!always_split_variable_names.empty()) {
@@ -190,12 +175,12 @@ void Forest::initR(std::string dependent_variable_name, std::unique_ptr<Data> in
   this->keep_inbag = keep_inbag;
 }
 
-void Forest::init(std::string dependent_variable_name, MemoryMode memory_mode, std::unique_ptr<Data> input_data,
-    uint mtry, std::string output_prefix, uint num_trees, uint seed, uint num_threads, ImportanceMode importance_mode,
-    uint min_node_size, std::string status_variable_name, bool prediction_mode, bool sample_with_replacement,
-    const std::vector<std::string>& unordered_variable_names, bool memory_saving_splitting, SplitRule splitrule,
-    bool predict_all, std::vector<double>& sample_fraction, double alpha, double minprop, bool holdout,
-    PredictionType prediction_type, uint num_random_splits, bool order_snps, uint max_depth) {
+void Forest::init(MemoryMode memory_mode, std::unique_ptr<Data> input_data, uint mtry, std::string output_prefix,
+    uint num_trees, uint seed, uint num_threads, ImportanceMode importance_mode, uint min_node_size,
+    bool prediction_mode, bool sample_with_replacement, const std::vector<std::string>& unordered_variable_names,
+    bool memory_saving_splitting, SplitRule splitrule, bool predict_all, std::vector<double>& sample_fraction,
+    double alpha, double minprop, bool holdout, PredictionType prediction_type, uint num_random_splits, bool order_snps,
+    uint max_depth) {
 
   // Initialize data with memmode
   this->data = std::move(input_data);
@@ -242,23 +227,14 @@ void Forest::init(std::string dependent_variable_name, MemoryMode memory_mode, s
 
   // Set number of samples and variables
   num_samples = data->getNumRows();
-  num_variables = data->getNumCols();
-
-  // Convert dependent variable name to ID
-  if (!prediction_mode && !dependent_variable_name.empty()) {
-    dependent_varID = data->getVariableID(dependent_variable_name);
-  }
+  num_independent_variables = data->getNumCols();
 
   // Set unordered factor variables
   if (!prediction_mode) {
     data->setIsOrderedVariable(unordered_variable_names);
   }
 
-  data->addNoSplitVariable(dependent_varID);
-
-  initInternal(status_variable_name);
-
-  num_independent_variables = num_variables - data->getNoSplitVariables().size();
+  initInternal();
 
   // Init split select weights
   split_select_weights.push_back(std::vector<double>());
@@ -267,7 +243,7 @@ void Forest::init(std::string dependent_variable_name, MemoryMode memory_mode, s
   manual_inbag.push_back(std::vector<size_t>());
 
   // Check if mtry is in valid range
-  if (this->mtry > num_variables - 1) {
+  if (this->mtry > num_independent_variables) {
     throw std::runtime_error("mtry can not be larger than number of variables in data.");
   }
 
@@ -283,7 +259,7 @@ void Forest::init(std::string dependent_variable_name, MemoryMode memory_mode, s
 
   // Order SNP levels if in "order" splitting
   if (!prediction_mode && order_snps) {
-    data->orderSnpLevels(dependent_variable_name, (importance_mode == IMP_GINI_CORRECTED));
+    data->orderSnpLevels((importance_mode == IMP_GINI_CORRECTED));
   }
 }
 
@@ -325,8 +301,9 @@ void Forest::writeOutput() {
     *verbose_out << std::endl;
   writeOutputInternal();
   if (verbose_out) {
-    *verbose_out << "Dependent variable name:           " << data->getVariableNames()[dependent_varID] << std::endl;
-    *verbose_out << "Dependent variable ID:             " << dependent_varID << std::endl;
+    if (dependent_variable_names.size() >= 1) {
+      *verbose_out << "Dependent variable name:           " << dependent_variable_names[0] << std::endl;
+    }
     *verbose_out << "Number of trees:                   " << num_trees << std::endl;
     *verbose_out << "Sample size:                       " << num_samples << std::endl;
     *verbose_out << "Number of independent variables:   " << num_independent_variables << std::endl;
@@ -375,13 +352,7 @@ void Forest::writeImportanceFile() {
 
   // Write importance to file
   for (size_t i = 0; i < variable_importance.size(); ++i) {
-    size_t varID = i;
-    for (auto& skip : data->getNoSplitVariables()) {
-      if (varID >= skip) {
-        ++varID;
-      }
-    }
-    std::string variable_name = data->getVariableNames()[varID];
+    std::string variable_name = data->getVariableNames()[i];
     importance_file << variable_name << ": " << variable_importance[i] << std::endl;
   }
 
@@ -400,8 +371,18 @@ void Forest::saveToFile() {
     throw std::runtime_error("Could not write to output file: " + filename + ".");
   }
 
-  // Write dependent_varID
-  outfile.write((char*) &dependent_varID, sizeof(dependent_varID));
+  // Write dependent variable names
+  uint num_dependent_variables = dependent_variable_names.size();
+  if (num_dependent_variables >= 1) {
+    outfile.write((char*) &num_dependent_variables, sizeof(num_dependent_variables));
+    for (auto& var_name : dependent_variable_names) {
+      size_t length = var_name.size();
+      outfile.write((char*) &length, sizeof(length));
+      outfile.write((char*) var_name.c_str(), length * sizeof(char));
+    }
+  } else {
+    throw std::runtime_error("Missing dependent variable name.");
+  }
 
   // Write num_trees
   outfile.write((char*) &num_trees, sizeof(num_trees));
@@ -457,10 +438,10 @@ void Forest::grow() {
       tree_manual_inbag = &manual_inbag[0];
     }
 
-    trees[i]->init(data.get(), mtry, dependent_varID, num_samples, tree_seed, &deterministic_varIDs,
-        &split_select_varIDs, tree_split_select_weights, importance_mode, min_node_size, sample_with_replacement,
-        memory_saving_splitting, splitrule, &case_weights, tree_manual_inbag, keep_inbag, &sample_fraction, alpha,
-        minprop, holdout, num_random_splits, max_depth);
+    trees[i]->init(data.get(), mtry, num_samples, tree_seed, &deterministic_varIDs, &split_select_varIDs,
+        tree_split_select_weights, importance_mode, min_node_size, sample_with_replacement, memory_saving_splitting,
+        splitrule, &case_weights, tree_manual_inbag, keep_inbag, &sample_fraction, alpha, minprop, holdout,
+        num_random_splits, max_depth);
   }
 
 // Init variable importance
@@ -468,6 +449,7 @@ void Forest::grow() {
 
 // Grow trees in multiple threads
 #ifdef OLD_WIN_R_BUILD
+  // #nocov start
   progress = 0;
   clock_t start_time = clock();
   clock_t lap_time = clock();
@@ -476,6 +458,7 @@ void Forest::grow() {
     progress++;
     showProgress("Growing trees..", start_time, lap_time);
   }
+  // #nocov end
 #else
   progress = 0;
 #ifdef R_BUILD
@@ -531,6 +514,7 @@ void Forest::predict() {
 
 // Predict trees in multiple threads and join the threads with the main thread
 #ifdef OLD_WIN_R_BUILD
+  // #nocov start
   progress = 0;
   clock_t start_time = clock();
   clock_t lap_time = clock();
@@ -545,6 +529,7 @@ void Forest::predict() {
   for (size_t sample_idx = 0; sample_idx < data->getNumRows(); ++sample_idx) {
     predictInternal(sample_idx);
   }
+  // #nocov end
 #else
   progress = 0;
 #ifdef R_BUILD
@@ -588,6 +573,7 @@ void Forest::computePredictionError() {
 
 // Predict trees in multiple threads
 #ifdef OLD_WIN_R_BUILD
+  // #nocov start
   progress = 0;
   clock_t start_time = clock();
   clock_t lap_time = clock();
@@ -596,6 +582,7 @@ void Forest::computePredictionError() {
     progress++;
     showProgress("Predicting..", start_time, lap_time);
   }
+  // #nocov end
 #else
   std::vector<std::thread> threads;
   threads.reserve(num_threads);
@@ -623,6 +610,7 @@ void Forest::computePermutationImportance() {
 
 // Compute tree permutation importance in multiple threads
 #ifdef OLD_WIN_R_BUILD
+  // #nocov start
   progress = 0;
   clock_t start_time = clock();
   clock_t lap_time = clock();
@@ -640,6 +628,7 @@ void Forest::computePermutationImportance() {
     progress++;
     showProgress("Computing permutation importance..", start_time, lap_time);
   }
+  // #nocov end
 #else
   progress = 0;
 #ifdef R_BUILD
@@ -812,28 +801,85 @@ void Forest::loadFromFile(std::string filename) {
   if (verbose_out)
     *verbose_out << "Loading forest from file " << filename << "." << std::endl;
 
-// Open file for reading
+  // Open file for reading
   std::ifstream infile;
   infile.open(filename, std::ios::binary);
   if (!infile.good()) {
     throw std::runtime_error("Could not read from input file: " + filename + ".");
   }
 
-// Read dependent_varID and num_trees
-  infile.read((char*) &dependent_varID, sizeof(dependent_varID));
+  // Skip dependent variable names (already read)
+  uint num_dependent_variables;
+  infile.read((char*) &num_dependent_variables, sizeof(num_dependent_variables));
+  for (size_t i = 0; i < num_dependent_variables; ++i) {
+    size_t length;
+    infile.read((char*) &length, sizeof(size_t));
+    infile.ignore(length);
+  }
+
+  // Read num_trees
   infile.read((char*) &num_trees, sizeof(num_trees));
 
-// Read is_ordered_variable
+  // Read is_ordered_variable
   readVector1D(data->getIsOrderedVariable(), infile);
 
-// Read tree data. This is different for tree types -> virtual function
+  // Read tree data. This is different for tree types -> virtual function
   loadFromFileInternal(infile);
 
   infile.close();
 
-// Create thread ranges
+  // Create thread ranges
   equalSplit(thread_ranges, 0, num_trees - 1, num_threads);
 }
+
+void Forest::loadDependentVariableNamesFromFile(std::string filename) {
+
+  // Open file for reading
+  std::ifstream infile;
+  infile.open(filename, std::ios::binary);
+  if (!infile.good()) {
+    throw std::runtime_error("Could not read from input file: " + filename + ".");
+  }
+
+  // Read dependent variable names
+  uint num_dependent_variables = 0;
+  infile.read((char*) &num_dependent_variables, sizeof(num_dependent_variables));
+  for (size_t i = 0; i < num_dependent_variables; ++i) {
+    size_t length;
+    infile.read((char*) &length, sizeof(size_t));
+    char* temp = new char[length + 1];
+    infile.read((char*) temp, length * sizeof(char));
+    temp[length] = '\0';
+    dependent_variable_names.push_back(temp);
+    delete[] temp;
+  }
+
+  infile.close();
+}
+
+std::unique_ptr<Data> Forest::loadDataFromFile(const std::string& data_path) {
+  std::unique_ptr<Data> result { };
+  switch (memory_mode) {
+  case MEM_DOUBLE:
+    result = make_unique<DataDouble>();
+    break;
+  case MEM_FLOAT:
+    result = make_unique<DataFloat>();
+    break;
+  case MEM_CHAR:
+    result = make_unique<DataChar>();
+    break;
+  }
+
+  if (verbose_out)
+    *verbose_out << "Loading input file: " << data_path << "." << std::endl;
+  bool found_rounding_error = result->loadFromFile(data_path, dependent_variable_names);
+  if (found_rounding_error && verbose_out) {
+    *verbose_out << "Warning: Rounding or Integer overflow occurred. Use FLOAT or DOUBLE precision to avoid this."
+        << std::endl;
+  }
+  return result;
+}
 // #nocov end
 
 void Forest::setSplitWeightVector(std::vector<std::vector<double>>& split_select_weights) {
@@ -870,17 +916,10 @@ void Forest::setSplitWeightVector(std::vector<std::vector<double>>& split_select
       double weight = split_select_weights[i][j];
 
       if (i == 0) {
-        size_t varID = j;
-        for (auto& skip : data->getNoSplitVariables()) {
-          if (varID >= skip) {
-            ++varID;
-          }
-        }
-
         if (weight == 1) {
-          deterministic_varIDs.push_back(varID);
+          deterministic_varIDs.push_back(j);
         } else if (weight < 1 && weight > 0) {
-          this->split_select_varIDs[j] = varID;
+          this->split_select_varIDs[j] = j;
           this->split_select_weights[i][j] = weight;
         } else if (weight == 0) {
           ++num_zero_weights;
@@ -903,24 +942,19 @@ void Forest::setSplitWeightVector(std::vector<std::vector<double>>& split_select
       std::copy_n(sw->begin(), num_independent_variables, sw->begin() + num_independent_variables);
 
       for (size_t k = 0; k < num_independent_variables; ++k) {
-        split_select_varIDs[num_independent_variables + k] = num_variables + k;
+        split_select_varIDs[num_independent_variables + k] = num_independent_variables + k;
       }
 
       size_t num_deterministic_varIDs = deterministic_varIDs.size();
       for (size_t k = 0; k < num_deterministic_varIDs; ++k) {
-        size_t varID = deterministic_varIDs[k];
-        for (auto& skip : data->getNoSplitVariables()) {
-          if (varID >= skip) {
-            --varID;
-          }
-        }
-        deterministic_varIDs.push_back(varID + num_variables);
+        deterministic_varIDs.push_back(k + num_independent_variables);
       }
     }
   }
 
   if (num_weights - deterministic_varIDs.size() - num_zero_weights < mtry) {
-    throw std::runtime_error("Too many zeros or ones in split select weights. Need at least mtry variables to split at.");
+    throw std::runtime_error(
+        "Too many zeros or ones in split select weights. Need at least mtry variables to split at.");
   }
 }
 
@@ -942,18 +976,13 @@ void Forest::setAlwaysSplitVariables(const std::vector<std::string>& always_spli
   if (importance_mode == IMP_GINI_CORRECTED) {
     size_t num_deterministic_varIDs = deterministic_varIDs.size();
     for (size_t k = 0; k < num_deterministic_varIDs; ++k) {
-      size_t varID = deterministic_varIDs[k];
-      for (auto& skip : data->getNoSplitVariables()) {
-        if (varID >= skip) {
-          --varID;
-        }
-      }
-      deterministic_varIDs.push_back(varID + num_variables);
+      deterministic_varIDs.push_back(k + num_independent_variables);
     }
   }
 }
 
 #ifdef OLD_WIN_R_BUILD
+// #nocov start
 void Forest::showProgress(std::string operation, clock_t start_time, clock_t& lap_time) {
 
 // Check for user interrupt
@@ -973,6 +1002,7 @@ void Forest::showProgress(std::string operation, clock_t start_time, clock_t& la
     lap_time = clock();
   }
 }
+// #nocov end
 #else
 void Forest::showProgress(std::string operation, size_t max_progress) {
   using std::chrono::steady_clock;
diff --git a/src/Forest.h b/src/Forest.h
index b4f38f32e..b73c5bdbc 100644
--- a/src/Forest.h
+++ b/src/Forest.h
@@ -48,22 +48,21 @@ class Forest {
       const std::vector<std::string>& unordered_variable_names, bool memory_saving_splitting, SplitRule splitrule,
       std::string case_weights_file, bool predict_all, double sample_fraction, double alpha, double minprop,
       bool holdout, PredictionType prediction_type, uint num_random_splits, uint max_depth);
-  void initR(std::string dependent_variable_name, std::unique_ptr<Data> input_data, uint mtry, uint num_trees,
-      std::ostream* verbose_out, uint seed, uint num_threads, ImportanceMode importance_mode, uint min_node_size,
+  void initR(std::unique_ptr<Data> input_data, uint mtry, uint num_trees, std::ostream* verbose_out, uint seed,
+      uint num_threads, ImportanceMode importance_mode, uint min_node_size,
       std::vector<std::vector<double>>& split_select_weights,
-      const std::vector<std::string>& always_split_variable_names, std::string status_variable_name,
-      bool prediction_mode, bool sample_with_replacement, const std::vector<std::string>& unordered_variable_names,
-      bool memory_saving_splitting, SplitRule splitrule, std::vector<double>& case_weights,
-      std::vector<std::vector<size_t>>& manual_inbag, bool predict_all, bool keep_inbag,
-      std::vector<double>& sample_fraction, double alpha, double minprop, bool holdout, PredictionType prediction_type,
-      uint num_random_splits, bool order_snps, uint max_depth);
-  void init(std::string dependent_variable_name, MemoryMode memory_mode, std::unique_ptr<Data> input_data, uint mtry,
-      std::string output_prefix, uint num_trees, uint seed, uint num_threads, ImportanceMode importance_mode,
-      uint min_node_size, std::string status_variable_name, bool prediction_mode, bool sample_with_replacement,
+      const std::vector<std::string>& always_split_variable_names, bool prediction_mode, bool sample_with_replacement,
       const std::vector<std::string>& unordered_variable_names, bool memory_saving_splitting, SplitRule splitrule,
-      bool predict_all, std::vector<double>& sample_fraction, double alpha, double minprop, bool holdout,
+      std::vector<double>& case_weights, std::vector<std::vector<size_t>>& manual_inbag, bool predict_all,
+      bool keep_inbag, std::vector<double>& sample_fraction, double alpha, double minprop, bool holdout,
       PredictionType prediction_type, uint num_random_splits, bool order_snps, uint max_depth);
-  virtual void initInternal(std::string status_variable_name) = 0;
+  void init(MemoryMode memory_mode, std::unique_ptr<Data> input_data, uint mtry, std::string output_prefix,
+      uint num_trees, uint seed, uint num_threads, ImportanceMode importance_mode, uint min_node_size,
+      bool prediction_mode, bool sample_with_replacement, const std::vector<std::string>& unordered_variable_names,
+      bool memory_saving_splitting, SplitRule splitrule, bool predict_all, std::vector<double>& sample_fraction,
+      double alpha, double minprop, bool holdout, PredictionType prediction_type, uint num_random_splits,
+      bool order_snps, uint max_depth);
+  virtual void initInternal() = 0;
 
   // Grow or predict
   void run(bool verbose, bool compute_oob_error);
@@ -109,9 +108,6 @@ class Forest {
   const std::vector<std::vector<std::vector<double>>>& getPredictions() const {
     return predictions;
   }
-  size_t getDependentVarId() const {
-    return dependent_varID;
-  }
   size_t getNumTrees() const {
     return num_trees;
   }
@@ -165,6 +161,10 @@ class Forest {
   // Load forest from file
   void loadFromFile(std::string filename);
   virtual void loadFromFileInternal(std::ifstream& infile) = 0;
+  void loadDependentVariableNamesFromFile(std::string filename);
+
+  // Load data from file
+  std::unique_ptr<Data> loadDataFromFile(const std::string& data_path);
 
   // Set split select weights and variables to be always considered for splitting
   void setSplitWeightVector(std::vector<std::vector<double>>& split_select_weights);
@@ -180,13 +180,12 @@ class Forest {
   // Verbose output stream, cout if verbose==true, logfile if not
   std::ostream* verbose_out;
 
+  std::vector<std::string> dependent_variable_names; // time,status for survival
   size_t num_trees;
   uint mtry;
   uint min_node_size;
-  size_t num_variables;
   size_t num_independent_variables;
   uint seed;
-  size_t dependent_varID;
   size_t num_samples;
   bool prediction_mode;
   MemoryMode memory_mode;
diff --git a/src/ForestClassification.cpp b/src/ForestClassification.cpp
index e33144854..1ca6d92b5 100644
--- a/src/ForestClassification.cpp
+++ b/src/ForestClassification.cpp
@@ -24,12 +24,11 @@
 
 namespace ranger {
 
-void ForestClassification::loadForest(size_t dependent_varID, size_t num_trees,
+void ForestClassification::loadForest(size_t num_trees,
     std::vector<std::vector<std::vector<size_t>> >& forest_child_nodeIDs,
     std::vector<std::vector<size_t>>& forest_split_varIDs, std::vector<std::vector<double>>& forest_split_values,
     std::vector<double>& class_values, std::vector<bool>& is_ordered_variable) {
 
-  this->dependent_varID = dependent_varID;
   this->num_trees = num_trees;
   this->class_values = class_values;
   data->setIsOrderedVariable(is_ordered_variable);
@@ -46,11 +45,11 @@ void ForestClassification::loadForest(size_t dependent_varID, size_t num_trees,
   equalSplit(thread_ranges, 0, num_trees - 1, num_threads);
 }
 
-void ForestClassification::initInternal(std::string status_variable_name) {
+void ForestClassification::initInternal() {
 
   // If mtry not set, use floored square root of number of independent variables.
   if (mtry == 0) {
-    unsigned long temp = sqrt((double) (num_variables - 1));
+    unsigned long temp = sqrt((double) num_independent_variables);
     mtry = std::max((unsigned long) 1, temp);
   }
 
@@ -62,7 +61,7 @@ void ForestClassification::initInternal(std::string status_variable_name) {
   // Create class_values and response_classIDs
   if (!prediction_mode) {
     for (size_t i = 0; i < num_samples; ++i) {
-      double value = data->get(i, dependent_varID);
+      double value = data->get_y(i, 0);
 
       // If classID is already in class_values, use ID. Else create a new one.
       uint classID = find(class_values.begin(), class_values.end(), value) - class_values.begin();
@@ -168,7 +167,7 @@ void ForestClassification::computePredictionErrorInternal() {
     double predicted_value = predictions[0][0][i];
     if (!std::isnan(predicted_value)) {
       ++num_predictions;
-      double real_value = data->get(i, dependent_varID);
+      double real_value = data->get_y(i, 0);
       if (predicted_value != real_value) {
         ++num_missclassifications;
       }
@@ -268,7 +267,7 @@ void ForestClassification::writePredictionFile() {
 void ForestClassification::saveToFileInternal(std::ofstream& outfile) {
 
   // Write num_variables
-  outfile.write((char*) &num_variables, sizeof(num_variables));
+  outfile.write((char*) &num_independent_variables, sizeof(num_independent_variables));
 
   // Write treetype
   TreeType treetype = TREE_CLASSIFICATION;
@@ -304,13 +303,9 @@ void ForestClassification::loadFromFileInternal(std::ifstream& infile) {
     std::vector<double> split_values;
     readVector1D(split_values, infile);
 
-    // If dependent variable not in test data, change variable IDs accordingly
-    if (num_variables_saved > num_variables) {
-      for (auto& varID : split_varIDs) {
-        if (varID >= dependent_varID) {
-          --varID;
-        }
-      }
+    // If dependent variable not in test data, throw error
+    if (num_variables_saved != num_independent_variables) {
+      throw std::runtime_error("Number of independent variables in data does not match with the loaded forest.");
     }
 
     // Create tree
diff --git a/src/ForestClassification.h b/src/ForestClassification.h
index fc23c70ea..5abb8f280 100644
--- a/src/ForestClassification.h
+++ b/src/ForestClassification.h
@@ -31,8 +31,7 @@ class ForestClassification: public Forest {
 
   virtual ~ForestClassification() override = default;
 
-  void loadForest(size_t dependent_varID, size_t num_trees,
-      std::vector<std::vector<std::vector<size_t>> >& forest_child_nodeIDs,
+  void loadForest(size_t num_trees, std::vector<std::vector<std::vector<size_t>> >& forest_child_nodeIDs,
       std::vector<std::vector<size_t>>& forest_split_varIDs, std::vector<std::vector<double>>& forest_split_values,
       std::vector<double>& class_values, std::vector<bool>& is_ordered_variable);
 
@@ -45,7 +44,7 @@ class ForestClassification: public Forest {
   }
 
 protected:
-  void initInternal(std::string status_variable_name) override;
+  void initInternal() override;
   void growInternal() override;
   void allocatePredictMemory() override;
   void predictInternal(size_t sample_idx) override;
diff --git a/src/ForestProbability.cpp b/src/ForestProbability.cpp
index a5f7c8f55..56654990f 100644
--- a/src/ForestProbability.cpp
+++ b/src/ForestProbability.cpp
@@ -18,13 +18,12 @@
 
 namespace ranger {
 
-void ForestProbability::loadForest(size_t dependent_varID, size_t num_trees,
+void ForestProbability::loadForest(size_t num_trees,
     std::vector<std::vector<std::vector<size_t>> >& forest_child_nodeIDs,
     std::vector<std::vector<size_t>>& forest_split_varIDs, std::vector<std::vector<double>>& forest_split_values,
     std::vector<double>& class_values, std::vector<std::vector<std::vector<double>>>& forest_terminal_class_counts,
     std::vector<bool>& is_ordered_variable) {
 
-  this->dependent_varID = dependent_varID;
   this->num_trees = num_trees;
   this->class_values = class_values;
   data->setIsOrderedVariable(is_ordered_variable);
@@ -51,11 +50,11 @@ std::vector<std::vector<std::vector<double>>> ForestProbability::getTerminalClas
   return result;
 }
 
-void ForestProbability::initInternal(std::string status_variable_name) {
+void ForestProbability::initInternal() {
 
   // If mtry not set, use floored square root of number of independent variables.
   if (mtry == 0) {
-    unsigned long temp = sqrt((double) (num_variables - 1));
+    unsigned long temp = sqrt((double) num_independent_variables);
     mtry = std::max((unsigned long) 1, temp);
   }
 
@@ -67,7 +66,7 @@ void ForestProbability::initInternal(std::string status_variable_name) {
   // Create class_values and response_classIDs
   if (!prediction_mode) {
     for (size_t i = 0; i < num_samples; ++i) {
-      double value = data->get(i, dependent_varID);
+      double value = data->get_y(i, 0);
 
       // If classID is already in class_values, use ID. Else create a new one.
       uint classID = find(class_values.begin(), class_values.end(), value) - class_values.begin();
@@ -262,7 +261,7 @@ void ForestProbability::writePredictionFile() {
 void ForestProbability::saveToFileInternal(std::ofstream& outfile) {
 
 // Write num_variables
-  outfile.write((char*) &num_variables, sizeof(num_variables));
+  outfile.write((char*) &num_independent_variables, sizeof(num_independent_variables));
 
 // Write treetype
   TreeType treetype = TREE_PROBABILITY;
@@ -311,13 +310,9 @@ void ForestProbability::loadFromFileInternal(std::ifstream& infile) {
       terminal_class_counts[terminal_nodes[j]] = terminal_class_counts_vector[j];
     }
 
-    // If dependent variable not in test data, change variable IDs accordingly
-    if (num_variables_saved > num_variables) {
-      for (auto& varID : split_varIDs) {
-        if (varID >= dependent_varID) {
-          --varID;
-        }
-      }
+    // If dependent variable not in test data, throw error
+    if (num_variables_saved != num_independent_variables) {
+      throw std::runtime_error("Number of independent variables in data does not match with the loaded forest.");
     }
 
     // Create tree
diff --git a/src/ForestProbability.h b/src/ForestProbability.h
index aa42c893f..47f0ac865 100644
--- a/src/ForestProbability.h
+++ b/src/ForestProbability.h
@@ -31,8 +31,7 @@ class ForestProbability: public Forest {
 
   virtual ~ForestProbability() override = default;
 
-  void loadForest(size_t dependent_varID, size_t num_trees,
-      std::vector<std::vector<std::vector<size_t>> >& forest_child_nodeIDs,
+  void loadForest(size_t num_trees, std::vector<std::vector<std::vector<size_t>> >& forest_child_nodeIDs,
       std::vector<std::vector<size_t>>& forest_split_varIDs, std::vector<std::vector<double>>& forest_split_values,
       std::vector<double>& class_values, std::vector<std::vector<std::vector<double>>>& forest_terminal_class_counts,
       std::vector<bool>& is_ordered_variable);
@@ -48,7 +47,7 @@ class ForestProbability: public Forest {
   }
 
 protected:
-  void initInternal(std::string status_variable_name) override;
+  void initInternal() override;
   void growInternal() override;
   void allocatePredictMemory() override;
   void predictInternal(size_t sample_idx) override;
diff --git a/src/ForestRegression.cpp b/src/ForestRegression.cpp
index dbc9e28af..f15c5693c 100644
--- a/src/ForestRegression.cpp
+++ b/src/ForestRegression.cpp
@@ -20,12 +20,11 @@
 
 namespace ranger {
 
-void ForestRegression::loadForest(size_t dependent_varID, size_t num_trees,
+void ForestRegression::loadForest(size_t num_trees,
     std::vector<std::vector<std::vector<size_t>> >& forest_child_nodeIDs,
     std::vector<std::vector<size_t>>& forest_split_varIDs, std::vector<std::vector<double>>& forest_split_values,
     std::vector<bool>& is_ordered_variable) {
 
-  this->dependent_varID = dependent_varID;
   this->num_trees = num_trees;
   data->setIsOrderedVariable(is_ordered_variable);
 
@@ -40,11 +39,11 @@ void ForestRegression::loadForest(size_t dependent_varID, size_t num_trees,
   equalSplit(thread_ranges, 0, num_trees - 1, num_threads);
 }
 
-void ForestRegression::initInternal(std::string status_variable_name) {
+void ForestRegression::initInternal() {
 
   // If mtry not set, use floored square root of number of independent variables
   if (mtry == 0) {
-    unsigned long temp = sqrt((double) (num_variables - 1));
+    unsigned long temp = sqrt((double) num_independent_variables);
     mtry = std::max((unsigned long) 1, temp);
   }
 
@@ -122,7 +121,7 @@ void ForestRegression::computePredictionErrorInternal() {
       ++num_predictions;
       predictions[0][0][i] /= (double) samples_oob_count[i];
       double predicted_value = predictions[0][0][i];
-      double real_value = data->get(i, dependent_varID);
+      double real_value = data->get_y(i, 0);
       overall_prediction_error += (predicted_value - real_value) * (predicted_value - real_value);
     } else {
       predictions[0][0][i] = NAN;
@@ -196,7 +195,7 @@ void ForestRegression::writePredictionFile() {
 void ForestRegression::saveToFileInternal(std::ofstream& outfile) {
 
 // Write num_variables
-  outfile.write((char*) &num_variables, sizeof(num_variables));
+  outfile.write((char*) &num_independent_variables, sizeof(num_independent_variables));
 
 // Write treetype
   TreeType treetype = TREE_REGRESSION;
@@ -226,13 +225,9 @@ void ForestRegression::loadFromFileInternal(std::ifstream& infile) {
     std::vector<double> split_values;
     readVector1D(split_values, infile);
 
-    // If dependent variable not in test data, change variable IDs accordingly
-    if (num_variables_saved > num_variables) {
-      for (auto& varID : split_varIDs) {
-        if (varID >= dependent_varID) {
-          --varID;
-        }
-      }
+    // If dependent variable not in test data, throw error
+    if (num_variables_saved != num_independent_variables) {
+      throw std::runtime_error("Number of independent variables in data does not match with the loaded forest.");
     }
 
     // Create tree
diff --git a/src/ForestRegression.h b/src/ForestRegression.h
index bd59d22dc..62689d38e 100644
--- a/src/ForestRegression.h
+++ b/src/ForestRegression.h
@@ -29,13 +29,12 @@ class ForestRegression: public Forest {
 
   virtual ~ForestRegression() override = default;
 
-  void loadForest(size_t dependent_varID, size_t num_trees,
-      std::vector<std::vector<std::vector<size_t>> >& forest_child_nodeIDs,
+  void loadForest(size_t num_trees, std::vector<std::vector<std::vector<size_t>> >& forest_child_nodeIDs,
       std::vector<std::vector<size_t>>& forest_split_varIDs, std::vector<std::vector<double>>& forest_split_values,
       std::vector<bool>& is_ordered_variable);
 
 private:
-  void initInternal(std::string status_variable_name) override;
+  void initInternal() override;
   void growInternal() override;
   void allocatePredictMemory() override;
   void predictInternal(size_t sample_idx) override;
diff --git a/src/ForestSurvival.cpp b/src/ForestSurvival.cpp
index 83c357ea9..cae6af865 100644
--- a/src/ForestSurvival.cpp
+++ b/src/ForestSurvival.cpp
@@ -21,14 +21,11 @@
 
 namespace ranger {
 
-void ForestSurvival::loadForest(size_t dependent_varID, size_t num_trees,
-    std::vector<std::vector<std::vector<size_t>> >& forest_child_nodeIDs,
+void ForestSurvival::loadForest(size_t num_trees, std::vector<std::vector<std::vector<size_t>> >& forest_child_nodeIDs,
     std::vector<std::vector<size_t>>& forest_split_varIDs, std::vector<std::vector<double>>& forest_split_values,
-    size_t status_varID, std::vector<std::vector<std::vector<double>> >& forest_chf,
-    std::vector<double>& unique_timepoints, std::vector<bool>& is_ordered_variable) {
+    std::vector<std::vector<std::vector<double>> >& forest_chf, std::vector<double>& unique_timepoints,
+    std::vector<bool>& is_ordered_variable) {
 
-  this->dependent_varID = dependent_varID;
-  this->status_varID = status_varID;
   this->num_trees = num_trees;
   this->unique_timepoints = unique_timepoints;
   data->setIsOrderedVariable(is_ordered_variable);
@@ -55,18 +52,11 @@ std::vector<std::vector<std::vector<double>>> ForestSurvival::getChf() const {
   return result;
 }
 
-void ForestSurvival::initInternal(std::string status_variable_name) {
-
-  // Convert status variable name to ID
-  if (!prediction_mode && !status_variable_name.empty()) {
-    status_varID = data->getVariableID(status_variable_name);
-  }
-
-  data->addNoSplitVariable(status_varID);
+void ForestSurvival::initInternal() {
 
   // If mtry not set, use floored square root of number of independent variables.
   if (mtry == 0) {
-    unsigned long temp = ceil(sqrt((double) (num_variables - 2)));
+    unsigned long temp = ceil(sqrt((double) num_independent_variables));
     mtry = std::max((unsigned long) 1, temp);
   }
 
@@ -78,7 +68,7 @@ void ForestSurvival::initInternal(std::string status_variable_name) {
   // Create unique timepoints
   std::set<double> unique_timepoint_set;
   for (size_t i = 0; i < num_samples; ++i) {
-    unique_timepoint_set.insert(data->get(i, dependent_varID));
+    unique_timepoint_set.insert(data->get_y(i, 0));
   }
   unique_timepoints.reserve(unique_timepoint_set.size());
   for (auto& t : unique_timepoint_set) {
@@ -88,7 +78,7 @@ void ForestSurvival::initInternal(std::string status_variable_name) {
   // Create response_timepointIDs
   if (!prediction_mode) {
     for (size_t i = 0; i < num_samples; ++i) {
-      double value = data->get(i, dependent_varID);
+      double value = data->get_y(i, 0);
 
       // If timepoint is already in unique_timepoints, use ID. Else create a new one.
       uint timepointID = find(unique_timepoints.begin(), unique_timepoints.end(), value) - unique_timepoints.begin();
@@ -105,7 +95,7 @@ void ForestSurvival::initInternal(std::string status_variable_name) {
 void ForestSurvival::growInternal() {
   trees.reserve(num_trees);
   for (size_t i = 0; i < num_trees; ++i) {
-    trees.push_back(make_unique<TreeSurvival>(&unique_timepoints, status_varID, &response_timepointIDs));
+    trees.push_back(make_unique<TreeSurvival>(&unique_timepoints, &response_timepointIDs));
   }
 }
 
@@ -187,15 +177,16 @@ void ForestSurvival::computePredictionErrorInternal() {
   }
 
   // Use all samples which are OOB at least once
-  overall_prediction_error = 1 - computeConcordanceIndex(*data, sum_chf, dependent_varID, status_varID, oob_sampleIDs);
+  overall_prediction_error = 1 - computeConcordanceIndex(*data, sum_chf, oob_sampleIDs);
 }
 
 // #nocov start
 void ForestSurvival::writeOutputInternal() {
   if (verbose_out) {
     *verbose_out << "Tree type:                         " << "Survival" << std::endl;
-    *verbose_out << "Status variable name:              " << data->getVariableNames()[status_varID] << std::endl;
-    *verbose_out << "Status variable ID:                " << status_varID << std::endl;
+    if (dependent_variable_names.size() >= 2) {
+      *verbose_out << "Status variable name:              " << dependent_variable_names[1] << std::endl;
+    }
   }
 }
 
@@ -265,15 +256,12 @@ void ForestSurvival::writePredictionFile() {
 void ForestSurvival::saveToFileInternal(std::ofstream& outfile) {
 
   // Write num_variables
-  outfile.write((char*) &num_variables, sizeof(num_variables));
+  outfile.write((char*) &num_independent_variables, sizeof(num_independent_variables));
 
   // Write treetype
   TreeType treetype = TREE_SURVIVAL;
   outfile.write((char*) &treetype, sizeof(treetype));
 
-  // Write status_varID
-  outfile.write((char*) &status_varID, sizeof(status_varID));
-
   // Write unique timepoints
   saveVector1D(unique_timepoints, outfile);
 }
@@ -291,9 +279,6 @@ void ForestSurvival::loadFromFileInternal(std::ifstream& infile) {
     throw std::runtime_error("Wrong treetype. Loaded file is not a survival forest.");
   }
 
-  // Read status_varID
-  infile.read((char*) &status_varID, sizeof(status_varID));
-
   // Read unique timepoints
   unique_timepoints.clear();
   readVector1D(unique_timepoints, infile);
@@ -324,20 +309,9 @@ void ForestSurvival::loadFromFileInternal(std::ifstream& infile) {
       chf[terminal_nodes[j]] = chf_vector[j];
     }
 
-    // If dependent variable not in test data, change variable IDs accordingly
-    if (num_variables_saved > num_variables) {
-      for (auto& varID : split_varIDs) {
-        if (varID >= dependent_varID) {
-          --varID;
-        }
-      }
-    }
-    if (num_variables_saved > num_variables + 1) {
-      for (auto& varID : split_varIDs) {
-        if (varID >= status_varID) {
-          --varID;
-        }
-      }
+    // If dependent variable not in test data, throw error
+    if (num_variables_saved != num_independent_variables) {
+      throw std::runtime_error("Number of independent variables in data does not match with the loaded forest.");
     }
 
     // Create tree
diff --git a/src/ForestSurvival.h b/src/ForestSurvival.h
index 4fc69171f..15b8a87fe 100644
--- a/src/ForestSurvival.h
+++ b/src/ForestSurvival.h
@@ -30,23 +30,19 @@ class ForestSurvival: public Forest {
 
   virtual ~ForestSurvival() override = default;
 
-  void loadForest(size_t dependent_varID, size_t num_trees,
-      std::vector<std::vector<std::vector<size_t>> >& forest_child_nodeIDs,
+  void loadForest(size_t num_trees, std::vector<std::vector<std::vector<size_t>> >& forest_child_nodeIDs,
       std::vector<std::vector<size_t>>& forest_split_varIDs, std::vector<std::vector<double>>& forest_split_values,
-      size_t status_varID, std::vector<std::vector<std::vector<double>> >& forest_chf,
-      std::vector<double>& unique_timepoints, std::vector<bool>& is_ordered_variable);
+      std::vector<std::vector<std::vector<double>> >& forest_chf, std::vector<double>& unique_timepoints,
+      std::vector<bool>& is_ordered_variable);
 
   std::vector<std::vector<std::vector<double>>> getChf() const;
 
-  size_t getStatusVarId() const {
-    return status_varID;
-  }
   const std::vector<double>& getUniqueTimepoints() const {
     return unique_timepoints;
   }
 
 private:
-  void initInternal(std::string status_variable_name) override;
+  void initInternal() override;
   void growInternal() override;
   void allocatePredictMemory() override;
   void predictInternal(size_t sample_idx) override;
@@ -57,7 +53,6 @@ class ForestSurvival: public Forest {
   void saveToFileInternal(std::ofstream& outfile) override;
   void loadFromFileInternal(std::ifstream& infile) override;
 
-  size_t status_varID;
   std::vector<double> unique_timepoints;
   std::vector<size_t> response_timepointIDs;
 
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index 614e7330a..ca7b8046f 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -8,14 +8,14 @@
 using namespace Rcpp;
 
 // rangerCpp
-Rcpp::List rangerCpp(uint treetype, std::string dependent_variable_name, Rcpp::NumericMatrix& input_data, std::vector<std::string> variable_names, uint mtry, uint num_trees, bool verbose, uint seed, uint num_threads, bool write_forest, uint importance_mode_r, uint min_node_size, std::vector<std::vector<double>>& split_select_weights, bool use_split_select_weights, std::vector<std::string>& always_split_variable_names, bool use_always_split_variable_names, std::string status_variable_name, bool prediction_mode, Rcpp::List loaded_forest, Rcpp::RawMatrix snp_data, bool sample_with_replacement, bool probability, std::vector<std::string>& unordered_variable_names, bool use_unordered_variable_names, bool save_memory, uint splitrule_r, std::vector<double>& case_weights, bool use_case_weights, std::vector<double>& class_weights, bool predict_all, bool keep_inbag, std::vector<double>& sample_fraction, double alpha, double minprop, bool holdout, uint prediction_type_r, uint num_random_splits, Eigen::SparseMatrix<double>& sparse_data, bool use_sparse_data, bool order_snps, bool oob_error, uint max_depth, std::vector<std::vector<size_t>>& inbag, bool use_inbag);
-RcppExport SEXP _ranger_rangerCpp(SEXP treetypeSEXP, SEXP dependent_variable_nameSEXP, SEXP input_dataSEXP, SEXP variable_namesSEXP, SEXP mtrySEXP, SEXP num_treesSEXP, SEXP verboseSEXP, SEXP seedSEXP, SEXP num_threadsSEXP, SEXP write_forestSEXP, SEXP importance_mode_rSEXP, SEXP min_node_sizeSEXP, SEXP split_select_weightsSEXP, SEXP use_split_select_weightsSEXP, SEXP always_split_variable_namesSEXP, SEXP use_always_split_variable_namesSEXP, SEXP status_variable_nameSEXP, SEXP prediction_modeSEXP, SEXP loaded_forestSEXP, SEXP snp_dataSEXP, SEXP sample_with_replacementSEXP, SEXP probabilitySEXP, SEXP unordered_variable_namesSEXP, SEXP use_unordered_variable_namesSEXP, SEXP save_memorySEXP, SEXP splitrule_rSEXP, SEXP case_weightsSEXP, SEXP use_case_weightsSEXP, SEXP class_weightsSEXP, SEXP predict_allSEXP, SEXP keep_inbagSEXP, SEXP sample_fractionSEXP, SEXP alphaSEXP, SEXP minpropSEXP, SEXP holdoutSEXP, SEXP prediction_type_rSEXP, SEXP num_random_splitsSEXP, SEXP sparse_dataSEXP, SEXP use_sparse_dataSEXP, SEXP order_snpsSEXP, SEXP oob_errorSEXP, SEXP max_depthSEXP, SEXP inbagSEXP, SEXP use_inbagSEXP) {
+Rcpp::List rangerCpp(uint treetype, Rcpp::NumericMatrix& input_x, Rcpp::NumericMatrix& input_y, std::vector<std::string> variable_names, uint mtry, uint num_trees, bool verbose, uint seed, uint num_threads, bool write_forest, uint importance_mode_r, uint min_node_size, std::vector<std::vector<double>>& split_select_weights, bool use_split_select_weights, std::vector<std::string>& always_split_variable_names, bool use_always_split_variable_names, bool prediction_mode, Rcpp::List loaded_forest, Rcpp::RawMatrix snp_data, bool sample_with_replacement, bool probability, std::vector<std::string>& unordered_variable_names, bool use_unordered_variable_names, bool save_memory, uint splitrule_r, std::vector<double>& case_weights, bool use_case_weights, std::vector<double>& class_weights, bool predict_all, bool keep_inbag, std::vector<double>& sample_fraction, double alpha, double minprop, bool holdout, uint prediction_type_r, uint num_random_splits, Eigen::SparseMatrix<double>& sparse_x, bool use_sparse_data, bool order_snps, bool oob_error, uint max_depth, std::vector<std::vector<size_t>>& inbag, bool use_inbag);
+RcppExport SEXP _ranger_rangerCpp(SEXP treetypeSEXP, SEXP input_xSEXP, SEXP input_ySEXP, SEXP variable_namesSEXP, SEXP mtrySEXP, SEXP num_treesSEXP, SEXP verboseSEXP, SEXP seedSEXP, SEXP num_threadsSEXP, SEXP write_forestSEXP, SEXP importance_mode_rSEXP, SEXP min_node_sizeSEXP, SEXP split_select_weightsSEXP, SEXP use_split_select_weightsSEXP, SEXP always_split_variable_namesSEXP, SEXP use_always_split_variable_namesSEXP, SEXP prediction_modeSEXP, SEXP loaded_forestSEXP, SEXP snp_dataSEXP, SEXP sample_with_replacementSEXP, SEXP probabilitySEXP, SEXP unordered_variable_namesSEXP, SEXP use_unordered_variable_namesSEXP, SEXP save_memorySEXP, SEXP splitrule_rSEXP, SEXP case_weightsSEXP, SEXP use_case_weightsSEXP, SEXP class_weightsSEXP, SEXP predict_allSEXP, SEXP keep_inbagSEXP, SEXP sample_fractionSEXP, SEXP alphaSEXP, SEXP minpropSEXP, SEXP holdoutSEXP, SEXP prediction_type_rSEXP, SEXP num_random_splitsSEXP, SEXP sparse_xSEXP, SEXP use_sparse_dataSEXP, SEXP order_snpsSEXP, SEXP oob_errorSEXP, SEXP max_depthSEXP, SEXP inbagSEXP, SEXP use_inbagSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
     Rcpp::traits::input_parameter< uint >::type treetype(treetypeSEXP);
-    Rcpp::traits::input_parameter< std::string >::type dependent_variable_name(dependent_variable_nameSEXP);
-    Rcpp::traits::input_parameter< Rcpp::NumericMatrix& >::type input_data(input_dataSEXP);
+    Rcpp::traits::input_parameter< Rcpp::NumericMatrix& >::type input_x(input_xSEXP);
+    Rcpp::traits::input_parameter< Rcpp::NumericMatrix& >::type input_y(input_ySEXP);
     Rcpp::traits::input_parameter< std::vector<std::string> >::type variable_names(variable_namesSEXP);
     Rcpp::traits::input_parameter< uint >::type mtry(mtrySEXP);
     Rcpp::traits::input_parameter< uint >::type num_trees(num_treesSEXP);
@@ -29,7 +29,6 @@ BEGIN_RCPP
     Rcpp::traits::input_parameter< bool >::type use_split_select_weights(use_split_select_weightsSEXP);
     Rcpp::traits::input_parameter< std::vector<std::string>& >::type always_split_variable_names(always_split_variable_namesSEXP);
     Rcpp::traits::input_parameter< bool >::type use_always_split_variable_names(use_always_split_variable_namesSEXP);
-    Rcpp::traits::input_parameter< std::string >::type status_variable_name(status_variable_nameSEXP);
     Rcpp::traits::input_parameter< bool >::type prediction_mode(prediction_modeSEXP);
     Rcpp::traits::input_parameter< Rcpp::List >::type loaded_forest(loaded_forestSEXP);
     Rcpp::traits::input_parameter< Rcpp::RawMatrix >::type snp_data(snp_dataSEXP);
@@ -50,14 +49,14 @@ BEGIN_RCPP
     Rcpp::traits::input_parameter< bool >::type holdout(holdoutSEXP);
     Rcpp::traits::input_parameter< uint >::type prediction_type_r(prediction_type_rSEXP);
     Rcpp::traits::input_parameter< uint >::type num_random_splits(num_random_splitsSEXP);
-    Rcpp::traits::input_parameter< Eigen::SparseMatrix<double>& >::type sparse_data(sparse_dataSEXP);
+    Rcpp::traits::input_parameter< Eigen::SparseMatrix<double>& >::type sparse_x(sparse_xSEXP);
     Rcpp::traits::input_parameter< bool >::type use_sparse_data(use_sparse_dataSEXP);
     Rcpp::traits::input_parameter< bool >::type order_snps(order_snpsSEXP);
     Rcpp::traits::input_parameter< bool >::type oob_error(oob_errorSEXP);
     Rcpp::traits::input_parameter< uint >::type max_depth(max_depthSEXP);
     Rcpp::traits::input_parameter< std::vector<std::vector<size_t>>& >::type inbag(inbagSEXP);
     Rcpp::traits::input_parameter< bool >::type use_inbag(use_inbagSEXP);
-    rcpp_result_gen = Rcpp::wrap(rangerCpp(treetype, dependent_variable_name, input_data, variable_names, mtry, num_trees, verbose, seed, num_threads, write_forest, importance_mode_r, min_node_size, split_select_weights, use_split_select_weights, always_split_variable_names, use_always_split_variable_names, status_variable_name, prediction_mode, loaded_forest, snp_data, sample_with_replacement, probability, unordered_variable_names, use_unordered_variable_names, save_memory, splitrule_r, case_weights, use_case_weights, class_weights, predict_all, keep_inbag, sample_fraction, alpha, minprop, holdout, prediction_type_r, num_random_splits, sparse_data, use_sparse_data, order_snps, oob_error, max_depth, inbag, use_inbag));
+    rcpp_result_gen = Rcpp::wrap(rangerCpp(treetype, input_x, input_y, variable_names, mtry, num_trees, verbose, seed, num_threads, write_forest, importance_mode_r, min_node_size, split_select_weights, use_split_select_weights, always_split_variable_names, use_always_split_variable_names, prediction_mode, loaded_forest, snp_data, sample_with_replacement, probability, unordered_variable_names, use_unordered_variable_names, save_memory, splitrule_r, case_weights, use_case_weights, class_weights, predict_all, keep_inbag, sample_fraction, alpha, minprop, holdout, prediction_type_r, num_random_splits, sparse_x, use_sparse_data, order_snps, oob_error, max_depth, inbag, use_inbag));
     return rcpp_result_gen;
 END_RCPP
 }
@@ -75,7 +74,7 @@ END_RCPP
 }
 
 static const R_CallMethodDef CallEntries[] = {
-    {"_ranger_rangerCpp", (DL_FUNC) &_ranger_rangerCpp, 44},
+    {"_ranger_rangerCpp", (DL_FUNC) &_ranger_rangerCpp, 43},
     {"_ranger_numSmaller", (DL_FUNC) &_ranger_numSmaller, 2},
     {NULL, NULL, 0}
 };
diff --git a/src/Tree.cpp b/src/Tree.cpp
index c3dbf9399..1116c61d5 100644
--- a/src/Tree.cpp
+++ b/src/Tree.cpp
@@ -17,7 +17,7 @@
 namespace ranger {
 
 Tree::Tree() :
-    dependent_varID(0), mtry(0), num_samples(0), num_samples_oob(0), min_node_size(0), deterministic_varIDs(0), split_select_varIDs(
+    mtry(0), num_samples(0), num_samples_oob(0), min_node_size(0), deterministic_varIDs(0), split_select_varIDs(
         0), split_select_weights(0), case_weights(0), manual_inbag(0), oob_sampleIDs(0), holdout(false), keep_inbag(
         false), data(0), variable_importance(0), importance_mode(DEFAULT_IMPORTANCE_MODE), sample_with_replacement(
         true), sample_fraction(0), memory_saving_splitting(false), splitrule(DEFAULT_SPLITRULE), alpha(DEFAULT_ALPHA), minprop(
@@ -27,7 +27,7 @@ Tree::Tree() :
 
 Tree::Tree(std::vector<std::vector<size_t>>& child_nodeIDs, std::vector<size_t>& split_varIDs,
     std::vector<double>& split_values) :
-    dependent_varID(0), mtry(0), num_samples(0), num_samples_oob(0), min_node_size(0), deterministic_varIDs(0), split_select_varIDs(
+    mtry(0), num_samples(0), num_samples_oob(0), min_node_size(0), deterministic_varIDs(0), split_select_varIDs(
         0), split_select_weights(0), case_weights(0), manual_inbag(0), split_varIDs(split_varIDs), split_values(
         split_values), child_nodeIDs(child_nodeIDs), oob_sampleIDs(0), holdout(false), keep_inbag(false), data(0), variable_importance(
         0), importance_mode(DEFAULT_IMPORTANCE_MODE), sample_with_replacement(true), sample_fraction(0), memory_saving_splitting(
@@ -35,7 +35,7 @@ Tree::Tree(std::vector<std::vector<size_t>>& child_nodeIDs, std::vector<size_t>&
         DEFAULT_NUM_RANDOM_SPLITS), max_depth(DEFAULT_MAXDEPTH), depth(0), last_left_nodeID(0) {
 }
 
-void Tree::init(const Data* data, uint mtry, size_t dependent_varID, size_t num_samples, uint seed,
+void Tree::init(const Data* data, uint mtry, size_t num_samples, uint seed,
     std::vector<size_t>* deterministic_varIDs, std::vector<size_t>* split_select_varIDs,
     std::vector<double>* split_select_weights, ImportanceMode importance_mode, uint min_node_size,
     bool sample_with_replacement, bool memory_saving_splitting, SplitRule splitrule, std::vector<double>* case_weights,
@@ -44,7 +44,6 @@ void Tree::init(const Data* data, uint mtry, size_t dependent_varID, size_t num_
 
   this->data = data;
   this->mtry = mtry;
-  this->dependent_varID = dependent_varID;
   this->num_samples = num_samples;
   this->memory_saving_splitting = memory_saving_splitting;
 
@@ -164,7 +163,7 @@ void Tree::predict(const Data* prediction_data, bool oob_prediction) {
       // Move to child
       size_t split_varID = split_varIDs[nodeID];
 
-      double value = prediction_data->get(sample_idx, split_varID);
+      double value = prediction_data->get_x(sample_idx, split_varID);
       if (prediction_data->isOrderedVariable(split_varID)) {
         if (value <= split_values[nodeID]) {
           // Move to left child
@@ -194,7 +193,7 @@ void Tree::predict(const Data* prediction_data, bool oob_prediction) {
 
 void Tree::computePermutationImportance(std::vector<double>& forest_importance, std::vector<double>& forest_variance) {
 
-  size_t num_independent_variables = data->getNumCols() - data->getNoSplitVariables().size();
+  size_t num_independent_variables = data->getNumCols();
 
 // Compute normal prediction accuracy for each tree. Predictions already computed..
   double accuracy_normal = computePredictionAccuracyInternal();
@@ -208,16 +207,8 @@ void Tree::computePermutationImportance(std::vector<double>& forest_importance,
 // Randomly permute for all independent variables
   for (size_t i = 0; i < num_independent_variables; ++i) {
 
-    // Skip no split variables
-    size_t varID = i;
-    for (auto& skip : data->getNoSplitVariables()) {
-      if (varID >= skip) {
-        ++varID;
-      }
-    }
-
     // Permute and compute prediction accuracy again for this permutation and save difference
-    permuteAndPredictOobSamples(varID, permutations);
+    permuteAndPredictOobSamples(i, permutations);
     double accuracy_permuted = computePredictionAccuracyInternal();
     double accuracy_difference = accuracy_normal - accuracy_permuted;
     forest_importance[i] += accuracy_difference;
@@ -231,6 +222,7 @@ void Tree::computePermutationImportance(std::vector<double>& forest_importance,
   }
 }
 
+// #nocov start
 void Tree::appendToFile(std::ofstream& file) {
 
 // Save general fields
@@ -241,6 +233,7 @@ void Tree::appendToFile(std::ofstream& file) {
 // Call special functions for subclasses to save special fields.
   appendToFileInternal(file);
 }
+// #nocov end
 
 void Tree::createPossibleSplitVarSubset(std::vector<size_t>& result) {
 
@@ -248,20 +241,15 @@ void Tree::createPossibleSplitVarSubset(std::vector<size_t>& result) {
 
   // For corrected Gini importance add dummy variables
   if (importance_mode == IMP_GINI_CORRECTED) {
-    num_vars += data->getNumCols() - data->getNoSplitVariables().size();
+    num_vars += data->getNumCols();
   }
 
   // Randomly add non-deterministic variables (according to weights if needed)
   if (split_select_weights->empty()) {
     if (deterministic_varIDs->empty()) {
-      drawWithoutReplacementSkip(result, random_number_generator, num_vars, data->getNoSplitVariables(), mtry);
+      drawWithoutReplacement(result, random_number_generator, num_vars, mtry);
     } else {
-      std::vector<size_t> skip;
-      std::copy(data->getNoSplitVariables().begin(), data->getNoSplitVariables().end(),
-          std::inserter(skip, skip.end()));
-      std::copy(deterministic_varIDs->begin(), deterministic_varIDs->end(), std::inserter(skip, skip.end()));
-      std::sort(skip.begin(), skip.end());
-      drawWithoutReplacementSkip(result, random_number_generator, num_vars, skip, mtry);
+      drawWithoutReplacementSkip(result, random_number_generator, num_vars, (*deterministic_varIDs), mtry);
     }
   } else {
     drawWithoutReplacementWeighted(result, random_number_generator, *split_select_varIDs, mtry, *split_select_weights);
@@ -307,7 +295,7 @@ bool Tree::splitNode(size_t nodeID) {
     size_t pos = start_pos[nodeID];
     while (pos < start_pos[right_child_nodeID]) {
       size_t sampleID = sampleIDs[pos];
-      if (data->get(sampleID, split_varID) <= split_value) {
+      if (data->get_x(sampleID, split_varID) <= split_value) {
         // If going to left, do nothing
         ++pos;
       } else {
@@ -321,7 +309,7 @@ bool Tree::splitNode(size_t nodeID) {
     size_t pos = start_pos[nodeID];
     while (pos < start_pos[right_child_nodeID]) {
       size_t sampleID = sampleIDs[pos];
-      double level = data->get(sampleID, split_varID);
+      double level = data->get_x(sampleID, split_varID);
       size_t factorID = floor(level) - 1;
       size_t splitID = floor(split_value);
 
@@ -370,7 +358,7 @@ size_t Tree::dropDownSamplePermuted(size_t permuted_varID, size_t sampleID, size
     }
 
     // Move to child
-    double value = data->get(sampleID_final, split_varID);
+    double value = data->get_x(sampleID_final, split_varID);
     if (data->isOrderedVariable(split_varID)) {
       if (value <= split_values[nodeID]) {
         // Move to left child
diff --git a/src/Tree.h b/src/Tree.h
index 0d0a9f7d0..7aea85ca9 100644
--- a/src/Tree.h
+++ b/src/Tree.h
@@ -35,7 +35,7 @@ class Tree {
   Tree(const Tree&) = delete;
   Tree& operator=(const Tree&) = delete;
 
-  void init(const Data* data, uint mtry, size_t dependent_varID, size_t num_samples, uint seed,
+  void init(const Data* data, uint mtry, size_t num_samples, uint seed,
       std::vector<size_t>* deterministic_varIDs, std::vector<size_t>* split_select_varIDs,
       std::vector<double>* split_select_weights, ImportanceMode importance_mode, uint min_node_size,
       bool sample_with_replacement, bool memory_saving_splitting, SplitRule splitrule,
@@ -102,7 +102,6 @@ class Tree {
 
   virtual void cleanUpInternal() = 0;
 
-  size_t dependent_varID;
   uint mtry;
 
   // Number of samples (all samples, not only inbag for this tree)
diff --git a/src/TreeClassification.cpp b/src/TreeClassification.cpp
index d9f7d7c1b..bdb9bf929 100644
--- a/src/TreeClassification.cpp
+++ b/src/TreeClassification.cpp
@@ -88,7 +88,7 @@ bool TreeClassification::splitNodeInternal(size_t nodeID, std::vector<size_t>& p
   double pure_value = 0;
   for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
     size_t sampleID = sampleIDs[pos];
-    double value = data->get(sampleID, dependent_varID);
+    double value = data->get_y(sampleID, 0);
     if (pos != start_pos[nodeID] && value != pure_value) {
       pure = false;
       break;
@@ -127,7 +127,7 @@ double TreeClassification::computePredictionAccuracyInternal() {
   for (size_t i = 0; i < num_predictions; ++i) {
     size_t terminal_nodeID = prediction_terminal_nodeIDs[i];
     double predicted_value = split_values[terminal_nodeID];
-    double real_value = data->get(oob_sampleIDs[i], dependent_varID);
+    double real_value = data->get_y(oob_sampleIDs[i], 0);
     if (predicted_value != real_value) {
       ++num_missclassifications;
     }
@@ -229,7 +229,7 @@ void TreeClassification::findBestSplitValueSmallQ(size_t nodeID, size_t varID, s
   // Count samples in right child per class and possbile split
   for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
     size_t sampleID = sampleIDs[pos];
-    double value = data->get(sampleID, varID);
+    double value = data->get_x(sampleID, varID);
     uint sample_classID = (*response_classIDs)[sampleID];
 
     // Count samples until split_value reached
@@ -392,7 +392,7 @@ void TreeClassification::findBestSplitValueUnordered(size_t nodeID, size_t varID
     for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
       size_t sampleID = sampleIDs[pos];
       uint sample_classID = (*response_classIDs)[sampleID];
-      double value = data->get(sampleID, varID);
+      double value = data->get_x(sampleID, varID);
       size_t factorID = floor(value) - 1;
 
       // If in right child, count
@@ -518,7 +518,7 @@ void TreeClassification::findBestSplitValueExtraTrees(size_t nodeID, size_t varI
   // Count samples in right child per class and possbile split
   for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
     size_t sampleID = sampleIDs[pos];
-    double value = data->get(sampleID, varID);
+    double value = data->get_x(sampleID, varID);
     uint sample_classID = (*response_classIDs)[sampleID];
 
     // Count samples until split_value reached
@@ -632,7 +632,7 @@ void TreeClassification::findBestSplitValueExtraTreesUnordered(size_t nodeID, si
     for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
       size_t sampleID = sampleIDs[pos];
       uint sample_classID = (*response_classIDs)[sampleID];
-      double value = data->get(sampleID, varID);
+      double value = data->get_x(sampleID, varID);
       size_t factorID = floor(value) - 1;
 
       // If in right child, count
@@ -686,11 +686,6 @@ void TreeClassification::addGiniImportance(size_t nodeID, size_t varID, double d
 
   // No variable importance for no split variables
   size_t tempvarID = data->getUnpermutedVarID(varID);
-  for (auto& skip : data->getNoSplitVariables()) {
-    if (tempvarID >= skip) {
-      --tempvarID;
-    }
-  }
 
   // Subtract if corrected importance and permuted variable, else add
   if (importance_mode == IMP_GINI_CORRECTED && varID >= data->getNumCols()) {
diff --git a/src/TreeProbability.cpp b/src/TreeProbability.cpp
index 81da10aed..dbc390fe1 100644
--- a/src/TreeProbability.cpp
+++ b/src/TreeProbability.cpp
@@ -92,7 +92,7 @@ bool TreeProbability::splitNodeInternal(size_t nodeID, std::vector<size_t>& poss
   double pure_value = 0;
   for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
     size_t sampleID = sampleIDs[pos];
-    double value = data->get(sampleID, dependent_varID);
+    double value = data->get_y(sampleID, 0);
     if (pos != start_pos[nodeID] && value != pure_value) {
       pure = false;
       break;
@@ -233,7 +233,7 @@ void TreeProbability::findBestSplitValueSmallQ(size_t nodeID, size_t varID, size
   // Count samples in right child per class and possbile split
   for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
     size_t sampleID = sampleIDs[pos];
-    double value = data->get(sampleID, varID);
+    double value = data->get_x(sampleID, varID);
     uint sample_classID = (*response_classIDs)[sampleID];
 
     // Count samples until split_value reached
@@ -396,7 +396,7 @@ void TreeProbability::findBestSplitValueUnordered(size_t nodeID, size_t varID, s
     for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
       size_t sampleID = sampleIDs[pos];
       uint sample_classID = (*response_classIDs)[sampleID];
-      double value = data->get(sampleID, varID);
+      double value = data->get_x(sampleID, varID);
       size_t factorID = floor(value) - 1;
 
       // If in right child, count
@@ -522,7 +522,7 @@ void TreeProbability::findBestSplitValueExtraTrees(size_t nodeID, size_t varID,
   // Count samples in right child per class and possbile split
   for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
     size_t sampleID = sampleIDs[pos];
-    double value = data->get(sampleID, varID);
+    double value = data->get_x(sampleID, varID);
     uint sample_classID = (*response_classIDs)[sampleID];
 
     // Count samples until split_value reached
@@ -636,7 +636,7 @@ void TreeProbability::findBestSplitValueExtraTreesUnordered(size_t nodeID, size_
     for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
       size_t sampleID = sampleIDs[pos];
       uint sample_classID = (*response_classIDs)[sampleID];
-      double value = data->get(sampleID, varID);
+      double value = data->get_x(sampleID, varID);
       size_t factorID = floor(value) - 1;
 
       // If in right child, count
@@ -690,11 +690,6 @@ void TreeProbability::addImpurityImportance(size_t nodeID, size_t varID, double
 
   // No variable importance for no split variables
   size_t tempvarID = data->getUnpermutedVarID(varID);
-  for (auto& skip : data->getNoSplitVariables()) {
-    if (tempvarID >= skip) {
-      --tempvarID;
-    }
-  }
 
   // Subtract if corrected importance and permuted variable, else add
   if (importance_mode == IMP_GINI_CORRECTED && varID >= data->getNumCols()) {
diff --git a/src/TreeRegression.cpp b/src/TreeRegression.cpp
index 4545e52e9..53bac9322 100644
--- a/src/TreeRegression.cpp
+++ b/src/TreeRegression.cpp
@@ -48,7 +48,7 @@ double TreeRegression::estimate(size_t nodeID) {
   size_t num_samples_in_node = end_pos[nodeID] - start_pos[nodeID];
   for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
     size_t sampleID = sampleIDs[pos];
-    sum_responses_in_node += data->get(sampleID, dependent_varID);
+    sum_responses_in_node += data->get_y(sampleID, 0);
   }
   return (sum_responses_in_node / (double) num_samples_in_node);
 }
@@ -72,7 +72,7 @@ bool TreeRegression::splitNodeInternal(size_t nodeID, std::vector<size_t>& possi
   double pure_value = 0;
   for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
     size_t sampleID = sampleIDs[pos];
-    double value = data->get(sampleID, dependent_varID);
+    double value = data->get_y(sampleID, 0);
     if (pos != start_pos[nodeID] && value != pure_value) {
       pure = false;
       break;
@@ -115,7 +115,7 @@ double TreeRegression::computePredictionAccuracyInternal() {
   for (size_t i = 0; i < num_predictions; ++i) {
     size_t terminal_nodeID = prediction_terminal_nodeIDs[i];
     double predicted_value = split_values[terminal_nodeID];
-    double real_value = data->get(oob_sampleIDs[i], dependent_varID);
+    double real_value = data->get_y(oob_sampleIDs[i], 0);
     if (predicted_value != real_value) {
       sum_of_squares += (predicted_value - real_value) * (predicted_value - real_value);
     }
@@ -134,7 +134,7 @@ bool TreeRegression::findBestSplit(size_t nodeID, std::vector<size_t>& possible_
   double sum_node = 0;
   for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
     size_t sampleID = sampleIDs[pos];
-    sum_node += data->get(sampleID, dependent_varID);
+    sum_node += data->get_y(sampleID, 0);
   }
 
   // For all possible split variables
@@ -212,8 +212,8 @@ void TreeRegression::findBestSplitValueSmallQ(size_t nodeID, size_t varID, doubl
   // Sum in right child and possbile split
   for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
     size_t sampleID = sampleIDs[pos];
-    double value = data->get(sampleID, varID);
-    double response = data->get(sampleID, dependent_varID);
+    double value = data->get_x(sampleID, varID);
+    double response = data->get_y(sampleID, 0);
 
     // Count samples until split_value reached
     for (size_t i = 0; i < num_splits; ++i) {
@@ -265,7 +265,7 @@ void TreeRegression::findBestSplitValueLargeQ(size_t nodeID, size_t varID, doubl
     size_t sampleID = sampleIDs[pos];
     size_t index = data->getIndex(sampleID, varID);
 
-    sums[index] += data->get(sampleID, dependent_varID);
+    sums[index] += data->get_y(sampleID, 0);
     ++counter[index];
   }
 
@@ -350,8 +350,8 @@ void TreeRegression::findBestSplitValueUnordered(size_t nodeID, size_t varID, do
     // Sum in right child
     for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
       size_t sampleID = sampleIDs[pos];
-      double response = data->get(sampleID, dependent_varID);
-      double value = data->get(sampleID, varID);
+      double response = data->get_y(sampleID, 0);
+      double value = data->get_x(sampleID, varID);
       size_t factorID = floor(value) - 1;
 
       // If in right child, count
@@ -385,7 +385,7 @@ bool TreeRegression::findBestSplitMaxstat(size_t nodeID, std::vector<size_t>& po
   response.reserve(num_samples_node);
   for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
     size_t sampleID = sampleIDs[pos];
-    response.push_back(data->get(sampleID, dependent_varID));
+    response.push_back(data->get_y(sampleID, 0));
   }
   std::vector<double> ranks = rank(response);
 
@@ -407,7 +407,7 @@ bool TreeRegression::findBestSplitMaxstat(size_t nodeID, std::vector<size_t>& po
     x.reserve(num_samples_node);
     for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
       size_t sampleID = sampleIDs[pos];
-      x.push_back(data->get(sampleID, varID));
+      x.push_back(data->get_x(sampleID, varID));
     }
 
     // Order by x
@@ -490,7 +490,7 @@ bool TreeRegression::findBestSplitExtraTrees(size_t nodeID, std::vector<size_t>&
   double sum_node = 0;
   for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
     size_t sampleID = sampleIDs[pos];
-    sum_node += data->get(sampleID, dependent_varID);
+    sum_node += data->get_y(sampleID, 0);
   }
 
   // For all possible split variables
@@ -567,8 +567,8 @@ void TreeRegression::findBestSplitValueExtraTrees(size_t nodeID, size_t varID, d
   // Sum in right child and possbile split
   for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
     size_t sampleID = sampleIDs[pos];
-    double value = data->get(sampleID, varID);
-    double response = data->get(sampleID, dependent_varID);
+    double value = data->get_x(sampleID, varID);
+    double response = data->get_y(sampleID, 0);
 
     // Count samples until split_value reached
     for (size_t i = 0; i < num_splits; ++i) {
@@ -669,8 +669,8 @@ void TreeRegression::findBestSplitValueExtraTreesUnordered(size_t nodeID, size_t
     // Sum in right child
     for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
       size_t sampleID = sampleIDs[pos];
-      double response = data->get(sampleID, dependent_varID);
-      double value = data->get(sampleID, varID);
+      double response = data->get_y(sampleID, 0);
+      double value = data->get_x(sampleID, varID);
       size_t factorID = floor(value) - 1;
 
       // If in right child, count
@@ -706,7 +706,7 @@ bool TreeRegression::findBestSplitBeta(size_t nodeID, std::vector<size_t>& possi
   double sum_node = 0;
   for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
     size_t sampleID = sampleIDs[pos];
-    sum_node += data->get(sampleID, dependent_varID);
+    sum_node += data->get_y(sampleID, 0);
   }
 
   // For all possible split variables find best split value
@@ -766,8 +766,8 @@ void TreeRegression::findBestSplitValueBeta(size_t nodeID, size_t varID, double
   // Sum in right child and possbile split
   for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
     size_t sampleID = sampleIDs[pos];
-    double value = data->get(sampleID, varID);
-    double response = data->get(sampleID, dependent_varID);
+    double value = data->get_x(sampleID, varID);
+    double response = data->get_y(sampleID, 0);
 
     // Count samples until split_value reached
     for (size_t i = 0; i < num_splits; ++i) {
@@ -800,8 +800,8 @@ void TreeRegression::findBestSplitValueBeta(size_t nodeID, size_t varID, double
     double var_left = 0;
     for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
       size_t sampleID = sampleIDs[pos];
-      double value = data->get(sampleID, varID);
-      double response = data->get(sampleID, dependent_varID);
+      double value = data->get_x(sampleID, varID);
+      double response = data->get_y(sampleID, 0);
 
       if (value > possible_split_values[i]) {
         var_right += (response - mean_right) * (response - mean_right);
@@ -826,8 +826,8 @@ void TreeRegression::findBestSplitValueBeta(size_t nodeID, size_t varID, double
     double beta_loglik_left = 0;
     for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
       size_t sampleID = sampleIDs[pos];
-      double value = data->get(sampleID, varID);
-      double response = data->get(sampleID, dependent_varID);
+      double value = data->get_x(sampleID, varID);
+      double response = data->get_y(sampleID, 0);
 
       if (value > possible_split_values[i]) {
         beta_loglik_right += betaLogLik(response, mean_right, phi_right);
@@ -867,18 +867,13 @@ void TreeRegression::addImpurityImportance(size_t nodeID, size_t varID, double d
     double sum_node = 0;
     for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
       size_t sampleID = sampleIDs[pos];
-      sum_node += data->get(sampleID, dependent_varID);
+      sum_node += data->get_y(sampleID, 0);
     }
     best_decrease = decrease - sum_node * sum_node / (double) num_samples_node;
   }
 
   // No variable importance for no split variables
   size_t tempvarID = data->getUnpermutedVarID(varID);
-  for (auto& skip : data->getNoSplitVariables()) {
-    if (tempvarID >= skip) {
-      --tempvarID;
-    }
-  }
 
   // Subtract if corrected importance and permuted variable, else add
   if (importance_mode == IMP_GINI_CORRECTED && varID >= data->getNumCols()) {
diff --git a/src/TreeSurvival.cpp b/src/TreeSurvival.cpp
index 762a2351c..f9525a160 100644
--- a/src/TreeSurvival.cpp
+++ b/src/TreeSurvival.cpp
@@ -21,17 +21,16 @@
 
 namespace ranger {
 
-TreeSurvival::TreeSurvival(std::vector<double>* unique_timepoints, size_t status_varID,
-    std::vector<size_t>* response_timepointIDs) :
-    status_varID(status_varID), unique_timepoints(unique_timepoints), response_timepointIDs(response_timepointIDs), num_deaths(
-        0), num_samples_at_risk(0) {
+TreeSurvival::TreeSurvival(std::vector<double>* unique_timepoints, std::vector<size_t>* response_timepointIDs) :
+    unique_timepoints(unique_timepoints), response_timepointIDs(response_timepointIDs), num_deaths(0), num_samples_at_risk(
+        0) {
   this->num_timepoints = unique_timepoints->size();
 }
 
 TreeSurvival::TreeSurvival(std::vector<std::vector<size_t>>& child_nodeIDs, std::vector<size_t>& split_varIDs,
     std::vector<double>& split_values, std::vector<std::vector<double>> chf, std::vector<double>* unique_timepoints,
     std::vector<size_t>* response_timepointIDs) :
-    Tree(child_nodeIDs, split_varIDs, split_values), status_varID(0), unique_timepoints(unique_timepoints), response_timepointIDs(
+    Tree(child_nodeIDs, split_varIDs, split_values), unique_timepoints(unique_timepoints), response_timepointIDs(
         response_timepointIDs), chf(chf), num_deaths(0), num_samples_at_risk(0) {
   this->num_timepoints = unique_timepoints->size();
 }
@@ -84,7 +83,7 @@ double TreeSurvival::computePredictionAccuracyInternal() {
   }
 
   // Return concordance index
-  return computeConcordanceIndex(*data, sum_chf, dependent_varID, status_varID, oob_sampleIDs);
+  return computeConcordanceIndex(*data, sum_chf, oob_sampleIDs);
 }
 
 bool TreeSurvival::splitNodeInternal(size_t nodeID, std::vector<size_t>& possible_split_varIDs) {
@@ -169,11 +168,10 @@ bool TreeSurvival::findBestSplitMaxstat(size_t nodeID, std::vector<size_t>& poss
   status.reserve(num_samples_node);
   for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
     size_t sampleID = sampleIDs[pos];
-    time.push_back(data->get(sampleID, dependent_varID));
-    status.push_back(data->get(sampleID, status_varID));
+    time.push_back(data->get_y(sampleID, 0));
+    status.push_back(data->get_y(sampleID, 1));
   }
   std::vector<double> scores = logrankScores(time, status);
-  //std::vector<double> scores = logrankScoresData(data, dependent_varID, status_varID, sampleIDs[nodeID]);
 
   // Save split stats
   std::vector<double> pvalues;
@@ -193,7 +191,7 @@ bool TreeSurvival::findBestSplitMaxstat(size_t nodeID, std::vector<size_t>& poss
     x.reserve(num_samples_node);
     for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
       size_t sampleID = sampleIDs[pos];
-      x.push_back(data->get(sampleID, varID));
+      x.push_back(data->get_x(sampleID, varID));
     }
 
     // Order by x
@@ -286,7 +284,7 @@ void TreeSurvival::computeDeathCounts(size_t nodeID) {
 
   for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
     size_t sampleID = sampleIDs[pos];
-    double survival_time = data->get(sampleID, dependent_varID);
+    double survival_time = data->get_y(sampleID, 0);
 
     size_t t = 0;
     while (t < num_timepoints && (*unique_timepoints)[t] < survival_time) {
@@ -297,7 +295,7 @@ void TreeSurvival::computeDeathCounts(size_t nodeID) {
     // Now t is the survival time, add to at risk and to death if death
     if (t < num_timepoints) {
       ++num_samples_at_risk[t];
-      if (data->get(sampleID, status_varID) == 1) {
+      if (data->get_y(sampleID, 1) == 1) {
         ++num_deaths[t];
       }
     }
@@ -311,7 +309,7 @@ void TreeSurvival::computeChildDeathCounts(size_t nodeID, size_t varID, std::vec
   // Count deaths in right child per timepoint and possbile split
   for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
     size_t sampleID = sampleIDs[pos];
-    double value = data->get(sampleID, varID);
+    double value = data->get_x(sampleID, varID);
     size_t survival_timeID = (*response_timepointIDs)[sampleID];
 
     // Count deaths until split_value reached
@@ -320,7 +318,7 @@ void TreeSurvival::computeChildDeathCounts(size_t nodeID, size_t varID, std::vec
       if (value > possible_split_values[i]) {
         ++num_samples_right_child[i];
         ++delta_samples_at_risk_right_child[i * num_timepoints + survival_timeID];
-        if (data->get(sampleID, status_varID) == 1) {
+        if (data->get_y(sampleID, 1) == 1) {
           ++num_deaths_right_child[i * num_timepoints + survival_timeID];
         }
       } else {
@@ -448,7 +446,7 @@ void TreeSurvival::findBestSplitValueLogRankUnordered(size_t nodeID, size_t varI
     for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
       size_t sampleID = sampleIDs[pos];
       size_t survival_timeID = (*response_timepointIDs)[sampleID];
-      double value = data->get(sampleID, varID);
+      double value = data->get_x(sampleID, varID);
       size_t factorID = floor(value) - 1;
 
       // If in right child, count
@@ -456,7 +454,7 @@ void TreeSurvival::findBestSplitValueLogRankUnordered(size_t nodeID, size_t varI
       if ((splitID & (1ULL << factorID))) {
         ++num_samples_right_child;
         ++delta_samples_at_risk_right_child[survival_timeID];
-        if (data->get(sampleID, status_varID) == 1) {
+        if (data->get_y(sampleID, 1) == 1) {
           ++num_deaths_right_child[survival_timeID];
         }
       }
@@ -526,9 +524,9 @@ void TreeSurvival::findBestSplitValueAUC(size_t nodeID, size_t varID, double& be
   // For all pairs
   for (size_t k = start_pos[nodeID]; k < end_pos[nodeID]; ++k) {
     size_t sample_k = sampleIDs[k];
-    double time_k = data->get(sample_k, dependent_varID);
-    double status_k = data->get(sample_k, status_varID);
-    double value_k = data->get(sample_k, varID);
+    double time_k = data->get_y(sample_k, 0);
+    double status_k = data->get_y(sample_k, 1);
+    double value_k = data->get_x(sample_k, varID);
 
     // Count samples in left node
     for (size_t i = 0; i < num_splits; ++i) {
@@ -540,9 +538,9 @@ void TreeSurvival::findBestSplitValueAUC(size_t nodeID, size_t varID, double& be
 
     for (size_t l = k + 1; l < end_pos[nodeID]; ++l) {
       size_t sample_l = sampleIDs[l];
-      double time_l = data->get(sample_l, dependent_varID);
-      double status_l = data->get(sample_l, status_varID);
-      double value_l = data->get(sample_l, varID);
+      double time_l = data->get_y(sample_l, 0);
+      double status_l = data->get_y(sample_l, 1);
+      double value_l = data->get_x(sample_l, varID);
 
       // Compute split
       computeAucSplit(time_k, time_l, status_k, status_l, value_k, value_l, num_splits, possible_split_values,
@@ -838,7 +836,7 @@ void TreeSurvival::findBestSplitValueExtraTreesUnordered(size_t nodeID, size_t v
     for (size_t pos = start_pos[nodeID]; pos < end_pos[nodeID]; ++pos) {
       size_t sampleID = sampleIDs[pos];
       size_t survival_timeID = (*response_timepointIDs)[sampleID];
-      double value = data->get(sampleID, varID);
+      double value = data->get_x(sampleID, varID);
       size_t factorID = floor(value) - 1;
 
       // If in right child, count
@@ -846,7 +844,7 @@ void TreeSurvival::findBestSplitValueExtraTreesUnordered(size_t nodeID, size_t v
       if ((splitID & (1ULL << factorID))) {
         ++num_samples_right_child;
         ++delta_samples_at_risk_right_child[survival_timeID];
-        if (data->get(sampleID, status_varID) == 1) {
+        if (data->get_y(sampleID, 1) == 1) {
           ++num_deaths_right_child[survival_timeID];
         }
       }
@@ -896,11 +894,6 @@ void TreeSurvival::addImpurityImportance(size_t nodeID, size_t varID, double dec
 
   // No variable importance for no split variables
   size_t tempvarID = data->getUnpermutedVarID(varID);
-  for (auto& skip : data->getNoSplitVariables()) {
-    if (tempvarID >= skip) {
-      --tempvarID;
-    }
-  }
 
   // Subtract if corrected importance and permuted variable, else add
   if (importance_mode == IMP_GINI_CORRECTED && varID >= data->getNumCols()) {
diff --git a/src/TreeSurvival.h b/src/TreeSurvival.h
index 613b476ff..5e0ed6781 100644
--- a/src/TreeSurvival.h
+++ b/src/TreeSurvival.h
@@ -21,7 +21,7 @@ namespace ranger {
 
 class TreeSurvival: public Tree {
 public:
-  TreeSurvival(std::vector<double>* unique_timepoints, size_t status_varID, std::vector<size_t>* response_timepointIDs);
+  TreeSurvival(std::vector<double>* unique_timepoints, std::vector<size_t>* response_timepointIDs);
 
   // Create from loaded forest
   TreeSurvival(std::vector<std::vector<size_t>>& child_nodeIDs, std::vector<size_t>& split_varIDs,
@@ -97,8 +97,6 @@ class TreeSurvival: public Tree {
     num_samples_at_risk.shrink_to_fit();
   }
 
-  size_t status_varID;
-
   // Unique time points for all individuals (not only this bootstrap), sorted
   const std::vector<double>* unique_timepoints;
   size_t num_timepoints;
diff --git a/src/rangerCpp.cpp b/src/rangerCpp.cpp
index ef3e916fb..6dc59843a 100644
--- a/src/rangerCpp.cpp
+++ b/src/rangerCpp.cpp
@@ -48,19 +48,20 @@ using namespace ranger;
 
 // [[Rcpp::depends(RcppEigen)]]
 // [[Rcpp::export]]
-Rcpp::List rangerCpp(uint treetype, std::string dependent_variable_name, Rcpp::NumericMatrix& input_data,
+Rcpp::List rangerCpp(uint treetype, Rcpp::NumericMatrix& input_x, Rcpp::NumericMatrix& input_y,
     std::vector<std::string> variable_names, uint mtry, uint num_trees, bool verbose, uint seed, uint num_threads,
     bool write_forest, uint importance_mode_r, uint min_node_size,
     std::vector<std::vector<double>>& split_select_weights, bool use_split_select_weights,
     std::vector<std::string>& always_split_variable_names, bool use_always_split_variable_names,
-    std::string status_variable_name, bool prediction_mode, Rcpp::List loaded_forest, Rcpp::RawMatrix snp_data,
+    bool prediction_mode, Rcpp::List loaded_forest, Rcpp::RawMatrix snp_data,
     bool sample_with_replacement, bool probability, std::vector<std::string>& unordered_variable_names,
     bool use_unordered_variable_names, bool save_memory, uint splitrule_r, std::vector<double>& case_weights,
     bool use_case_weights, std::vector<double>& class_weights, bool predict_all, bool keep_inbag,
     std::vector<double>& sample_fraction, double alpha, double minprop, bool holdout, uint prediction_type_r,
-    uint num_random_splits, Eigen::SparseMatrix<double>& sparse_data, bool use_sparse_data, bool order_snps, 
-    bool oob_error, uint max_depth, std::vector<std::vector<size_t>>& inbag, bool use_inbag) {
-
+    uint num_random_splits, Eigen::SparseMatrix<double>& sparse_x, 
+    bool use_sparse_data, bool order_snps, bool oob_error, uint max_depth, 
+    std::vector<std::vector<size_t>>& inbag, bool use_inbag) {
+  
   Rcpp::List result;
 
   try {
@@ -94,19 +95,18 @@ Rcpp::List rangerCpp(uint treetype, std::string dependent_variable_name, Rcpp::N
     size_t num_rows;
     size_t num_cols;
     if (use_sparse_data) {
-      num_rows = sparse_data.rows();
-      num_cols = sparse_data.cols();
+      num_rows = sparse_x.rows();
+      num_cols = sparse_x.cols();
     } else {
-      num_rows = input_data.nrow();
-      num_cols = input_data.ncol();
+      num_rows = input_x.nrow();
+      num_cols = input_x.ncol();
     }
 
     // Initialize data 
     if (use_sparse_data) {
-      data = make_unique<DataSparse>(sparse_data, variable_names, num_rows, num_cols);
+      data = make_unique<DataSparse>(sparse_x, input_y, variable_names, num_rows, num_cols);
     } else {
-      data = make_unique<DataRcpp>(input_data, variable_names, num_rows,
-          num_cols);
+      data = make_unique<DataRcpp>(input_x, input_y, variable_names, num_rows, num_cols);
     }
 
     // If there is snp data, add it
@@ -144,16 +144,14 @@ Rcpp::List rangerCpp(uint treetype, std::string dependent_variable_name, Rcpp::N
     PredictionType prediction_type = (PredictionType) prediction_type_r;
 
     // Init Ranger
-    forest->initR(dependent_variable_name, std::move(data), mtry, num_trees, verbose_out, seed, num_threads,
-        importance_mode, min_node_size, split_select_weights, always_split_variable_names, status_variable_name,
+    forest->initR(std::move(data), mtry, num_trees, verbose_out, seed, num_threads,
+        importance_mode, min_node_size, split_select_weights, always_split_variable_names,
         prediction_mode, sample_with_replacement, unordered_variable_names, save_memory, splitrule, case_weights,
         inbag, predict_all, keep_inbag, sample_fraction, alpha, minprop, holdout, prediction_type, num_random_splits, 
         order_snps, max_depth);
 
     // Load forest object if in prediction mode
     if (prediction_mode) {
-      size_t dependent_varID = loaded_forest["dependent.varID"];
-      //size_t num_trees = loaded_forest["num.trees"];
       std::vector<std::vector<std::vector<size_t>> > child_nodeIDs = loaded_forest["child.nodeIDs"];
       std::vector<std::vector<size_t>> split_varIDs = loaded_forest["split.varIDs"];
       std::vector<std::vector<double>> split_values = loaded_forest["split.values"];
@@ -162,23 +160,22 @@ Rcpp::List rangerCpp(uint treetype, std::string dependent_variable_name, Rcpp::N
       if (treetype == TREE_CLASSIFICATION) {
         std::vector<double> class_values = loaded_forest["class.values"];
         auto& temp = dynamic_cast<ForestClassification&>(*forest);
-        temp.loadForest(dependent_varID, num_trees, child_nodeIDs, split_varIDs, split_values, class_values,
+        temp.loadForest(num_trees, child_nodeIDs, split_varIDs, split_values, class_values,
             is_ordered);
       } else if (treetype == TREE_REGRESSION) {
         auto& temp = dynamic_cast<ForestRegression&>(*forest);
-        temp.loadForest(dependent_varID, num_trees, child_nodeIDs, split_varIDs, split_values, is_ordered);
+        temp.loadForest(num_trees, child_nodeIDs, split_varIDs, split_values, is_ordered);
       } else if (treetype == TREE_SURVIVAL) {
-        size_t status_varID = loaded_forest["status.varID"];
         std::vector<std::vector<std::vector<double>> > chf = loaded_forest["chf"];
         std::vector<double> unique_timepoints = loaded_forest["unique.death.times"];
         auto& temp = dynamic_cast<ForestSurvival&>(*forest);
-        temp.loadForest(dependent_varID, num_trees, child_nodeIDs, split_varIDs, split_values, status_varID, chf,
+        temp.loadForest(num_trees, child_nodeIDs, split_varIDs, split_values, chf,
             unique_timepoints, is_ordered);
       } else if (treetype == TREE_PROBABILITY) {
         std::vector<double> class_values = loaded_forest["class.values"];
         std::vector<std::vector<std::vector<double>>> terminal_class_counts = loaded_forest["terminal.class.counts"];
         auto& temp = dynamic_cast<ForestProbability&>(*forest);
-        temp.loadForest(dependent_varID, num_trees, child_nodeIDs, split_varIDs, split_values, class_values,
+        temp.loadForest(num_trees, child_nodeIDs, split_varIDs, split_values, class_values,
             terminal_class_counts, is_ordered);
       }
     } else {
@@ -238,7 +235,6 @@ Rcpp::List rangerCpp(uint treetype, std::string dependent_variable_name, Rcpp::N
     // Save forest if needed
     if (write_forest) {
       Rcpp::List forest_object;
-      forest_object.push_back(forest->getDependentVarId(), "dependent.varID");
       forest_object.push_back(forest->getNumTrees(), "num.trees");
       forest_object.push_back(forest->getChildNodeIDs(), "child.nodeIDs");
       forest_object.push_back(forest->getSplitVarIDs(), "split.varIDs");
@@ -260,7 +256,6 @@ Rcpp::List rangerCpp(uint treetype, std::string dependent_variable_name, Rcpp::N
         forest_object.push_back(temp.getTerminalClassCounts(), "terminal.class.counts");
       } else if (treetype == TREE_SURVIVAL) {
         auto& temp = dynamic_cast<ForestSurvival&>(*forest);
-        forest_object.push_back(temp.getStatusVarId(), "status.varID");
         forest_object.push_back(temp.getChf(), "chf");
         forest_object.push_back(temp.getUniqueTimepoints(), "unique.death.times");
       }
diff --git a/src/utility.cpp b/src/utility.cpp
index 3725e5263..416ca7ede 100644
--- a/src/utility.cpp
+++ b/src/utility.cpp
@@ -81,6 +81,16 @@ void loadDoubleVectorFromFile(std::vector<double>& result, std::string filename)
   }
 } // #nocov end
 
+void drawWithoutReplacement(std::vector<size_t>& result, std::mt19937_64& random_number_generator, size_t max,
+    size_t num_samples) {
+  if (num_samples < max / 10) {
+    drawWithoutReplacementSimple(result, random_number_generator, max, num_samples);
+  } else {
+    //drawWithoutReplacementKnuth(result, random_number_generator, max, skip, num_samples);
+    drawWithoutReplacementFisherYates(result, random_number_generator, max, num_samples);
+  }
+}
+
 void drawWithoutReplacementSkip(std::vector<size_t>& result, std::mt19937_64& random_number_generator, size_t max,
     const std::vector<size_t>& skip, size_t num_samples) {
   if (num_samples < max / 10) {
@@ -91,6 +101,26 @@ void drawWithoutReplacementSkip(std::vector<size_t>& result, std::mt19937_64& ra
   }
 }
 
+void drawWithoutReplacementSimple(std::vector<size_t>& result, std::mt19937_64& random_number_generator, size_t max,
+    size_t num_samples) {
+
+  result.reserve(num_samples);
+
+  // Set all to not selected
+  std::vector<bool> temp;
+  temp.resize(max, false);
+
+  std::uniform_int_distribution<size_t> unif_dist(0, max - 1);
+  for (size_t i = 0; i < num_samples; ++i) {
+    size_t draw;
+    do {
+      draw = unif_dist(random_number_generator);
+    } while (temp[draw]);
+    temp[draw] = true;
+    result.push_back(draw);
+  }
+}
+
 void drawWithoutReplacementSimple(std::vector<size_t>& result, std::mt19937_64& random_number_generator, size_t max,
     const std::vector<size_t>& skip, size_t num_samples) {
 
@@ -116,6 +146,23 @@ void drawWithoutReplacementSimple(std::vector<size_t>& result, std::mt19937_64&
   }
 }
 
+void drawWithoutReplacementFisherYates(std::vector<size_t>& result, std::mt19937_64& random_number_generator,
+    size_t max, size_t num_samples) {
+
+  // Create indices
+  result.resize(max);
+  std::iota(result.begin(), result.end(), 0);
+
+  // Draw without replacement using Fisher Yates algorithm
+  std::uniform_real_distribution<double> distribution(0.0, 1.0);
+  for (size_t i = 0; i < num_samples; ++i) {
+    size_t j = i + distribution(random_number_generator) * (max - i);
+    std::swap(result[i], result[j]);
+  }
+
+  result.resize(num_samples);
+}
+
 void drawWithoutReplacementFisherYates(std::vector<size_t>& result, std::mt19937_64& random_number_generator,
     size_t max, const std::vector<size_t>& skip, size_t num_samples) {
 
@@ -203,8 +250,8 @@ double mostFrequentValue(const std::unordered_map<double, size_t>& class_count,
   }
 }
 
-double computeConcordanceIndex(const Data& data, const std::vector<double>& sum_chf, size_t dependent_varID,
-    size_t status_varID, const std::vector<size_t>& sample_IDs) {
+double computeConcordanceIndex(const Data& data, const std::vector<double>& sum_chf,
+    const std::vector<size_t>& sample_IDs) {
 
   // Compute concordance index
   double concordance = 0;
@@ -214,16 +261,16 @@ double computeConcordanceIndex(const Data& data, const std::vector<double>& sum_
     if (!sample_IDs.empty()) {
       sample_i = sample_IDs[i];
     }
-    double time_i = data.get(sample_i, dependent_varID);
-    double status_i = data.get(sample_i, status_varID);
+    double time_i = data.get_y(sample_i, 0);
+    double status_i = data.get_y(sample_i, 1);
 
     for (size_t j = i + 1; j < sum_chf.size(); ++j) {
       size_t sample_j = j;
       if (!sample_IDs.empty()) {
         sample_j = sample_IDs[j];
       }
-      double time_j = data.get(sample_j, dependent_varID);
-      double status_j = data.get(sample_j, status_varID);
+      double time_j = data.get_y(sample_j, 0);
+      double status_j = data.get_y(sample_j, 1);
 
       if (time_i < time_j && status_i == 0) {
         continue;
@@ -295,6 +342,7 @@ std::string beautifyTime(uint seconds) { // #nocov start
   return result;
 } // #nocov end
 
+// #nocov start
 size_t roundToNextMultiple(size_t value, uint multiple) {
 
   if (multiple == 0) {
@@ -308,6 +356,7 @@ size_t roundToNextMultiple(size_t value, uint multiple) {
 
   return value + multiple - remainder;
 }
+// #nocov end
 
 void splitString(std::vector<std::string>& result, const std::string& input, char split_char) { // #nocov start
 
@@ -579,12 +628,14 @@ std::vector<size_t> numSamplesLeftOfCutpoint(std::vector<double>& x, const std::
   return num_samples_left;
 }
 
+// #nocov start
 std::stringstream& readFromStream(std::stringstream& in, double& token) {
   if (!(in >> token) && (std::fpclassify(token) == FP_SUBNORMAL)) {
     in.clear();
   }
   return in;
 }
+// #nocov end
 
 double betaLogLik(double y, double mean, double phi) {
 
diff --git a/src/utility.h b/src/utility.h
index b151e53ce..04709e305 100644
--- a/src/utility.h
+++ b/src/utility.h
@@ -148,6 +148,16 @@ inline void readVector2D(std::vector<std::vector<T>>& result, std::ifstream& fil
 void loadDoubleVectorFromFile(std::vector<double>& result, std::string filename);
 // #nocov end
 
+/**
+ * Draw random numbers in a range without replacements.
+ * @param result Vector to add results to. Will not be cleaned before filling.
+ * @param random_number_generator Random number generator
+ * @param range_length Length of range. Interval to draw from: 0..max-1
+ * @param num_samples Number of samples to draw
+ */
+void drawWithoutReplacement(std::vector<size_t>& result, std::mt19937_64& random_number_generator, size_t range_length,
+    size_t num_samples);
+
 /**
  * Draw random numbers in a range without replacement and skip values.
  * @param result Vector to add results to. Will not be cleaned before filling.
@@ -164,6 +174,16 @@ void drawWithoutReplacementSkip(std::vector<size_t>& result, std::mt19937_64& ra
  * @param result Vector to add results to. Will not be cleaned before filling.
  * @param random_number_generator Random number generator
  * @param range_length Length of range. Interval to draw from: 0..max-1
+ * @param num_samples Number of samples to draw
+ */
+void drawWithoutReplacementSimple(std::vector<size_t>& result, std::mt19937_64& random_number_generator, size_t max,
+    size_t num_samples);
+
+/**
+ * Simple algorithm for sampling without replacement (skip values), faster for smaller num_samples
+ * @param result Vector to add results to. Will not be cleaned before filling.
+ * @param random_number_generator Random number generator
+ * @param range_length Length of range. Interval to draw from: 0..max-1
  * @param skip Values to skip
  * @param num_samples Number of samples to draw
  */
@@ -175,6 +195,16 @@ void drawWithoutReplacementSimple(std::vector<size_t>& result, std::mt19937_64&
  * @param result Vector to add results to. Will not be cleaned before filling.
  * @param random_number_generator Random number generator
  * @param max Length of range. Interval to draw from: 0..max-1
+ * @param num_samples Number of samples to draw
+ */
+void drawWithoutReplacementFisherYates(std::vector<size_t>& result, std::mt19937_64& random_number_generator,
+    size_t max, size_t num_samples);
+
+/**
+ * Fisher Yates algorithm for sampling without replacement (skip values).
+ * @param result Vector to add results to. Will not be cleaned before filling.
+ * @param random_number_generator Random number generator
+ * @param max Length of range. Interval to draw from: 0..max-1
  * @param skip Values to skip
  * @param num_samples Number of samples to draw
  */
@@ -273,13 +303,11 @@ double mostFrequentValue(const std::unordered_map<double, size_t>& class_count,
  * Compute concordance index for given data and summed cumulative hazard function/estimate
  * @param data Reference to Data object
  * @param sum_chf Summed chf over timepoints for each sample
- * @param dependent_varID ID of dependent variable
- * @param status_varID ID of status variable
  * @param sample_IDs IDs of samples, for example OOB samples
  * @return concordance index
  */
-double computeConcordanceIndex(const Data& data, const std::vector<double>& sum_chf, size_t dependent_varID,
-    size_t status_varID, const std::vector<size_t>& sample_IDs);
+double computeConcordanceIndex(const Data& data, const std::vector<double>& sum_chf,
+    const std::vector<size_t>& sample_IDs);
 
 /**
  * Convert a unsigned integer to string
diff --git a/tests/testthat/test_interface.R b/tests/testthat/test_interface.R
index 0df46ba17..940dc4b83 100644
--- a/tests/testthat/test_interface.R
+++ b/tests/testthat/test_interface.R
@@ -69,6 +69,52 @@ test_that("Working if dependent variable is matrix with one column", {
   expect_silent(ranger(data = iris2, dependent.variable = "Sepal.Width"))
 })
 
+test_that("Same result with x/y interface, classification", {
+  set.seed(300)
+  rf_formula <- ranger(Species ~ ., iris, num.trees = 5)
+  
+  set.seed(300)
+  rf_xy <- ranger(y = iris[, 5], x = iris[, -5], num.trees = 5)
+  
+  expect_equal(rf_formula$prediction.error, rf_xy$prediction.error)
+  expect_equal(rf_formula$predictions, rf_xy$predictions)
+})
+
+test_that("Same result with x/y interface, regression", {
+  set.seed(300)
+  rf_formula <- ranger(Sepal.Length ~ ., iris, num.trees = 5)
+  
+  set.seed(300)
+  rf_xy <- ranger(y = iris[, 1], x = iris[, -1], num.trees = 5)
+  
+  expect_equal(rf_formula$prediction.error, rf_xy$prediction.error)
+  expect_equal(rf_formula$predictions, rf_xy$predictions)
+})
+
+test_that("Same result with x/y interface, survival", {
+  set.seed(300)
+  rf_formula <- ranger(Surv(time, status) ~ ., veteran, num.trees = 5)
+  
+  set.seed(300)
+  rf_xy <- ranger(y = veteran[, c(3, 4)], x = veteran[, c(-3, -4)], num.trees = 5)
+  
+  expect_equal(rf_formula$prediction.error, rf_xy$prediction.error)
+  expect_equal(rf_formula$predictions, rf_xy$predictions)
+})
+
+test_that("Column order does not change prediction", {
+  dat <- iris[, c(sample(1:4), 5)]
+  rf <- ranger(dependent.variable.name = "Species", data = iris)
+  
+  set.seed(42)
+  pred1 <- predict(rf, iris)$predictions
+  
+  set.seed(42)
+  pred2 <- predict(rf, dat)$predictions
+  
+  expect_equal(pred1, pred2)
+})
+
 # Tibbles
 # This is failing on Rdevel. Possible without suggesting tibble package?
 # if (requireNamespace("tibble", quietly = TRUE)) {
diff --git a/tests/testthat/test_print.R b/tests/testthat/test_print.R
index f41b6114c..fd9f3512b 100644
--- a/tests/testthat/test_print.R
+++ b/tests/testthat/test_print.R
@@ -19,4 +19,4 @@ expect_that(print(predict(rf, iris)), prints_text("Ranger prediction"))
 expect_that(str(rf), prints_text("List of 14"))
 
 ## Test str forest function
-expect_that(str(rf$forest), prints_text("List of 10"))
+expect_that(str(rf$forest), prints_text("List of 9"))
diff --git a/tests/testthat/test_ranger.R b/tests/testthat/test_ranger.R
index 0b4a6136b..80a6fa429 100644
--- a/tests/testthat/test_ranger.R
+++ b/tests/testthat/test_ranger.R
@@ -157,9 +157,12 @@ test_that("OOB error is correct for 1 tree, regression", {
 
 test_that("Missing value columns detected in training", {
   dat <- iris
-  dat[4, 5] <- NA
   dat[25, 1] <- NA
-  expect_error(ranger(Species ~ ., dat, num.trees = 5), "Missing data in columns: Species, Sepal.Length")
+  expect_error(ranger(Species ~ ., dat, num.trees = 5), "Missing data in columns: Sepal.Length")
+  
+  dat <- iris
+  dat[4, 5] <- NA
+  expect_error(ranger(Species ~ ., dat, num.trees = 5), "Missing data in dependent variable.")
 })
 
 test_that("No error if missing value in irrelevant column, training", {
@@ -178,84 +181,70 @@ test_that("No error if missing value in irrelevant column, prediction", {
 test_that("Split points are at (A+B)/2 for numeric features, regression variance splitting", {
   dat <- data.frame(y = rbinom(100, 1, .5), x = rbinom(100, 1, .5))
   rf <- ranger(y ~ x, dat, num.trees = 10)
-  split_points <- mapply(function(varID, value) {
-    value[varID > 0]
-    }, 
-    rf$forest$split.varIDs, 
-    rf$forest$split.values
-  )
+  split_points <- sapply(1:rf$num.trees, function(i) {
+    res <- treeInfo(rf, i)$splitval
+    res[!is.na(res)]
+  })
   expect_equal(split_points, rep(0.5, rf$num.trees))
 })
 
 test_that("Split points are at (A+B)/2 for numeric features, regression maxstat splitting", {
   dat <- data.frame(y = rbinom(100, 1, .5), x = rbinom(100, 1, .5))
   rf <- ranger(y ~ x, dat, num.trees = 10, splitrule = "maxstat", alpha = 1)
-  split_points <- mapply(function(varID, value) {
-    value[varID > 0]
-    }, 
-    rf$forest$split.varIDs, 
-    rf$forest$split.values
-  )
+  split_points <- sapply(1:rf$num.trees, function(i) {
+    res <- treeInfo(rf, i)$splitval
+    res[!is.na(res)]
+  })
   expect_equal(split_points, rep(0.5, rf$num.trees))
 })
 
 test_that("Split points are at (A+B)/2 for numeric features, classification", {
   dat <- data.frame(y = factor(rbinom(100, 1, .5)), x = rbinom(100, 1, .5))
   rf <- ranger(y ~ x, dat, num.trees = 10)
-  split_points <- mapply(function(varID, value) {
-    value[varID > 0]
-  }, 
-  rf$forest$split.varIDs, 
-  rf$forest$split.values
-  )
+  split_points <- sapply(1:rf$num.trees, function(i) {
+    res <- treeInfo(rf, i)$splitval
+    res[!is.na(res)]
+  })
   expect_equal(split_points, rep(0.5, rf$num.trees))
 })
 
 test_that("Split points are at (A+B)/2 for numeric features, probability", {
   dat <- data.frame(y = factor(rbinom(100, 1, .5)), x = rbinom(100, 1, .5))
   rf <- ranger(y ~ x, dat, num.trees = 10, probability = TRUE)
-  split_points <- mapply(function(varID, value) {
-    value[varID > 0]
-  }, 
-  rf$forest$split.varIDs, 
-  rf$forest$split.values
-  )
+  split_points <- sapply(1:rf$num.trees, function(i) {
+    res <- treeInfo(rf, i)$splitval
+    res[!is.na(res)]
+  })
   expect_equal(split_points, rep(0.5, rf$num.trees))
 })
 
 test_that("Split points are at (A+B)/2 for numeric features, survival logrank splitting", {
   dat <- data.frame(time = runif(100, 1, 10), status = rbinom(100, 1, .5), x = rbinom(100, 1, .5))
   rf <- ranger(Surv(time, status) ~ x, dat, num.trees = 10, splitrule = "logrank")
-  split_points <- mapply(function(varID, value) {
-    value[varID > 0]
-  }, 
-  rf$forest$split.varIDs, 
-  rf$forest$split.values
-  )
+  split_points <- sapply(1:rf$num.trees, function(i) {
+    res <- treeInfo(rf, i)$splitval
+    res[!is.na(res)]
+  })
   expect_equal(split_points, rep(0.5, rf$num.trees))
 })
 
 test_that("Split points are at (A+B)/2 for numeric features, survival C-index splitting", {
   dat <- data.frame(time = runif(100, 1, 10), status = rbinom(100, 1, .5), x = rbinom(100, 1, .5))
   rf <- ranger(Surv(time, status) ~ x, dat, num.trees = 10, splitrule = "C")
-  split_points <- mapply(function(varID, value) {
-    value[varID > 0]
-  }, 
-  rf$forest$split.varIDs, 
-  rf$forest$split.values
-  )
+  split_points <- sapply(1:rf$num.trees, function(i) {
+    res <- treeInfo(rf, i)$splitval
+    res[!is.na(res)]
+  })
   expect_equal(split_points, rep(0.5, rf$num.trees))
 })
 
 test_that("Split points are at (A+B)/2 for numeric features, survival maxstat splitting", {
   dat <- data.frame(time = runif(100, 1, 10), status = rbinom(100, 1, .5), x = rbinom(100, 1, .5))
   rf <- ranger(Surv(time, status) ~ x, dat, num.trees = 10, splitrule = "maxstat", alpha = 1)
-  split_points <- mapply(function(varID, value) {
-    value[varID > 0]
-  }, 
-  rf$forest$split.varIDs, 
-  rf$forest$split.values
-  )
+  split_points <- sapply(1:rf$num.trees, function(i) {
+    res <- treeInfo(rf, i)$splitval
+    res[!is.na(res)]
+  })
   expect_equal(split_points, rep(0.5, rf$num.trees))
 })
 
diff --git a/tests/testthat/test_treeInfo.R b/tests/testthat/test_treeInfo.R
index a49b56278..1c8a069c5 100644
--- a/tests/testthat/test_treeInfo.R
+++ b/tests/testthat/test_treeInfo.R
@@ -234,4 +234,20 @@ test_that("Spitting value is numeric for order splitting", {
   expect_is(ti.order$splitval[!ti.order$terminal & ti.order$splitvarName == "Species"], "numeric")
 })
 
+test_that("treeInfo works for 31 unordered factor levels but not for 32", {
+  n <- 31
+  dt <- data.frame(x = factor(1:n, ordered = FALSE),  
+                   y = rbinom(n, 1, 0.5))
+  rf <- ranger(y ~ ., data = dt, num.trees = 10, splitrule = "extratrees")
+  expect_silent(treeInfo(rf))
+  
+  n <- 32
+  dt <- data.frame(x = factor(1:n, ordered = FALSE),  
+                   y = rbinom(n, 1, 0.5))
+  rf <- ranger(y ~ ., data = dt, num.trees = 10, splitrule = "extratrees")
+  expect_warning(treeInfo(rf), "Unordered splitting levels can only be shown for up to 31 levels.")
+})
+
+
+
 
diff --git a/tests/testthat/test_unordered.R b/tests/testthat/test_unordered.R
index e7f17bf54..b7441c2cc 100644
--- a/tests/testthat/test_unordered.R
+++ b/tests/testthat/test_unordered.R
@@ -337,7 +337,7 @@ test_that("Partition splitting working for large number of levels", {
   rf <- ranger(y ~ ., data = dt, num.trees = 10, splitrule = "extratrees")
   
   max_split <- max(sapply(1:rf$num.trees, function(i) {
-    max(log2(rf$forest$split.values[[i]][rf$forest$split.varIDs[[i]] > 0]))
+    max(log2(rf$forest$split.values[[i]]), na.rm = TRUE)
   }))
 
   expect_lte(max_split, n)