# Simulation
With R kernel

#### Further info
Models usable with train() from caret: <br>
https://topepo.github.io/caret/train-models-by-tag.html#Model_Tree <br>

## Packages

In [None]:
# Set the library path
#.libPaths("/user/emma.foessing01/u11969/new_R_libs")
Sys.setenv("PKG_CXXFLAGS"="-std=c++14")

print(R.version.string)

# List of required packages
list_of_packages <- c(
  "synthpop", "jsonlite", "codetools", "insight", "party", "haven", "dplyr", "rpart", "rpart.plot",
  "randomForest", "pROC", "caret", "pracma", "here", "Hmisc", "purrr",
  "ranger", "bnlearn", "arulesCBA", "network", "igraph", "xgboost",
  "data.table", "RSNNS"
)

# Function to load packages and handle errors
load_if_installed <- function(p) {
  tryCatch({
    library(p, character.only = TRUE)
  }, error = function(e) {
    message(sprintf("Package '%s' is not installed.", p))
  })
}

# Load all required packages
lapply(list_of_packages, load_if_installed)

## Data

In [3]:
load(file = (paste0(here(), "/cpspop.RData")))
adult <- read.csv(file = paste0(here(),"/adult_preprocessed.csv"))
# delete NAs
adult[adult == "?"] <- NA
adult <- na.omit(adult)

adult$workclass <- as.factor(adult$workclass)
adult$education <- as.factor(adult$education)
adult$marital_status <- as.factor(adult$marital_status)
adult$relationship <- as.factor(adult$relationship)
adult$race <- as.factor(adult$race)
adult$sex <- as.factor(adult$sex)
adult$native_country <- as.factor(adult$native_country)
adult$income <- as.factor(adult$income)

Ein Datensatz zu genieren (m = 1) ist ausreichend, da ich keine Varianzanalyse machen werde. Damit die Ergebnisse nicht von einem zufälligen Prozess abhängen ist es sinnvoll über ein paar runs Mittelwerte zu bilden (50–100)

## Functions

### Evaluation Functions

In [58]:
## Calculate evaluation metrics for continuous targets
evaluation_metrics_cont <- function(predictions, test_set){
    # Residuals
    residuals <- predictions - test_set$income
    
    # Mean Absolute Error (MAE)
    MAE <- mean(abs(residuals))
    
    # Mean Squared Error (MSE) and Root Mean Squared Error (RMSE)
    MSE <- mean(residuals^2)
    RMSE <- sqrt(MSE)
    
    # R-squared: Guarding against zero variance in the target
    SS_res <- sum(residuals^2)
    SS_tot <- sum((test_set$income - mean(test_set$income))^2)
    R_squared <- ifelse(SS_tot == 0, NA, 1 - (SS_res / SS_tot))
    
    # Mean Absolute Percentage Error (MAPE): Handling division by zero
    MAPE <- ifelse(any(test_set$income == 0), NA, mean(abs(residuals / test_set$income)) * 100)
    
    metrics_df <- data.frame(
        MAE = MAE, 
        MSE = MSE, 
        RMSE = RMSE, 
        R_squared = R_squared, 
        MAPE = MAPE
    )
    
    return(metrics_df)
}

In [59]:
## Calculate evaluation metrics for factored targets
evaluation_metrics_factor <- function(predictions, test_set) {
    # Ensure test_set is a data frame
    test_set <- as.data.frame(test_set)
    
    # Ensure both predictions and test_set$income are factors with the same levels
    predictions <- as.factor(predictions)
    reference <- as.factor(test_set$income)
    
    # Ensure levels match between predictions and reference
    levels(predictions) <- levels(reference)
    
    # Confusion matrix for the prediction on original data
    cm <- caret::confusionMatrix(predictions, reference, mode = "everything")

    # Saving evaluation metrics
    accuracy <- cm$overall['Accuracy']
    
    if (length(levels(reference)) == 2) {
        # Binary classification
        f1 <- cm$byClass['F1']
        sens <- cm$byClass['Sensitivity']
        spec <- cm$byClass['Specificity']
    } else {
        # Multi-class classification: calculate metrics for each class and take the mean
        f1 <- mean(cm$byClass[,'F1'], na.rm = TRUE)
        sens <- mean(cm$byClass[,'Sensitivity'], na.rm = TRUE)
        spec <- mean(cm$byClass[,'Specificity'], na.rm = TRUE)
    }

    # Create the dataframe
    metrics_df <- data.frame(
        Accuracy = accuracy, 
        F1 = f1, 
        Sensitivity = sens, 
        Specificity = spec
    )
    
    return(metrics_df)
}

# multi-class summary for prediction on conti target, use MSE
multiClassSummary <- function(data, lev = NULL, model = NULL) {
    mse <- mean((data$obs - data$pred)^2)
    c(MSE = mse)
}

### Prediction Functions

##### CART

In [None]:
# just the prediction
cart_pred <- function(data, outer_folds, cp_steps, inner_folds){############adjust##############
    # adjust evaluation metric to fit both numeric and factored targets
    summaryFunctionType <- if (is.numeric(data$income)) defaultSummary else multiClassSummary
    # metric: train() uses per default RSME and Accuracy for numeric and factored targets

    #  set control args
    outer_control <- trainControl(method = "cv", number = outer_folds,
                                  summaryFunction = summaryFunctionType,
                                  verboseIter = FALSE,
                                  allowParallel = TRUE)
        
    inner_control <- trainControl(method = "cv", number = inner_folds, 
                                  summaryFunction = summaryFunctionType,
                                  verboseIter = FALSE,
                                  allowParallel = TRUE)

    # Define the grid for hyperparameter tuning
    complexity <- 10^seq(log10(0.0001), log10(0.01), length.out = cp_steps)############adjust##############

    # Create grid
    tunegrid <- expand.grid(cp = complexity)############adjust##############

    # Initialize variables to store results
    outer_results <- list()

    outer_cv_folds = createFolds(data$income, k = outer_folds)
    
    # Outer loop: Cross-validation for model evaluation
    for (i in seq_along(outer_folds)) {
        
        # Split data into outer folds
        outer_test_index = outer_cv_folds[[i]]
        outer_testData = data[outer_test_index,]
        outer_trainData  = data[-outer_test_index,]
        
        # Hyperparameter tuning using inner CV
        # No need for inner loop because "train" does k-fold CV already
        model <- caret::train(income ~ ., 
                        data = outer_trainData, 
                        method = "rpart", ############adjust##############
                        tuneGrid = tunegrid, 
                        trControl = inner_control,
                        control = rpart.control(maxsurrogate = 0, maxcompete = 1) ############adjust##############
                        )#,
                        #metric = metricType)
            

        # Store the best hyperparameters
        best_hyperparameters <- model$bestTune

        # Train the final model on the outer training set with the best hyperparameters
        final_model <- caret::train(income ~ ., 
                             data = outer_trainData, 
                             method = "rpart",############adjust##############
                             trControl = outer_control, 
                             tuneGrid = best_hyperparameters)

        # Testing the final model on the outer test set
        predictions <- predict(final_model, newdata = outer_testData)
        
        if (is.numeric(data$income)) {
            eval <- evaluation_metrics_cont(predictions, outer_testData) # postResample is a useful caret function
        } else if (is.factor(data$income)) {
            eval <- evaluation_metrics_factor(predictions, outer_testData)
        } else {
            stop("The predicted target has to be numeric or factor.")
        }

        # Store the evaluation metrics for this outer fold
        outer_results[[i]] <- eval
    }

    # Average the evaluation metrics over the outer folds
    eval_avg_outer_folds <- do.call(rbind, outer_results) %>%
                            dplyr::summarise(across(everything(), mean, na.rm = TRUE))

    

    # Return the average evaluation metrics
    return(eval_avg_outer_folds)
}

##### RF

In [None]:
rf_pred <- function(data, outer_folds, mtry_steps, ntree_steps, inner_folds) {
    # Adjust evaluation metric to fit both numeric and factored targets
    summaryFunctionType <- if (is.numeric(data$income)) defaultSummary else multiClassSummary
    # Metric: train() uses per default RSME and Accuracy for numeric and factored targets

    # Set control args
    outer_control <- trainControl(method = "cv", number = outer_folds,
                                  summaryFunction = summaryFunctionType,
                                  verboseIter = FALSE,
                                  allowParallel = TRUE)
        
    inner_control <- trainControl(method = "cv", number = inner_folds, 
                                  summaryFunction = summaryFunctionType,
                                  verboseIter = FALSE,
                                  allowParallel = TRUE)

    # Define the parameter grid for tuning
    splitrule_value <- if (is.numeric(data$income)) "variance" else "gini"
    
    tunegrid <- expand.grid(mtry = seq(2, ncol(data) - 1, length.out = mtry_steps),
                            splitrule = splitrule_value,
                            min.node.size = 5)
    # You can incorporate ntree_steps into the grid if needed

    # Initialize variables to store results
    outer_cv_folds <- createFolds(data$income, k = outer_folds)

    # Initialize variables to store results
    outer_results <- list()

    outer_cv_folds <- createFolds(data$income, k = outer_folds)
    
    # Outer loop: Cross-validation for model evaluation
    for (i in seq_along(outer_cv_folds)) {
        # Split data into outer folds
        outer_test_index <- outer_cv_folds[[i]]
        outer_testData <- data[outer_test_index, ]
        outer_trainData <- data[-outer_test_index, ]
        
        # Hyperparameter tuning using inner CV
        model <- caret::train(income ~ ., 
                              data = outer_trainData, 
                              method = "ranger",  
                              tuneGrid = tunegrid, 
                              trControl = inner_control)

        # Store the best hyperparameters
        best_hyperparameters <- model$bestTune

        # Train the final model on the outer training set with the best hyperparameters
        final_model <- caret::train(income ~ ., 
                                    data = outer_trainData, 
                                    method = "ranger", 
                                    trControl = outer_control, 
                                    tuneGrid = best_hyperparameters)

        # Testing the final model on the outer test set
        predictions <- predict(final_model, newdata = outer_testData)
        
        if (is.numeric(data$income)) {
            eval <- evaluation_metrics_cont(predictions, outer_testData)
        } else if (is.factor(data$income)) {
            eval <- evaluation_metrics_factor(predictions, outer_testData)
        } else {
            stop("The predicted target has to be numeric or factor.")
        }

        # Store the evaluation metrics for this outer fold
        outer_results[[i]] <- eval
    }

    # Average the evaluation metrics over the outer folds
    eval_avg_outer_folds <- do.call(rbind, outer_results) %>%
                            dplyr::summarise(across(everything(), mean, na.rm = TRUE))

    # Return the average evaluation metrics
    return(eval_avg_outer_folds)
}

##### XGB

In [None]:
xgb_pred <- function(data, outer_folds, inner_folds, nrounds_steps, max_depth_steps, eta_steps, gamma_steps, colsample_bytree_steps, min_child_weight_steps, subsample_steps) { ############adjust##############

    # Adjust evaluation metric to fit both numeric and factored targets
    summaryFunctionType <- if (is.numeric(data$income)) defaultSummary else multiClassSummary
    
    # Set control args
    outer_control <- caret::trainControl(method = "cv", number = outer_folds,
                                  summaryFunction = summaryFunctionType,
                                  verboseIter = FALSE,
                                  allowParallel = TRUE)
        
    inner_control <- caret::trainControl(method = "cv", number = inner_folds, 
                                  summaryFunction = summaryFunctionType,
                                  verboseIter = FALSE,
                                  allowParallel = TRUE)

    # Define the parameter grid for tuning
    tunegrid <- expand.grid(
        nrounds = seq(50, 150, length.out = nrounds_steps),
        max_depth = round(seq(3, 9, length.out = max_depth_steps)),
        eta = seq(0.01, 0.3, length.out = eta_steps),
        gamma = seq(0, 0.2, length.out = gamma_steps),
        colsample_bytree = seq(0.5, 1, length.out = colsample_bytree_steps),
        min_child_weight = seq(1, 10, length.out = min_child_weight_steps),
        subsample = seq(0.5, 1, length.out = subsample_steps)
    )
    # alternatives for smaller grid:
        #gamma = 0,  # Default value
        #colsample_bytree = 0.8,  # Default value
        #min_child_weight = 1,  # Default value
        #subsample = 0.8  # Default value


    # Initialize variables to store results
    outer_results <- list()

    outer_cv_folds <- createFolds(data$income, k = outer_folds)
    
    # Outer loop: Cross-validation for model evaluation
    for (i in seq_along(outer_cv_folds)) {
        
        # Split data into outer folds
        outer_test_index <- outer_cv_folds[[i]]
        outer_testData <- data[outer_test_index,]
        outer_trainData <- data[-outer_test_index,]
        
        # Convert data for xgboost
        train_X <- convert_to_numeric_matrix(outer_trainData[, !colnames(outer_trainData) %in% 'income'])
        train_y <- outer_trainData$income
        
        val_X <- convert_to_numeric_matrix(outer_testData[, !colnames(outer_testData) %in% 'income'])
        val_y <- outer_testData$income
        
        train_dmatrix <- xgb.DMatrix(data = train_X, label = train_y)
        val_dmatrix <- xgb.DMatrix(data = val_X, label = val_y)

        # Hyperparameter tuning using inner CV
        model <- caret::train(x = train_X, 
                              y = train_y,
                              method = "xgbTree", ############adjust##############
                              tuneGrid = tunegrid, 
                              trControl = inner_control,
                              verbose = FALSE)
        
        # Store the best hyperparameters
        best_hyperparameters <- model$bestTune

        # Train the final model on the outer training set with the best hyperparameters
        final_model <- caret::train(x = train_X, 
                                    y = train_y, 
                                    method = "xgbTree", ############adjust##############
                                    tuneGrid = best_hyperparameters,
                                    trControl = outer_control,
                                    verbose = FALSE)

        # Testing the final model on the outer test set
        predictions <- predict(model, newdata = val_X, iteration_range = c(1, model$bestTune$nrounds))############adjust##############

        
        if (is.numeric(data$income)) {
            eval <- postResample(predictions, val_y)
        } else if (is.factor(data$income)) {
            eval <- confusionMatrix(predictions, val_y)
        } else {
            stop("The predicted target has to be numeric or factor.")
        }

        # Store the evaluation metrics for this outer fold
        outer_results[[i]] <- eval
    }

    # Average the evaluation metrics over the outer folds
    eval_avg_outer_fold <- mean(unlist(outer_results)) # Calculate the mean performance over all outer folds

    # Return the average evaluation metrics
    return(eval_avg_outer_fold)
}


##### BN

this does not have an implemented function in the caret::train() function, so the model needs to be created first

In [None]:
discretize_df = function(df, breaks = 5) {
  for (var in colnames(df)) {
    # Check if the variable is not a factor
    if (!is.factor(df[[var]])) {

      # Count the frequency of each unique value
      freq_table <- table(df[[var]])

      # Calculate the proportion of zeros, ensuring NA is handled
      zero_proportion <- ifelse(!is.na(freq_table[as.character(0)]), 
                                freq_table[as.character(0)] / sum(freq_table), 
                                0)

      # Determine the number of breaks based on zero proportion
      if (zero_proportion > 4/5) {
        new_breaks = 1
      } else if (zero_proportion > 1/4) {
        new_breaks = breaks - 2
      } else if (zero_proportion > 1/5) {
        new_breaks = breaks - 1
      } else {
        new_breaks = breaks
      }
      
      # Separate zeros and non-zeros
      zero_portion = (df[[var]] == 0)
      non_zero_values = df[[var]][!zero_portion]

      # Discretize non-zero values
      if (length(non_zero_values) > 0) {
        # Calculate breaks for non-zero values
        range_values = range(non_zero_values, na.rm = TRUE)
        breaks_values = seq(range_values[1], range_values[2], length.out = new_breaks + 1)
        
        # Ensure correct number of labels are created
        labels = sapply(1:(length(breaks_values)-1), function(i) 
                        paste("(", breaks_values[i], "-", breaks_values[i+1], "]", sep=""))

        # Use cut to apply these breaks and labels
        discretized_non_zeros = cut(non_zero_values, breaks = breaks_values, labels = labels, include.lowest = TRUE)
        # Combine zero and discretized non-zeros into the original dataframe
        df[[var]] <- factor(ifelse(zero_portion, "0", as.character(discretized_non_zeros)))
      } else {
        # If all values are zero or the number of breaks is zero or negative
        df[[var]] <- factor("0")
      }
    }
  }
  return(df)
}

In [None]:
inner_cv <- function(data, target_var, folds, algorithms) {
  # Create inner folds
  inner_folds <- createFolds(data[[target_var]], k = folds)
  
  best_model <- NULL
  best_performance <- -Inf
  best_algorithm <- NULL
  
  for (algorithm in algorithms) {
    cat("Trying algorithm:", algorithm, "\n")
    fold_results <- c()
    
    for (i in seq_along(inner_folds)) {
      inner_test_index <- inner_folds[[i]]
      inner_trainData <- data[-inner_test_index, ]
      inner_testData <- data[inner_test_index, ]
      
      # Fit Bayesian Network model using bnlearn algorithm
      bn_model <- do.call(get(algorithm, envir = asNamespace("bnlearn")), list(inner_trainData))
      
      # Fit the model to the training data
      fitted_bn_model <- bnlearn::bn.fit(bn_model, inner_trainData)
      
      # Use Bayesian Likelihood Weighting for prediction
      predictions <- predict(fitted_bn_model, node = target_var, data = inner_testData, method = "bayes-lw")
      
      # Handle missing levels in prediction
      predictions <- factor(predictions, levels = levels(inner_trainData[[target_var]]))
      
      # Calculate the accuracy
      accuracy <- mean(predictions == inner_testData[[target_var]], na.rm = TRUE)
      fold_results[i] <- accuracy
    }
    
    # Average performance for this algorithm
    avg_performance <- mean(fold_results, na.rm = TRUE)
    
    if (!is.na(avg_performance) && avg_performance > best_performance) {
      best_performance <- avg_performance
      best_model <- fitted_bn_model
      best_algorithm <- algorithm
    }
  }
  
  cat("Best algorithm selected:", best_algorithm, "with accuracy:", best_performance, "\n")
  return(best_model)
}

In [None]:
bn_pred <- function(data, outer_folds, inner_folds) {
  # Discretize the data
  data <- discretize_df(data)
  
  algorithms = c("hc", "tabu")  # You can add more algorithms here
  data$income <- factor(data$income, levels = unique(data$income))

  outer_results <- list()
  outer_cv_folds <- createFolds(data$income, k = outer_folds)
  
  for (i in seq_along(outer_cv_folds)) {
    outer_test_index <- outer_cv_folds[[i]]
    outer_testData <- data[outer_test_index, ]
    outer_trainData <- data[-outer_test_index, ]
    
    # Get the best fitted model from inner CV
    best_model <- inner_cv(outer_trainData, "income", inner_folds, algorithms)

    # Perform prediction using 'bayes-lw' method
    predictions <- predict(best_model, node = "income", data = outer_testData, method = "bayes-lw")
    
    # Evaluate the accuracy of the predictions (direct comparison without numeric threshold)
    accuracy <- mean(predictions == outer_testData$income, na.rm = TRUE)
    outer_results[[i]] <- accuracy
  }
  
  # Calculate average accuracy over all outer folds
  eval_avg_outer_folds <- mean(unlist(outer_results))
  
  return(eval_avg_outer_folds)
}

##### MLP

In [None]:
mlp_pred <- function(data, outer_folds, size_steps, decay_steps, inner_folds){
    # adjust evaluation metric to fit both numeric and factored targets
    summaryFunctionType <- if (is.numeric(data$income)) defaultSummary else multiClassSummary
    # metric: train() uses per default RSME and Accuracy for numeric and factored targets

    #  set control args
    outer_control <- caret::trainControl(method = "cv", number = outer_folds,
                                  summaryFunction = summaryFunctionType,
                                  verboseIter = FALSE,
                                  allowParallel = TRUE)
        
    inner_control <- caret::trainControl(method = "cv", number = inner_folds, 
                                  summaryFunction = summaryFunctionType,
                                  verboseIter = FALSE,
                                  allowParallel = TRUE)

    # Define the grid for hyperparameter tuning
    size_values <- seq(1, 10, length.out = size_steps)
    decay_values <- 10^seq(log10(0.0001), log10(0.01), length.out = decay_steps)

    # Create grid
    tunegrid <- expand.grid(size = size_values, decay = decay_values)

    # Initialize variables to store results
    outer_results <- list()

    outer_cv_folds = createFolds(data$income, k = outer_folds)
    
    # Outer loop: Cross-validation for model evaluation
    for (i in seq_along(outer_folds)) {
        
        # Split data into outer folds
        outer_test_index = outer_cv_folds[[i]]
        outer_testData = data[outer_test_index,]
        outer_trainData  = data[-outer_test_index,]
        
        # Hyperparameter tuning using inner CV
        # No need for inner loop because "train" does k-fold CV already
        mlp_model <- caret::train(income ~ ., 
                           data = outer_trainData, 
                           method = "nnet", 
                           tuneGrid = tunegrid, 
                           trControl = inner_control)#,
                           #metric = metricType)
            

        # Store the best hyperparameters
        best_hyperparameters <- mlp_model$bestTune
        print("best HP")
        print(mlp_model$bestTune)

        # Train the final model on the outer training set with the best hyperparameters
        final_model <- caret::train(income ~ ., 
                             data = outer_trainData, 
                             method = "nnet", 
                             trControl = outer_control, 
                             tuneGrid = best_hyperparameters)

        # Testing the final model on the outer test set
        predictions <- predict(final_model, newdata = outer_testData)
        
        if (is.numeric(data$income)) {
            eval <- postResample(predictions, outer_testData$income) # postResample is a useful caret function
        } else if (is.factor(data$income)) {
            eval <- confusionMatrix(predictions, outer_testData$income)
        } else {
            stop("The predicted target has to be numeric or factor.")
        }

        # Store the evaluation metrics for this outer fold
        outer_results[[i]] <- eval
    }

    # Average the evaluation metrics over the outer folds
    eval_avg_outer_fold <- mean(unlist(outer_results)) # Calculate the mean performance over all outer folds

    # Return the average evaluation metrics
    return(eval_avg_outer_fold)
}

## For original data

In [None]:
simulation <- function(data, nrun = 10, outer_folds = 5, inner_folds = 5, cp_steps = 10, #CART params
                                                                mtry_steps = 10, ntree_steps= 10, # RF params
                                                                nrounds_steps = 10, max_depth_steps = 10, eta_steps = 10, gamma_steps = 10, colsample_bytree_steps = 10, min_child_weight_steps = 10, subsample_steps = 10, # XGB params
                                                                size_steps = 10, decay_steps = 10 # MLP params
                                                                ){
    
    # create empty list to store evaluation dataframes
    eval_list <- list()

    # set inital seed
    s <- 1234
    for (i in 1:nrun){
        # vary seed with each run
        s <- s + 1

        # create synthetic data
        # data <- gen_data()

        # prediction model with nested CV and grid search
        CART_eval <- cart_pred(data, outer_folds, cp_steps, inner_folds)
        RF_eval <- rf_pred(data, outer_folds, mtry_steps, ntree_steps, inner_folds)
        #XGB_eval <- xgb_pred(data, outer_folds, inner_folds, nrounds_steps, max_depth_steps = 10, eta_steps = 10, gamma_steps = 10, colsample_bytree_steps = 10, min_child_weight_steps = 10, subsample_steps = 10)
        BN_eval <- bn_pred(data, outer_folds, inner_folds)
        #MLP_eval <- mlp_pred(data, outer_folds, size_steps, decay_steps, inner_folds)

        # bind results 
        eval <- rbind(CART_eval = CART_eval, RF_eval = RF_eval, BN_eval = BN_eval) #XGB_eval = Boost_eval, , MLP_eval = MLP_eval

        # ich glaube wenn es so verschachtelt ist und ich eine Liste und noch eine Liste habe, müsste ich es anders machen
        # am besten wäre ein Dataframe und für jede Model-Art eine Zeile
        eval_list[[i]] <- eval
        print(c("run", i, "completed"))
        }

    # average over all runs
    sum_df <- Reduce(function(x, y) Map(`+`, x, y), eval_list)
    eval_avg <- lapply(sum_df, function(col) col / length(eval_list))

    # Convert the list back to a dataframe
    # Store row names
    rownames <- row.names(eval_list[[1]])

    # Convert the list back to a dataframe
    eval_avg <- as.data.frame(eval_avg)

    # Set back the row names
    row.names(eval_avg) <- rownames
    
    # returns
    results <- list(eval_avg = eval_avg)
    return(results)
}

## For synthetic data

In [None]:
adult_res <- simulation(data = adult, nrun = 1, outer_folds = 2, inner_folds = 2)

In [None]:
cps_res <- simulation(data = cpspop, nrun = 1, outer_folds = 2, inner_folds = 2)

In [63]:
#Saving the data:
saveRDS(cps_res, file = (paste0(here(), "/results/cps_orig_res.RData")))
saveRDS(adult_res, file = (paste0(here(), "/results/adult_orig_res.RData")))

"kann komprimierte Datei '/Users/emmafoessing/Documents/Master/MA/Code/Master-Thesis/simulation/cps_CART_res.RData' nicht "offnen. Grund evtl. 'No such file or directory'"


ERROR: Error in gzfile(file, mode): kann Verbindung nicht "offnen
