# Evaluation script

## Load packages

In [1]:
Sys.setenv("PKG_CXXFLAGS"="-std=c++14")

# List of required packages
list_of_packages <- c("here", "dplyr")

# Function to load packages and handle errors
load_if_installed <- function(p) {
  tryCatch({
    library(p, character.only = TRUE)
  }, error = function(e) {
    message(sprintf("Package '%s' is not installed.", p))
  })
}

# Load all required packages
lapply(list_of_packages, load_if_installed)

here() starts at /home/uni08/hpc/emma.foessing01/u11969/Master-Thesis




Attaching package: ‘dplyr’




The following objects are masked from ‘package:stats’:

    filter, lag




The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




## Load results

In [2]:
datanames <- c("cps", "adult")
modelnames <- c("orig", "cart", "rf", "bn", "xgb", "svm")

loaded_data <- list()

# Loop through all combinations of dataname and modelname
for (dataname in datanames) {
  for (modelname in modelnames) {
    # Construct the file path
    file_path <- paste0(here(), "/results/", dataname, "_", modelname, "_res.RData")
    
    # Check if the file exists
    if (file.exists(file_path)) {
      # Load the file if it exists
      load(file = file_path)
      
      # Assign the loaded data to the list
      loaded_data[[paste0(dataname, "_", modelname)]] <- get(ls()[1])
      
      cat("Loaded data for:", dataname, modelname, "\n")
    } else {
      # Print message if the file does not exist
      cat("Data does not exist for:", dataname, modelname, "\n")
    }
  }
}

# Print all loaded dataframes
cat("\n--- List of loaded dataframes ---\n")
for (name in names(loaded_data)) {
  cat("Data for:", name, "\n")
  print(loaded_data[[name]])
}

Loaded data for: cps orig 
Loaded data for: cps cart 
Data does not exist for: cps rf 
Loaded data for: cps bn 
Loaded data for: cps xgb 
Data does not exist for: cps svm 
Loaded data for: adult orig 
Loaded data for: adult cart 
Data does not exist for: adult rf 
Loaded data for: adult bn 
Loaded data for: adult xgb 
Data does not exist for: adult svm 



--- List of loaded dataframes ---


Data for: cps_orig 
$eval_regression_avg
          MAE        MSE     RMSE R_squared     MAPE
CART 27081.35 1728972408 41580.91 0.2699372 5346.312
RF   25191.51 1611471960 40143.12 0.3343683 4432.881
XGB  24894.57 1588800820 39859.04 0.3437388 4358.569
SVM  25695.09 1847248426 42977.06 0.2371105 3634.983

$eval_bn_avg
   Accuracy        F1 Sensitivity Specificity
1 0.9603824 0.9797909         0.2         0.8

Data for: cps_cart 
$eval_regression_avg
          MAE        MSE     RMSE R_squared     MAPE
CART 27243.17 1843154279 42931.97 0.2630100 3661.599
RF   25103.50 1607652084 40089.80 0.3436767 3891.644
XGB  25009.23 1606703556 40083.65 0.3438721 3933.308
SVM  25719.93 1886206959 43430.48 0.2296043 3540.986

$eval_bn_avg
   Accuracy        F1 Sensitivity Specificity
1 0.9601506 0.9796702         0.2         0.8

Data for: cps_bn 
$eval_regression_avg
      Accuracy        F1 Sensitivity Specificity
CART 0.9602298 0.9797115   0.2000000   0.8000000
RF   0.9601910 0.7353345   0.2001093 

In [3]:
# Define the names of the datasets and models
datanames <- c("cps", "adult")
modelnames <- c("orig", "cart", "rf", "bn", "xgb", "svm")

# Initialize empty lists for storing regression and classification tables
regression_results <- list()
classification_results <- list()

# Define performance metrics for regression and classification
regression_metrics <- c("MAE", "MSE", "RMSE", "R_squared", "MAPE")
classification_metrics <- c("Accuracy", "F1", "Sensitivity", "Specificity")

# Initialize tables as data frames with the correct structure
regression_table <- data.frame(matrix(ncol = length(modelnames), nrow = length(modelnames)))
colnames(regression_table) <- modelnames
rownames(regression_table) <- modelnames

classification_table <- data.frame(matrix(ncol = length(modelnames), nrow = length(modelnames)))
colnames(classification_table) <- modelnames
rownames(classification_table) <- modelnames

# Iterate over all loaded data and extract relevant metrics
for (dataname in datanames) {
  for (modelname in modelnames) {
    df_name <- paste0(dataname, "_", modelname)
    
    if (!is.null(loaded_data[[df_name]])) {
      data <- loaded_data[[df_name]]
      
      # For the 'cps' dataset (regression task)
      if (dataname == "cps") {
        if (!is.null(data$eval_regression_avg)) {
          # Extract regression metrics
          regression_metrics_values <- data$eval_regression_avg
          # Add them to the regression table
          regression_table[modelname, "CART"] <- paste(regression_metrics_values["CART", ], collapse = ", ")
          regression_table[modelname, "RF"] <- paste(regression_metrics_values["RF", ], collapse = ", ")
          regression_table[modelname, "XGB"] <- paste(regression_metrics_values["XGB", ], collapse = ", ")
          regression_table[modelname, "SVM"] <- paste(regression_metrics_values["SVM", ], collapse = ", ")
        }
      }
      
      # For the 'adult' dataset (classification task)
      if (dataname == "adult") {
        if (!is.null(data$eval_classification_avg)) {
          # Extract classification metrics
          classification_metrics_values <- data$eval_classification_avg
          # Add them to the classification table
          classification_table[modelname, "CART"] <- paste(classification_metrics_values["CART", ], collapse = ", ")
          classification_table[modelname, "RF"] <- paste(classification_metrics_values["RF", ], collapse = ", ")
          classification_table[modelname, "XGB"] <- paste(classification_metrics_values["XGB", ], collapse = ", ")
          classification_table[modelname, "SVM"] <- paste(classification_metrics_values["SVM", ], collapse = ", ")
          classification_table[modelname, "BN"] <- paste(classification_metrics_values["BN", ], collapse = ", ")
        }
      }
    }
  }
}

# Print the tables
cat("\n--- Regression Results (CPS Dataset) ---\n")
print(regression_table)

cat("\n--- Classification Results (Adult Dataset) ---\n")
print(classification_table)


--- Regression Results (CPS Dataset) ---


     orig cart rf bn xgb svm
orig   NA   NA NA NA  NA  NA
cart   NA   NA NA NA  NA  NA
rf     NA   NA NA NA  NA  NA
bn     NA   NA NA NA  NA  NA
xgb    NA   NA NA NA  NA  NA
svm    NA   NA NA NA  NA  NA
                                                                                         CART
orig 27081.3492082474, 1728972407.68385, 41580.913983267, 0.269937192505549, 5346.31238234134
cart  27243.1708662494, 1843154279.18516, 42931.972691517, 0.26301003814581, 3661.59947812589
rf                                                                                       <NA>
bn                                             0.960229801351297, 0.979711461063755, 0.2, 0.8
xgb                  27456.8922085511, 1695125999.6769, 41171.907894545, 0.3186540495244, NaN
svm                                                                                      <NA>
                                                                                            RF
orig 25191.5051777678, 1611471960.02314, 401


--- Classification Results (Adult Dataset) ---


     orig cart rf bn xgb svm
orig   NA   NA NA NA  NA  NA
cart   NA   NA NA NA  NA  NA
rf     NA   NA NA NA  NA  NA
bn     NA   NA NA NA  NA  NA
xgb    NA   NA NA NA  NA  NA
svm    NA   NA NA NA  NA  NA
                                                                           CART
orig 0.853988462303561, 0.906265962880981, 0.939789882581443, 0.595098561534363
cart 0.847015915119363, 0.904444352400282, 0.961938325991189, 0.497319034852547
rf                                                                         <NA>
bn      0.83024998342285, 0.892400806993948, 0.936485532815808, 0.5086782376502
xgb   0.830990584803077, 0.89368978604496, 0.938096488923912, 0.496858781753619
svm                                                                        <NA>
                                                                             RF
orig 0.859757310523175, 0.909757145807047, 0.941202436655778, 0.614011720831113
cart    0.858862194363123, 0.9100211895487, 0.948284212573016, 0.586785055002