# Baysian Networks prediction function

In [8]:
# directory
directory <- "/Users/emmafoessing/Documents/Master/MA/Code/Master-Thesis"

### Packages

In [9]:
list_of_packages <- c ("synthpop", "insight", "party", "haven", "dplyr", "rpart", "rpart.plot", "randomForest", "pROC", "caret", "pracma", "here", "Hmisc", "purrr", "randomForest", "ranger", "bnlearn", "arulesCBA", "network", "igraph")

install_if_missing <- function(p){
  if(!requireNamespace(p, quietly = TRUE)){
    install.packages(p)
  }
  library(p, character.only=TRUE)
}


lapply(list_of_packages, install_if_missing)

### Data

In [10]:
load(paste0(directory, "/cpspop.RData"))
adult <- read.csv(paste0(directory, "/adult_preprocessed.csv"))
# delete NAs
adult[adult == "?"] <- NA
adult <- na.omit(adult)

adult$workclass <- as.factor(adult$workclass)
adult$education <- as.factor(adult$education)
adult$marital_status <- as.factor(adult$marital_status)
adult$relationship <- as.factor(adult$relationship)
adult$race <- as.factor(adult$race)
adult$sex <- as.factor(adult$sex)
adult$native_country <- as.factor(adult$native_country)
adult$income <- as.factor(adult$income)

adult_with_cont <- adult
cps_with_cont <- cpspop

### Helper functions

In [11]:
evaluation_metrics_factor <- function(predictions, test_set) {
    # Ensure test_set is a data frame
    test_set <- as.data.frame(test_set)
    
    # Ensure both predictions and test_set$income are factors with the same levels
    predictions <- as.factor(predictions)
    reference <- as.factor(test_set$income)
    
    # Ensure levels match between predictions and reference
    levels(predictions) <- levels(reference)
    
    # Confusion matrix for the prediction on original data
    cm <- confusionMatrix(predictions, reference, mode = "everything")

    # Saving evaluation metrics
    accuracy <- cm$overall['Accuracy']
    
    if (length(levels(reference)) == 2) {
        # Binary classification
        f1 <- cm$byClass['F1']
        sens <- cm$byClass['Sensitivity']
        spec <- cm$byClass['Specificity']
    } else {
        # Multi-class classification: calculate metrics for each class and take the mean
        f1 <- mean(cm$byClass[,'F1'], na.rm = TRUE)
        sens <- mean(cm$byClass[,'Sensitivity'], na.rm = TRUE)
        spec <- mean(cm$byClass[,'Specificity'], na.rm = TRUE)
    }

    # Create the dataframe
    metrics_df <- data.frame(
        Accuracy = accuracy, 
        F1 = f1, 
        Sensitivity = sens, 
        Specificity = spec
    )
    
    return(metrics_df)
}

Ich diskretisiere alle meine continuous cars (in Intervalle einteilen) --> das geht mit der discretize function <br>
Ich will ca. 5 Kategorien maximal pro Variable haben. Wenn eine der Ausprägungen in der Varibale mindestens 1/5 aller Ausprägungnen aus macht, dann soll diese eine eigene Kategorie werden und die restlichen als Interval kategorisiert werden. Wenn es mehrere Werte gibt, die mindestens 1/5 aller Ausprägungen ausmachen, dann sollen auch diese alle jeweils eine eigene Kategorie werden und der Rest kann in Intervalle eingeteilt werden. Am Ende soll die Variable des Datensatzes überschrieben werden mit der kategorialen Variable und der Datensatz nur aus 'factor' Variablen bestehen.
2/5 sind =0 --> 3 weitere Kategorien mit Intervallen
3/5 sind =0 --> 2 weitere Kategorien mit Intervallen

In [12]:
discretize_df = function(df, breaks = 5) {
  for (var in colnames(df)) {
    # Check if the variable is not a factor
    if (!is.factor(df[[var]])) {

      # Count the frequency of each unique value
      freq_table <- table(df[[var]])

      # Calculate the proportion of zeros, ensuring NA is handled
      zero_proportion <- ifelse(!is.na(freq_table[as.character(0)]), 
                                freq_table[as.character(0)] / sum(freq_table), 
                                0)

      # Determine the number of breaks based on zero proportion
      if (zero_proportion > 4/5) {
        new_breaks = 1
      } else if (zero_proportion > 1/4) {
        new_breaks = breaks - 2
      } else if (zero_proportion > 1/5) {
        new_breaks = breaks - 1
      } else {
        new_breaks = breaks
      }
      
      # Separate zeros and non-zeros
      zero_portion = (df[[var]] == 0)
      non_zero_values = df[[var]][!zero_portion]

      # Discretize non-zero values
      if (length(non_zero_values) > 0) {
        # Calculate breaks for non-zero values
        range_values = range(non_zero_values, na.rm = TRUE)
        breaks_values = seq(range_values[1], range_values[2], length.out = new_breaks + 1)
        
        # Ensure correct number of labels are created
        labels = sapply(1:(length(breaks_values)-1), function(i) 
                        paste("(", breaks_values[i], "-", breaks_values[i+1], "]", sep=""))

        # Use cut to apply these breaks and labels
        discretized_non_zeros = cut(non_zero_values, breaks = breaks_values, labels = labels, include.lowest = TRUE)
        # Combine zero and discretized non-zeros into the original dataframe
        df[[var]] <- factor(ifelse(zero_portion, "0", as.character(discretized_non_zeros)))
      } else {
        # If all values are zero or the number of breaks is zero or negative
        df[[var]] <- factor("0")
      }
    }
  }
  return(df)
}

#### Look at the levels created

In [13]:
print_levels <- function(data) {
  factor_vars <- sapply(data, is.factor)
  for (var in names(data)[factor_vars]) {
    cat("Levels of", var, ":\n")
    print(levels(data[[var]]))
    cat("\n")
  }
}

In [14]:
# cps
discretized_data <- discretize_df(cpspop)
print_levels(discretized_data)

# adult
discretized_data <- discretize_df(adult)
print_levels(discretized_data)

Levels of tax :
[1] "(1-33333]"     "(33333-66665]" "(66665-99997]" "0"            

Levels of income :
[1] "(1-153749.2]"        "(153749.2-307497.4]" "(307497.4-461245.6]"
[4] "(461245.6-614993.8]" "(614993.8-768742]"  

Levels of csp :
[1] "(1-23917]" "0"        

Levels of age :
[1] "(15-30]" "(30-45]" "(45-60]" "(60-75]" "(75-90]"

Levels of educ :
 [1] "31" "32" "33" "34" "35" "36" "37" "38" "39" "40" "41" "42" "43" "44" "45"
[16] "46"

Levels of marital :
[1] "1" "2" "3" "4" "5" "6" "7"

Levels of race :
[1] "1" "2" "3" "4"

Levels of sex :
[1] "1" "2"

Levels of ss :
[1] "(16671.3333333333-33335.6666666667]" "(33335.6666666667-50000]"           
[3] "(7-16671.3333333333]"                "0"                                  

Levels of age :
[1] "(17-31.6]"   "(31.6-46.2]" "(46.2-60.8]" "(60.8-75.4]" "(75.4-90]"  

Levels of workclass :
[1] "0" "1" "2" "3" "4" "5" "6"

Levels of fnlwgt :
[1] "(1190517.8-1484705]"  "(13769-307956.2]"     "(307956.2-602143.4]" 
[4] "(602143.4-8963

In [15]:
cpdag_to_dag <- function(cpdag) {
  # Convert bnlearn object to adjacency matrix
  adj_matrix <- amat(cpdag)
  
  # Convert adjacency matrix to igraph object
  ig <- graph_from_adjacency_matrix(adj_matrix, mode = "directed")
  
  # Check if it's already a DAG
  if (igraph::is_dag(ig)) {
    return(cpdag)
  }
  
  # Convert CPDAG to DAG by randomly orienting undirected edges
  directed_arcs <- directed.arcs(cpdag)
  undirected_arcs <- undirected.arcs(cpdag)
  
  while (nrow(undirected_arcs) > 0) {
    arc <- undirected_arcs[1, , drop = FALSE]
    cpdag <- set.arc(cpdag, from = arc[1, 1], to = arc[1, 2])
    undirected_arcs <- undirected.arcs(cpdag)
  }
  return(cpdag)
}

In [16]:
train_bn <- function(data, algorithm, score = NULL) {
  if (algorithm == "hc") {
    bn <- bnlearn::hc(data, score = score)
  } else if (algorithm == "tabu") {
    bn <- bnlearn::tabu(data, score = score)
  } else if (algorithm == "gs") {
    bn <- bnlearn::gs(data)
    bn <- bnlearn::cpdag(bn) # Convert to a completed partially directed acyclic graph
    bn <- cpdag_to_dag(bn) # Convert CPDAG to DAG
  } else if (algorithm == "iamb") {
    bn <- bnlearn::iamb(data)
    bn <- bnlearn::cpdag(bn) # Convert to a completed partially directed acyclic graph
    bn <- cpdag_to_dag(bn) # Convert CPDAG to DAG
  } else {
    stop("Unsupported algorithm")
  }

  cat(algorithm, "\n")

  bn.fit(bn, data)
}

In [17]:
# Define a function to evaluate the model using cross-validation
evaluate_bn <- function(data, bn_fitted, target_var) {
  predictions <- predict(bn_fitted, data = data, node = target_var)
  actual <- data[[target_var]]
  accuracy <- mean(predictions == actual)
  return(accuracy)
}

# Just BN prediction on original data

In [18]:
# Define discretize_df function
discretize_df <- function(df, breaks = 5) {
  for (var in colnames(df)) {
    if (!is.factor(df[[var]])) {
      freq_table <- table(df[[var]])
      zero_proportion <- ifelse(!is.na(freq_table[as.character(0)]), 
                                freq_table[as.character(0)] / sum(freq_table), 
                                0)
      if (zero_proportion > 4/5) {
        new_breaks <- 1
      } else if (zero_proportion > 1/4) {
        new_breaks <- breaks - 2
      } else if (zero_proportion > 1/5) {
        new_breaks <- breaks - 1
      } else {
        new_breaks <- breaks
      }
      zero_portion <- (df[[var]] == 0)
      non_zero_values <- df[[var]][!zero_portion]
      if (length(non_zero_values) > 0) {
        range_values <- range(non_zero_values, na.rm = TRUE)
        breaks_values <- seq(range_values[1], range_values[2], length.out = new_breaks + 1)
        labels <- sapply(1:(length(breaks_values) - 1), function(i) 
                         paste("(", breaks_values[i], "-", breaks_values[i + 1], "]", sep = ""))
        discretized_non_zeros <- cut(non_zero_values, breaks = breaks_values, labels = labels, include.lowest = TRUE)
        df[[var]] <- factor(ifelse(zero_portion, "0", as.character(discretized_non_zeros)))
      } else {
        df[[var]] <- factor("0")
      }
    }
  }
  return(df)
}

# Define cpdag_to_dag function
cpdag_to_dag <- function(cpdag) {
  adj_matrix <- amat(cpdag)
  ig <- graph_from_adjacency_matrix(adj_matrix, mode = "directed")
  if (igraph::is_dag(ig)) {
    return(cpdag)
  }
  directed_arcs <- directed.arcs(cpdag)
  undirected_arcs <- undirected.arcs(cpdag)
  while (nrow(undirected_arcs) > 0) {
    arc <- undirected_arcs[1, , drop = FALSE]
    cpdag <- set.arc(cpdag, from = arc[1, 1], to = arc[1, 2])
    undirected_arcs <- undirected.arcs(cpdag)
  }
  return(cpdag)
}

# Define train_bn function
train_bn <- function(data, algorithm, score = NULL) {
  if (any(is.na(data))) {
    stop("The data contains missing values.")
  }
  
  if (algorithm %in% c("hc", "tabu") && !is.null(score)) {
    bn <- bnlearn::hc(data, score = score)
  } else if (algorithm == "tabu" && !is.null(score)) {
    bn <- bnlearn::tabu(data, score = score)
  } else if (algorithm == "gs") {
    bn <- bnlearn::gs(data)
    bn <- bnlearn::cpdag(bn)
    bn <- cpdag_to_dag(bn)
  } else if (algorithm == "iamb") {
    bn <- bnlearn::iamb(data)
    bn <- bnlearn::cpdag(bn)
    bn <- cpdag_to_dag(bn)
  } else {
    stop("Unsupported algorithm or missing score for algorithm.")
  }
  
  bn.fit(bn, data)
}

# Define a function to evaluate the Bayesian network model
evaluate_bn <- function(testData, bn_fitted, target_var) {
  predictions <- predict(bn_fitted, data = testData, node = target_var)
  mean(predictions == testData[[target_var]])
}

custom_model <- list(
  type = c("Classification", "Regression"),
  library = "bnlearn",
  loop = NULL,
  parameters = data.frame(parameter = c("algorithm", "score"),
                          class = c("character", "character"),
                          label = c("Algorithm", "Score")),
  grid = function(x, y, len = NULL, search = "grid") {
    algorithms <- c("hc", "tabu", "gs", "iamb")
    scores <- c("aic", "bic")
    expand.grid(algorithm = algorithms, score = scores)
  },
  fit = function(x, y, wts, param, lev, last, classProbs, ...) {
    data <- as.data.frame(x)
    data$income <- y
    
    print("Fitting model with parameters:")
    print(param)
    
    if (any(is.na(data))) {
      stop("The data contains missing values.")
    }
    
    # Additional debug info
    if (!param$score %in% c("aic", "bic")) {
      stop("Invalid score parameter: ", param$score)
    }
    
    train_bn(data, param$algorithm, param$score)
  },
  predict = function(modelFit, newdata, submodels = NULL) {
    if (any(is.na(newdata))) {
      stop("The new data contains missing values.")
    }
    predict(modelFit, newdata)
  },
  prob = function(modelFit, newdata, submodels = NULL) {
    if (any(is.na(newdata))) {
      stop("The new data contains missing values.")
    }
    predict(modelFit, newdata, type = "prob")
  },
  predictors = function(x, ...) {
    names(x$bn)
  },
  varImp = NULL,
  levels = function(x) x$lev,
  tags = c("Bayesian Network", "Graphical Models"),
  sort = function(x) x
)

In [19]:
bn_pred <- function(data, outer_folds, inner_folds){############adjust##############

    # discretize the data
    data <- discretize_df(data)

    if (any(is.na(data))) {
        stop("Data contains NA values after discretization")
    }

    # adjust evaluation metric to fit both numeric and factored targets
    summaryFunctionType <- if (is.numeric(data$income)) defaultSummary else multiClassSummary
    # metric: train() uses per default RSME and Accuracy for numeric and factored targets

    #  set control args
    outer_control <- trainControl(method = "cv", number = outer_folds,
                                  summaryFunction = summaryFunctionType,
                                  verboseIter = FALSE,
                                  allowParallel = TRUE)
        
    inner_control <- trainControl(method = "cv", number = inner_folds, 
                                  summaryFunction = summaryFunctionType,
                                  verboseIter = FALSE,
                                  allowParallel = TRUE)

    # Define the grid for hyperparameter tuning
    algorithms <- c("hc", "tabu", "gs", "iamb") ############adjust##############
    scores <- c("aic", "bic") ############adjust##############

    # Create grid
    tunegrid <- expand.grid(algorithm = algorithms, score = scores) ############adjust##############

    # Initialize variables to store results
    outer_results <- list()

    outer_cv_folds = createFolds(data$income, k = outer_folds)
    
    # Outer loop: Cross-validation for model evaluation
    for (i in seq_along(outer_folds)) {
        
        # Split data into outer folds
        outer_test_index = outer_cv_folds[[i]]
        outer_testData = data[outer_test_index,]
        outer_trainData  = data[-outer_test_index,]
        print("outer data folds")
        print(any(is.na(outer_trainData)))
        print(any(is.na(outer_testData)))
        if (any(is.na(outer_trainData))) {
        print(colSums(is.na(outer_trainData)))}

        print("before train")
        # Hyperparameter tuning using inner CV
        # No need for inner loop because "train" does k-fold CV already
        model <- caret::train(income ~ ., 
                        data = outer_trainData, 
                        method = custom_model, ############adjust##############
                        tuneGrid = tunegrid, 
                        trControl = inner_control)#,
                        #metric = metricType)
            

        # Store the best hyperparameters
        best_hyperparameters <- model$bestTune

        # Train the final model on the outer training set with the best hyperparameters
        final_model <- caret::train(income ~ ., 
                             data = outer_trainData, 
                             method = "rpart",############adjust##############
                             trControl = outer_control, 
                             tuneGrid = best_hyperparameters)

        # Testing the final model on the outer test set
        predictions <- predict(final_model, newdata = outer_testData)
        
        if (is.numeric(data$income)) {
            eval <- postResample(predictions, outer_testData$income) # postResample is a useful caret function
        } else if (is.factor(data$income)) {
            eval <- confusionMatrix(predictions, outer_testData$income)
        } else {
            stop("The predicted target has to be numeric or factor.")
        }

        # Store the evaluation metrics for this outer fold
        outer_results[[i]] <- eval
    }

    # Average the evaluation metrics over the outer folds
    eval_avg_outer_fold <- mean(unlist(outer_results)) # Calculate the mean performance over all outer folds

    # Return the average evaluation metrics
    return(eval_avg_outer_fold)
}

In [20]:
apply(cps_with_cont, 2, function(col) {
    if(length(unique(col)) == 1) {
        return(names(cps_with_cont)[which(col == names(cps_with_cont))])
    } else {
        return(NULL)
    }
})

NULL

In [22]:
cps_res <- bn_pred(cps_with_cont, 2, 2)

[1] "outer data folds"
[1] FALSE
[1] FALSE
[1] "before train"
[1] "Fitting model with parameters:"
  algorithm score
1        hc   aic


"model fit failed for Fold1: algorithm=hc, score=aic Error in check.label(score, choices = allowed, labels = score.labels,  : 
  the score must be a single character string.
"


[1] "Fitting model with parameters:"
  algorithm score
2      tabu   aic


"model fit failed for Fold1: algorithm=tabu, score=aic Error in check.label(score, choices = allowed, labels = score.labels,  : 
  the score must be a single character string.
"


old

bn_simulation <- function(data, nrun = 10, kfold = 10, algorithms = c("hc", "tabu", "gs", "iamb"), scores = c("aic", "bic", "k2")) {
 
  # Create empty list to store evaluation dataframes
  eval_list <- list()

  # Discretize the data
  data <- discretize_df(data)

  # Set initial seed
  s <- 1234
  for (i in 1:nrun) {
    # Vary seed with each run
    s <- s + 1
    set.seed(s)

    target_var <- "income"

    # Split the data into training and testing sets
    trainIndex <- createDataPartition(data$income, p = .8, 
                                      list = FALSE, 
                                      times = 1)
    trainData <- data[trainIndex,]
    testData  <- data[-trainIndex,]
    trainData <- as.data.frame(trainData)
    testData <- as.data.frame(testData)

    # Define control for CV
    control <- trainControl(method = "cv", number = kfold)

    # Define the grid of parameters
    grid <- expand.grid(algorithm = algorithms, score = scores)
    
    results <- lapply(algorithms, function(algorithm) {
      if (algorithm %in% c("hc", "tabu")) {
        scores_results <- lapply(scores, function(score) {
          bn_fitted <- train_bn(trainData, algorithm, score)
          accuracy <- evaluate_bn(testData, bn_fitted, target_var)
          return(data.frame(algorithm = algorithm, score = score, accuracy = accuracy))
        })
        scores_results <- do.call(rbind, scores_results)
      } else {
        bn_fitted <- train_bn(trainData, algorithm)
        accuracy <- evaluate_bn(testData, bn_fitted, target_var)
        scores_results <- data.frame(algorithm = algorithm, score = NA, accuracy = accuracy)
      }
      return(scores_results)
    })

    # Combine results into a single data frame
    results <- do.call(rbind, results)

    # Select the best model
    best_model <- results[which.max(results$accuracy),]

    # Train the best model on the entire training dataset
    final_bn <- if (best_model$algorithm %in% c("hc", "tabu")) {
      train_bn(trainData, best_model$algorithm, best_model$score)
    } else {
      train_bn(trainData, best_model$algorithm, "")
    }
    
    # Use the final model for prediction
    levels(testData$income) <- levels(trainData$income)
    predictions <- predict(final_bn, data = testData, node = target_var)
    #predictions <- factor(predictions, levels = levels(trainData$income))


    # Evaluate the final model
    eval <- as.data.frame(evaluation_metrics_factor(predictions, testData)) #$income

    eval_list[[i]] <- eval
    print(c("run", i, "completed"))
  }

  # Outside the nruns loop
  # Average over all runs
  sum_df <- Reduce(function(x, y) Map(`+`, x, y), eval_list)
  eval_avg <- lapply(sum_df, function(col) col / length(eval_list))

  # Convert the list back to a dataframe
  # Store row names
  rownames <- row.names(eval_list[[1]])

  # Convert the list back to a dataframe
  eval_avg <- as.data.frame(eval_avg)

  # Set back the row names
  rownames(eval_avg) <- row.names(eval_list[[1]])

  # Return the average evaluation metrics
  return(eval_avg)
}

In [150]:
cps_res <- bn_simulation(cpspop)

hc 
hc 
hc 
tabu 
tabu 
tabu 


"vstructure income -> educ <- age is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure tax -> marital <- csp is not applicable, because one or both arcs are oriented in the opposite direction."


gs 


"vstructure marital -> age <- ss is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure age -> marital <- sex is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure income -> educ <- age is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure tax -> marital <- sex is not applicable, because one or both arcs are oriented in the opposite direction."


iamb 
hc 
[1] "run"       "1"         "completed"
hc 
hc 
hc 
tabu 
tabu 
tabu 


"vstructure income -> educ <- age is not applicable, because one or both arcs are oriented in the opposite direction."


gs 


"vstructure marital -> age <- ss is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure age -> marital <- sex is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure income -> educ <- age is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure tax -> marital <- sex is not applicable, because one or both arcs are oriented in the opposite direction."


iamb 
hc 
[1] "run"       "2"         "completed"
hc 
hc 
hc 
tabu 
tabu 
tabu 


"vstructure income -> educ <- age is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure tax -> marital <- csp is not applicable, because one or both arcs are oriented in the opposite direction."


gs 


"vstructure marital -> age <- ss is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure age -> marital <- sex is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure tax -> marital <- sex is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure income -> educ <- age is not applicable, because one or both arcs are oriented in the opposite direction."


iamb 
hc 
[1] "run"       "3"         "completed"
hc 
hc 
hc 
tabu 
tabu 
tabu 


"vstructure income -> educ <- age is not applicable, because one or both arcs are oriented in the opposite direction."


gs 


"vstructure marital -> age <- ss is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure age -> marital <- sex is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure income -> educ <- age is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure tax -> marital <- sex is not applicable, because one or both arcs are oriented in the opposite direction."


iamb 
hc 
[1] "run"       "4"         "completed"
hc 
hc 
hc 
tabu 
tabu 
tabu 


"vstructure income -> educ <- age is not applicable, because one or both arcs are oriented in the opposite direction."


gs 


"vstructure marital -> age <- ss is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure age -> marital <- sex is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure income -> educ <- age is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure tax -> marital <- sex is not applicable, because one or both arcs are oriented in the opposite direction."


iamb 
hc 
[1] "run"       "5"         "completed"
hc 
hc 
hc 
tabu 
tabu 
tabu 


"vstructure income -> educ <- age is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure tax -> marital <- csp is not applicable, because one or both arcs are oriented in the opposite direction."


gs 


"vstructure marital -> age <- ss is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure age -> marital <- sex is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure income -> educ <- age is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure tax -> marital <- sex is not applicable, because one or both arcs are oriented in the opposite direction."


iamb 
hc 
[1] "run"       "6"         "completed"
hc 
hc 
hc 
tabu 
tabu 
tabu 


"vstructure income -> educ <- age is not applicable, because one or both arcs are oriented in the opposite direction."


gs 


"vstructure marital -> age <- ss is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure age -> marital <- sex is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure income -> educ <- age is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure tax -> marital <- sex is not applicable, because one or both arcs are oriented in the opposite direction."


iamb 
hc 
[1] "run"       "7"         "completed"
hc 
hc 
hc 
tabu 
tabu 
tabu 


"vstructure income -> educ <- age is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure tax -> marital <- csp is not applicable, because one or both arcs are oriented in the opposite direction."


gs 


"vstructure marital -> age <- ss is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure age -> marital <- sex is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure income -> educ <- age is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure tax -> marital <- sex is not applicable, because one or both arcs are oriented in the opposite direction."


iamb 
hc 
[1] "run"       "8"         "completed"
hc 
hc 
hc 
tabu 
tabu 
tabu 


"vstructure income -> educ <- age is not applicable, because one or both arcs are oriented in the opposite direction."


gs 


"vstructure marital -> age <- ss is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure age -> marital <- sex is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure income -> educ <- age is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure tax -> marital <- sex is not applicable, because one or both arcs are oriented in the opposite direction."


iamb 
hc 
[1] "run"       "9"         "completed"
hc 
hc 
hc 
tabu 
tabu 
tabu 


"vstructure income -> educ <- age is not applicable, because one or both arcs are oriented in the opposite direction."


gs 


"vstructure marital -> age <- ss is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure age -> marital <- sex is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure income -> educ <- age is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure tax -> marital <- sex is not applicable, because one or both arcs are oriented in the opposite direction."


iamb 
hc 
[1] "run"       "10"        "completed"


In [126]:
adult_res <- bn_simulation(adult)

hc 
hc 
hc 
tabu 
tabu 
tabu 


"vstructure education -> workclass <- sex is not applicable, because one or both arcs are oriented in the opposite direction."


gs 


"vstructure education -> income <- capital_gain is not applicable, because one or both arcs are oriented in the opposite direction."


iamb 
tabu 
[1] "run"       "1"         "completed"
hc 
hc 
hc 
tabu 
tabu 
tabu 


"vstructure education -> workclass <- sex is not applicable, because one or both arcs are oriented in the opposite direction."


gs 


"vstructure education -> income <- capital_gain is not applicable, because one or both arcs are oriented in the opposite direction."


iamb 
hc 
[1] "run"       "2"         "completed"
hc 
hc 
hc 
tabu 
tabu 
tabu 
gs 


"vstructure education -> income <- capital_gain is not applicable, because one or both arcs are oriented in the opposite direction."


iamb 
hc 
[1] "run"       "3"         "completed"
hc 
hc 
hc 
tabu 
tabu 
tabu 
gs 


"vstructure workclass -> education <- income is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure education -> income <- capital_gain is not applicable, because one or both arcs are oriented in the opposite direction."


iamb 
hc 
[1] "run"       "4"         "completed"
hc 
hc 
hc 
tabu 
tabu 
tabu 
gs 


"vstructure workclass -> education <- income is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure education -> income <- capital_gain is not applicable, because one or both arcs are oriented in the opposite direction."


iamb 
hc 
[1] "run"       "5"         "completed"
hc 
hc 
hc 
tabu 
tabu 
tabu 


"vstructure education -> workclass <- sex is not applicable, because one or both arcs are oriented in the opposite direction."


gs 


"vstructure education -> income <- capital_gain is not applicable, because one or both arcs are oriented in the opposite direction."


iamb 
hc 
[1] "run"       "6"         "completed"
hc 
hc 
hc 
tabu 
tabu 
tabu 


"vstructure education -> workclass <- sex is not applicable, because one or both arcs are oriented in the opposite direction."


gs 


"vstructure education -> income <- capital_gain is not applicable, because one or both arcs are oriented in the opposite direction."


iamb 
hc 
[1] "run"       "7"         "completed"
hc 
hc 
hc 
tabu 
tabu 
tabu 


"vstructure education -> workclass <- sex is not applicable, because one or both arcs are oriented in the opposite direction."


gs 


"vstructure workclass -> education <- income is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure education -> income <- capital_gain is not applicable, because one or both arcs are oriented in the opposite direction."


iamb 
hc 
[1] "run"       "8"         "completed"
hc 
hc 
hc 
tabu 
tabu 
tabu 


"vstructure education -> workclass <- sex is not applicable, because one or both arcs are oriented in the opposite direction."


gs 


"vstructure education -> income <- capital_gain is not applicable, because one or both arcs are oriented in the opposite direction."


iamb 
tabu 
[1] "run"       "9"         "completed"
hc 
hc 
hc 
tabu 
tabu 
tabu 
gs 


"vstructure education -> income <- capital_gain is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure age -> marital_status <- sex is not applicable, because one or both arcs are oriented in the opposite direction."
"vstructure age -> relationship <- sex is not applicable, because one or both arcs are oriented in the opposite direction."


iamb 
hc 
[1] "run"       "10"        "completed"


In [127]:
adult_res

Unnamed: 0_level_0,Accuracy,F1,Sensitivity,Specificity
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>
Accuracy,0.7710662,0.8546128,0.8958057,0.3946036


In [151]:
cps_res

Unnamed: 0_level_0,Accuracy,F1,Sensitivity,Specificity
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>
Accuracy,0.9606435,0.9799267,0.25,0.8


### Save the results

In [1]:
# Bind results
bn_pred_results <- list(cps_res = cps_res, adult_res = adult_res)
# File pth for output
file <- "/user/emma.foessing01/u11969/results/bn_pred_results.RData" 
dir.create(dirname(output_file), recursive = TRUE, showWarnings = FALSE) # create dir if not there
# Save the results to an RData file 
save(results, file = output_file)

ERROR: Error in eval(expr, envir, enclos): Objekt 'cps_res' nicht gefunden
