Skip to content

Commit

Permalink
updates to network.predictability
Browse files Browse the repository at this point in the history
o a more robust solution for categorical data with categories made to start at 1 but are later converted to their proper value on output

o quadratic weighted kappa is used for `"Weighted"` accuracy when ordinal data are used
  • Loading branch information
AlexChristensen committed Feb 26, 2024
1 parent 1843285 commit 7d00fb4
Show file tree
Hide file tree
Showing 4 changed files with 192 additions and 106 deletions.
45 changes: 21 additions & 24 deletions R/helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -2903,12 +2903,12 @@ continuous_accuracy <- function(prediction, observed)

#' @noRd
# Categorical Accuracy (for single variable) ----
# Updated 19.02.2024
# Updated 26.02.2024
categorical_accuracy <- function(prediction, observed)
{

# Get maximum categories
max_category <- max(prediction, observed)
max_category <- max(prediction, observed, na.rm = TRUE)

# Set category sequence
category_sequence <- seq_len(max_category)
Expand All @@ -2928,38 +2928,35 @@ categorical_accuracy <- function(prediction, observed)
# Populate accuracy table
accuracy_table[table_names$observed, table_names$prediction] <- tabled

# Get empirical frequencies
frequency <- table(factor(observed, levels = category_sequence))
# Get frequencies
observed_frequency <- table(factor(observed, levels = category_sequence))
prediction_frequency <- table(factor(prediction, levels = category_sequence))

# Get total values
total_values <- sum(frequency)

# Get maximum possible distance incorrect
max_distance <- pmax(
abs(max_category - category_sequence), # distance from maximum category
abs(min(category_sequence) - category_sequence) # distance from minimum category
)

# Get minimum possible weight accuracy
minimum_weighted <- sum(0.5^max_distance * frequency)
total_values <- sum(observed_frequency)

# Get diagonal of table (correct predictions)
correct <- diag(accuracy_table)

# Standardize tables
observed_standard <- observed_frequency / total_values
prediction_standard <- prediction_frequency / total_values
confusion_matrix <- accuracy_table / total_values

# Compute weights
weights <- outer(
category_sequence, category_sequence,
FUN = function(x, y){(x - y)^2}
)

# Return accuracy by category
return(
c(
correct / frequency,
accuracy = sum(correct, na.rm = TRUE) / total_values,
balanced = sum(
correct / rowSums(accuracy_table, na.rm = TRUE),
na.rm = TRUE
) / max_category,
weighted = (sum(0.5^abs(prediction - observed), na.rm = TRUE) - minimum_weighted) /
# weighted total
(total_values - minimum_weighted)
# normalization: all differences of 0 minus
# all differences of maximum absolute difference
weighted = 1 - (
sum(weights * confusion_matrix) / # observed
sum(weights * tcrossprod(observed_standard, prediction_standard)) # expected
)
)
)

Expand Down
50 changes: 14 additions & 36 deletions R/network.generalizability.R
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@
#' @export
#'
# Perform generalizability analysis ----
# Updated 19.02.2024
# Updated 26.02.2024
network.generalizability <- function(
data,
# generalizability arguments
Expand Down Expand Up @@ -453,54 +453,32 @@ network.generalizability <- function(
rbind, lapply(prediction_results, function(x){x$predictions})
)

# Initialize adjusted predictions
adjusted_predictions <- prediction_matrix

# Get flags
flags <- attr(prediction_results[[1]]$results, "flags")

# Get column sequence
column_sequence <- seq_len(dimensions[2])

# Get ranges
ranges <- nvapply(
column_sequence, function(i){
range(prediction_matrix[,i], data[,i], na.rm = TRUE)
}, LENGTH = 2
)

# Ensure 'prediction_matrix' and 'data' start at 1
for(i in column_sequence){
# Check for categories
if(any(flags$categorical)){

# Check for categories
if(flags$categorical[[i]]){
# Set up for combined
combined <- rbind(data, prediction_matrix)

# Check for lowest category
minimum_value <- ranges[1,i]
# Get original sample size
original_n <- dimensions[1]

# Re-adjust minimum category to 1 for new data
if(minimum_value <= 0){
# Ensure categories start at 1
one_start_list <- ensure_one_start(combined, flags, original_n)

# Value to add
add_value <- abs(minimum_value) + 1

# Add value to 'data' and 'prediction_matrix'
data[,i] <- data[,i] + add_value
prediction_matrix[,i] <- prediction_matrix[,i] + add_value

}

}
# Sort out data
original.data <- one_start_list$original.data
newdata <- one_start_list$newdata

}

# Set up as if at the end of `network.predictability`
metric_summary <- setup_results(
predictions = prediction_matrix,
adjusted_predictions = adjusted_predictions,
newdata = data, flags = flags,
predictions = newdata, newdata = original.data, flags = flags,
betas = NULL, node_names = dimnames(data)[[2]],
dimensions = dimensions, dim_sequence = column_sequence
dimensions = dimensions, dim_sequence = seq_len(dimensions[2])
)

# Attach categories to results
Expand Down
Loading

0 comments on commit 7d00fb4

Please sign in to comment.