From 567989a6690405f99cc39a0385d23e7f4f7e5afc Mon Sep 17 00:00:00 2001
From: gavieira <gabrieldeusdeth@gmail.com>
Date: Tue, 19 Dec 2023 12:45:47 -0300
Subject: [PATCH] Changed labels for matching summary table

---
 R/05-biblioverlap.R | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/R/05-biblioverlap.R b/R/05-biblioverlap.R
index 84f804e..ab17ec8 100644
--- a/R/05-biblioverlap.R
+++ b/R/05-biblioverlap.R
@@ -106,16 +106,16 @@ get_matching_summary_df <- function(internal_db_list) {
   #Getting values
   summary <- list()
   summary$total <- nrow(all_data)
-  summary$unique <- nrow(all_data %>% dplyr::distinct(.data$UUID, .keep_all = TRUE))
-  summary$duplicates <- summary$total - summary$unique
+  summary$distinct <- nrow(all_data %>% dplyr::distinct(.data$UUID, .keep_all = TRUE))
+  summary$overlap <- summary$total - summary$distinct
   summary$matched <- nrow(matched_data)
-  summary$unmatched <- summary$unique - summary$matched
+  summary$unmatched <- summary$distinct - summary$matched
   summary$matched_id <- nrow(matched_data %>% dplyr::filter(!is.na(DI)))
   summary$matched_score <- nrow(matched_data %>% dplyr::filter(is.na(DI))) #USES DI column
   summary_df <- data.frame(doc_subset = names(summary), n_docs = unlist(summary), row.names = NULL)
   #Getting dataframe
-  categories <- c('total', 'unique/duplicates', 'unique/duplicates', 'unique', 'unique', 'matched', 'matched')
-  doc_subset_levels <- c('total',  'duplicates', 'unique', 'unmatched', 'matched',  'matched_id', 'matched_score' )
+  categories <- c('total', 'distinct/overlap', 'distinct/overlap', 'distinct', 'distinct', 'matched', 'matched')
+  doc_subset_levels <- c('total',  'overlap', 'distinct', 'unmatched', 'matched',  'matched_id', 'matched_score' )
   final_summary_df <- summary_df %>%
     dplyr::mutate("doc_subset" = factor(.data$doc_subset, levels = doc_subset_levels)) %>%
     dplyr::mutate("category" = factor(categories, levels = unique(categories)), .after = .data$doc_subset) %>%