Skip to content

Commit

Permalink
remaming of package and functions to inspect - more accuract that rep…
Browse files Browse the repository at this point in the history
…orter
  • Loading branch information
alastair rushworth committed Apr 22, 2019
1 parent a149cbf commit 995dca0
Show file tree
Hide file tree
Showing 32 changed files with 466 additions and 466 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
@@ -1,5 +1,5 @@
Package: reporter
Title: Tools for Exploring and Comparing Data Frames
Package: inspectdf
Title: Inspection, Comparison and Visualisation of Data Frames
Version: 0.0.0.9000
Authors@R: person("Alastair", "Rushworth", email = "alastairmrushworth@gmail.com", role = c("aut", "cre"))
Maintainer: Alastair Rushworth <alastairmrushworth@gmail.com>
Expand Down
16 changes: 8 additions & 8 deletions NAMESPACE
@@ -1,12 +1,12 @@
# Generated by roxygen2: do not edit by hand

export(report_cat)
export(report_cor)
export(report_imb)
export(report_mem)
export(report_na)
export(report_num)
export(report_types)
export(inspect_cat)
export(inspect_cor)
export(inspect_imb)
export(inspect_mem)
export(inspect_na)
export(inspect_num)
export(inspect_types)
importFrom(dplyr,anti_join)
importFrom(dplyr,arrange)
importFrom(dplyr,bind_cols)
Expand Down Expand Up @@ -78,4 +78,4 @@ importFrom(tidyr,gather)
importFrom(tidyr,replace_na)
importFrom(utils,object.size)
importFrom(utils,tail)
useDynLib(reporter)
useDynLib(inspectdf)
12 changes: 6 additions & 6 deletions R/RcppExports.R
Expand Up @@ -2,26 +2,26 @@
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

count_levels_num <- function(x) {
.Call('_reporter_count_levels_num', PACKAGE = 'reporter', x)
.Call('_inspectdf_count_levels_num', PACKAGE = 'inspectdf', x)
}

count_levels_char <- function(x) {
.Call('_reporter_count_levels_char', PACKAGE = 'reporter', x)
.Call('_inspectdf_count_levels_char', PACKAGE = 'inspectdf', x)
}

na_numeric <- function(x) {
.Call('_reporter_na_numeric', PACKAGE = 'reporter', x)
.Call('_inspectdf_na_numeric', PACKAGE = 'inspectdf', x)
}

na_character <- function(x) {
.Call('_reporter_na_character', PACKAGE = 'reporter', x)
.Call('_inspectdf_na_character', PACKAGE = 'inspectdf', x)
}

na_logical <- function(x) {
.Call('_reporter_na_logical', PACKAGE = 'reporter', x)
.Call('_inspectdf_na_logical', PACKAGE = 'inspectdf', x)
}

na_integer <- function(x) {
.Call('_reporter_na_integer', PACKAGE = 'reporter', x)
.Call('_inspectdf_na_integer', PACKAGE = 'inspectdf', x)
}

18 changes: 9 additions & 9 deletions R/report_cat.R → R/inspect_cat.R
@@ -1,4 +1,4 @@
#' Report and compare the levels within each categorical feature in one or two dataframes.
#' Summarise and compare the levels within each categorical feature in one or two dataframes.
#'
#' @param df1 A dataframe
#' @param df2 An optional second data frame for comparing categorical levels.
Expand Down Expand Up @@ -35,11 +35,11 @@
#' @export
#' @examples
#' data("starwars", package = "dplyr")
#' report_cat(starwars)
#' inspect_cat(starwars)
#' # return a visualisation too
#' report_cat(starwars, show_plot = TRUE)
#' inspect_cat(starwars, show_plot = TRUE)
#' # compare the levels in two data frames
#' report_cat(starwars, starwars[1:20, ])
#' inspect_cat(starwars, starwars[1:20, ])
#' @importFrom tibble as_tibble
#' @importFrom tibble tibble
#' @importFrom dplyr arrange
Expand All @@ -56,7 +56,7 @@
#' @importFrom dplyr ungroup
#' @importFrom magrittr %>%

report_cat <- function(df1, df2 = NULL, show_plot = FALSE){
inspect_cat <- function(df1, df2 = NULL, show_plot = FALSE){

# perform basic column check on dataframe input
check_df_cols(df1)
Expand All @@ -79,9 +79,9 @@ report_cat <- function(df1, df2 = NULL, show_plot = FALSE){
do.call("rbind", .) %>%
mutate(col_name = colnames(df_cat))
# get the unique levels
levels_unique <- lapply(levels_list, nrow) %>%
levels_unique <- suppressWarnings(lapply(levels_list, nrow) %>%
do.call("rbind", .) %>%
as_tibble(rownames = "col_name")
as_tibble(rownames = "col_name"))
# combine the above tables
levels_df <- levels_unique %>%
left_join(levels_top, by = "col_name") %>%
Expand Down Expand Up @@ -110,10 +110,10 @@ report_cat <- function(df1, df2 = NULL, show_plot = FALSE){
}
} else {
# levels for df1
s1 <- report_cat(df1, show_plot = FALSE) %>%
s1 <- inspect_cat(df1, show_plot = FALSE) %>%
select(-contains("common"), -cnt)
# levels for df2
s2 <- report_cat(df2, show_plot = FALSE) %>%
s2 <- inspect_cat(df2, show_plot = FALSE) %>%
select(-contains("common"), -cnt)
# combine and clean up levels
levels_df <- full_join(s1, s2, by = "col_name") %>%
Expand Down
14 changes: 7 additions & 7 deletions R/report_cor.R → R/inspect_cor.R
@@ -1,4 +1,4 @@
#' Report and compare Pearson's correlation coefficients for numeric columns in one or two dataframes.
#' Summarise and compare Pearson's correlation coefficients for numeric columns in one or two dataframes.
#'
#' @param df1 A data frame
#' @param df2 An optional second data frame for comparing correlation
Expand Down Expand Up @@ -36,11 +36,11 @@
#' @examples
#' data("starwars", package = "dplyr")
#' # correlations in numeric columns
#' report_cor(starwars)
#' inspect_cor(starwars)
#' # get visualisation with confidence bands
#' report_cor(starwars, show_plot = TRUE)
#' inspect_cor(starwars, show_plot = TRUE)
#' # compare correlations with a different data frame
#' report_cor(starwars, starwars[1:10, ], show_plot = TRUE)
#' inspect_cor(starwars, starwars[1:10, ], show_plot = TRUE)
#' @importFrom dplyr arrange
#' @importFrom dplyr contains
#' @importFrom dplyr desc
Expand Down Expand Up @@ -70,7 +70,7 @@
#' @importFrom magrittr %>%
#' @importFrom tibble tibble

report_cor <- function(df1, df2 = NULL, show_plot = FALSE, alpha = 0.05,
inspect_cor <- function(df1, df2 = NULL, show_plot = FALSE, alpha = 0.05,
absolute = TRUE){

# perform basic column check on dataframe input
Expand Down Expand Up @@ -101,11 +101,11 @@ report_cor <- function(df1, df2 = NULL, show_plot = FALSE, alpha = 0.05,
}
} else {
# stats for df1
s1 <- report_cor(df1, show_plot = F) %>%
s1 <- inspect_cor(df1, show_plot = F) %>%
select(col_1, col_2, corr) %>%
rename(corr_1 = corr)
# stats for df2
s2 <- report_cor(df2, show_plot = F) %>%
s2 <- inspect_cor(df2, show_plot = F) %>%
select(col_1, col_2, corr) %>%
rename(corr_2 = corr)
# join the two
Expand Down
14 changes: 7 additions & 7 deletions R/report_imb.R → R/inspect_imb.R
@@ -1,4 +1,4 @@
#' Report and compare columnwise imbalance for non-numeric columns in one or two dataframes.
#' Summarise and compare columnwise imbalance for non-numeric columns in one or two dataframes.
#'
#' @param df1 A data frame
#' @param df2 An optional second data frame for comparing columnwise imbalance.
Expand Down Expand Up @@ -37,11 +37,11 @@
#' @examples
#' data("starwars", package = "dplyr")
#' # get tibble of most common levels
#' report_imb(starwars)
#' inspect_imb(starwars)
#' # get most common levels and show as barplot
#' report_imb(starwars, show_plot = TRUE)
#' inspect_imb(starwars, show_plot = TRUE)
#' # compare memory usage
#' report_imb(starwars, starwars[1:10, -3])
#' inspect_imb(starwars, starwars[1:10, -3])
#' @importFrom tibble tibble
#' @importFrom dplyr arrange
#' @importFrom dplyr contains
Expand All @@ -54,7 +54,7 @@
#' @importFrom dplyr slice
#' @importFrom magrittr %>%

report_imb <- function(df1, df2 = NULL, show_plot = FALSE){
inspect_imb <- function(df1, df2 = NULL, show_plot = FALSE){

# perform basic column check on dataframe input
check_df_cols(df1)
Expand Down Expand Up @@ -90,10 +90,10 @@ report_imb <- function(df1, df2 = NULL, show_plot = FALSE){
}
} else {
# summary of df1
s1 <- report_imb(df1, show_plot = F) %>%
s1 <- inspect_imb(df1, show_plot = F) %>%
rename(pcnt_1 = pcnt, cnt_1 = cnt)
# summary of df2
s2 <- report_imb(df2, show_plot = F) %>%
s2 <- inspect_imb(df2, show_plot = F) %>%
rename(pcnt_2 = pcnt, cnt_2 = cnt)
# left join summaries together
out <- left_join(s1, s2, by = c("col_name", "value")) %>%
Expand Down
14 changes: 7 additions & 7 deletions R/report_mem.R → R/inspect_mem.R
@@ -1,4 +1,4 @@
#' Report and compare the memory usage in one or two dataframes.
#' Summarise and compare the memory usage in one or two dataframes.
#'
#' @param df1 A data frame.
#' @param df2 An optional second data frame for comparing column sizes.
Expand Down Expand Up @@ -28,11 +28,11 @@
#' @examples
#' data("starwars", package = "dplyr")
#' # get tibble of column memory usage for the starwars data
#' report_mem(starwars)
#' inspect_mem(starwars)
#' # get column memory usage and show as barplot
#' report_mem(starwars, show_plot = TRUE)
#' inspect_mem(starwars, show_plot = TRUE)
#' # compare memory usage
#' report_mem(starwars, starwars[1:10, -3])
#' inspect_mem(starwars, starwars[1:10, -3])
#' @importFrom dplyr arrange
#' @importFrom dplyr contains
#' @importFrom dplyr desc
Expand All @@ -55,7 +55,7 @@
#' @importFrom tibble tibble
#' @export

report_mem <- function(df1, df2 = NULL, show_plot = FALSE){
inspect_mem <- function(df1, df2 = NULL, show_plot = FALSE){

# perform basic column check on dataframe input
check_df_cols(df1)
Expand Down Expand Up @@ -93,8 +93,8 @@ report_mem <- function(df1, df2 = NULL, show_plot = FALSE){
return(out)
} else {
# get the space report for both input dfs
df1 <- report_mem(df1, show_plot = F)
df2 <- report_mem(df2, show_plot = F)
df1 <- inspect_mem(df1, show_plot = F)
df2 <- inspect_mem(df2, show_plot = F)
sjoin <- full_join(df1, df2, by = "col_name") %>%
select(col_name, contains("size"), contains("pcnt"))
colnames(sjoin)[2:3] <- paste0("size_", 1:2)
Expand Down
18 changes: 9 additions & 9 deletions R/report_na.R → R/inspect_na.R
@@ -1,4 +1,4 @@
#' Report and compare the rate of missingness in one or two dataframes.
#' Summarise and compare the rate of missingness in one or two dataframes.
#'
#' @param df1 A data frame
#' @param df2 An optional second data frame for making columnwise comparison of missingness.
Expand Down Expand Up @@ -35,12 +35,12 @@
#'
#' @examples
#' data("starwars", package = "dplyr")
#' # report missingness in starwars data
#' report_na(starwars)
#' # inspect missingness in starwars data
#' inspect_na(starwars)
#' # show the result as a barplot
#' report_na(starwars, show_plot = TRUE)
#' inspect_na(starwars, show_plot = TRUE)
#' # compare two dataframes
#' report_na(starwars, starwars[1:30, ])
#' inspect_na(starwars, starwars[1:30, ])
#' @importFrom dplyr arrange
#' @importFrom dplyr desc
#' @importFrom dplyr full_join
Expand All @@ -53,12 +53,12 @@
#' @importFrom tibble tibble
#' @export

report_na <- function(df1, df2 = NULL, show_plot = FALSE, alpha = 0.05){
inspect_na <- function(df1, df2 = NULL, show_plot = FALSE, alpha = 0.05){
# perform basic column check on dataframe input
check_df_cols(df1)
# capture the data frame names
df_names <- get_df_names()
# if ony one df input then report na content
# if ony one df input then inspect na content
if(is.null(df2)){
# find the 10 with most missingness
out <- vec_to_tibble(sapply(df1, sumna)) %>%
Expand All @@ -77,8 +77,8 @@ report_na <- function(df1, df2 = NULL, show_plot = FALSE, alpha = 0.05){
}
if(type == "console") invisible(df1)
} else {
s1 <- report_na(df1, show_plot = F)
s2 <- report_na(df2, show_plot = F)
s1 <- inspect_na(df1, show_plot = F)
s2 <- inspect_na(df2, show_plot = F)
na_tab <- full_join(s1, s2, by = "col_name")
na_tab$p_value <- prop_test(na_1 = na_tab$cnt.x,
na_2 = na_tab$cnt.y,
Expand Down
16 changes: 8 additions & 8 deletions R/report_num.R → R/inspect_num.R
@@ -1,4 +1,4 @@
#' Report and compare the numeric variables within one or two dataframes
#' Summarise and compare the numeric variables within one or two dataframes
#'
#' @param df1 A data frame
#' @param df2 An optional second data frame for comparing categorical levels.
Expand All @@ -16,7 +16,7 @@
#' @return A \code{tibble} containing statistical summaries of the numeric
#' columns of \code{df1}, or comparing the histograms of \code{df1} and \code{df2}.
#' @details
#' If only \code{df1} is specified, \code{report_num} returns a tibble with columns
#' If only \code{df1} is specified, \code{inspect_num} returns a tibble with columns
#' \itemize{
#' \item \code{col_name} character vector containing the column names in \code{df1}
#' and \code{df2}
Expand Down Expand Up @@ -46,11 +46,11 @@
#' @examples
#' data("starwars", package = "dplyr")
#' # show summary statistics for starwars
#' report_num(starwars)
#' inspect_num(starwars)
#' # with a visualisation too - try to limit number of bins
#' report_num(starwars, breaks = 10)
#' inspect_num(starwars, breaks = 10)
#' # compare two data frames
#' report_num(starwars, starwars[-c(1:10), ], breaks = 10, show_plot = TRUE)
#' inspect_num(starwars, starwars[-c(1:10), ], breaks = 10, show_plot = TRUE)
#' @importFrom dplyr arrange
#' @importFrom dplyr contains
#' @importFrom dplyr desc
Expand Down Expand Up @@ -81,7 +81,7 @@
#' @importFrom tidyr gather
#' @importFrom utils tail

report_num <- function(df1, df2 = NULL, show_plot = F,
inspect_num <- function(df1, df2 = NULL, show_plot = F,
breaks = 20, plot_layout = NULL, breakseq = NULL){

# perform basic column check on dataframe input
Expand Down Expand Up @@ -145,13 +145,13 @@ report_num <- function(df1, df2 = NULL, show_plot = F,
}
} else {
# get histogram and summaries for first df
s1 <- report_num(df1, show_plot = F, breaks = breaks) %>%
s1 <- inspect_num(df1, show_plot = F, breaks = breaks) %>%
select(col_name, mean, sd, hist)
# extract breaks from the above
breaks_table <- tibble(col_name = s1$col_name,
breaks = lapply(s1$hist, get_break))
# get new histoggrams and summary stats using breaks from s1
s2 <- report_num(df2, breakseq = breaks_table, show_plot = F) %>%
s2 <- inspect_num(df2, breakseq = breaks_table, show_plot = F) %>%
select(col_name, mean, sd, hist)
s12 <- full_join(s1, s2, by = "col_name")
# calculate psi and fisher p-value
Expand Down
16 changes: 8 additions & 8 deletions R/report_types.R → R/inspect_types.R
@@ -1,4 +1,4 @@
#' Report and compare column types in one or two dataframes.
#' Summarise and compare column types in one or two dataframes.
#'
#' @param df1 A data frame.
#' @param df2 An optional second data frame for comparison.
Expand Down Expand Up @@ -27,11 +27,11 @@
#' @examples
#' data("starwars", package = "dplyr")
#' # get tibble of column types for the starwars data
#' report_types(starwars)
#' inspect_types(starwars)
#' # get column types and show as barplot
#' report_types(starwars, show_plot = TRUE)
#' inspect_types(starwars, show_plot = TRUE)
#' # compare two data frames
#' report_types(starwars, starwars[, -1], show_plot = TRUE)
#' inspect_types(starwars, starwars[, -1], show_plot = TRUE)
#' @export
#' @importFrom dplyr arrange
#' @importFrom dplyr case_when
Expand All @@ -50,9 +50,9 @@
#' @importFrom tibble tibble
#' @importFrom tidyr gather
#' @importFrom tidyr replace_na
#' @useDynLib reporter
#' @useDynLib inspectdf

report_types <- function(df1, df2 = NULL, show_plot = FALSE){
inspect_types <- function(df1, df2 = NULL, show_plot = FALSE){

# perform basic column check on dataframe input
check_df_cols(df1)
Expand Down Expand Up @@ -87,9 +87,9 @@ report_types <- function(df1, df2 = NULL, show_plot = FALSE){
# return dataframe
return(out)
} else {
s1 <- report_types(df1, show_plot = F) %>% select(-col_name)
s1 <- inspect_types(df1, show_plot = F) %>% select(-col_name)
colnames(s1)[2:3] <- paste0(c("cnt_", "pcnt_"), 1)
s2 <- report_types(df2, show_plot = F) %>% select(-col_name)
s2 <- inspect_types(df2, show_plot = F) %>% select(-col_name)
colnames(s2)[2:3] <- paste0(c("cnt_", "pcnt_"), 2)
out <- full_join(s1, s2, by = "type") %>%
replace(is.na(.), 0)
Expand Down

0 comments on commit 995dca0

Please sign in to comment.