Skip to content

Commit

Permalink
Completed #41
Browse files Browse the repository at this point in the history
  • Loading branch information
boxuancui committed May 27, 2018
1 parent 54300c3 commit b5c1377
Show file tree
Hide file tree
Showing 9 changed files with 344 additions and 81 deletions.
103 changes: 87 additions & 16 deletions R/create_report.r
Original file line number Diff line number Diff line change
@@ -1,57 +1,128 @@
#' create_report Function
#'
#' This function generates the report of data profiling.
#' This function creates a data profiling report.
#' @param data input data
#' @param output_file output file name. The default is "report.html".
#' @param output_dir output directory for report. The default is user's current directory.
#' @param y name of response variable if any. Response variables will be passed to appropriate plotting functions automatically.
#' @param config report configuration with function arguments as \link{list}. See details.
#' @param \dots other arguments to be passed to \link{render}.
#' @keywords create_report
#' @aliases GenerateReport
#' @details \code{config} is a named list to be evaluated by \code{create_report}.
#' Each name should exactly match a function name.
#' By doing so, that function and corresponding content will be added to the report.
#' If you do not want to include certain functions/content, do not add it to \code{config}.
#' @details By default, there is a preset \code{config} object (refer to example).
#' In case you would like to customize the report, copy and edit the code and pass it to \code{config} argument.
#' @details All function arguments will be passed to \link{do.call} as a list.
#' @note If there are multiple options for the same function, all of them will be plotted.
#' For example, \code{create_report(..., y = "a", config = list("plot_bar" = list("with" = "b")))} will create 3 bar charts:
#' \itemize{
#' \item regular frequency bar chart
#' \item bar chart aggregated by response variable "a"
#' \item bar chart aggregated by `with` variable "b"`
#' }
#' @importFrom utils browseURL
#' @importFrom rmarkdown render
#' @export create_report GenerateReport
#' @examples
#' \dontrun{
#' # load library
#' #############################
#' ## Default config file ##
#' ## Copy and edit if needed ##
#' #############################
#' config <- list(
#' "introduce" = list(),
#' "plot_str" = list(
#' "type" = "diagonal",
#' "fontSize" = 35,
#' "width" = 1000,
#' "margin" = list("left" = 350, "right" = 250)
#' ),
#' "plot_missing" = list(),
#' "plot_histogram" = list(),
#' "plot_bar" = list(),
#' "plot_correlation" = list("use" = "pairwise.complete.obs"),
#' "plot_prcomp" = list(),
#' "plot_boxplot" = list(),
#' "plot_scatterplot" = list()
#' )
#'
#' # Create report
#' create_report(iris)
#' create_report(airquality, y = "Ozone")
#'
#' # Load library
#' library(ggplot2)
#' library(data.table)
#'
#' # load diamonds dataset from ggplot2
#' data("diamonds", package = "ggplot2")
#' diamonds2 <- data.table(diamonds)
#'
#' # manually set some missing values
#' # Set some missing values
#' diamonds2 <- data.table(diamonds)
#' for (j in 5:ncol(diamonds2)) {
#' set(diamonds2,
#' i = sample.int(nrow(diamonds2), sample.int(nrow(diamonds2), 1)),
#' j,
#' value = NA_integer_)}
#' value = NA_integer_)
#' }
#'
#' # generate report for diamonds2 dataset
#' create_report(diamonds2,
#' output_file = "report.html",
#' output_dir = getwd(),
#' html_document(toc = TRUE, toc_depth = 6, theme = "flatly"))
#' # Create customized report for diamonds2 dataset
#' create_report(
#' data = diamonds2,
#' output_file = "report.html",
#' output_dir = getwd(),
#' y = "price",
#' config = list(
#' "introduce" = list(),
#' "plot_missing" = list(),
#' "plot_histogram" = list(),
#' "plot_density" = list(),
#' "plot_bar" = list("with" = "carat"),
#' "plot_correlation" = list("use" = "pairwise.complete.obs"),
#' "plot_prcomp" = list(),
#' "plot_boxplot" = list("by" = "carat"),
#' "plot_scatterplot" = list("by" = "carat")
#' ),
#' html_document(toc = TRUE, toc_depth = 6, theme = "flatly")
#' )
#' }

create_report <- function(data, output_file = "report.html", output_dir = getwd(), ...) {
## Get argument list
args <- as.list(match.call())
create_report <- function(data, output_file = "report.html", output_dir = getwd(), y = NULL, config = list(), ...) {
## Check response variable
if (!is.null(y)) {
if (!(y %in% names(data))) stop("`", y, "` not found in data!")
}
## Get directory of report markdown template
report_dir <- system.file("rmd_template/report.rmd", package = "DataExplorer")
## Set report configuration if null
if (length(config) == 0) {
config <- list(
"introduce" = list(),
"plot_str" = list("type" = "diagonal", "fontSize" = 35, "width" = 1000, "margin" = list("left" = 350, "right" = 250)),
"plot_missing" = list(),
"plot_histogram" = list(),
"plot_bar" = list(),
"plot_correlation" = list("use" = "pairwise.complete.obs"),
"plot_prcomp" = list(),
"plot_boxplot" = list(),
"plot_scatterplot" = list()
)
}
## Render report into html
suppressWarnings(render(
input = report_dir,
output_file = output_file,
output_dir = output_dir,
intermediates_dir = output_dir,
params = list(data = data, fun_options = list()),
params = list(data = data, report_config = config, response = y),
...
))
## Open report
report_path <- file.path(output_dir, output_file)
browseURL(report_path)
## Print report directory
args <- as.list(match.call())
if (ifelse(is.null(args[["quiet"]]), TRUE, !args[["quiet"]])) message(paste0("\n\nReport is generated at \"", report_path, "\"."))
}

Expand Down
2 changes: 1 addition & 1 deletion R/plot_bar.r
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ plot_bar <- function(data, with = NULL, maxcat = 50, order_bar = TRUE, title = N
} else {
if (!is.numeric(data[[with]])) stop("`with` should be continuous!")
x <- data.table(subset_data[, j, with = FALSE], "agg_by" = data[[with]])
agg_x <- x[, list(frequency = sum(agg_by)), by = eval(names(x)[1])]
agg_x <- x[, list(frequency = sum(agg_by, na.rm = TRUE)), by = eval(names(x)[1])]
}
if (order_bar) {
base_plot <- ggplot(agg_x, aes(x = reorder(get(names(agg_x)[1]), frequency), y = frequency))
Expand Down
2 changes: 1 addition & 1 deletion R/plot_prcomp.r
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#'
#' Visualize output of \link{prcomp}.
#' @param data input data
#' @param variance_cap Maximum cumulative explained variance allowed for all principle components. Default is 80\%.
#' @param variance_cap maximum cumulative explained variance allowed for all principle components. Default is 80\%.
#' @param maxcat maximum categories allowed for each discrete feature. The default is 50.
#' @param title plot title starting from page 2.
#' @param ggtheme complete ggplot2 themes. The default is \link{theme_gray}.
Expand Down
4 changes: 2 additions & 2 deletions R/plot_str.r
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
#' plot_str(obj, type = "d", max_level = 2)
#' }

plot_str <- function(data, type = c("diagonal", "radial"), max_level, print_network = TRUE, ...) {
plot_str <- function(data, type = c("diagonal", "radial"), max_level = NULL, print_network = TRUE, ...) {
## Declare variable first to pass R CMD check
i <- idx <- parent <- NULL
## Capture str output
Expand All @@ -45,7 +45,7 @@ plot_str <- function(data, type = c("diagonal", "radial"), max_level, print_netw
base_split <- tstrsplit(str_output[2:n], "\\$")
nest_level <- (nchar(base_split[[1]]) - nchar(gsub("\ \\.{2}", "", base_split[[1]]))) / 3 + 1
## Handle max_level if exists
if (missing(max_level)) {
if (is.null(max_level)) {
max_level <- max(nest_level)
} else if (max_level <= 0 | max_level > max(nest_level)) {
stop(paste0("max_level should be between 1 and ", max(nest_level)))
Expand Down

0 comments on commit b5c1377

Please sign in to comment.