R/stats.R

#' @title Summary statistics table for a correlation
#' @description Get error summary statistics for any given compressibility correlation.
#' A quick way to show an error summary between any of the indicated correlations and
#' the Standing-Katz chart.
#'
#'     MSE:   Mean Squared Error
#'     RMSE:  Root Mean Squared Error
#'     RSS:   Residual sum of Squares
#'     RMSLE: Root Mean Squared Logarithmic Error. Penalizes understimation.
#'     MAPE:  Mean Absolute Percentage Error = AARE
#'     MPE:   Mean Percentage error = ARE
#'     MAE:   Mean Absolute Error
#'
#' @param correlation identifier. Can be "HY", "DAK", "DPR" "N10", "SH"
#' @param pprRange low (lp) or high (hp) chart area of the Standing-Katz chart
#' @param interval quality of the Ppr scale. Coarse: every 1.0; Fine: every 0.5
#' @importFrom dplyr group_by summarise n
#' @rdname z.stats
#' @export
#' @examples
#' \dontrun{
#' # error statistics for the Dranchuk-AbouKassem correlation
#' z.stats("DAK")
#' }
z.stats <- function(correlation = "DAK", pprRange = "lp", interval = "coarse") {
    Ppr <- NULL; Tpr <- NULL; z.calc <- NULL; z.chart <- NULL; n <- NULL
    # get all `lp` Tpr curves
    tpr_all <- getStandingKatzTpr(pprRange)
    # ppr <- c(0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0)
    ppr <- getStandingKatzPpr(interval)
    sk_corr_all <- createTidyFromMatrix(ppr, tpr_all, correlation)

    grouped <- group_by(sk_corr_all, Tpr, Ppr)
    smry_tpr_ppr <- summarise(grouped, z.chart, z.calc,
                              RMSE = sqrt(mean((z.chart-z.calc)^2)),
                              MPE  = sum((z.chart - z.calc) / z.chart) * 100 / n(),
                              MAPE = sum(abs((z.chart - z.calc) / z.chart)) * 100 / n(),
                              MSE   = sum((z.chart - z.calc)^2) / n(),
                              RSS   = sum((z.chart - z.calc)^2),
                              MAE   = sum(abs(z.chart - z.calc)) / n(),
                              MAAPE = sum(atan(abs((z.chart - z.calc) / z.chart))) / n()

    )
    tibble::as_tibble(smry_tpr_ppr)
}


#' @title Quantiles for z.stats
#' @description Calculate the quantiles for any of the statistical variables
#' that originates from calling z.stats
#' @param stat Any of the statistical variables in z.stats:
#' RMSE, MPRE, MAPE, MSE, RSS, MAE
#' @export
#' @importFrom stats quantile
z.stats_quantile <- function(stat = "MAPE") {
    cols <- ncol(z.stats())
    z.stats_stats <- names(z.stats())[3:cols]
    corrs <- z_correlations$short
    sapply(corrs, function(corr) quantile(z.stats(corr)[[stat]] ))
}


#' @title Statistics on z.stats table
#' @description This function summarizes the tables generated by z.stats using
#' custom provided functions. We get a shorter table with statistics of statistics.
#' @param stat Any of the statistical variables in z.stats:
#' RMSE, MPRE, MAPE, MSE, RSS, MAE
#' @export
#' @examples
#' \dontrun{
#' # Get a statistical summary of the Mean Absolute Percentage Error (MAPE)
#' stats_of_z.stats()
#' }
stats_of_z.stats <- function(stat = "MAPE") {
    z.stats_stats <- names(z.stats())[3:ncol(z.stats())]
    msg <- paste0("statistic not in table. \n Try with: ", paste0(z.stats_stats, collapse=", "))
    if (!stat %in% z.stats_stats) stop(msg)

    # Mode function from here:
    # https://stackoverflow.com/a/8189441/5270873
    Mode <- function(x) {
        ux <- unique(x)
        ux[which.max(tabulate(match(x, ux)))]
    }
    corrs <- z_correlations$short
    custom_functions <- c("mean", "max", "min", "median", "Mode")
    sapply(corrs, function(corr)
                sapply(custom_functions, function(f) get(f)( z.stats(corr)[[stat]] )))
    #
    # enclose in double brackets to make it work [[stat]]
}


#' Tile plot of best fit area for a correlation
#'
#' Plot will show blue areas with the lowest errors and redish with very high error
#' or close to MAPE=25. Pink is much greater than 25.
#' @param correlation identifier. Can be "HY", "DAK", "DPR" "N10", "SH"
#' @param pprRange low (lp) or high (hp) chart area of the Standing-Katz chart
#' @param stat Any of the statistical variables in z.stats:
#' @param ... any other parameter
#' @import ggplot2
#' @export
#' @examples
#' # plot Dranchuk-AbouKassem
#' z.plot.range("DAK")
#'
#' # plot Beggs-Brill correlation with fine grid on Ppr
#' z.plot.range("BB", interval = "fine")
z.plot.range <- function(correlation = "DAK", stat = "MAPE", pprRange = "lp", ...) {
    Ppr <- NULL; Tpr <- NULL; MAPE <- NULL; z.calc <- NULL; z.chart <- NULL
    # isMissing_correlation(correlation)
    msg <- "You have to provide a z-factor correlation: "
    msg_missing <- paste(msg, paste(get_z_correlations(), collapse = " "))
    if (missing(correlation)) stop(msg_missing)
    if (!isValid_correlation(correlation)) stop("Not a valid correlation.")

    if (stat == "MAPE") {
        midpoint <-  12.5
        limit <- c(0, 25)
    } else if (stat == "RMSE") {
        midpoint <-   0.025
        limit <- c(0, 0.050)
    } else if (stat == "MAAPE") {
        midpoint <-   0.125
        limit <- c(0, 0.250)
    }

    corr_name <- z_correlations[which(z_correlations["short"] == correlation),
                                                     "long"]

    smry_tpr_ppr <- z.stats(correlation, pprRange, ...)
    g <- ggplot(smry_tpr_ppr, aes(Ppr, Tpr))
    g <- g + geom_tile(data = smry_tpr_ppr, aes(fill = get(stat)), color="white") +
        scale_fill_gradient2(low = "blue", high = "red", mid = "yellow", na.value = "grey25",
                             midpoint = midpoint, limit = limit, name = stat) +
        theme(axis.text.x = element_text(angle=45, vjust=1, size = 11, hjust = 1)) +
        coord_equal() +
        ggtitle(corr_name, subtitle = correlation) +
        guides(fill = guide_colorbar(barwidth = 0.6, barheight = 5,
                                     label.vjust = 0.9))
    print(g)
}