/
stats.R
144 lines (131 loc) · 5.78 KB
/
stats.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#' @title Summary statistics table for a correlation
#' @description Get error summary statistics for any given compressibility correlation.
#' A quick way to show an error summary between any of the indicated correlations and
#' the Standing-Katz chart.
#'
#' MSE: Mean Squared Error
#' RMSE: Root Mean Squared Error
#' RSS: Residual sum of Squares
#' RMSLE: Root Mean Squared Logarithmic Error. Penalizes understimation.
#' MAPE: Mean Absolute Percentage Error = AARE
#' MPE: Mean Percentage error = ARE
#' MAE: Mean Absolute Error
#'
#' @param correlation identifier. Can be "HY", "DAK", "DPR" "N10", "SH"
#' @param pprRange low (lp) or high (hp) chart area of the Standing-Katz chart
#' @param interval quality of the Ppr scale. Coarse: every 1.0; Fine: every 0.5
#' @importFrom dplyr group_by summarise n
#' @rdname z.stats
#' @export
#' @examples
#' \dontrun{
#' # error statistics for the Dranchuk-AbouKassem correlation
#' z.stats("DAK")
#' }
z.stats <- function(correlation = "DAK", pprRange = "lp", interval = "coarse") {
Ppr <- NULL; Tpr <- NULL; z.calc <- NULL; z.chart <- NULL; n <- NULL
# get all `lp` Tpr curves
tpr_all <- getStandingKatzTpr(pprRange)
# ppr <- c(0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0)
ppr <- getStandingKatzPpr(interval)
sk_corr_all <- createTidyFromMatrix(ppr, tpr_all, correlation)
grouped <- group_by(sk_corr_all, Tpr, Ppr)
smry_tpr_ppr <- summarise(grouped, z.chart, z.calc,
RMSE = sqrt(mean((z.chart-z.calc)^2)),
MPE = sum((z.chart - z.calc) / z.chart) * 100 / n(),
MAPE = sum(abs((z.chart - z.calc) / z.chart)) * 100 / n(),
MSE = sum((z.chart - z.calc)^2) / n(),
RSS = sum((z.chart - z.calc)^2),
MAE = sum(abs(z.chart - z.calc)) / n(),
MAAPE = sum(atan(abs((z.chart - z.calc) / z.chart))) / n()
)
tibble::as_tibble(smry_tpr_ppr)
}
#' @title Quantiles for z.stats
#' @description Calculate the quantiles for any of the statistical variables
#' that originates from calling z.stats
#' @param stat Any of the statistical variables in z.stats:
#' RMSE, MPRE, MAPE, MSE, RSS, MAE
#' @export
#' @importFrom stats quantile
z.stats_quantile <- function(stat = "MAPE") {
cols <- ncol(z.stats())
z.stats_stats <- names(z.stats())[3:cols]
corrs <- z_correlations$short
sapply(corrs, function(corr) quantile(z.stats(corr)[[stat]] ))
}
#' @title Statistics on z.stats table
#' @description This function summarizes the tables generated by z.stats using
#' custom provided functions. We get a shorter table with statistics of statistics.
#' @param stat Any of the statistical variables in z.stats:
#' RMSE, MPRE, MAPE, MSE, RSS, MAE
#' @export
#' @examples
#' \dontrun{
#' # Get a statistical summary of the Mean Absolute Percentage Error (MAPE)
#' stats_of_z.stats()
#' }
stats_of_z.stats <- function(stat = "MAPE") {
z.stats_stats <- names(z.stats())[3:ncol(z.stats())]
msg <- paste0("statistic not in table. \n Try with: ", paste0(z.stats_stats, collapse=", "))
if (!stat %in% z.stats_stats) stop(msg)
# Mode function from here:
# https://stackoverflow.com/a/8189441/5270873
Mode <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
corrs <- z_correlations$short
custom_functions <- c("mean", "max", "min", "median", "Mode")
sapply(corrs, function(corr)
sapply(custom_functions, function(f) get(f)( z.stats(corr)[[stat]] )))
#
# enclose in double brackets to make it work [[stat]]
}
#' Tile plot of best fit area for a correlation
#'
#' Plot will show blue areas with the lowest errors and redish with very high error
#' or close to MAPE=25. Pink is much greater than 25.
#' @param correlation identifier. Can be "HY", "DAK", "DPR" "N10", "SH"
#' @param pprRange low (lp) or high (hp) chart area of the Standing-Katz chart
#' @param stat Any of the statistical variables in z.stats:
#' @param ... any other parameter
#' @import ggplot2
#' @export
#' @examples
#' # plot Dranchuk-AbouKassem
#' z.plot.range("DAK")
#'
#' # plot Beggs-Brill correlation with fine grid on Ppr
#' z.plot.range("BB", interval = "fine")
z.plot.range <- function(correlation = "DAK", stat = "MAPE", pprRange = "lp", ...) {
Ppr <- NULL; Tpr <- NULL; MAPE <- NULL; z.calc <- NULL; z.chart <- NULL
# isMissing_correlation(correlation)
msg <- "You have to provide a z-factor correlation: "
msg_missing <- paste(msg, paste(get_z_correlations(), collapse = " "))
if (missing(correlation)) stop(msg_missing)
if (!isValid_correlation(correlation)) stop("Not a valid correlation.")
if (stat == "MAPE") {
midpoint <- 12.5
limit <- c(0, 25)
} else if (stat == "RMSE") {
midpoint <- 0.025
limit <- c(0, 0.050)
} else if (stat == "MAAPE") {
midpoint <- 0.125
limit <- c(0, 0.250)
}
corr_name <- z_correlations[which(z_correlations["short"] == correlation),
"long"]
smry_tpr_ppr <- z.stats(correlation, pprRange, ...)
g <- ggplot(smry_tpr_ppr, aes(Ppr, Tpr))
g <- g + geom_tile(data = smry_tpr_ppr, aes(fill = get(stat)), color="white") +
scale_fill_gradient2(low = "blue", high = "red", mid = "yellow", na.value = "grey25",
midpoint = midpoint, limit = limit, name = stat) +
theme(axis.text.x = element_text(angle=45, vjust=1, size = 11, hjust = 1)) +
coord_equal() +
ggtitle(corr_name, subtitle = correlation) +
guides(fill = guide_colorbar(barwidth = 0.6, barheight = 5,
label.vjust = 0.9))
print(g)
}