forked from BillPetti/baseballr
-
Notifications
You must be signed in to change notification settings - Fork 1
/
label_statcast_imputed_data.R
43 lines (39 loc) · 1.74 KB
/
label_statcast_imputed_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#' Label Statcast data as imputed
#'
#' Based on a series of heuristics, this function attempts to
#' label Statcast data for which the launch angle and speed
#' have been imputed.
#'
#' @param statcast_df A dataframe containing Statcast batted ball data
#' @param impute_file A CSV file giving the launch angle, launch speed,
#' \code{bb_type}, events fields to label
#' as imputed. if NULL then it's read from the \code{extdata} folder of the package.
#' @param inverse_precision inverse of how many digits to truncate the launch angle
#' and speed to for comparison. Default is \code{10000}, i.e. keep 4 digits of precision.
#' @keywords MLB, Statcast, sabermetrics
#' @importFrom dplyr bind_rows
#' @importFrom dplyr left_join
#' @importFrom readr read_csv
#' @return A copy of the input dataframe with a new column \code{imputed} appended. imputed
#' is 1 if launch angle and launch speed are likely imputed, 0 otherwise.
#' @export
#' @examples
#' \dontrun{
#' statcast_df <- scrape_statcast_savant("2017-05-01", "2017-05-02")
#' sc_df <- label_statcast_imputed_data(statcast_df)
#' mean(sc_df$imputed)
#' }
label_statcast_imputed_data <- function(statcast_df, impute_file = NULL,
inverse_precision = 10000) {
if (is.null(impute_file)) {
impute_file <- system.file("extdata/statcast_impute.csv", package = "baseballr")
}
imputed_df <- suppressMessages(readr::read_csv(impute_file))
imputed_df$imputed <- 1
tmp <- dplyr::left_join(
statcast_df %>% mutate(ila = as.integer(launch_angle * inverse_precision),
ils = as.integer(launch_speed * inverse_precision)),
imputed_df, by = c("ils", "ila", "bb_type", "events"))
tmp$imputed <- ifelse(is.na(tmp$imputed), 0, 1)
tmp
}