-
Notifications
You must be signed in to change notification settings - Fork 1
/
cbs.R
169 lines (129 loc) · 3.94 KB
/
cbs.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
#' Scrape CBS Projeections
#'
#' @inheritParams scrape_razzball_steamer
#' @param pos position name
#' @return data frame with fantasy pros projection data
#'
#' @export
scrape_cbs <- function(url, pos) {
cbs_url <- sprintf(url, pos)
h <- read_html(cbs_url)
projs <- h %>%
html_nodes('#layoutRailNone') %>%
html_nodes('.data') %>%
html_table(fill = TRUE) %>%
magrittr::extract2(1)
#projection names are in the 2nd row
proj_names <- projs[2, 1:(ncol(projs)-1)]
#trim bad rows from bottom, top and right
proj_clean <- projs[3:(nrow(projs) - 1), 1:(ncol(projs)-1)]
names(proj_clean) <- proj_names
proj_clean$position <- pos
proj_clean
}
#' Read raw cbs projections for a given year
#'
#' @inheritParams read_raw_razzball_steamer
#'
#' @return named list of data frames
#' @export
read_raw_cbs <- function(year) {
urls <- list(
'2016' = 'http://www.cbssports.com/fantasy/baseball/stats/sortable/cbs/%s/season/standard/projections?&print_rows=9999'
)
url <- urls[[as.character(year)]]
cbs_h <- list()
cbs_h[['hc']] <- scrape_cbs(url, 'C')
cbs_h[['h1b']] <- scrape_cbs(url, '1B')
cbs_h[['h2b']] <- scrape_cbs(url, '2B')
cbs_h[['hss']] <- scrape_cbs(url, 'SS')
cbs_h[['h3b']] <- scrape_cbs(url, '3B')
cbs_h[['hof']] <- scrape_cbs(url, 'OF')
cbs_h[['hdf']] <- scrape_cbs(url, 'DH')
h <- dplyr::bind_rows(cbs_h)
cbs_p <- list()
cbs_p[['sp']] <- scrape_cbs(url, 'SP')
cbs_p[['rp']] <- scrape_cbs(url, 'RP')
p <- dplyr::bind_rows(cbs_p)
list('h' = h, 'p' = p)
}
#' Cleans up a cbs projection scrape
#'
#' @description names, consistent stat names, etc.
#' @param df raw cbs df. output of read_raw_cbs.
#' @param hit_pitch c('h', 'p')
#'
#' @return a data frame with consistent variable names
#' @export
clean_raw_cbs <- function(df, hit_pitch) {
names(df) <- tolower(names(df))
#clean player names
player_names <- strsplit(df$player, ',', fixed = TRUE)
fullname <- lapply(player_names, function(x) extract(x, 1)) %>% unlist()
df$fullname <- trim_whitespace(fullname)
df$firstname <- split_firstlast(df$fullname)$first
df$lastname <- split_firstlast(df$fullname)$last
#get priority position
if (hit_pitch == 'h') {
hierarchy <- user_settings$h_hierarchy
} else if (hit_pitch == 'p') {
hierarchy <- user_settings$p_hierarchy
}
df <- df %>%
dplyr::rowwise() %>%
dplyr::mutate(
priority_pos = priority_position(position, hierarchy)
)
#DH to util if util
if ('Util' %in% names(user_settings$special_positions$h)) {
df$priority_pos <- gsub('DH', 'Util', df$priority_pos)
}
#build tb
if (hit_pitch == 'h') {
#string to numeric
df <- force_numeric(
df, c('ab', 'r', 'h', '1b', '2b', '3b', 'hr', 'rbi', 'bb', 'ko',
'sb', 'cs', 'ba', 'obp', 'slg')
)
df <- df %>%
dplyr::rowwise() %>%
dplyr::mutate(
tb = calc_tb(h, `2b`, `3b`, hr)
)
} else if (hit_pitch == 'p') {
names(df)[names(df) == 's'] <- 'sv'
names(df)[names(df) == 'inn'] <- 'ip'
df <- force_numeric(
df, c("ip", "gs", "qs", "cg", "w", "l", "sv", "bs", "k",
"bbi", "ha", "era", "whip")
)
}
#drop unwanted columns
df <- df %>%
dplyr::select(-player, -fpts)
df
}
cbs_mlbid_match <- function(cbs_df, mlbid = NA) {
#just a stub for now
cbs_df$mlbid <- c(1:nrow(cbs_df))
cbs_df
}
#' Get cbs
#'
#' @description workhorse function. reads the raw cbs data file,
#' cleans up headers, returns list of projection data frames ready for
#' projection_prep function.
#' @inheritParams read_raw_razzball_steamer
#' @return list of named projection data frames.
#' @export
get_cbs <- function(year) {
raw <- read_raw_cbs(year)
clean_h <- clean_raw_cbs(raw$h, 'h')
clean_p <- clean_raw_cbs(raw$p, 'p')
clean_h <- cbs_mlbid_match(clean_h)
clean_p <- cbs_mlbid_match(clean_p)
clean_h$projection_name <- 'cbs'
clean_p$projection_name <- 'cbs'
proj_list <- list('h' = clean_h, 'p' = clean_p)
proj_list
}