/
get_aaer_dates.R
41 lines (33 loc) · 1.01 KB
/
get_aaer_dates.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
library(dplyr, warn.conflicts = FALSE)
library(rvest)
library(lubridate)
get_aaers <- function(year) {
if (year == 2021) {
res <- "https://www.sec.gov/divisions/enforce/friactions.htm"
} else if (year == 2020) {
res <- "https://www.sec.gov/divisions/enforce/friactions/friactions2020.htm"
} else {
res <- paste0("https://www.sec.gov/divisions/enforce/friactions/friactions",
year, ".shtml")
}
tables <-
res %>%
read_html() %>%
html_table()
add_names <- function(df) {
names(df) <- c("aaer_num", "aaer_date", "aaer_desc")
df
}
table_index <- if(year < 2016) 5 else 1
aaers <-
tables[[table_index]] %>%
.[-1:-2, ] %>%
add_names() %>%
filter(grepl("[0-9]", aaer_num)) %>%
mutate(aaer_date = mdy(aaer_date),
year = year)
aaers
}
aaer_dates <- bind_rows(lapply(1999:2021, get_aaers))
save(aaer_dates, file="data/aaer_dates.RData",
compress = "xz")