In [2]:
if(!require('pacman')) {
  install.packages('pacman')
}
pacman::p_load(tidyverse, skimr, RCurl, 
               data.table,bit64,stringr,readxl,tidyr,haven,purrr,
               splitstackshape,bestglm, glmnet, leaps, car, pROC,
              randomForest,  rattle, pROC, usmap, xtable, ggcorrplot,
              fastDummies, caret, janitor, vroom, rvest, here,fs              
              ) 

dir_create("data/fec/raw")

# candidate master ---------------------------------------------------------------------------

cn_url <- "https://www.fec.gov/files/bulk-downloads/2020/cn20.zip"
cn_head_url <- "https://www.fec.gov/files/bulk-downloads/data_dictionaries/cn_header_file.csv"
cn_header <- str_to_lower(unlist(str_split(read_lines(cn_head_url), ",")))

download.file(
  url = cn_url,
  destfile = "data/fec/raw/cn20.zip"
)

unzip(
  zipfile = "data/fec/raw/cn20.zip",
  exdir = "data/fec/raw/"
)

file_delete("data/fec/raw/cn20.zip")

cn <- vroom(
  file = "data/fec/raw/cn.txt",
  delim = "|",
  col_names = cn_header,
  col_types = cols(
    .default = col_character(),
    cand_election_yr = col_integer()
  )
)

general_cn <- cn %>%
  filter(
    cand_election_yr == "2020",
    cand_office == "P",
    cand_status == "C"
  )


# candidate committee link -------------------------------------------------------------------

ccl_url <- "https://www.fec.gov/files/bulk-downloads/2020/ccl20.zip"
ccl_head_url <- "https://www.fec.gov/files/bulk-downloads/data_dictionaries/ccl_header_file.csv"
ccl_header <- str_to_lower(unlist(str_split(read_lines(ccl_head_url), ",")))
cmte_tp_url <- "https://www.fec.gov/campaign-finance-data/committee-type-code-descriptions/"

download.file(
  url = ccl_url,
  destfile = "data/fec/raw/ccl.zip"
)

unzip(
  zipfile = "data/fec/raw/ccl.zip",
  exdir = "data/fec/raw/"
)

file_delete("data/fec/raw/ccl.zip")

ccl <- vroom(
  file = "data/fec/raw/ccl.txt",
  delim = "|",
  col_names = ccl_header,
  col_types = cols(
    .default = col_character(),
    cand_election_yr = col_integer(),
    fec_election_yr = col_integer(),
  )
)

cmte_tp_codes <-
  read_html(cmte_tp_url) %>%
  html_node("table") %>%
  html_table(header = TRUE) %>%
  as_tibble() %>%
  clean_names()

# committee master ---------------------------------------------------------------------------

cm_url <- "https://www.fec.gov/files/bulk-downloads/2020/cm20.zip"
cm_head_url <- "https://www.fec.gov/files/bulk-downloads/data_dictionaries/cm_header_file.csv"
cm_header <- str_to_lower(unlist(str_split(read_lines(cm_head_url), ",")))

download.file(
  url = cm_url,
  destfile = "data/fec/raw/cm20.zip"
)

unzip(
  zipfile = "data/fec/raw/cm20.zip",
  exdir = "data/fec/raw/"
)

file_delete("data/fec/raw/cm20.zip")

cm <- vroom(
  file = "data/fec/raw/cm.txt",
  delim = "|",
  col_names = cm_header,
  col_types = cols(
    .default = col_character()
  )
)

general_cn <- cn %>%
  filter(
    cand_election_yr == "2020",
    cand_office == "P",
    cand_status == "C"
  )

# ind conts ----------------------------------------------------------------------------------

indiv_url <- "https://www.fec.gov/files/bulk-downloads/2020/indiv20.zip"
indiv_head_url <- "https://www.fec.gov/files/bulk-downloads/data_dictionaries/indiv_header_file.csv"
indiv_header <- str_to_lower(unlist(str_split(read_lines(indiv_head_url), ",")))

rpt_tp_url <- "https://www.fec.gov/campaign-finance-data/report-type-code-descriptions/"
tran_tp_url <- "https://www.fec.gov/campaign-finance-data/transaction-type-code-descriptions/"

download.file(
  url = indiv_url,
  destfile = "data/fec/raw/indiv20.zip"
)

unzip(
  zipfile = "data/fec/raw/indiv20.zip",
  exdir = "data/fec/raw/"
)

file_delete("data/fec/raw/indiv20.zip")

indiv <- vroom(
    file = "data/fec/raw/itcont.txt",
    col_names = indiv_header,
    col_types = cols(
      .default = col_character(),
      transaction_amt = col_double(),
      transaction_dt = col_date("%m%d%Y")
    )
  )

rpt_tp_codes <-
  read_html(rpt_tp_url) %>%
  html_node(".simple-table") %>%
  html_table(header = TRUE) %>%
  as_tibble() %>%
  clean_names() %>%
  select(
    rpt_tp = report_type_code,
    rpt_desc = report_type
  )

tran_tp_codes <-
  read_html(tran_tp_url) %>%
  html_node(".simple-table") %>%
  html_table(fill = TRUE, header = TRUE) %>%
  as_tibble() %>%
  clean_names() %>%
  select(
    transaction_tp = transaction_type,
    transaction_desc = transaction_type_description
  )


# join ---------------------------------------------------------------------------------------

dem_ccl <- general_cn %>%
  left_join(
    ccl,
    by = c("cand_id", "cand_election_yr")
  )

dem_indiv <- indiv %>%
  right_join(dem_ccl, by = "cmte_id")

file_delete("data/fec/raw/itcont.txt")

# write --------------------------------------------------------------------------------------

write_csv(
  x = dem_indiv,
  path = "data/fec/dem_indiv.csv",
  na = ""
)

Loading required package: pacman

“there is no package called ‘pacman’”
Installing package into ‘/Users/sahluwalia/Library/R/4.0/library’
(as ‘lib’ is unspecified)




The downloaded binary packages are in
	/var/folders/rx/qkbp8l317c31rs2zkvc_p4b80000gn/T//RtmpjRiVyI/downloaded_packages


Installing package into ‘/Users/sahluwalia/Library/R/4.0/library’
(as ‘lib’ is unspecified)




  There is a binary version available but the source version is later:
      binary source needs_compilation
skimr  2.1.3  2.1.4             FALSE



installing the source package ‘skimr’



skimr installed

Installing package into ‘/Users/sahluwalia/Library/R/4.0/library’
(as ‘lib’ is unspecified)




  There is a binary version available but the source version is later:
        binary   source needs_compilation
RCurl 1.98-1.6 1.98-1.9              TRUE



installing the source package ‘RCurl’



RCurl installed

Installing package into ‘/Users/sahluwalia/Library/R/4.0/library’
(as ‘lib’ is unspecified)




The downloaded binary packages are in
	/var/folders/rx/qkbp8l317c31rs2zkvc_p4b80000gn/T//RtmpjRiVyI/downloaded_packages



splitstackshape installed

“package ‘splitstackshape’ was built under R version 4.0.2”
Installing package into ‘/Users/sahluwalia/Library/R/4.0/library’
(as ‘lib’ is unspecified)

also installing the dependencies ‘shape’, ‘leaps’, ‘glmnet’, ‘grpreg’, ‘pls’





  There are binary versions available but the source versions are later:
       binary source needs_compilation
glmnet  4.1-3  4.1-6              TRUE
pls     2.8-0  2.8-1             FALSE


The downloaded binary packages are in
	/var/folders/rx/qkbp8l317c31rs2zkvc_p4b80000gn/T//RtmpjRiVyI/downloaded_packages


installing the source packages ‘glmnet’, ‘pls’


“installation of package ‘glmnet’ had non-zero exit status”

bestglm installed

“package ‘bestglm’ was built under R version 4.0.2”
“package ‘leaps’ was built under R version 4.0.2”
Installing package into ‘/Users/sahluwalia/Library/R/4.0/library’
(as ‘lib’ is unspecified)




  There is a binary version available but the source version is later:
       binary source needs_compilation
glmnet  4.1-3  4.1-6              TRUE



installing the source package ‘glmnet’


“installation of package ‘glmnet’ had non-zero exit status”
“”
“there is no package called ‘glmnet’”
Installing package into ‘/Users/sahluwalia/Library/R/4.0/library’
(as ‘lib’ is unspecified)

“package ‘randomForest’ is not available (for R version 4.0.1)”
“'BiocManager' not available.  Could not check Bioconductor.

Please use `install.packages('BiocManager')` and then retry.”
“”
“there is no package called ‘randomForest’”
Installing package into ‘/Users/sahluwalia/Library/R/4.0/library’
(as ‘lib’ is unspecified)

also installing the dependencies ‘XML’, ‘rpart.plot’





  There are binary versions available but the source versions are later:
             binary    source needs_compilation
XML        3.99-0.9 3.99-0.13              TRUE
rpart.plot    3.1.0     3.1.1             FALSE


The downloaded binary packages are in
	/var/folders/rx/qkbp8l317c31rs2zkvc_p4b80000gn/T//RtmpjRiVyI/downloaded_packages


installing the source packages ‘XML’, ‘rpart.plot’



rattle installed

“package ‘rattle’ was built under R version 4.0.5”
“package ‘bitops’ was built under R version 4.0.2”
Installing package into ‘/Users/sahluwalia/Library/R/4.0/library’
(as ‘lib’ is unspecified)

also installing the dependency ‘usmapdata’





  There is a binary version available but the source version is later:
      binary source needs_compilation
usmap  0.6.0  0.6.1             FALSE


The downloaded binary packages are in
	/var/folders/rx/qkbp8l317c31rs2zkvc_p4b80000gn/T//RtmpjRiVyI/downloaded_packages


installing the source package ‘usmap’



usmap installed

Installing package into ‘/Users/sahluwalia/Library/R/4.0/library’
(as ‘lib’ is unspecified)




  There is a binary version available but the source version is later:
           binary source needs_compilation
ggcorrplot  0.1.3  0.1.4             FALSE



installing the source package ‘ggcorrplot’



ggcorrplot installed

Installing package into ‘/Users/sahluwalia/Library/R/4.0/library’
(as ‘lib’ is unspecified)




The downloaded binary packages are in
	/var/folders/rx/qkbp8l317c31rs2zkvc_p4b80000gn/T//RtmpjRiVyI/downloaded_packages



fastDummies installed

“package ‘fastDummies’ was built under R version 4.0.2”
“Failed to install/load:
bestglm, glmnet, randomForest”


ERROR: Error in open.connection(structure(4L, class = c("curl", "connection"), conn_id = <pointer: 0x144f>), : SSL certificate problem: certificate has expired
