Skip to content

Commit

Permalink
closes #1 #2
Browse files Browse the repository at this point in the history
  • Loading branch information
rafapereirabr committed Jan 21, 2022
1 parent df71fdc commit 0b04200
Show file tree
Hide file tree
Showing 10 changed files with 297 additions and 40 deletions.
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: flightsbr
Title: Download Flight Data from Brazil
Version: 0.0.1
Version: 0.1.0
Authors@R:
c(person(given="Rafael H. M.", family="Pereira",
email="rafa.pereira.br@gmail.com",
Expand All @@ -25,7 +25,8 @@ Suggests:
dplyr,
ggplot2 (>= 3.3.1),
rmarkdown (>= 2.6),
knitr
knitr,
testthat
VignetteBuilder:
knitr
Encoding: UTF-8
Expand Down
74 changes: 45 additions & 29 deletions R/read_flights.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,53 +8,69 @@
#' type, payload, and the number of passengers, and several other variables. A
#' description of all variables included in the data is available at \url{https://www.anac.gov.br/assuntos/setor-regulado/empresas/envio-de-informacoes/descricao-de-variaveis}.
#'
#' @param year Numeric. Year of the data. Defaults to `2010`
#' @param month Numeric. Year of the data. Defaults to `1` (January)
#' @param date Numeric. Date of the data in the format `yyyymm`. Defaults to
#' `202001`. To download the data for all months in a year, the user
#' can pass a 4-digit year input `yyyy` .
#' @param type String. Whether the data set should be of the type `basica`
#' (flight stage, the default) or `combinada` (On flight origin and destination
#' - OFOD).
#' (flight stage, the default) or `combinada` (On flight origin and
#' destination - OFOD).
#' @param showProgress Logical. Defaults to `TRUE` display progress bar
#' @param select A vector of column names or numbers to keep, drop the rest. The
#' order that the columns are specified determines the order of the
#' columns in the result.
#'
#' @return A `"data.table" "data.frame"` object
#' @export
#' @family download flight data

#' @examples \dontrun{ if (interactive()) {
#' # Read flights data
#' a201506 <- read_flights(year=2015, month=6)
#'}}
read_flights <- function(year=2020, month=1, type='basica', showProgress=TRUE){
read_flights <- function(date = 202001, type = 'basica', showProgress = TRUE, select = NULL){

### check inputs
# type
if( ! type %in% c('basica', 'combinada') ){ stop(paste0("Argument 'type' must be either 'basica' or 'combinada'")) }
if( ! is.logical(showProgress) ){ stop(paste0("Argument 'showProgress' must be either 'TRUE' or 'FALSE.")) }
check_date(date=date)

# year and months perhaps use yyyymm ?

# progress bar
if( !(showProgress %in% c(T, F)) ){ stop("Value to argument 'showProgress' has to be either TRUE or FALSE") }
if (nchar(date)==6) {
#### Download one month---------------------------------------------------------

# prepare address of online data
split_date(date)
file_url <- get_url(type=type, year=year, month=month)

### prepare address of online data
if( nchar(month) ==1 ) { month <- paste0('0', month)}
url_root <- 'https://www.gov.br/anac/pt-br/assuntos/regulados/empresas-aereas/envio-de-informacoes/microdados/'
file_name <- paste0(type, year, '-', month, '.zip')
file_url <- paste0(url_root, file_name)
# download and read data
dt <- download_flights_data(file_url, showProgress = showProgress, select = select)
return(dt)

### download data
temp_local_file <- tempfile( file_name )
# utils::download.file(url = file_url, destfile = temp_local_file)

try(
httr::GET(url=file_url,
if(showProgress==T){ httr::progress()},
httr::write_disk(temp_local_file, overwrite = T),
config = httr::config(ssl_verifypeer = FALSE)
), silent = F)

### read zipped file stored locally
temp_local_file_zip <- paste0('unzip -p ', temp_local_file)
dt <- data.table::fread( cmd = temp_local_file_zip)
return(dt)
}

} else if (nchar(date)==4) {
#### Download whole year---------------------------------------------------------

# prepare address of online data
all_months <- generate_all_months(date)

# manually ignore dates after Nov 2021
if (date==2021) { all_months <- all_months[all_months < 202112] }

dt_list <- lapply( X=all_months,
FUN= function(i, type.=type, showProgress.=showProgress, select.=select) { # i = all_months[3]

# prepare address of online data
split_date(i)
file_url <- get_url(type, year, month)

# download and read data
temp_dt <- download_flights_data(file_url, showProgress = showProgress, select = select)
return(temp_dt)
}
)
dt <- data.table::rbindlist(dt_list)
return(dt)

}}

132 changes: 132 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# nocov start

#' Split a date from yyyymmm to year yyyy and month mm
#'
#' @param date Numeric. Date of the data in the format `yyyymm`.
#'
#' @return An two string objects, `year` and `month`.
#'
#' @keywords internal
split_date <- function(date) {

y <- substring(date, 1,4)
m <- substring(date, 5,6)

newList <- list("year" = y,
"month" = m)
list2env(newList ,.GlobalEnv)
}

#' Check whether date input is acceptable
#' @param date Numeric. Either a 6-digit date in the format `yyyymm` or a 4-digit
#' date input `yyyy` .
#'
#' @return Check messages.
#'
#' @keywords internal
check_date <- function(date) {

# all dates between 2000 and 2021
all_dates <- lapply(X=2000:2021, FUN=generate_all_months)
all_dates <- unlist(all_dates)

# no data after 202111
all_dates <- all_dates[all_dates < 202111]


if (nchar(date)==6) {
if (!(date %in% all_dates)) {stop("Data only available for dates between Jan 2000 and Nov 2021.")}
}



if (nchar(date)==4) {
if (!(date %in% 2000:2021)) {stop("Data only available for dates between Jan 2000 and Nov 2021.")}
}

}







#' Generate all months with `yyyymm` format in a year
#'
#' @param date Numeric. 4-digit date in the format `yyyy`.
#' @return Vector or strings.
#' @keywords internal
generate_all_months <- function(date) {

# check
if( nchar(date)!=4 ){ stop(paste0("Argument 'date' must be 4-digit in the format `yyyy`.")) }

jan <- as.numeric(paste0(date, '01'))
dec <- as.numeric(paste0(date, '12'))
all_months <- jan:dec
return(all_months)
}



#' Put together the data file url
#'
#' @param year Numeric. Year of the data in `yyyy` format.
#' @param month Numeric. Month of the data in `mm` format.
#' @param type String. Whether the data set should be of the type `basica`
#' (flight stage, the default) or `combinada` (On flight origin and
#' destination - OFOD).
#'
#' @return A url string.
#'
#' @keywords internal
get_url <- function(type, year, month) {

if( nchar(month) ==1 ) { month <- paste0('0', month)}

url_root <- 'https://www.gov.br/anac/pt-br/assuntos/regulados/empresas-aereas/envio-de-informacoes/microdados/'
file_name <- paste0(type, year, '-', month, '.zip')
file_url <- paste0(url_root, file_name)
return(file_url)
}






#' Download and read ANAC flight data
#'
#' @param file_url String. A url passed from get_url.
#' @param showProgress Logical, passed from \code{\link{read_flights}}
#' @param select A vector of column names or numbers to keep, passed from \code{\link{read_flights}}
#'
#' @return A `"data.table" "data.frame"` object
#'
#' @keywords internal
download_flights_data <- function(file_url, showProgress=showProgress, select=select){

# create temp local file
file_name <- substr(file_url, (nchar(file_url) + 1) -17, nchar(file_url) )
temp_local_file <- tempfile( file_name )

# download data
try(
httr::GET(url=file_url,
if(showProgress==T){ httr::progress()},
httr::write_disk(temp_local_file, overwrite = T),
config = httr::config(ssl_verifypeer = FALSE)
), silent = F)

# read zipped file stored locally
temp_local_file_zip <- paste0('unzip -p ', temp_local_file)
dt <- data.table::fread( cmd = temp_local_file_zip, select=select)
return(dt)
}

## quiets concerns of R CMD check re: the .'s that appear in pipelines
if(getRversion() >= "2.15.1") utils::globalVariables(
c('month', 'year'))

# nocov end
19 changes: 19 additions & 0 deletions man/check_date.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions man/download_flights_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions man/generate_all_months.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 24 additions & 0 deletions man/get_url.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 15 additions & 8 deletions man/read_flights.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions man/split_date.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 0b04200

Please sign in to comment.