version 2.4.0

cran · Jan 4, 2019 · 4b6038e · 4b6038e
1 parent d7a205e
commit 4b6038e
Show file tree

Hide file tree

Showing 11 changed files with 127 additions and 71 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,9 +1,9 @@
 Package: rNOMADS
 Type: Package
-Title: An Interface to the NOAA Operational Model Archive and
+Title: An R Interface to the NOAA Operational Model Archive and
         Distribution System
-Version: 2.3.10
-Date: 2018-7-17
+Version: 2.4.0
+Date: 2019-12-3
 Authors@R: c(person(given="Daniel C.",family="Bowman", role=c("aut", "cre"),
         email="danny.c.bowman@gmail.com"))
 Depends: R (>= 3.4.0), rvest (>= 0.3.2)
@@ -18,7 +18,7 @@ URL: <https://bovineaerospace.wordpress.com/category/r/ >,
         <https://r-forge.r-project.org/projects/rnomads/ >,
         <https://www.r-project.org >
 NeedsCompilation: no
-Packaged: 2018-07-18 02:46:22 UTC; dantayaga
+Packaged: 2019-01-04 05:45:07 UTC; dantayaga
 Author: Daniel C. Bowman [aut, cre]
 Repository: CRAN
-Date/Publication: 2018-07-18 04:20:03 UTC
+Date/Publication: 2019-01-04 12:50:03 UTC
diff --git a/MD5 b/MD5
@@ -1,21 +1,21 @@
-278bfa04538fd32d31eddad5447b9789 *DESCRIPTION
+c84a8d1ad5509457e1c936a0700f0cae *DESCRIPTION
 390e84088e5d6fef202002db75860464 *NAMESPACE
-fcb8dc6a9dff60494fba64d7ef3e8c44 *NEWS
+d6c8b95fea8537f78af8444b37e674c1 *NEWS
 25d1b075adf63d775b2003a6b13f7f3c *R/GetArchiveGrib.R
-3d3616ae851730c99ca81b34c9add562 *R/GetDODS.R
-573e507bae1e1f85f4ff251ecff230dc *R/GetRealTimeGrib.R
-350c74c638277ac783906247a4a8fb4c *R/Models.R
-9a4cd02bb95fe1bb070ee928915afdd1 *R/RNomadsTools.R
+e89307f8c0a73afc7e9f67e5e08c9c3c *R/GetDODS.R
+56c60352d659f91659a3c2f0fce4671a *R/GetRealTimeGrib.R
+04648b3e7a07182dd44d850e01f3c645 *R/Models.R
+08d02247d80a9e7f3368d7f3582f056c *R/RNomadsTools.R
 7c474ab346a5149f4c40c020d625752d *R/ReadGrib.R
-52f408d5e1ed74804c5d5a4408a11e40 *R/onAttach.R
+d9a48be054af6f67860d0a807bd23487 *R/onAttach.R
 e1094799c652a311619b92483183a5fc *inst/CITATION
 81c30cc1ca803cde5a9e254641a31d46 *man/ArchiveGribGrab.Rd
 a4ae1bccff1f268c029a9aec22fce458 *man/BuildProfile.Rd
 2687f4e7fef1b34c66e3f921e9588e54 *man/CheckNOMADSArchive.Rd
-060097d3dce9fe8e22c8ba8985a228b9 *man/CrawlModels.Rd
+ebd0ee4708d5a85a8645f8fcc80735e5 *man/CrawlModels.Rd
 bb19e9e08bfc08e27a6163bd0dd3c5d2 *man/DODSGrab.Rd
 44390881baa1d0ce506540797eba7e9d *man/GetClosestForecasts.Rd
-131601b6148940425cbc29b683d0a979 *man/GetDODSDates.Rd
+953f540c591b10a874b375a08123ae95 *man/GetDODSDates.Rd
 144215941c318d0bb88a2e98c4dd7da4 *man/GetDODSModelRunInfo.Rd
 44bb20ad1cfdaa35e3e296d4d2f01dcb *man/GetDODSModelRuns.Rd
 24cf81c99f723afb6d6c3e7e48ce7bc7 *man/GribGrab.Rd
@@ -24,7 +24,7 @@ bb19e9e08bfc08e27a6163bd0dd3c5d2 *man/DODSGrab.Rd
 43004dfb2d645efdba52442e8e94bea1 *man/MagnitudeAzimuth.Rd
 8c88218e349bb746e3f1220bf23f6099 *man/ModelGrid.Rd
 df43cf3706870565770577a17e4ec5a1 *man/NOMADSArchiveList.Rd
-261b6cc4586df17ae4723977b94a6267 *man/NOMADSRealTimeList.Rd
+06901461bc9e81520775e11f45397d27 *man/NOMADSRealTimeList.Rd
 6518b5982569da84448dbe13263fb7de *man/ParseModelPage.Rd
 eadc8897225fe1915cb3a846e37fcc73 *man/PlotWindProfile.Rd
 1d8571c81e1e843e50dcd7c96b14d581 *man/ReadGrib.Rd

diff --git a/NEWS b/NEWS
@@ -126,3 +126,7 @@ I expect the next release to be 2-4.1
 
 2-3.9
 Big plans for 2-4.1 have been deferred; this is a release to fix a bug in wgrib inventory reading requests and race conditions when running ReadGrib in parallel
+
+2-4.0
+Fixed real time grib download issues related to NCEP switching to https.
+DODS (openDAP-alt) remains nonfunctioning, but decided to release a new version ASAP to help those who depend on it to run websites, etv.
diff --git a/R/GetDODS.R b/R/GetDODS.R
@@ -1,12 +1,13 @@
 #use the GrADS-DODS capability of NOMADS to get ascii data
 
-GetDODSDates <- function(abbrev, archive = FALSE, request.sleep = 1) {
+GetDODSDates <- function(abbrev, archive = FALSE, request.sleep = 1, https = FALSE) {
     #Checks the GrADS data server to see what dates and model subsets are available for model specified by ABBREV.
     #INPUTS
     #    ABBREV - Model abbreviation
     #    ARCHIVE - If you're looking in the model archives (TRUE) or the real time NOMADS system (FALSE)
     #    REQUEST.SLEEP - Sometimes hammering the NOMADS server with a zillion HTTP requests is not a good idea.
     #    REQUEST.SLEEP pauses X seconds between requests to prevent timeouts.
+    #    HTTPS - Whether to use https (TRUE) or http (FALSE)
     #OUTPUTS
     #    AVAILABLE.DATES - A list of model URLS and dates
     #        $ABBREV - Model abbreviation
@@ -16,7 +17,7 @@ GetDODSDates <- function(abbrev, archive = FALSE, request.sleep = 1) {
     date.pattern <- "[1-2]\\d{3}[0-1]\\d{1}[0-3]\\d{1}$"
 
     if(!archive) {
-        top.url <- unique(NOMADSRealTimeList("dods", abbrev)$url)
+        top.url <- unique(NOMADSRealTimeList("dods", abbrev, https = https)$url)
     } else {
         if(grepl("anl$", abbrev)) {
             stop(paste("Archived analysis models are not stored by date.",

diff --git a/R/GetRealTimeGrib.R b/R/GetRealTimeGrib.R
@@ -1,4 +1,4 @@
-CrawlModels <- function(abbrev = NULL, url = NULL, depth = NULL, verbose = TRUE) {
+CrawlModels <- function(abbrev = NULL, model.url = NULL, depth = NULL, verbose = TRUE) {
    #A simple web crawler that looks at the specified model directory online and gets information on all runs of the specified model.
    #See the NOMADSRealTimeList function for available models.
    #Alternatively, pass CrawlModels a URL to get a model that I have not included yet.
@@ -15,12 +15,12 @@ CrawlModels <- function(abbrev = NULL, url = NULL, depth = NULL, verbose = TRUE)
        stop("No models specified.")
    }
 
-   if(is.null(url)) {
+   if(is.null(model.url)) {
        model.info <- NOMADSRealTimeList("grib", abbrev=abbrev) 
-       url <- model.info$url[1]
+       model.url <- model.info$url[1]
    }   
 
-   urls.out <- unlist(WebCrawler(url, depth = depth, verbose = verbose), recursive = TRUE, use.names = FALSE) 
+   urls.out <- unlist(WebCrawler(model.url, depth = depth, verbose = verbose), recursive = TRUE, use.names = FALSE) 
 }
 
 GribGrab <- function(model.url, preds, levels, variables, local.dir = NULL, file.names = NULL, 
@@ -144,12 +144,35 @@ ParseModelPage <- function(model.url) {
 #            MODEL.PARAMETERS$LEVELS - the model levels
 #            MODEL.PARAMETERS$VARIABLES - the types of data provided by the models
 
-    html.code <- scrapeR::scrape(model.url, parse = FALSE)
-    model.parameters <- list()
-    model.parameters$pred <- gsub("option value=\"", "", stringr::str_extract_all(html.code[[1]], "option value=\"[^\"]+")[[1]])
-    model.parameters$levels <- gsub("lev_", "", stringr::str_extract_all(html.code[[1]], "lev_[^\"]+")[[1]], fixed = TRUE)
-    model.parameters$variables <- gsub("var_", "", stringr::str_extract_all(html.code[[1]], "var_[^\"]+")[[1]], fixed = TRUE)
-
+    html <- readLines(model.url, warn = FALSE)
+
+    f.i <- which(grepl("<option value", html))
+    pred <- stringr::str_replace_all(
+        stringr::str_extract(html[f.i], "\".*\""), "\"", "")
+
+    checkboxes <- html[grepl("type=\"checkbox\"", html)]
+
+    v.i <- which(grepl("\"var_", checkboxes))
+    vars.tmp <- unlist(strsplit(checkboxes[v.i], "<input type=\"checkbox\"")) 
+    variables <- stringr::str_replace_all(
+        stringr::str_replace_all(
+        stringr::str_extract(vars.tmp, "\"var_.*\""),
+        "var_", ""),
+        "\"", "")
+
+    l.i <- which(grepl("\"lev_", checkboxes))
+    levs.tmp <- unlist(strsplit(checkboxes[l.i], "<input type=\"checkbox\""))
+    levels <- stringr::str_replace_all(
+        stringr::str_replace_all(
+        stringr::str_extract(levs.tmp, "\"lev_.*\""),
+        "lev_", ""),
+        "\"", "")
+
+    model.parameters <- list(
+        pred      = pred,
+        variables = variables[which(!is.na(variables))],
+        levels    = levels[which(!is.na(levels))])
+
     return(model.parameters)
 }
 
@@ -174,7 +197,7 @@ WebCrawler <- function(url, depth = NULL, verbose = TRUE) {
 #    This function recursively searches for links in the given url and follows every single link.
 #    It returns a list of the final (dead end) URLs.
 #    Many thanks to users David F and Adam Smith on stackoverflow for the link parser:
-#    http://stackoverflow.com/questions/3746256/extract-links-from-webpage-using-r/3746290#3746290
+#    https://stackoverflow.com/questions/3746256/extract-links-from-webpage-using-r/3746290#3746290
 #    INPUTS
 #        URL is the url to start looking in
 #    OUTPUTS

diff --git a/R/Models.R b/R/Models.R
@@ -1,18 +1,14 @@
 #Descriptions of real time and archived models
-NOMADSRealTimeList <- function(url.type, abbrev = NULL) {
+NOMADSRealTimeList <- function(url.type, abbrev = NULL, https = TRUE) {
     #Returns a list of model abbreviations for real time models, a short description, and URL for each model offered by the NOMADS server
     #If a specific model abbreviation is requested, the abbreviation is checked against the model list.
     #If a match is found, information is returned about that model; otherwise an error occurs
     #
-    #
-    #A big shout out to user hrbrmstr on Stack Overflow for providing the table parsing code in this function
-    #http://stackoverflow.com/questions/27592575/dropped-rows-using-readhtmltable-in-r
-    #http://stackoverflow.com/users/1457051/hrbrmstr
-    #
     #INPUTS
     #    URL.TYPE determines which URL to return: one for downloading GRIB files (grib) or one for downloading dods data via DODS (dods)
     #    ABBREV is the model abbreviation that rNOMADS uses to figure out which model you want.
     #        if NULL, returns information on all models
+    #    HTTPS if TRUE, use https, if FALSE, use http
     #OUTPUTS
     #    MODEL.LIST - a list of model metadata with elements
     #        $ABBREV - the abbrevation used to call the model in rNOMADS
@@ -23,37 +19,56 @@ NOMADSRealTimeList <- function(url.type, abbrev = NULL) {
         stop("URL type must be either \"grib\" or \"dods\"!")
     }
 
-    base.url <- "http://nomads.ncep.noaa.gov/"
-    trim <- function(x) gsub("^[[:space:]]+|[[:space:]]+$", "", x)
-
-    doc <- xml2::read_html(base.url)
-    ds <- doc %>% html_nodes(xpath="//table/descendant::th[@class='nomads'][1]/../../
-                                            descendant::td[contains(., 'http')]/
-                                            preceding-sibling::td[3]")
-    data.set <- ds %>% html_text() %>% trim()
-
-    grib.filter <- doc %>% html_nodes(xpath="//table/descendant::th[@class='nomads'][1]/../../
-                                  descendant::td[contains(., 'http')]/preceding-sibling::td[1]") %>%
-   sapply(function(x) {
-     ifelse(grepl("href", as.character(x)),
-           x %>% html_node("a") %>% html_attr("href"),
-           NA)
-    })
-
-   http.link <- doc %>% html_nodes("a[href^='/pub/data/']") %>% html_attr("href")
-
-   gds.alt <- doc %>% html_nodes(xpath="//table/descendant::th[@class='nomads'][1]/../../
-                              descendant::td[contains(., 'http')]/following-sibling::td[1]") %>%
-   sapply(function(x) {
-     ifelse(grepl("href", as.character(x)),
-           x %>% html_node("a") %>% html_attr("href"),
-           NA)
-    })
+   if(https) {
+       prefix <- "https"
+   } else {
+       prefix <- "http"
+   }
 
 
+   base.url <- paste0(prefix, "://nomads.ncep.noaa.gov/")
+
+   oldlocale <- Sys.setlocale()
+   foo <- Sys.setlocale('LC_ALL','C')
+
+   #Grab table rows
+   row.regex <- "^\\s*<td.*center.*href.*txt_descriptions"
+
+   #Read the website code
+   ncep.html <- readLines(base.url, warn = FALSE)
+
+   #Rows of interest
+   r.i <- which(grepl(row.regex, ncep.html))
+
+   #Get grib and dods models
+   data.set <- rep(NA, length(r.i))
+   grib.filter <- data.set
+   gds.alt <- data.set
+
+   for(k in 1:length(r.i)) {
+      ds.tmp <-   stringr::str_extract(
+         stringr::str_extract(ncep.html[r.i[k]], "<a.*</a>"),
+         ">.*<")
+       data.set[k] <- substr(ds.tmp, 2, nchar(ds.tmp) - 1)
+
+       grib.filter.tmp <- ncep.html[r.i[k] + 1]
+       gds.alt.tmp <- ncep.html[r.i[k] + 3]
+
+       if(grepl("grib filter", grib.filter.tmp)) {
+           grib.filter[k] <- stringr::str_extract(grib.filter.tmp, "cgi-bin.*\\.pl")
+       }
+
+       if(grepl("OpenDAP-alt", gds.alt.tmp)) {
+          gds.alt[k] <- stringr::str_replace_all(
+              stringr::str_extract(gds.alt.tmp, "\"dods.*\""),
+              "\"", "")
+       }
+   }
+
+
    grib.abbrevs <- stringr::str_replace(stringr::str_replace(basename(grib.filter), "filter_", ""), ".pl", "")
    dods.abbrevs <- basename(gds.alt)
-   dods.base.url <- "http://nomads.ncep.noaa.gov:9090/dods/"
+   dods.base.url <- paste0(prefix, "://nomads.ncep.noaa.gov:9090/dods/")
    if(is.null(abbrev)) {
        if(url.type == "grib") {
           good.abbrevs <- which(!is.na(grib.abbrevs))

diff --git a/R/RNomadsTools.R b/R/RNomadsTools.R
@@ -309,9 +309,16 @@ LinkExtractor <- function(url) {
     #OUTPUTS
     #    LINKS - A list of all the links on the page
 
-    html.tmp <- xml2::read_html(url)
-    links <-  html.tmp %>% html_nodes("a") %>% html_attr("href")
+    html.tmp <- readLines(url, warn = FALSE)
 
+    hrefs    <- unlist(strsplit(html.tmp[which(grepl("href", html.tmp))],
+       "</tr><tr><td>"))
+
+    meat     <- stringr::str_extract(hrefs, "href=\".*\"")
+
+    links <-  stringr::str_replace_all(
+        stringr::str_replace(meat, "href=", ""), "\"", "")
+
     return(links)
 }
 

diff --git a/R/onAttach.R b/R/onAttach.R
@@ -1,6 +1,6 @@
 .onAttach <- function(libname, pkgname) {
-   packageStartupMessage(paste0(c("\n****\nWelcome to rNOMADS!\n",
-   "Questions? Send a message to rnomads-user@lists.r-forge.r-project.org\n",
+   packageStartupMessage(paste0(c("\n****\nWelcome to rNOMADS 2.4.0 \"The spice must flow\"!\n",
+   "Questions? Follow @rNOMADS_r on Twitter or send a message to rnomads-user@lists.r-forge.r-project.org\n",
    "Using rNOMADS as a data source for a publication?  Please cite it!\n",  
     "I'm an early career researcher and every citation matters.\n****\n")))
 }
diff --git a/man/CrawlModels.Rd b/man/CrawlModels.Rd
@@ -7,15 +7,15 @@ Get Available Model Runs
 This function determine which instances of a given model are available for download.
 }
 \usage{
-CrawlModels(abbrev = NULL, url = NULL, depth = NULL, verbose = TRUE)
+CrawlModels(abbrev = NULL, model.url = NULL, depth = NULL, verbose = TRUE)
 }
 %- maybe also 'usage' for other objects documented here.
 \arguments{
   \item{abbrev}{
    The model abbreviation, see \code{\link{NOMADSRealTimeList}}.
    Defaults to \code{NULL}.
 }
-  \item{url}{
+  \item{model.url}{
    A URL to use instead of using the abbreviations in \code{\link{NOMADSRealTimeList}}.
    Defaults to \code{NULL}.
 }

diff --git a/man/GetDODSDates.Rd b/man/GetDODSDates.Rd
@@ -7,7 +7,7 @@ Find available model run dates for data on the GrADS - DODS system.
 This function checks the GrADS data server to see what dates and model subsets are available for model specified by ABBREV
 }
 \usage{
-GetDODSDates(abbrev, archive=FALSE, request.sleep=1)
+GetDODSDates(abbrev, archive=FALSE, request.sleep=1, https = FALSE)
 }
 %- maybe also 'usage' for other objects documented here.
 \arguments{
@@ -20,6 +20,9 @@ GetDODSDates(abbrev, archive=FALSE, request.sleep=1)
   \item{request.sleep}{
   Seconds to pause between HTTP requests when scanning model pages - this prevents timeouts. Default \code{1}.
   }
+  \item{https}{
+  Whether to use HTTP or HTTPS.  Default FALSE (use HTTPS)
+  } 
 }
 \details{
 This function determines which dates are available for download for a particular model through the GrADS - DODS system.

diff --git a/man/NOMADSRealTimeList.Rd b/man/NOMADSRealTimeList.Rd
@@ -8,17 +8,20 @@ Scans the NOMADS Real Time web site to generate a list of available model produc
 Users can refer to this list to find out more information about the available models, and rNOMADS uses the abbreviations to determine which URLs to scan and download.
 }
 \usage{
-NOMADSRealTimeList(url.type, abbrev = NULL)
+NOMADSRealTimeList(url.type, abbrev = NULL, https = TRUE) 
 }
 %- maybe also 'usage' for other objects documented here.
 \arguments{
   \item{url.type}{
   Determine whether to return a URL for extracting GRIB files (\code{"grib"}) or for getting ascii format data directly from the server (\code{"dods"}).}
   \item{abbrev}{
    Return information about the model that this abbreviation refers to.
-   Defaults to \code{NULL}, in which case information about all the models available through \code{rNOMADS}.
-}
+   Defaults to \code{NULL}, in which case information about all the models available through \code{rNOMADS}.}
+  \item{https}{
+  Whether to use http or https.  Default TRUE, though DODS may not work. 
+  If that's the case, use https=FALSE.}
 }
+
 \value{
     \item{abbrevs}{An abbreviation for each model}
     \item{names}{A full name for each model}