version 1.0.0

cran · Aug 9, 2013 · 97b6b8c · 97b6b8c
commit 97b6b8c
Show file tree

Hide file tree

Showing 13 changed files with 775 additions and 0 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,21 @@
+Package: rNOMADS
+Type: Package
+Title: An interface to the NOAA Operational Model Archive and
+        Distribution System
+Version: 1.0.0
+Date: 2013-07-29
+Authors@R: c(person(given="Daniel C.",family="Bowman", role=c("aut", "cre"),
+        email="daniel.bowman@unc.edu"))
+Depends: R (>= 3.0.0), fields (>= 6.7.6)
+Suggests: GEOmap (>= 2.1)
+Description: Automatically download forecast data from the National Oceanic and Atmospheric Administration's Operational Model Archive and Distribution System (NOMADS) and read it into R.  Currently, the Global Forecast System 0.5 x 0.5 degree model is the only one supported, but future versions of rNOMADS will include an interface to all models on NOMADS. rNOMADS uses an external series of routines called wgrib2 to read the model data; get wgrib2 at http://www.cpc.ncep.noaa.gov/products/wesley/wgrib2/. 
+License: GPL (>= 3)
+URL: https://code.google.com/p/r-rgfs/ (subversion repository)
+        http://www.unc.edu/~haksaeng/rNOMADS/rNOMADS_examples.pdf (some
+        examples)
+Packaged: 2013-08-05 11:59:23 UTC; bowman
+Author: Daniel Bowman [aut, cre]
+Maintainer: Daniel Bowman <daniel.bowman@unc.edu>
+NeedsCompilation: no
+Repository: CRAN
+Date/Publication: 2013-08-09 16:47:01
diff --git a/MD5 b/MD5
@@ -0,0 +1,12 @@
+ad4c8076fcb19ed42c7a667faeda4c6a *DESCRIPTION
+8b54e5a89fbda3af5e077053d40bec76 *NAMESPACE
+93094444ac8b710da4e370a7583c90d7 *R/GetGrib.R
+e206f3d56c2d6b51745c3ca53fd725ac *R/ReadGrib.R
+9a1d5acc515f113ba7fc071fcb1a8373 *data/GFS.RData
+a67943e3d86da1c29ae4b0667c645efe *man/GetModelRunHour.Rd
+9ebfbb3a4dacd741b0fa1aefbd68f997 *man/GribGrab.Rd
+453c3d349fbe6d3ce89f7f4cd138311c *man/ModelGrid.Rd
+0429159336849f94b5f787d80726516c *man/NoModelRun.Rd
+ea2d6abb9b692cfeac91058a22f69f35 *man/ReadGrib.Rd
+2cf0fb8c8a4ebc0388238b347f081344 *man/gfs.model.data.Rd
+e00f975422ae110d7a110bb4fc6a0bb1 *man/rNOMADS-package.Rd
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1 @@
+exportPattern("^[[:alpha:]]+")
diff --git a/R/GetGrib.R b/R/GetGrib.R
@@ -0,0 +1,139 @@
+GribGrab <- function(levels, variables, local.dir = ".", file.name = "fcst.grb", model.date = Sys.time(), fcst.date = Sys.time(), 
+    model.domain = NULL, tidy = FALSE, verbose = TRUE)
+{
+    #Get grib file from the GFS forecast repository
+    #INPUTS
+    #    LEVELS is the vertical region to return data for,  as vector
+    #    VARIABLES is the data to return, as vector
+    #    LOCAL.DIR is the directory to save the files in
+    #    FILE.NAME is the directory path and file name to save the grib file on disk, defaults to "fcst.grb" in current directory
+    #    MODEL.DATE is the date and time of the requested model run, in GMT and POSIXlt format, defaults to current system time
+    #    FCST.DATE is the requested forecast date, defaults to current system time
+    #    MODEL.DOMAIN is a vector of latitudes and longitudes that specify the area to return a forecast for
+    #    This is a rectangle with elements: west longitude, east longitude, north latitude, south latitude
+    #    Defaults to entire planet
+    #    LEVELS is the vertical region to return data for
+    #    VARIABLES is the data to return
+    #    TIDY asks whether to delete all grib files in the directory specified in FILE.NAME, default FALSE.
+    #    This is useful to clear out previous model runs.
+    #    It looks for all files named '.grb' and removes them.
+    #    VERBOSE gives a blow by blow account of the download. Default TRUE.
+    #OUTPUTS
+    #    FILE.NAME is the name and location of the grib file that has been downloaded 
+
+   if(tidy) {
+        unlink(list.files(local.dir, pattern = "*\\.grb$"))
+   }
+   levels.str <- paste(gsub(" ", "_", levels), collapse = "=on&lev_")
+   variables.str <- paste(variables, collapse = "=on&var_")
+
+   #Check for latest model run date
+   model.params <- GetModelRunHour(model.date = model.date, fcst.date = fcst.date) 
+   if(is.na(model.params$model.hour)) {
+       stop("Could not find the latest model run date.  Make sure you have a working Internet connection.  If you do and this code is still not working, it may be that the NOMADS website is down.
+           Give it a an hour or so, and try again.")
+   }
+   grb.name <- paste("gfs.t", sprintf("%02d", model.params$model.hour), "z.mastergrb2f",
+       sprintf("%02d", model.params$fcst.back), "&", sep = "")
+   grb.dir <- paste("dir=", strsplit(model.params$url.tested, split = "dir=")[[1]][2], sep = "")
+
+   if(!is.null(model.domain)) {
+       subregion.str <- paste( "=on&subregion=",
+       "&leftlon=", model.domain[1],
+       "&rightlon=", model.domain[2],
+       "&toplat=", model.domain[3],
+       "&bottomlat=", model.domain[4],
+       "&", sep = "")
+    } else {
+       subregion.str <- "=on&" 
+    }
+
+   grb.url <- paste("http://nomads.ncep.noaa.gov/cgi-bin/filter_gfs_hd.pl?file=",
+       grb.name,
+       "lev_",
+       levels.str,
+       "=on&var_",
+       variables.str,
+       subregion.str,
+       grb.dir,
+       sep = "") 
+
+      #now write download logic
+
+   download.file(grb.url, paste(local.dir,file.name, sep = "/"), mode = "wb", quiet = !verbose)
+
+   return(file.name)
+}
+
+NoModelRun <- function(e) 
+{
+    #Called when code in GetModelRunDate tries to ping a GFS model that has not been run yet
+    return ("Failure")
+}
+
+GetModelRunHour <- function(model.date = Sys.time(), fcst.date = Sys.time(),
+    url.to.check = c("http://nomads.ncep.noaa.gov/cgi-bin/filter_gfs_hd.pl?dir=%2Fgfs.", "%2Fmaster"), attempts = 10)
+{
+    #Checks for and returns the date for the latest model run time for the requested date.
+    #By default, check for the system time, and get the closest forecast.
+    #If the input is not the current time, get the model forecast closest behind the requested date.
+    #
+    #INPUTS
+    #    MODEL.DATE - a date in POSIXct format saying which GFS model date to use (i.e. when the model was run).  Defaults to the current system time
+    #    FCST.DATE = a date in POSIXct format saying what date the forecast should be for.  Defaults to the current system time.
+    #    URL.TO.CHECK - what URL to append the GFS formatted model run time to
+    #    We use this URL to check if the model has run yet.
+    #    Perhaps, if the default fails, the user can modify this argument to make things work
+    #    ATTEMPTS - number of model runs to check for before giving up
+    #
+    #OUTPUTS as list
+    #    MODEL.RUN.HOUR - The model run hour to download
+    #    URL.TESTED - The url that was tested to determine the model hour
+    #    FCST.TDIFF - Time difference between model date and forecast date (i.e. how far in the future the forecast is from the model run that's available) in hours 
+    #    FCST.BACK - The model forecast run immediately before the requested forecast date, in hours, in case that grib file is desired
+    #    FCST.FORE - The model forecast run immediately after the requested forecast date, in hours, in case that grib file is desired
+
+
+    model.hour <- seq(0, 18, by = 6)
+    fcst.hour <- c(seq(0, 192, by = 3), seq(204, 384, by = 12))
+    attributes(model.date)$tzone <- "GMT" # Convert to GMT time zone
+    attributes(fcst.date)$tzone <- "GMT"
+    model.date <- as.POSIXlt(model.date)
+    fcst.date <- as.POSIXlt(fcst.date)
+
+    c = 1
+
+    while (1)
+    {
+       yr <- model.date$year + 1900
+       mo <- model.date$mo + 1
+       mday <- model.date$mday
+       hr <- model.date$hour
+
+       hr.diff <- model.hour - hr
+       latest.model.run <- model.hour[hr.diff == max(hr.diff[hr.diff <= 0])]
+
+       fcst.url <- paste(url.to.check[1], yr, sprintf("%02d", mo), sprintf("%02d", mday), sprintf("%02d", latest.model.run), url.to.check[2], sep = "")
+       test <- suppressWarnings(tryCatch(url(fcst.url, open = "rb"), error = NoModelRun))
+
+       if(test == "Failure") {
+           model.date = as.POSIXlt(model.date - 3600 * 6) #Subtract 6 hours and try again
+       }
+       else {
+           close(test)
+           fcst.tdiff <- as.numeric(difftime(fcst.date, model.date, units = "hours"))
+           fcst.hour.diff <- fcst.hour - fcst.tdiff
+           fcst.back <- fcst.hour[fcst.hour.diff == max(fcst.hour.diff[fcst.hour.diff <=0])]
+           fcst.fore <- fcst.hour[fcst.hour.diff == min(fcst.hour.diff[fcst.hour.diff >=0])]
+           model.hour <- latest.model.run
+           break
+       }
+
+      if (c > attempts) {
+          model.hour <- NA
+          break
+      } 
+      c <- c + 1
+   } 
+   return (list(model.hour = model.hour, url.tested = fcst.url, fcst.tdiff = fcst.tdiff, fcst.back = fcst.back, fcst.fore = fcst.fore))
+}
diff --git a/R/ReadGrib.R b/R/ReadGrib.R
@@ -0,0 +1,136 @@
+ReadGrib <- function(file.name, variables, levels) {
+    #This is a function to read forecast data from a Grib file
+    #INPUTS
+    #    FILE.NAME - Grib file name
+    #    VARIABLES - data to extract
+    #    LEVELS - which levels to extract data from
+    #OUTPUTS
+    #    MODEL.DATA - the grib model as an array, with columns for the model run date (when the model was run)
+    #       the forecast (when the model was for), the variable (what kind of data), the level (where in the atmosphere or the Earth, vertically)
+    #       the longitude, the latitude, and the value of the variable.
+
+    #Get specified data from grib file
+
+    match.str <- ' -match "('
+    for(var in variables) {
+        match.str <- paste(match.str, var, "|", sep = "")
+    }
+
+    match.str.lst <- strsplit(match.str, split = "")[[1]]
+    match.str <- paste(match.str.lst[1:(length(match.str.lst) - 1)], collapse = "")
+    match.str <- paste(match.str, "):(", sep = "")
+
+    for(lvl in levels) {
+        match.str <- paste(match.str, lvl, "|", sep = "")
+    }
+
+    match.str.lst <- strsplit(match.str, split = "")[[1]]
+    match.str <- paste(match.str, '"', sep = "")
+    match.str <- paste(match.str.lst[1:(length(match.str.lst) - 1)], collapse = "")
+    match.str <- paste(match.str, ")\"", sep = "")
+
+    wg2.str <- paste('wgrib2 ',     
+        file.name, ' -inv my.inv -csv - -no_header', 
+        match.str, sep = "")
+
+    #Get the data from the grib file in CSV format
+    csv.str <- system(wg2.str, intern = TRUE)
+
+    #HERE IS THE EXTRACTION
+    model.data.vector <- strsplit(paste(gsub("\"", "", csv.str), collapse = ","), split = ",")[[1]]
+    model.data <- t(array(model.data.vector, dim = c(7, length(model.data.vector)/7)))
+    colnames(model.data) <- c("model.run.date", "forecast.date", "variable", "level", "lon", "lat", "value")
+    return(model.data)
+}
+
+ModelGrid <- function(model.data, lon.grid, lat.grid, variables = NULL, levels = NULL, model.domain = NULL) {
+    #Transform model data array into a grid with dimensions levels x variables x lon range x lat range
+    #This should reduce the size of the returned data by removing redundant information
+    #INPUTS
+    #    MODEL.DATA - Data returned by ReadGrib
+    #    LON.GRID - spacing of longitude grid
+    #    LAT.GRID - spacing of latitude grid
+    #    VARIABLES - variables to include in grid, if NULL, include all of them
+    #    LEVELS - levels to include in grid, if NULL, include all of them
+    #    MODEL.DOMAIN - vector c(LEFT LON, RIGHT LON, TOP LAT, BOTTOM LAT) of region to include in output. If NULL, include everything.
+    #
+    #OUTPUTS
+    #   FCST.GRID - A list with elements:
+    #       $Z An array of dimensions levels x variables x lon x lat; each level x variable contains the model grid of data from that variable and level
+    #       $X Vector of longitudes
+    #       $Y Vector of latitudes
+    #       $VARIABLES - the variables contained in the grid
+    #       $LEVELS - the levels in the grid
+    #       $MODEL.RUN.DATE - when the forecast model was run
+    #       $FCST.DATE - what date the forecast is for
+
+    model.run.date <- unique(model.data[,1])
+
+    if(length(model.run.date) > 1) {
+        warning("There appears to be more than one model run date in your model grid!")
+    }
+
+    fcst.date <- unique(model.data[,2])
+
+    if(length(fcst.date) > 1) {
+        warning("There appears to be more than one model run date in your model grid!")
+    }
+
+    data.grid <- matrix(as.numeric(model.data[,5:7]), nrow = nrow(model.data))
+
+    if(is.null(variables)) {
+        variables <- unique(model.data[,3]) 
+    }
+
+    nomatch.ind <- is.na(match(variables, unique(model.data[,3])))
+    if(sum(nomatch.ind) > 0) {
+        warning(paste("The following variables are NOT present in the model data:", paste(variables[nomatch.ind], collapse = " ")))
+        variables <- variables[!nomatch.ind]
+    }
+
+
+    if(is.null(levels)) {
+        levels <- unique(model.data[,4])
+    }
+
+    nomatch.ind <- is.na(match(levels, unique(model.data[,4])))
+    if(sum(nomatch.ind) > 0) {
+        warning(paste("The following levels are NOT present in the model data:", paste(levels[nomatch.ind], collapse = " ")))
+        levels <- levels[!nomatch.ind]
+    }
+
+
+    if(is.null(model.domain)) {
+        model.domain <- c(min(data.grid[,1]), max(data.grid[,1]), max(data.grid[,2]), min(data.grid[,2]))
+    }
+
+    #Build grid
+
+    lons <- seq(model.domain[1], model.domain[2], by = lon.grid)
+    lats <- seq(model.domain[4], model.domain[3], by = lat.grid)
+    grid <- list(x = lons, y = lats)
+
+    fcst.grid <- list(z = array(rep(NA, length(lons) * length(lats) * length(variables) * length(levels)),
+        dim = c(length(levels), length(variables), length(lons), length(lats))), 
+        x = lons, y = lats, variables = variables, levels = levels, 
+        model.run.date = model.run.date, fcst.date = fcst.date)
+
+    #Put variables and levels into a series of layered images
+    for(lvl in levels) {
+        for(var in variables) {
+             mi <- which(var == model.data[,3] & lvl == model.data[,4] &
+                   data.grid[,1] >= model.domain[1] & data.grid[,1] <= model.domain[2] &
+                  data.grid[,2] <= model.domain[3] & data.grid[,2] >= model.domain[4])
+             if(length(mi) > 0) {
+                 fcst.grid$z[which(lvl == fcst.grid$levels), which(var == fcst.grid$variables),,] <- as.image(
+                     array(
+                     data.grid[mi,3],
+                     dim = c(length(lons), length(lats))),
+                     grid = grid,
+                     x = cbind(data.grid[,1], data.grid[,2]))$z
+              }
+        }
+    }
+
+    return(fcst.grid)
+}
diff --git a/data/GFS.RData b/data/GFS.RData
diff --git a/man/GetModelRunHour.Rd b/man/GetModelRunHour.Rd
@@ -0,0 +1,68 @@
+\name{GetModelRunHour}
+\alias{GetModelRunHour}
+\title{Get the time and date for the most recent GFS model run
+}
+\description{Determine the most recent GFS model run to a given date and time.
+This ensures that the forecast to download is the most accurate and up to date.
+}
+\usage{
+GetModelRunHour(model.date = Sys.time(), fcst.date = Sys.time(), 
+    url.to.check = c(
+    "http://nomads.ncep.noaa.gov/cgi-bin/filter_gfs_hd.pl?dir=\%2Fgfs.", 
+    "\%2Fmaster"), 
+    attempts = 10)
+}
+\arguments{
+  \item{model.date}{
+  The date when the GFS model was run, in POSIX format.    
+}
+  \item{fcst.date}{
+  The date to get the weather forecast for, in POSIX format.
+}
+  \item{url.to.check}{
+  The URL that points to the model download page. 
+}
+  \item{attempts}{
+  How many times to try contacting the model download page before giving up.
+}
+}
+\details{
+\code{\link{GribGrab}} calls \code{GetModelRunHour} to determine which model files to download.
+\code{model.date} and \code{fcst.date} are in POSIX format (use \code{as.POSIXct}).
+}
+\value{
+  \item{model.hour}{What GMT time the latest model run took place for the date specified in \code{model.date}.}
+  \item{url.tested}{The URL that was checked to determine that the model hour is valid}
+  \item{fcst.tdiff}{Time difference between model date and forecast date (i.e. how far in the future the forecast is from the model run that's available) in hours}
+  \item{fcst.back}{The model forecast run immediately before the requested forecast date}
+  \item{fcst.fore}{The model forecast run immediately after the requested forecast date.}
+}
+\references{
+\url{http://nomads.ncep.noaa.gov/}
+}
+\author{
+Daniel Bowman \email{daniel.bowman@unc.edu}
+}
+\note{
+This function requires an Internet connection to work.
+}
+
+\seealso{
+\code{\link{GribGrab}}
+}
+\examples{
+
+#Get the latest model run
+model.date <- Sys.time() 
+
+#Get the forecast 6 hours from now - addition is defined in seconds
+fcst.date <- model.date + 6 * 3600 
+
+#Figure out what model files to download
+model.params <- GetModelRunHour(model.date = model.date, fcst.date = fcst.date)
+
+print(model.params$model.hour) #When the GFS model was run last
+print(model.params$fcst.back) #The forecast hour before the requested forecast date
+print(model.params$fcst.fore) #The forecast hour after the requested forecast date
+}
+\keyword{ connection }