diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..24cac66 --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,22 @@ +Package: sad +Title: Verify the Scale, Anisotropy and Direction of Weather Forecasts +Version: 0.1.3 +Authors@R: + person(given = "Sebastian", + family = "Buschow", + role = c("aut", "cre"), + email = "s6sebusc@uni-bonn.de", + comment = c(ORCID = "0000-0003-4750-361X")) +Description: Implementation of the wavelet-based spatial verification method of Buschow and Friederichs "SAD: Verifying the Scale, Anisotropy and Direction of precipitation forecasts" (2020, submitted to QJRMS). Forecasts and Observations are transformed by a decimated or redundant dual-tree complex wavelet transform to analyze the spatial scale, degree of anisotropy and preferred direction in each field. These structural attributes are compared by a series of scores. An experimental algorithm for the correction of these errors is included as well. +License: MIT + file LICENSE +Imports: emdist +Depends: dualtrees +Encoding: UTF-8 +LazyData: false +RoxygenNote: 6.1.1 +NeedsCompilation: no +Packaged: 2020-11-02 09:43:31 UTC; s6sebusc +Author: Sebastian Buschow [aut, cre] () +Maintainer: Sebastian Buschow +Repository: CRAN +Date/Publication: 2020-11-06 16:30:02 UTC diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..da8ab6f --- /dev/null +++ b/LICENSE @@ -0,0 +1,2 @@ +YEAR: 2020 +COPYRIGHT HOLDER: Sebastian Buschow diff --git a/MD5 b/MD5 new file mode 100644 index 0000000..68f6117 --- /dev/null +++ b/MD5 @@ -0,0 +1,19 @@ +60d718f504be5e06951a24f76cd0f944 *DESCRIPTION +3aa3337d9909e6727b6a5273157b5d1a *LICENSE +562d7534cc0d19d00a9f6d63ae4c2f8d *NAMESPACE +d0d02d9d86fd4a82a8cda142c2982006 *R/data.R +a74e86afa0ad6553bb7c6aabe0d9f9e8 *R/lowlevel.R +651a58de7dbc6182857e66b1b40a0a78 *R/plotfunctions.R +e93bd481c8272d2ccb915cd868f41b09 *R/sadcorrect.R +4e85d2c3fb8fcc7a6a8046b7e74224e5 *R/sadverif.R +e01d44645bba935c0ee4940ba0fae425 *R/scores.R +a002b526c81a682ad2f15cae55e1b891 *data/rrain.rda +d8645178f3bced3a5206ea67d02d8224 *man/getpareto.Rd +001ce3be45718ea17ea2847e983d740e *man/hemd.Rd +247f74ec98d994e8b3ad633cf9a6db53 *man/prepare_sad.Rd +857d98cd30644ac9bd702ba01f7b7ff2 *man/raincols.Rd +dc21822f0905877914cef0533698d69b *man/rrain.Rd +79fcba07fe089de6ec3176e7302090a5 *man/sadcorrect.Rd +3628a5f662183b55f4c7b96524a80c47 *man/sadforecast.Rd +cb2e6cf2b2bc6a918d9ab9956a391f7d *man/sadverif.Rd +472b09cbca3c581412cd18be9ddec66b *man/semd.Rd diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..071168a --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,14 @@ +# Generated by roxygen2: do not edit by hand + +S3method(plot,sadforecast) +S3method(plot,sadverif) +S3method(summary,sadverif) +export(as.sadforecast) +export(getpareto) +export(hemd) +export(prepare_sad) +export(raincols) +export(sadcorrect) +export(sadverif) +export(semd) +import(dualtrees) diff --git a/R/data.R b/R/data.R new file mode 100644 index 0000000..9892c83 --- /dev/null +++ b/R/data.R @@ -0,0 +1,18 @@ +#' Random Rain +#' +#' Randomly simulated synthetic rain fields with Matern covariances +#' +#' @docType data +#' +#' @usage data(rrain) +#' +#' @format A \code{4x10x128x128} matrix +#' +#' @keywords datasets +#' +#' @details These fields were used in Buschow et al. (2019) . The first array corresponds to the four model configurations from that paper (different roughness nu and scale sc), the second dimension contains ten realizations for each model. +#' @source simulated using the 'RandomFields' package, code available at <10.5281/zenodo.3257511> +#' +#' @examples +#' data(rrain) +"rrain" diff --git a/R/lowlevel.R b/R/lowlevel.R new file mode 100644 index 0000000..f5a3c3f --- /dev/null +++ b/R/lowlevel.R @@ -0,0 +1,125 @@ +#' class for a list of forecasts +#' +#' check that a list of forecasts fulfills all requirements to be verified by our method +#' @param x a list of 2 or more 2D matrices with equal sizes and no missing or inifinite values +#' @param mfrow vector with the number of rows and columns you would like in the plot +#' @param col color scale for the plot +#' @param ... further arguments passed to \code{image} +#' @return an object of class \code{sadforecast} +#' @details \code{as.sadforecast} does nothing except check that everything is as it should be, add the attributes that can be changed by \code{prepare_sad} and provide a method for quick plots of the data. +#' @examples +#' data( rrain ) +#' ra <- list( rrain[1,1,,], rrain[4,5,,], rrain[2,7,,] ) +#' ra <- as.sadforecast(ra) +#' plot(ra) +#' @name sadforecast +NULL + +#' @rdname sadforecast +#' @export +as.sadforecast <- function(x){ + # check that we got a list + if( !is.list(x) ) stop( "please provide observation and forecasts as a list of matrices" ) + + # check that we got at least one pair + n <- length(x) + if( n<2 ) stop( "need at least two fields to compare" ) + + # check the dimensions + dims <- unlist( lapply(x, function(x) length(dim(x))) ) + if( any( dims != 2 ) ) stop( "all inputs must be 2D matrices" ) + nx <- unlist( lapply( x, nrow ) ) + ny <- unlist( lapply( x, ncol ) ) + if( any( nx[-1] != nx[1] ) | any( ny[-1] != ny[1] ) ) stop( "all inputs must have the same dimensions" ) + + # check that we got no missing values + if( any( is.na( unlist( x ) ) ) ) stop( "no missing values allowed" ) + if( any( !is.finite( unlist( x ) ) ) ) stop( "no infinite values allowed" ) + + # maybe add names + if( is.null( names(x) ) ) names(x) <- c( "obs", paste0( "f", 1:(n-1) ) ) + + # add attributes if they aren't given already + defaults <- list( log=FALSE, px=1:nx[1], py=1:ny[1], rsm=0, xmin=-Inf, boundaries="none" ) + for( n in names( defaults ) ){ + if( is.null( attr(x, n) ) ) attr( x, n ) <- defaults[[n]] + } + + return( structure( x, class="sadforecast" ) ) +} + +#' prepare a sad forecast for verification +#' +#' remove small values, apply log-transform, smooth borders, handle boundary conditions +#' @param x a list of 2 or more 2D matrices with equal sizes and no missing or inifinite values, as required by \code{as.sadforecast} +#' @param xmin values smaller than \code{xmin} are set to zero +#' @param log logical, do you want to log-transfrom the data? (recommended for precipitation) +#' @param rsm number of pixels which are linearly smoothed at the edge +#' @param Nx size to which the data is extended in x-direction +#' @param Ny size to which the data is extended in y-direction +#' @param boundaries how to handle the boundary conditions, either "pad", "mirror" or "periodic" +#' @return an object of class \code{sadforecast} which has been prepared in the desired way. +#' @details the positions within the extended field where the original field resides are output as attributes "px", "py" of the result. The other input parameters are saved as attributes of the result as well. +#' @examples +#' data( rrain ) +#' ra <- list( rrain[2,4,,], rrain[3,9,,] ) +#' ra <- prepare_sad( ra, rsm=0, Nx=256, boundaries="mirror", log=FALSE ) +#' plot(ra) +#' @export +prepare_sad <- function( x, xmin=0.1, log=TRUE, rsm=0, Nx=NULL, Ny=NULL, boundaries="pad" ){ + x <- as.sadforecast( x ) + old_att <- attributes( x ) + + if( is.null(Nx) ) Nx <- nrow( x[[1]] ) + if( is.null(Ny) ) Ny <- ncol( x[[1]] ) + + # remove small values + x <- lapply(x, function(x) x*1*(x>=xmin) ) + # smooth the boundaries + x <- lapply(x, dualtrees::smooth_borders, r=rsm) + + # log or no? + if( log & !old_att$log ) logfun <- function(x) log2( x + xmin ) else logfun <- identity + # select boundary type + bound <- switch( boundaries, + mirror=dualtrees::put_in_mirror, + pad=dualtrees::pad, + periodic=dualtrees::period_bc, + stop( "unknown boundaries, you can use 'mirror', 'pad' or 'periodic'" ) + ) + # remember where we were in the original field + bc <- bound( logfun(x[[1]]), N=Nx, Ny=Ny ) + # apply boundaries and maybe logarithm + x <- lapply( x, function(x) bound( logfun(x), N=Nx, Ny=Ny )$res ) + + # set attributes + class( x ) <- "sadforecast" + attr( x, "px" ) <- bc$px + attr( x, "py" ) <- bc$py + attr( x, "xmin" ) <- max( xmin, old_att$xmin ) + if(log) attr( x, "log" ) <- TRUE + attr( x, "rsm" ) <- rsm + attr( x, "boundaries" ) <- boundaries + return( x ) +} + +#' rain color scale +#' +#' eight shades of blue used in \code{plot.sadforecast} +#' @export +raincols <- c( "#FCFCFC", "#CAD5EB", "#97AFD9", "#608AC8", "#3267A5", "#214671", "#112641", "#040404" ) + +angdif <- function(x,y){ + d <- x - y + if( d > 90 ) d <- d - 180 + if( d < -90 ) d <- d + 180 + return(d) +} + + +checkJ <- function(J, Nx, Ny){ + if( J < 1 ) stop( "you have to use at least one scale (J>=1)" ) + N <- min( Nx, Ny ) + Jmax <- floor( log2( N ) ) + if( J > Jmax ) stop( paste0( "J must be no greater than ", Jmax ) ) +} diff --git a/R/plotfunctions.R b/R/plotfunctions.R new file mode 100644 index 0000000..ed0c5b4 --- /dev/null +++ b/R/plotfunctions.R @@ -0,0 +1,21 @@ + + +#' @rdname sadforecast +#' @export +plot.sadforecast <- function(x, mfrow=NULL, col=NULL, ...){ + if( is.null(col) ) col <- raincols + oldpar <- graphics::par( no.readonly=TRUE ) + on.exit( graphics::par(oldpar) ) + + nc <- ceiling( sqrt( length(x) ) ) + nr <- ceiling( length(x)/nc ) + if( is.null(mfrow) ) mfrow <- c(nr,nc) + + graphics::par( mfrow=mfrow, mar=c(1,1,2,1) ) + for( i in 1:length(x) ){ + graphics::image( x[[i]], col=col, xaxt="n", yaxt="n", xlab="", ylab="", + useRaster=TRUE, main=names(x)[i], ... ) + } + + +} diff --git a/R/sadcorrect.R b/R/sadcorrect.R new file mode 100644 index 0000000..b70da37 --- /dev/null +++ b/R/sadcorrect.R @@ -0,0 +1,99 @@ +#' correct structure errors +#' +#' use the inverse 'dtcwt' to correct errors in scale, anisotropy and direction +#' @param x a list of equally sized matrices, the first element is assumed to be the observation +#' @param xmin values smaller than \code{xmin} are set to zero +#' @param log logical, do you want to log-transfrom the data? (recommended for precipitation) +#' @param rsm number of pixels which are linearly smoothed at the edge +#' @param Nx size to which the data is extended in x-direction, has to be a whole power of 2 +#' @param Ny size to which the data is extended in y-direction, has to be a whole power of 2 +#' @param J largest scale considered +#' @param boundaries how to handle the boundary conditions, either "pad", "mirror" or "periodic" +#' @param direction if \code{TRUE}, scale and direction are corrected, otherwise only scale +#' @return an object of class \code{sadforecast} +#' @details The algorithm performs the following steps: +#' \enumerate{ +#' \item remove values below \code{xmin} +#' \item if \code{log=TRUE} log-transform all fields +#' \item set all fields to zero mean, unit variance +#' \item apply \code{dtcwt} to all fields +#' \item loop over forecasts and scales. If \code{direction=TRUE} loop over the six directions. Multiply forecast energy at each location by the ratio of total observed energy to total forecast energy at that scale (and possibly direction) +#' \item apply \code{idtcwt} to all forecasts +#' \item reset means and variance of the forecasts to their original values +#' \item if \code{log=TRUE} invert the log-transform +#' \item return the list of corrected fields +#'} +#' @examples +#' data(rrain) +#' ra <- as.sadforecast( list( rrain[2,1,,], rrain[3,1,,], rrain[3,2,,], rrain[3,3,,] ) ) +#' ra_c <- sadcorrect( ra, rsm=10 ) +#' plot(ra_c) +#' @export +sadcorrect <- function( x, xmin=0.1, log=TRUE, rsm=0, Nx=NULL, Ny=NULL, J=NULL, boundaries="pad", direction=TRUE ){ + + # check the input + x <- as.sadforecast( x ) + n <- length( x ) + nam <- names( x ) + + # get defaults for the dimensions etc + if( is.null( Nx ) ) Nx <- 2**ceiling( log2( max( dim( x[[1]] ) ) ) ) + if( is.null( Ny ) ) Ny <- Nx + if( is.null( J ) ) J <- log2( min(Nx,Ny) ) - 3 + + # check that J is valid + checkJ( J, Nx, Ny ) + + # remember the original observation + obs0 <- x[[1]] + + # handle thresholding, log, boundaries + x <- prepare_sad( x, xmin, log, rsm, Nx, Ny, boundaries ) + + # standardize the margins + xm <- lapply( x, mean ) + xsd <- lapply( x, stats::sd ) + for( i in 1:n ) x[[i]] <- ( x[[i]] - xm[[i]] ) / xsd[[i]] + + # apply wavelet transform + dt <- lapply( x, dualtrees::dtcwt, dec=TRUE, + fb1 = dualtrees::near_sym_b_bp, + fb2 = dualtrees::qshift_b_bp ) + + + res <- list( obs0 ) + + # correct each forecast + dto <- dt[[1]] + for( i in 2:n ){ # loop over forecasts + dtf <- dt[[i]] + for( j in 1:J ){ # loop over all scales + if( direction ){ # correct each direction individually + for( d in 1:6 ){ + fac <- sum(Mod(dto[[j]][,,d])**2) / + sum(Mod(dtf[[j]][,,d])**2) + dtf[[j]][,,d] <- dtf[[j]][,,d]*fac + } + }else{ # correct all directions together + fac <- sum(Mod(dto[[j]])**2) / + sum(Mod(dtf[[j]])**2) + dtf[[j]] <- dtf[[j]]*fac + } + + } + # transform back + fbc <- idtcwt( dtf, fb1 = dualtrees::near_sym_b_bp, + fb2 = dualtrees::qshift_b_bp + ) + # restore margins + fbc <- ( fbc - mean(fbc) )/ stats::sd(fbc) * xsd[[i]] + xm[[i]] + # cut out original domain + fbc <- with( attributes(x), fbc[px,py] ) + # invert logarithm (maybe) + if( log ) fbc <- 2**fbc - xmin + res[[i]] <- fbc + } + + + return( structure(res, class="sadforecast" ) ) +} diff --git a/R/sadverif.R b/R/sadverif.R new file mode 100644 index 0000000..569cb91 --- /dev/null +++ b/R/sadverif.R @@ -0,0 +1,252 @@ +#' dual-tree verification +#' +#' verify the scale, anisotropy and direction of a number of forecasts +#' @param x a list of equally sized matrices, the first element is assumed to be the observation +#' @param dec logical, do you want to use the decimated transform +#' @param xmin values smaller than \code{xmin} are set to zero +#' @param log logical, do you want to log-transfrom the data? (recommended for precipitation) +#' @param a relative weight of directional errors compared to scale errors in \code{semdd} +#' @param nbr number of breaks for the scale histograms, has no effect if \code{dec=TRUE} +#' @param rsm number of pixels which are linearly smoothed at the edge +#' @param Nx size to which the data is extended in x-direction +#' @param Ny size to which the data is extended in y-direction +#' @param J largest scale considered +#' @param boundaries how to handle the boundary conditions, either "pad", "mirror" or "periodic" +#' @param return_specs if \code{TRUE}, the spatial mean spectra are returned as well +#' @param object object of class sadverif +#' @param ... further arguments, currently ignored. +#' @details each element of x is transformed via \code{dtcwt} from the 'dualtrees' package. Scores and centres based on the mean spectra are calculated. If \code{dec=FALSE}, scale histograms and the corresponding score \code{hemd} are calcualted as well. +#' @return an object of class \code{sadverif}, containing the following elements +#' \describe{ +#' \item{settings}{ a dataframe containing the parameters that were originally passed to dtverif } +#' \item{centres}{ a matrix cotaining the anisotropy \code{rho}, angle \code{phi} and central scale \code{z} derived from the mean spectra. Rain area and sum are included as well.} +#' \item{detscores}{ a matrix containing the differences in centre components, the direction/anisotropy score \code{dxy}, the emd between direction-averaged spectra (\code{semd}) and the emd between the directional spectra (\code{semdd}). If \code{dec=FALSE}, the emd between the scale histograms, hemd, is included as well. } +#' \item{time}{ the time the calculation took in seconds } +#' } +#' if there is more than one forecast, the ensemble scores SpEn and (if available), hemd are computed as well, treating all forecasts as members of the ensemble to be verified. +#' @references Selesnick, I.W., R.G. Baraniuk, and N.C. Kingsbury (2005) +#' Buschow et al. (2019) +#' Buschow and Friederichs (2020) +#' @examples +#' oldpar <- par(no.readonly=TRUE) +#' on.exit(par(oldpar)) +#' data(rrain) +#' ra <- as.sadforecast( list( rrain[1,1,,], rrain[1,2,,], rrain[2,1,,], rrain[3,1,,] ) ) +#' plot(ra) +#' verif <- sadverif( ra, log=FALSE, xmin=0 ) +#' summary(verif) +#' par( mfrow=c(2,2) ) +#' plot( verif ) +#' @import dualtrees +#' @name sadverif +NULL + +#' @rdname sadverif +#' @export +sadverif <- function( x, dec=TRUE, xmin=0.1, log=TRUE, a=1, nbr=33, rsm=0, Nx=NULL, Ny=NULL, J=NULL, boundaries="pad", return_specs=FALSE ){ + + # start timing + t0 <- proc.time() + + # check the input + x <- as.sadforecast( x ) + n <- length( x ) + nam <- names( x ) + + # get defaults for the dimensions etc + if( is.null( Nx ) ) Nx <- 2**ceiling( log2( max( dim( x[[1]] ) ) ) ) + if( is.null( Ny ) ) Ny <- Nx + if( is.null( J ) ) J <- log2( min(Nx,Ny) ) - 3 + + # check that J is valid + checkJ( J, Nx, Ny ) + + # get area and sum + xar <- lapply( x, function(x) mean( x>xmin ) ) + xsu <- lapply( x, function(x) sum( x*1*(x>=xmin) ) ) + + # handle thresholding, log, boundaries + x <- prepare_sad( x, xmin, log, rsm, Nx, Ny, boundaries ) + + # do the dual-tree transform + dt <- lapply( x, dualtrees::dtcwt, J=J, dec=dec, + fb1 = dualtrees::near_sym_b_bp, + fb2 = dualtrees::qshift_b_bp ) + + if( !dec ){ + # cut out original domain + dt <- with( attributes(x), lapply(dt, function(y) return(y[,px,py,]) ) ) + # correct bias + dt <- lapply(dt, FUN=get_en, correct = "b_bp", N = 2^(J + 3) ) + # remove negative energy + dt <- lapply( dt, function(x) x*1*(x>0) ) + # prepare for the histograms + br <- seq( 1, J, , nbr ) + mi <- ( br[-1] + br[-nbr] ) / 2 + # calculate the histograms + hists <- array( dim=c(n, nbr-1), dimnames=list(nam, mi) ) + for( i in 1:n ){ + # get the mask of non-zero rain + th <- if(log) log2(xmin) else xmin + ma <- x[[i]] <= th + # map of central scales + zc <- with( attributes(x), dt2cen( dt[[i]], mask=ma[px,py] )[ ,,3 ] ) + hists[ i, ] <- graphics::hist( zc, breaks=br, plot=FALSE )$density + } + } + + # average over space + dt <- lapply( dt, dualtrees::dtmean ) + # get the mean centre + dtcen <- lapply( dt, dualtrees::dt2cen ) + # get the x- and y-component + xy <- lapply( dtcen, function(x) dualtrees::cen2xy(x)[1:2] ) + + # prepare the results + cennames <- c( "rho", "phi", "z", "area", "sum" ) + scorenames <- c( paste0("d", cennames), "dxy", "semd", "semdd" ) + if( !dec ) scorenames <- c( scorenames, "hemd" ) + + cen <- array( dim=c(n, length(cennames) ), + dimnames=list(nam, cennames ) ) + detscores <- array( dim=c(n-1, length(scorenames) ), + dimnames=list(nam[-1], scorenames) ) + + # calculate the centres and deterministic scores + for( i in 1:n ){ + # get the centres + cen[ i, ] <- c( dtcen[[i]], xar[[i]], xsu[[i]] ) + j <- i-1 + if( j > 0 & xar[[i]] > 0 ){ + # differences in centre + detscores[ j, 1:5 ] <- cen[i,] - cen[1,] + # special treatment for the angle + detscores[ j, 2 ] <- angdif( cen[i,2], cen[1,2] ) + + # combined anisotropy / direction score + detscores[ j, 6 ] <- sqrt( sum( ( xy[[i]] - xy[[1]] )**2 ) ) + + # direction-less and directional emd + detscores[ j, 7 ] <- semd( dt[[i]], dt[[1]], dir=FALSE ) + detscores[ j, 8 ] <- semd( dt[[i]], dt[[1]], dir=TRUE, a=a ) + + # histogram emd, if possible + if( !dec ){ + detscores[ j, 9 ] <- hemd( hists[i,], hists[1,] ) + } + } + } + + # maybe calculate the probabilistic scores as well + if( n > 2 ){ + ensscores <- list() + # observed mean spectrum + dto <- c( dt[[1]] ) + + #re-format predicted mean spectra + dtf <- c( dt[[2]] ) + for( i in 3:n ) dtf <- cbind( dtf, c(dt[[i]]) ) + # get the energy score + ensscores$en <- energyscore( dto, dtf ) + if( !dec ){ + ensscores$he <- hemd( hists[1,], colMeans( hists[-1,] ), mids=mi ) + } + } + + # save the settings + settings <- data.frame( dec=dec, xmin=xmin, log=log, a=a, nbr=nbr, rsm=rsm, Nx=Nx, Ny=Ny, J=J, boundaries=boundaries ) + + # create the results + res <- list( settings=settings, centres=cen, detscores=detscores ) + if( !dec ) res$hists <- hists + if( n > 2 ) res$ensscores <- ensscores + if( return_specs ) res$dt <- dt + + # add the time + res$time <- unname( proc.time() - t0 )[3] + + class( res ) <- "sadverif" + + return( res ) +} + + +#' @rdname sadverif +#' @export +plot.sadverif <- function(x, ...){ + # define colors + cols <- 1:nrow( x$cen ) + + # prepare the hexagon for our plot + phi <- pi*60/180*0:5 + xx <- cos( phi ) + yy <- sin( phi ) + # get the x and y component you want to plot + xy <- apply(x$cen[,1:3], 1, cen2xy) + + # plot the centre in the x-y-plane + graphics::plot( xx, yy, type="n" ) + graphics::abline( h=0, v=0, lty=3 ) + graphics::points( c(xx, xx[1]), c(yy, yy[1]), type="b", pch=NA, cex=1.5 ) + graphics::text( xx, yy, labels=paste0(seq(15,,30,6),"\u00B0") ) + graphics::text( xy[ 1, ], xy[2,], col=cols, labels=rownames(x$cen) ) + graphics::title( main="anisotropy and direction" ) + + + # dotchart for the central scales + graphics::dotchart( xy[3,], xlim=c(1,x$settings$J), col=cols ) + graphics::abline(v=xy[3,1], lty=2) + graphics::title( main="central scale" ) + + # get the scores + sc <- x$detscores + + # find the pareto-set in the dz-dxy-plane + winners <- getpareto( abs( sc[ ,c(3,6),drop=FALSE ] ) ) + fonts <- rep( 1, nrow(sc) ) + fonts[winners] <- 2 + lab <- rownames(sc) + lab[winners] <- paste0( lab[winners], "*" ) + # plot dz against dxy + if( any( !is.na( sc[,3] + sc[,6] ) ) ){ + graphics::plot( abs(sc[,3]), sc[,6], type="n", xlab="|dz|", ylab="dxy", + xlim=range(abs(sc[,3]), na.rm=TRUE)+c(-.1,.1), + ylim=range(abs(sc[,6]), na.rm=TRUE)+c(-.1,.1) + ) + graphics::grid() + graphics::text( abs(sc[,3]), sc[,6], col=cols[-1], labels=lab, font=fonts ) + graphics::title( main="scale- and directional error" ) + } + + # plot the histograms if we have them + if( !is.null( x$hists ) ){ + hists <- x$hists + mi <- colnames(hists) + graphics::plot( mi, hists[1,], col=cols[1], type="l", ylim=range(hists), + xlab="scale", ylab="density" ) + for( i in 2:nrow(hists) ) graphics::points( mi, hists[i,], col=cols[i], type="l" ) + graphics::legend( x="topleft", lty=1, col=cols, legend=rownames( x$cen ), bty="n" ) + graphics::title( main="histograms of central scales" ) + } +} + +#' @rdname sadverif +#' @export +summary.sadverif <- function(object, ...){ + with(object,{ + nam <- rownames(detscores) + cat( paste0("\nVerification of ", paste(nam[-1], collapse=", "), " against ", nam[1], ".\n\n" ) ) + cat( "Deterministic verification of the individual forecasts:\n") + print( detscores, digits=2 ) + if( nrow(detscores) > 1 ){ + winners <- getpareto( abs(detscores[,c(3,6)]) ) + cat( "\nMembers of the pareto set (based on dxy and dz):\n" ) + cat( nam[winners], "\n" ) + cat( "\nEnsemble scores:\n") + print( unlist( ensscores), digits=2 ) + } + cat( "\n settings:\n" ) + print(settings) + cat( "\n took ", round(time, 2), " seconds.\n" ) + }) +} diff --git a/R/scores.R b/R/scores.R new file mode 100644 index 0000000..7af41a8 --- /dev/null +++ b/R/scores.R @@ -0,0 +1,94 @@ +#' spectral emd +#' +#' earth mover's distance between dual-tree wavelet spectra +#' @param dt1,dt2 forecast and observed spectrum +#' @param a ratio between scale- and directional component +#' @param dir whether or not to include direction information +#' @return a single value, the emd. If \code{dir=FALSE}, the value is signed, indicating the direction of the scale error. +#' @export +semd <- function( dt1, dt2, a=1, dir=TRUE ){ + if( sum(dt1)==0 | sum(dt2)==0 ) return( NA ) + J <- nrow( dt1 ) + + if( dir ){ + + dt1 <- c( dt1 )/sum( dt1 ) + dt2 <- c( dt2 )/sum( dt2 ) + + d <- rep( 1:6, each=J ) + + x <- a*cos( (d-1)*60*pi/180 )*( J-1 )/2 + y <- a*sin( (d-1)*60*pi/180 )*( J-1 )/2 + z <- rep( 1:J, 6 ) + + dt1 <- cbind( dt1, x, y, z ) + dt2 <- cbind( dt2, x, y, z ) + + }else{ + z1 <- dualtrees::dt2cen(dt1)[,,3] + z2 <- dualtrees::dt2cen(dt2)[,,3] + dt1 <- rowMeans( dt1 ) + dt2 <- rowMeans( dt2 ) + dt1 <- cbind( dt1/sum(dt1), 1:J ) + dt2 <- cbind( dt2/sum(dt2), 1:J ) + + } + res <- emdist::emd( dt1, dt2 ) + if(!dir) res <- res*sign( z1 - z2 ) + + return( res ) +} + +#' histogram emd +#' +#' Earth Mover's Distance between two histograms, given as vectors +#' @param h1,h2 vectors of non-negtaive numbers representing two histograms +#' @param mids the bin mids corresponding to the histograms. Can also be given via the names of \code{h1}. +#' @return the value of the EMD +#' @export +hemd <- function( h1, h2, mids=NULL ){ + if( is.null(mids) ) mids <- as.numeric( names(h1) ) + if( is.null(mids) ) stop( "give me the mids" ) + + z1 <- sum( h1*mids ) / sum(h1) + z2 <- sum( h2*mids ) / sum(h2) + + res <- emdist::emd( cbind(h1,mids), cbind(h2,mids) )*sign( z1-z2 ) + return(res) +} + + +energyscore <- function( obs, forc ){ + nx <- nrow(forc) + nm <- ncol(forc) + + en1 <- mean( sqrt( colSums( ( forc - obs )**2) ) ) + en2 <- 0 + for( i in 1:nm ) en2 <- en2 + sum( sqrt( colSums( ( forc[,-i,drop=FALSE] - forc[,i] )**2) ) )/( nm**2 ) + res <- en1 - en2 / 2 + return(res) +} + +#' Find the pareto set +#' +#' Determine the set of pareto optimal forecasts in a matrix of scores +#' @param scores a matrix of negatively oriented scores where the rows correspond to different forecasts and the columns denote different scores. +#' @return a vector of indices indicating all members of the pareto set. +#' @details The Pareto set contains all those forecasts for which no other forecast is better in every respect. In this function, we assume that all scores are negatively oriented, "better" therefore means lower values. +#' @note This function becomes very memory hungry if you have more than 1000 forecasts, be careful. +#' @export +getpareto <- function( scores ){ + scores <- scores[ !is.na( rowSums( scores ) ), , drop=FALSE ] + if( nrow(scores) < 2 ){ + winners <- NULL + }else{ + ns <- ncol(scores) + isworse <- TRUE + for( i in 1:ns ) isworse <- isworse & outer( scores[,i,drop=FALSE], + scores[,i,drop=FALSE], + function(x,y) x > y + ) + winners <- which( rowSums( isworse ) < 1 ) + } + return( winners ) +} diff --git a/data/rrain.rda b/data/rrain.rda new file mode 100644 index 0000000..e409b04 Binary files /dev/null and b/data/rrain.rda differ diff --git a/man/getpareto.Rd b/man/getpareto.Rd new file mode 100644 index 0000000..f0524c1 --- /dev/null +++ b/man/getpareto.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/scores.R +\name{getpareto} +\alias{getpareto} +\title{Find the pareto set} +\usage{ +getpareto(scores) +} +\arguments{ +\item{scores}{a matrix of negatively oriented scores where the rows correspond to different forecasts and the columns denote different scores.} +} +\value{ +a vector of indices indicating all members of the pareto set. +} +\description{ +Determine the set of pareto optimal forecasts in a matrix of scores +} +\details{ +The Pareto set contains all those forecasts for which no other forecast is better in every respect. In this function, we assume that all scores are negatively oriented, "better" therefore means lower values. +} +\note{ +This function becomes very memory hungry if you have more than 1000 forecasts, be careful. +} diff --git a/man/hemd.Rd b/man/hemd.Rd new file mode 100644 index 0000000..4dc418d --- /dev/null +++ b/man/hemd.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/scores.R +\name{hemd} +\alias{hemd} +\title{histogram emd} +\usage{ +hemd(h1, h2, mids = NULL) +} +\arguments{ +\item{h1, h2}{vectors of non-negtaive numbers representing two histograms} + +\item{mids}{the bin mids corresponding to the histograms. Can also be given via the names of \code{h1}.} +} +\value{ +the value of the EMD +} +\description{ +Earth Mover's Distance between two histograms, given as vectors +} diff --git a/man/prepare_sad.Rd b/man/prepare_sad.Rd new file mode 100644 index 0000000..9e08aa0 --- /dev/null +++ b/man/prepare_sad.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lowlevel.R +\name{prepare_sad} +\alias{prepare_sad} +\title{prepare a sad forecast for verification} +\usage{ +prepare_sad(x, xmin = 0.1, log = TRUE, rsm = 0, Nx = NULL, + Ny = NULL, boundaries = "pad") +} +\arguments{ +\item{x}{a list of 2 or more 2D matrices with equal sizes and no missing or inifinite values, as required by \code{as.sadforecast}} + +\item{xmin}{values smaller than \code{xmin} are set to zero} + +\item{log}{logical, do you want to log-transfrom the data? (recommended for precipitation)} + +\item{rsm}{number of pixels which are linearly smoothed at the edge} + +\item{Nx}{size to which the data is extended in x-direction} + +\item{Ny}{size to which the data is extended in y-direction} + +\item{boundaries}{how to handle the boundary conditions, either "pad", "mirror" or "periodic"} +} +\value{ +an object of class \code{sadforecast} which has been prepared in the desired way. +} +\description{ +remove small values, apply log-transform, smooth borders, handle boundary conditions +} +\details{ +the positions within the extended field where the original field resides are output as attributes "px", "py" of the result. The other input parameters are saved as attributes of the result as well. +} +\examples{ +data( rrain ) +ra <- list( rrain[2,4,,], rrain[3,9,,] ) +ra <- prepare_sad( ra, rsm=0, Nx=256, boundaries="mirror", log=FALSE ) +plot(ra) +} diff --git a/man/raincols.Rd b/man/raincols.Rd new file mode 100644 index 0000000..e2ae61f --- /dev/null +++ b/man/raincols.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lowlevel.R +\docType{data} +\name{raincols} +\alias{raincols} +\title{rain color scale} +\format{An object of class \code{character} of length 8.} +\usage{ +raincols +} +\description{ +eight shades of blue used in \code{plot.sadforecast} +} +\keyword{datasets} diff --git a/man/rrain.Rd b/man/rrain.Rd new file mode 100644 index 0000000..9a71431 --- /dev/null +++ b/man/rrain.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{rrain} +\alias{rrain} +\title{Random Rain} +\format{A \code{4x10x128x128} matrix} +\source{ +simulated using the 'RandomFields' package, code available at <10.5281/zenodo.3257511> +} +\usage{ +data(rrain) +} +\description{ +Randomly simulated synthetic rain fields with Matern covariances +} +\details{ +These fields were used in Buschow et al. (2019) . The first array corresponds to the four model configurations from that paper (different roughness nu and scale sc), the second dimension contains ten realizations for each model. +} +\examples{ +data(rrain) +} +\keyword{datasets} diff --git a/man/sadcorrect.Rd b/man/sadcorrect.Rd new file mode 100644 index 0000000..d62048e --- /dev/null +++ b/man/sadcorrect.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sadcorrect.R +\name{sadcorrect} +\alias{sadcorrect} +\title{correct structure errors} +\usage{ +sadcorrect(x, xmin = 0.1, log = TRUE, rsm = 0, Nx = NULL, + Ny = NULL, J = NULL, boundaries = "pad", direction = TRUE) +} +\arguments{ +\item{x}{a list of equally sized matrices, the first element is assumed to be the observation} + +\item{xmin}{values smaller than \code{xmin} are set to zero} + +\item{log}{logical, do you want to log-transfrom the data? (recommended for precipitation)} + +\item{rsm}{number of pixels which are linearly smoothed at the edge} + +\item{Nx}{size to which the data is extended in x-direction, has to be a whole power of 2} + +\item{Ny}{size to which the data is extended in y-direction, has to be a whole power of 2} + +\item{J}{largest scale considered} + +\item{boundaries}{how to handle the boundary conditions, either "pad", "mirror" or "periodic"} + +\item{direction}{if \code{TRUE}, scale and direction are corrected, otherwise only scale} +} +\value{ +an object of class \code{sadforecast} +} +\description{ +use the inverse 'dtcwt' to correct errors in scale, anisotropy and direction +} +\details{ +The algorithm performs the following steps: +\enumerate{ + \item remove values below \code{xmin} + \item if \code{log=TRUE} log-transform all fields + \item set all fields to zero mean, unit variance + \item apply \code{dtcwt} to all fields + \item loop over forecasts and scales. If \code{direction=TRUE} loop over the six directions. Multiply forecast energy at each location by the ratio of total observed energy to total forecast energy at that scale (and possibly direction) + \item apply \code{idtcwt} to all forecasts + \item reset means and variance of the forecasts to their original values + \item if \code{log=TRUE} invert the log-transform + \item return the list of corrected fields +} +} +\examples{ +data(rrain) +ra <- as.sadforecast( list( rrain[2,1,,], rrain[3,1,,], rrain[3,2,,], rrain[3,3,,] ) ) +ra_c <- sadcorrect( ra, rsm=10 ) +plot(ra_c) +} diff --git a/man/sadforecast.Rd b/man/sadforecast.Rd new file mode 100644 index 0000000..5793f0c --- /dev/null +++ b/man/sadforecast.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lowlevel.R, R/plotfunctions.R +\name{sadforecast} +\alias{sadforecast} +\alias{as.sadforecast} +\alias{plot.sadforecast} +\title{class for a list of forecasts} +\usage{ +as.sadforecast(x) + +\method{plot}{sadforecast}(x, mfrow = NULL, col = NULL, ...) +} +\arguments{ +\item{x}{a list of 2 or more 2D matrices with equal sizes and no missing or inifinite values} + +\item{mfrow}{vector with the number of rows and columns you would like in the plot} + +\item{col}{color scale for the plot} + +\item{...}{further arguments passed to \code{image}} +} +\value{ +an object of class \code{sadforecast} +} +\description{ +check that a list of forecasts fulfills all requirements to be verified by our method +} +\details{ +\code{as.sadforecast} does nothing except check that everything is as it should be, add the attributes that can be changed by \code{prepare_sad} and provide a method for quick plots of the data. +} +\examples{ +data( rrain ) +ra <- list( rrain[1,1,,], rrain[4,5,,], rrain[2,7,,] ) +ra <- as.sadforecast(ra) +plot(ra) +} diff --git a/man/sadverif.Rd b/man/sadverif.Rd new file mode 100644 index 0000000..ad0eb03 --- /dev/null +++ b/man/sadverif.Rd @@ -0,0 +1,77 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sadverif.R +\name{sadverif} +\alias{sadverif} +\alias{plot.sadverif} +\alias{summary.sadverif} +\title{dual-tree verification} +\usage{ +sadverif(x, dec = TRUE, xmin = 0.1, log = TRUE, a = 1, nbr = 33, + rsm = 0, Nx = NULL, Ny = NULL, J = NULL, boundaries = "pad", + return_specs = FALSE) + +\method{plot}{sadverif}(x, ...) + +\method{summary}{sadverif}(object, ...) +} +\arguments{ +\item{x}{a list of equally sized matrices, the first element is assumed to be the observation} + +\item{dec}{logical, do you want to use the decimated transform} + +\item{xmin}{values smaller than \code{xmin} are set to zero} + +\item{log}{logical, do you want to log-transfrom the data? (recommended for precipitation)} + +\item{a}{relative weight of directional errors compared to scale errors in \code{semdd}} + +\item{nbr}{number of breaks for the scale histograms, has no effect if \code{dec=TRUE}} + +\item{rsm}{number of pixels which are linearly smoothed at the edge} + +\item{Nx}{size to which the data is extended in x-direction} + +\item{Ny}{size to which the data is extended in y-direction} + +\item{J}{largest scale considered} + +\item{boundaries}{how to handle the boundary conditions, either "pad", "mirror" or "periodic"} + +\item{return_specs}{if \code{TRUE}, the spatial mean spectra are returned as well} + +\item{...}{further arguments, currently ignored.} + +\item{object}{object of class sadverif} +} +\value{ +an object of class \code{sadverif}, containing the following elements +\describe{ + \item{settings}{ a dataframe containing the parameters that were originally passed to dtverif } + \item{centres}{ a matrix cotaining the anisotropy \code{rho}, angle \code{phi} and central scale \code{z} derived from the mean spectra. Rain area and sum are included as well.} + \item{detscores}{ a matrix containing the differences in centre components, the direction/anisotropy score \code{dxy}, the emd between direction-averaged spectra (\code{semd}) and the emd between the directional spectra (\code{semdd}). If \code{dec=FALSE}, the emd between the scale histograms, hemd, is included as well. } + \item{time}{ the time the calculation took in seconds } +} +if there is more than one forecast, the ensemble scores SpEn and (if available), hemd are computed as well, treating all forecasts as members of the ensemble to be verified. +} +\description{ +verify the scale, anisotropy and direction of a number of forecasts +} +\details{ +each element of x is transformed via \code{dtcwt} from the 'dualtrees' package. Scores and centres based on the mean spectra are calculated. If \code{dec=FALSE}, scale histograms and the corresponding score \code{hemd} are calcualted as well. +} +\examples{ +oldpar <- par(no.readonly=TRUE) +on.exit(par(oldpar)) +data(rrain) +ra <- as.sadforecast( list( rrain[1,1,,], rrain[1,2,,], rrain[2,1,,], rrain[3,1,,] ) ) +plot(ra) +verif <- sadverif( ra, log=FALSE, xmin=0 ) +summary(verif) +par( mfrow=c(2,2) ) +plot( verif ) +} +\references{ +Selesnick, I.W., R.G. Baraniuk, and N.C. Kingsbury (2005) +Buschow et al. (2019) +Buschow and Friederichs (2020) +} diff --git a/man/semd.Rd b/man/semd.Rd new file mode 100644 index 0000000..c817597 --- /dev/null +++ b/man/semd.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/scores.R +\name{semd} +\alias{semd} +\title{spectral emd} +\usage{ +semd(dt1, dt2, a = 1, dir = TRUE) +} +\arguments{ +\item{dt1, dt2}{forecast and observed spectrum} + +\item{a}{ratio between scale- and directional component} + +\item{dir}{whether or not to include direction information} +} +\value{ +a single value, the emd. If \code{dir=FALSE}, the value is signed, indicating the direction of the scale error. +} +\description{ +earth mover's distance between dual-tree wavelet spectra +}