version 1.2.0

cran · Nov 2, 2007 · 9c2ade2 · 9c2ade2
1 parent 05f3168
commit 9c2ade2
Show file tree

Hide file tree

Showing 9 changed files with 434 additions and 36 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,12 +1,12 @@
 Package: kzs
 Type: Package
-Title: Kolmogorov-Zurbenko Spline
-Version: 1.1.0
-Date: 2007-07-01
-Author: Derek Cyr <dc896148@albany.edu> and Igor Zurbenko <igorg.zurbenko@gmail.com>.
-Maintainer: Derek Cyr <dc896148@albany.edu>
-Depends: R (>= 2.5.0), graphics, stats 
-Description: A collection of functions utilizng splines to smooth a noisy data set in order 
-	     to estimate its underlying signal.
+Title: Kolmogorov-Zurbenko Spline Smoothing and Applications
+Version: 1.2.0
+Date: 2007-11-02
+Author: Derek Cyr <cyr.derek@gmail.com> and Igor Zurbenko <igorg.zurbenko@gmail.com>.
+Maintainer: Derek Cyr <cyr.derek@gmail.com>
+Depends: R (>= 2.6.0), graphics, lattice, stats 
+Description: A collection of functions utilizng splines to construct a smooth estimate 
+             of a signal buried in noise.
 License: GPL version 2 or newer
-Packaged: Sun Jul  1 15:30:14 2007; Owner
+Packaged: Fri Nov  2 23:36:30 2007; Owner
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,2 +1,2 @@
-import(graphics, stats)
-export(kzs)
+import(graphics, lattice, stats)
+export(argkzs, kzs, argskzs, skzs)
diff --git a/R/argkzs.R b/R/argkzs.R
@@ -0,0 +1,11 @@
+argkzs <- function(data, x) {
+	delta <- max(data[,x]) - min(data[,x])
+	sx <- sort(data[,x])
+	dx <- diff(sx)
+	minx <- min(dx[dx > 0])
+	arg1 <- sprintf("delta must be a real number much less than %s", delta)
+	arg2 <- sprintf("h must be a positive real number less than %s", minx)
+	lst <- list(delta = arg1, h = arg2)
+	return(lst)
+}
+
diff --git a/R/argskzs.R b/R/argskzs.R
@@ -0,0 +1,17 @@
+argskzs <- function(data, x1, x2) {
+	delta1 <- max(data[,x1]) - min(data[,x1])
+	delta2 <- max(data[,x2]) - min(data[,x2])
+	sx1 <- sort(data[,x1])
+	sx2 <- sort(data[,x2])
+	dx1 <- diff(sx1)
+	dx2 <- diff(sx2)
+	minx1 <- min(dx1[dx1 > 0])
+	minx2 <- min(dx2[dx2 > 0])
+	arg11 <- sprintf("delta1 must be a real number much less than %s", delta1)
+	arg12 <- sprintf("delta2 must be a real number much less than %s", delta2)	
+	arg21 <- sprintf("h1 must be a positive real number less than %s", minx1)
+	arg22 <- sprintf("h2 must be a positive real number less than %s", minx2)
+	lst <- list(delta1 = arg11, delta2 = arg12, h1 = arg21, h2 = arg22)
+	return(lst)
+}
+
diff --git a/R/skzs.R b/R/skzs.R
@@ -0,0 +1,56 @@
+skzs <- function(data, y, x1, x2, delta1, delta2, h1, h2, k=1, show.edges=FALSE, plot=TRUE)
+{	
+	s1 <- diff(sort(data[,x1]))
+	s2 <- diff(sort(data[,x2]))
+	if (h1 >= min(s1[s1 > 0]))
+		stop("Invalid 'h1': Value should be much less than the minimum difference of consecutive x1 values")  
+	if (h2 >= min(s2[s2 > 0]))
+		stop("Invalid 'h2': Value should be much less than the minimum difference of consecutive x2 values")  
+	if (delta1 >= (max(data[,x1]) - min(data[,x1])))
+		stop("Invalid 'delta1': Value should be much less than the difference of the max and min x1 values") 
+	if (delta2 >= (max(data[,x2]) - min(data[,x2])))
+		stop("Invalid 'delta2': Value should be much less than the difference of the max and min x2 values") 
+	origx1 <- data[,x1]
+	origx2 <- data[,x2]
+	origy <- data[,y]
+	x1range <- range(data[,x1])
+	x2range <- range(data[,x2])
+	d1 <- delta1/2
+	d2 <- delta2/2
+	for (i in 1:k) {
+		data <- as.vector(data)                         
+		maxx1 <- max(data[,x1])	                    
+		minx1 <- min(data[,x1])			        
+		maxx2 <- max(data[,x2])
+		minx2 <- min(data[,x2])
+		yvals <- data[,y]
+		xk1 <- seq(minx1 - d1, maxx1 + d1, h1)
+		xk2 <- seq(minx2 - d2, maxx2 + d2, h2)
+		xk <- expand.grid(xk1 = xk1, xk2 = xk2)
+		zk <- array(NA, dim = c(nrow(xk),1))
+		for (j in 1:nrow(xk)) {
+			w1 <- abs(data[,x1] - xk$xk1[j])
+			w1[w1 > d1] <- NA
+			w2 <- abs(data[,x2] - xk$xk2[j])
+			w2[w2 > d2] <- NA
+			Ik <- which(!(is.na(w1) | is.na(w2)))
+			YIk <- yvals[Ik]
+			zk[j] <- mean(YIk)      
+		}
+		xk$zk <- zk
+		data <- na.omit(xk)
+		x1 <- 1
+		x2 <- 2
+		y <- 3
+	}           
+		if (show.edges == FALSE){
+			x1d <- data[data[,1] >= min(x1range) & data[,1] <= max(x1range), ]   
+			x2d <- x1d[(x1d[,2] >= min(x2range)) & (x1d[,2] <= max(x2range)), ]   
+			data <- na.omit(x2d)
+		}
+		if (plot == TRUE){
+			plot(wireframe(zk ~ xk1 * xk2, data,drape = TRUE, colorkey = TRUE, scales = list(arrows = FALSE)))    
+		}		
+	return(data)
+}
+
diff --git a/man/argkzs.Rd b/man/argkzs.Rd
@@ -0,0 +1,70 @@
+\name{argkzs}
+\alias{argkzs}
+\title{ Argument Limits for KZS }
+\description{
+  This function calculates the value for which the arguments \code{delta} and \code{h} 
+  in the KZS function are bounded above or below by.
+}
+\usage{
+argkzs(data, x)
+}
+\arguments{
+  \item{data}{ 
+    a data frame of paired values X and Y representing pairs (Xi, Yi ), i = 1,... ,n and 
+    X, Y are real values.  This should be the data frame that is to be used with KZS.
+  }
+  \item{x}{ 
+    an integer specifying the position of the column in the data frame containing the one 
+    dimensional input variable, X, coordinates.
+  }
+}
+\details{
+  In the KZS function, the argument \code{delta} is the physical range of smoothing in terms of 
+  unit values of X; the argument \code{h} is a scale reading of all outcomes of the algorithm.  
+  More specifically, \code{h} is the interval width of a uniform scale overlaying the X axis.    
+  The purpose of this function is to give an upper and/or lower bound on the values of \code{delta} 
+  and \code{h} so that users may select appropriate values that satisfy all restrictions.  This 
+  function eliminates any guess-work involved in choosing a satisfying value for \code{delta} and 
+  \code{h} and should be used prior to KZS in order to save time and increase efficiency of use. 
+}
+\value{
+  a list containing two elements:
+  \item{delta }{the bounding value for the argument \code{delta}}
+  \item{h }{the bounding value for the argument \code{h}}
+}
+\author{ Derek Cyr \email{cyr.derek@gmail.com} and Igor Zurbenko \email{igorg.zurbenko@gmail.com} }
+\seealso{ \code{\link{kzs}} }
+\examples{
+  #This example uses the same data from the KZS example
+
+  # Define the time sequence
+  t <- seq(from = -round(400*pi), to = round(400*pi), by = .25) 
+
+  # Positive t (includes time = 0)
+  tp <- seq(from = 0, to = round(400*pi), by = .25)	
+
+  # Negative t	      
+  tn <- seq(from = -round(400*pi), to = -.25, by = .25)                                                       
+
+  # Positive side of signal
+  signalp <- 0.5*sin(sqrt((2*pi*abs(tp))/200))
+
+  # Negative side of signal             
+  signaln <- 0.5*sin(-sqrt((2*pi*abs(tn))/200))
+
+  # Appending into one signal           
+  signal <- append(signaln, signalp, after = length(tn))    
+
+  # Randomly generate noise from the standard normal distribution
+  et <- rnorm(length(t), mean = 0, sd = 1)
+
+  # Add the noise to the signal
+  yt <- et + signal
+
+  # Data frame of (t,yt) 
+  pts <- data.frame(cbind(t,yt))
+
+  argkzs(pts, 1)
+}
+\keyword{ smooth }
+\keyword{ nonparametric }
diff --git a/man/argskzs.Rd b/man/argskzs.Rd
@@ -0,0 +1,63 @@
+\name{argskzs}
+\alias{argskzs}
+\title{ Argument Limits for SKZS }
+\description{
+  This function calculates the values for which the arguments \code{delta1}, 
+  \code{delta2} and \code{h1}, \code{h2} in SKZS are bounded above or below by.
+}
+\usage{
+argskzs(data, x1, x2)
+}
+\arguments{
+  \item{data}{ 
+    a data frame to be used with SKZS.  Only the columns corresponding the input variables 
+    X = (\code{x1}, \code{x2}) are needed; the column corresponding to the response variable is 
+    optional, but plays no part in the use of this function.
+  }
+  \item{x1}{ 
+    an integer specifying the position of the column in the data frame containing \code{x1} values.
+  }
+  \item{x2}{ 
+    an integer specifying the position of the column in the data frame containing \code{x2} values.
+  }
+}
+\details{
+  In the SKZS function (similarly to the \code{\link{kzs}} function), the arguments \code{delta1} and 
+  \code{delta2} are the physical ranges of smoothing in terms of the unit values of the input variables 
+  \code{x1} and \code{x2}; the arguments \code{h1} and \code{h2} are scale readings of all outcomes of 
+  the algorithm; more specifically, \code{h1} and \code{h2} are values denoting the interval widths of 
+  two uniform scales overlapping the \code{x1} and \code{x2} axes.  The restrictions on the arguments 
+  are the same as for the one dimensional input variable in KZS, only here, the restrictions are extended 
+  to the two-dimensional input variables \code{x1} and \code{x2}.  The purpose of this function is to give 
+  an upper bound on the values \code{delta1, delta2} and \code{h1, h2} so that users may select appropriate 
+  values that satisfy all restrictions.  This function eliminates any guess-work involved in choosing a 
+  satisfying value for the arguments and should be used prior to using SKZS in order to save time and 
+  increase efficiency of use. 
+}
+\value{
+  a list containing the following:
+  \item{delta1 }{the bounding value for the argument \code{delta1}}
+  \item{delta2 }{the bounding value for the argument \code{delta2}}
+  \item{h1 }{the bounding value for the argument \code{h1}}
+  \item{h2 }{the bounding value for the argument \code{h2}}
+}
+\author{ Derek Cyr \email{cyr.derek@gmail.com} and Igor Zurbenko \email{igorg.zurbenko@gmail.com} }
+\seealso{ \code{\link{skzs}} }
+\examples{
+### Recall the SKZS example of the Sinc function
+
+# Setup the data
+u <- seq(-3*pi, 3*pi, 3*pi/100)
+v <- u
+x1 <- sample(u, size = 4000, replace = TRUE)
+x2 <- sample(v, size = 4000, replace = TRUE)
+d <- data.frame(cbind(x1,x2))
+df <- unique(d)
+df$z <- sin(sqrt(df$x1^2 + df$x2^2)) / sqrt(df$x1^2 + df$x2^2)
+df$z[is.na(df$z)] <- 1
+
+# Return the bounding values for each argument
+argskzs(df, 1, 2)
+}
+\keyword{ smooth }
+\keyword{ nonparametric }
diff --git a/man/kzs.Rd b/man/kzs.Rd
@@ -2,64 +2,65 @@
 \alias{kzs}
 \title{ Kolmogorov-Zurbenko Spline }
 \description{
-  The Kolmogorov-Zurbenko Spline function utilizes the moving average to construct
-  a piece-wise estimator of the underlying signal of the given input data.
+  The KZS utilizes splines to construct a smooth estimate of a signal buried in noise.
 }
 \usage{
 kzs(x, delta, h, k = 1, show.edges = FALSE)
 }
 \arguments{
   \item{x}{ 
-    a data frame of paired values X and Y. The data frame should consist of two columns
-    of data representing pairs (Xi, Yi), i = 1,..., \emph{n} and X, Y are real values; the first
-    column of data represents X values and the second column represents the corresponding
-    Y values.
+    a data frame of paired values X and Y. The data frame needs two columns of data 
+    representing pairs (Xi, Yi), i = 1,..., \emph{n} and X, Y are real values; the first
+    column of data represents the one dimensional input variable, X, and the second 
+    column, Y, denotes the variable to be used as the response.
   }
   \item{delta}{ 
-    the physical range of smoothing in terms of unit values of X.\cr
-	\emph{Restriction:} \eqn{\code{delta << Xn-X1}}  
+    the physical range of smoothing in terms of unit values of \code{X}.\cr
+	\emph{Restriction:} \eqn{\code{delta << (max(X) - min(X))}}  
   }
   \item{h}{ 
     a scale reading of all outcomes of the algorithm. More specifically, \code{h} is the interval
     width of a uniform scale covering the interval \eqn{\code{(Xn - delta/2, Xn + delta/2)}}.\cr
-	\emph{Restriction:} \eqn{\code{h < min(Xi+1 - Xi)}} and \eqn{\code{h > 0}}
+	\emph{Restriction:} \eqn{\code{h < min{X(i+1) - X(i)}}} and \eqn{\code{h > 0}}
   }
   \item{k}{ 
     the number of iterations the function will execute; \code{k} may also be interpreted as
     the order of smoothness (as a polynomial of degree \code{k-1}).  By default, \code{k} is set to perform
     a single iteration.
   }
   \item{show.edges}{ 
-    a logical indicating whether or not to display the resulting data beyond the range of X
-    values of the user-supplied data. If \code{FALSE}, then the extended edges are suppressed. By
-    default, this parameter is set to \code{FALSE}.
+    a logical indicating whether or not to display the resulting data beyond the range of \code{x}
+    values of the user-supplied data. If \code{false}, then the extended edges are suppressed. By
+    default, this parameter is set to \code{false}.
   }
 }
 \details{
   The relation between variables Y and X as a function [namely, Y(x)] of a current value of 
   X = x is often desired as a result of practical research. Usually we search for some simple 
   function Y(x) when given a data set of pairs (Xi, Yi). These pairs frequently resemble a 
   noisy plot, and thus Y(x) is desired to be a smooth outcome from the original data to capture 
-  important patterns in the data, while leaving out the noise. The \code{KZS} function estimates a 
-  solution to this problem through use of splines, which is a nonparametric estimator of a 
-  function.  Given a data set of pairs (Xi, Yi), splines estimate the smooth values of Y from 
-  X's. The \code{KZS} function Y(x) averages all values of Yi for all Xi within the range \code{delta} around 
-  each scale reading \code{hi} along the variable X. The \code{KZS} algorithm is designed to smooth all fast 
-  fluctuations in Y within the \code{delta}-range in X, while keeping ranges more then \code{delta} untouched. 
-  The separation of short scales less than \code{delta} and long scales more than \code{delta} is becoming more 
-  effective with higher \code{k}, while effective range of separation is becoming \eqn{\code{delta}*sqrt(\code{k})}. 
+  important patterns in the data, while leaving out the noise. The KZS estimates a 
+  solution to this problem through use of splines, a particular nonparametric estimator of a function.  
+  Given a data set of pairs (Xi, Yi), splines estimate the smooth values of Y from X's. The KZS 
+  averages all values of Y for all X within the range \code{delta} around each scale reading 
+  \code{hi} along the variable X. The KZS algorithm is designed to smooth all fast fluctuations 
+  in Y within the \code{delta}-range in X, while keeping ranges more then \code{delta} untouched. The 
+  separation of short scales less than \code{delta} and long scales more than \code{delta} is becoming 
+  more effective with higher \code{k}, while effective range of separation is becoming \eqn{\code{delta}*sqrt(\code{k})}. 
 }
 \value{
   a two-column data frame containing:
   \item{Xk }{X values resulting from execution of algorithm}
-  \item{Y(Xk) }{Y values resulting from execution of algorithm}
+  \item{Y(Xk) }{Response values resulting from execution of algorithm}
 }
 \references{ "Spline Smoothing." \url{http://economics.about.com/od/economicsglossary/g/splines.htm}}
-\author{ Derek Cyr \email{dc896148@albany.edu} and Igor Zurbenko \email{igorg.zurbenko@gmail.com} }
+\author{ Derek Cyr \email{cyr.derek@gmail.com} and Igor Zurbenko \email{igorg.zurbenko@gmail.com} }
+\seealso{ \code{\link{argkzs}} }
 \note{ 
-  The \code{KZS} function is designed for the general situation, including time series data. In many 
-  applications where variable X can be time, the \code{KZS} is resolving the problem of missing values in
-  time series or irregularly observed values in longitudinal data analysis.
+  KZS is designed for the general situation, including time series data. In many 
+  applications where the variable X can be time, KZS can resolve the problem of missing values in
+  time series or irregularly observed values in longitudinal data analysis.  KZS may take time to 
+  completely run depending on the size of the data set used and the number of iterations specified.
 }
 \examples{
   # This example was created with the intent to push the limits of KZS. The