version1.0

cran · Jun 26, 2007 · ba05459 · ba05459
commit ba05459
Show file tree

Hide file tree

Showing 5 changed files with 225 additions and 0 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,12 @@
+Package: kzs
+Type: Package
+Title: Kolmogorov-Zurbenko Spline
+Version: 1.0
+Date: 2007-06-26
+Author: Derek Cyr <dc896148@albany.edu> and Igor Zurbenko <igorg.zurbenko@gmail.com>.
+Maintainer: Derek Cyr <dc896148@albany.edu>
+Depends: R (>= 2.5.0), graphics, stats 
+Description: The Kolmogorov-Zurbenko Spline function utilizes the moving average to construct 
+	     a piece-wise estimator of the underlying signal of the given input data.
+License: GPL version 2 or newer
+Packaged: Wed Jun 27 15:28:28 2007; Owner
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,2 @@
+import(graphics, stats)
+export(kzs)
diff --git a/R/kzs.R b/R/kzs.R
@@ -0,0 +1,40 @@
+`kzs` <-
+function(x, delta, h, k=1, show.edges=FALSE)
+{
+if (h >= min(diff(sort(x[,1]))))
+stop("Invalid 'h': Value should be much less than the minimum difference of consecutive X values")  
+if (delta >= (max(x[,1])-min(x[,1])))
+stop("Invalid 'delta': Value should be much less than the difference of the max and min X values") 
+orig_data<-as.vector(x)
+xrange <- range(x)
+d <- delta/2
+v <- -delta/(2*h)
+for (i in 1:k) {
+data <- as.vector(x)                        
+maxx <- max(data[,1])                    
+minx <- min(data[,1])        
+yvals <- data[,2]                           
+ord <- sort(data[,1])                                                                                      
+Nk <- (maxx-minx)/h
+incrm <- seq(v,Nk-v,by=1)
+xk <- as.vector(ord[1]+(incrm*h))           
+zk <- numeric(length(xk))         
+for (j in 1:length(xk)) {
+w <- abs(ord-xk[j])              
+Ik <- yvals[w <= d]  
+zk[j] <- mean(Ik)   
+}
+Zk <- data.frame(zk)                      
+x <- data.frame(cbind(xk,Zk))              
+xf <- na.omit(x)                
+if (show.edges == FALSE){
+y2 <- x[-(1:(-v*k)),]
+y3 <- y2[-(nrow(y2):(nrow(y2)-(-v*k)+1)),]
+xj <- as.data.frame(y3)
+xf <- na.omit(xj)
+}
+plot(xf$zk~xf$xk,xlab=expression(paste(x[k])),ylab=expression(paste(Y(x[k]))),xlim=xrange,ylim=c(min(xf$zk),max(xf$zk)),type='l',pch=19,col='black')        
+}
+return(xf)
+}
+
diff --git a/man/kzs-package.Rd b/man/kzs-package.Rd
@@ -0,0 +1,37 @@
+\name{kzs-package}
+\alias{kzs-package}
+\docType{package}
+\title{ Kolmogorov-Zurbenko Spline }
+\description{
+A collection of functions utilizing splines to estimate the underlying 
+signal of the given input data.
+}
+\details{
+\tabular{ll}{
+Package: \tab kzs\cr
+Type: \tab Package\cr
+Version: \tab 1.0\cr
+Date: \tab 2007-06-26\cr
+License: \tab GPL Version 2 or later\cr
+}
+The relation between variables Y and X as a function [namely, Y(x)] of a current value of 
+X = x is often desired as a result of practical research. Usually we search for some simple 
+function Y(x) when given a data set of pairs (Xi, Yi). These pairs frequently resemble a 
+noisy plot, and thus Y(x) is desired to be a smooth outcome from the original data to capture 
+important patterns in the data, while leaving out the noise.\cr 
+The \code{KZS} function estimates a solution to this problem through use of splines, which is 
+a nonparametric estimator of a function.  Given a data set of pairs (Xi, Yi), splines estimate 
+the smooth values of Y from X's.
+}
+\author{
+Derek Cyr \email{dc896148@albany.edu} and Igor Zurbenko \email{igorg.zurbenko@gmail.com} 
+
+Maintainer: Derek Cyr <dc896148@albany.edu>
+}
+\references{
+"Spline Smoothing." \url{http://economics.about.com/od/economicsglossary/g/splines.htm}
+}
+\keyword{ package }
+\seealso{
+\code{\link[kzs:kzs]{kzs}}
+}
diff --git a/man/kzs.Rd b/man/kzs.Rd
@@ -0,0 +1,134 @@
+\name{kzs}
+\alias{kzs}
+\title{ Kolmogorov-Zurbenko Spline }
+\description{
+  The Kolmogorov-Zurbenko Spline function utilizes the moving average to construct
+  a piece-wise estimator of the underlying signal of the given input data.
+}
+\usage{
+kzs(x, delta, h, k = 1, show.edges = FALSE)
+}
+\arguments{
+  \item{x}{ 
+    a data frame of paired values X and Y. The data frame should consist of two columns
+    of data representing pairs (Xi, Yi), i = 1,..., \emph{n} and X, Y are real values; the first
+    column of data represents X values and the second column represents the corresponding
+    Y values.
+  }
+  \item{delta}{ 
+    the physical range of smoothing in terms of unit values of X.\cr
+	\emph{Restriction:} \eqn{\code{delta << Xn-X1}}  
+  }
+  \item{h}{ 
+    a scale reading of all outcomes of the algorithm. More specifically, \code{h} is the interval
+    width of a uniform scale covering the interval \eqn{\code{(Xn - delta/2, Xn + delta/2)}}.\cr
+	\emph{Restriction:} \eqn{\code{h < min(Xi+1 - Xi)}} and \eqn{\code{h > 0}}
+  }
+  \item{k}{ 
+    the number of iterations the function will execute; \code{k} may also be interpreted as
+    the order of smoothness (as a polynomial of degree \code{k-1}).  By default, \code{k} is set to perform
+    a single iteration.
+  }
+  \item{show.edges}{ 
+    a logical indicating whether or not to display the resulting data beyond the range of X
+    values of the user-supplied data. If \code{FALSE}, then the extended edges are suppressed. By
+    default, this parameter is set to \code{FALSE}.
+  }
+}
+\details{
+  The relation between variables Y and X as a function [namely, Y(x)] of a current value of 
+  X = x is often desired as a result of practical research. Usually we search for some simple 
+  function Y(x) when given a data set of pairs (Xi, Yi). These pairs frequently resemble a 
+  noisy plot, and thus Y(x) is desired to be a smooth outcome from the original data to capture 
+  important patterns in the data, while leaving out the noise. The \code{KZS} function estimates a 
+  solution to this problem through use of splines, which is a nonparametric estimator of a 
+  function.  Given a data set of pairs (Xi, Yi), splines estimate the smooth values of Y from 
+  X's. The \code{KZS} function Y(x) averages all values of Yi for all Xi within the range \code{delta} around 
+  each scale reading \code{hi} along the variable X. The \code{KZS} algorithm is designed to smooth all fast 
+  fluctuations in Y within the \code{delta}-range in X, while keeping ranges more then \code{delta} untouched. 
+  The separation of short scales less than \code{delta} and long scales more than \code{delta} is becoming more 
+  effective with higher \code{k}, while effective range of separation is becoming \eqn{\code{delta}*sqrt(\code{k})}. 
+}
+\value{
+  a two-column data frame containing:
+  \item{Xk }{X values resulting from execution of algorithm}
+  \item{Y(Xk) }{Y values resulting from execution of algorithm}
+}
+\references{ "Spline Smoothing." \url{http://economics.about.com/od/economicsglossary/g/splines.htm}}
+\author{ Derek Cyr \email{dc896148@albany.edu} and Igor Zurbenko \email{igorg.zurbenko@gmail.com} }
+\note{ 
+  The \code{KZS} function is designed for the general situation, including time series data. In many 
+  applications where variable X can be time, the \code{KZS} is resolving the problem of missing values in
+  time series or irregularly observed values in longitudinal data analysis.
+}
+\examples{
+  # This example was created with the intent to push the limits of KZS. The function has a wide peak and a 
+  # sharp peak; for a wide peak, you may permit stronger smoothing and for a sharp peak you may not (you would 
+  # be over-smoothing). The key here is to find satisfying values for the parameters.
+  
+  # EXAMPLE 1
+  
+  t <- seq(from=-round(400*pi),to=round(400*pi),by=.25) #Total time t
+  tp <- seq(from=0,to=round(400*pi),by=.25)		#Positive t (includes t=0)
+  tn <- seq(from=-round(400*pi),to=-.25,by=.25)         #Negative t
+  nobs <- 1:length(t)                                   #Number of total t values            
+  
+  # True signal
+  signalp <- 0.5*sin(sqrt((2*pi*abs(tp))/200))          #Positive side of signal
+  signaln <- 0.5*sin(-sqrt((2*pi*abs(tn))/200))         #Negative side of signal
+  signal <- append(signaln,signalp,after=length(tn))    #Appending into one signal
+  	
+  # Randomly generate noise from the standard normal distribution
+  et <- rnorm(length(t),mean=0,sd=1)
+  
+  # Add the noise to the true signal
+  yt <- et + signal
+  
+  # Data frame of (t,yt) 
+  pts <- data.frame(cbind(t,yt))
+  
+  # Plot of the true signal
+  plot(signal~t,xlab='t',ylab='Signal',main='True Signal',type="l")
+  
+  # Plot of yt (signal + noise)
+  plot(yt~t,ylab=expression(paste(Y[t])),main='Signal buried in noise',type="p")
+  
+  # Apply KZS function - 3 iterations
+  kzs(pts,delta=80,h=.2,k=3,show.edges=FALSE)
+  lines(signal~t,col="red")
+  title(main="KZS(delta=80, h=0.20, k=3, show.edges=false)")
+  legend("topright", c("True signal","KZS estimate"), cex=0.8, col=c("red","black"), lty=1:1, lwd=2, bty="n")
+  
+  # EXAMPLE 2 - Use same data with 5 iterations
+
+  kzs(pts,delta=80,h=.20,k=5,show.edges=FALSE)
+  lines(signal~t,col="red")
+  title(main="KZS(delta=80, h=0.20, k=5, show.edges=false)")
+  legend("topright", c("True signal","KZS estimate"), cex=0.8, col=c("red","black"), lty=1:1, lwd=2, bty="n")
+  
+  
+  # EXAMPLE 3 - Rerun KZS on the same function after removing 20\% of the data points. 
+  #             This provides an opportunity to create a random scale along the variable X.
+  
+  # Generate and remove a random 20\% of t 
+  t20 <- sample(nobs,size=length(nobs)/5)        #Random 20\% of (t,yt)
+  pts20 <- pts[-t20,]                            #Remove the 20\%
+  
+  # Plot of (t,yt) with 20\% removal
+  plot(pts20$yt~pts20$t,xlab='t',ylab=expression(paste(Y[t])),main='Signal buried in noise - 20\% removal',type="p")
+  
+  # Apply KZS function - 3 iterations
+  kzs(pts20,delta=80,h=.20,k=3,show.edges=FALSE)
+  lines(signal~t,col="red")
+  title(main="KZS(delta=80, h=0.20, k=3, show.edges=false) - 20\% removal")
+  legend("topright", c("True signal","KZS estimate"), cex=0.8, col=c("red","black"), lty=1:1, lwd=2, bty="n")
+   
+  # EXAMPLE 4 - Use same data with 5 iterations
+  kzs(pts20,delta=80,h=.20,k=5,show.edges=FALSE)
+  lines(signal~t,col="red")
+  title(main="KZS(delta=80, h=0.20, k=5, show.edges=false) - 20\% removal")
+  legend("topright", c("True signal","KZS estimate"), cex=0.8, col=c("red","black"), lty=1:1, lwd=2, bty="n")
+}
+\keyword{ smooth }
+\keyword{ ts }
+\keyword{ nonparametric }