Skip to content
Permalink
Browse files

first commit of this new package

  • Loading branch information...
Fernando C Barbi
Fernando C Barbi committed Apr 28, 2016
0 parents commit 4d787ff60a0a57e6d0c86fc513d6ec0d7e469f75
BIN +14 KB .DS_Store
Binary file not shown.
@@ -0,0 +1,7 @@
^.*\.git$
^.*\.Rproj$
^\.Rproj\.user$
^.*\.project
^TODO$
^\.travis\.yml$
^README.md$
@@ -0,0 +1,11 @@
.Rproj.user
.Rhistory
.RData
*.o
*.so
*.dll
*.Rproj
tsworkflow.Rproj
.project
inst/doc
dev
@@ -0,0 +1,12 @@
Package: tsworkflow
Title: Functions to streamline workflow in time series analysis.
Version: 0.0.0.9000
Authors@R: person("Fernando", "Barbi", email = "fcbarbi@gmail.com", role = c("aut", "cre"))
Author: Fernando Barbi [aut,cre]
Maintainer: Fernando Barbi <fcbarbi@gmail.com>
Description: Toos for handling ts (time series) and zoo objects to: clean outliers, generate dummies, complete series and generate Eviews-formatted dates.
URL: https://github.com/fcbarbi/tsworkflow
Depends: R (>= 3.2.4)
License: GPL
LazyData: true
RoxygenNote: 5.0.1
@@ -0,0 +1,10 @@
# Generated by roxygen2: do not edit by hand

export(dlog)
export(genEVtime)
export(removeOutliers)
export(toIndex)
export(tsComplete)
export(tsDummy)
export(tsMapOutliers)
importFrom(stats,quantile)
BIN +6 KB R/.DS_Store
Binary file not shown.
@@ -0,0 +1,45 @@

#' @title dlog
#' @name dlog
#'
#' @description Generates series of the percentage variation of the series data passed as parameter.
#' @details It is implicit that data is index by time (time series). If observations are negative or zero, the \code{convert=TRUE} option allows the automatic transformation to an index before passing the \code{log()}.
#'
#' @param x Time series or vector to be converted to a \code{ts} object.
#' @param convert (Optional) Controls the automatia conversion to index before passing \code{log()}. Defaults to \code{TRUE}.
#' @return Time series is the percentage variation with the first position as \code{0}.
#'
#' @export dlog
#'
#' @seealso \code{\link{toIndex}}
#'
#' @examples
#' x <- ts( rnorm(10,mean=100) )
#' dlx <- dlog(x)
#'
#' x1 <- ts( seq(-.3,.6,.1) )
#' dlog( x1, convert=FALSE ) # causes NA when log(x) for x<=0

# log of first differences
dlog <- function( x, convert=TRUE ) {
if (!is.ts(x) & !zoo::is.zoo(x)) stop('x must be a ts (time series) or zoo object.')
dlog.ts(x, convert)
}

# Internal use only, do NOT export (no checks done here)
dlog.ts <- function( x, convert=TRUE ) {
if (any(x<0) & convert) x <- toIndex(x)
c(0,diff(log(x)))
}

# TODO
# dlog.data.frame <- function( x, convert=TRUE ) {}

# percentage variation, just to check
# dpx <- tsworkflow:::deltaPX(x)
# if (any(abs(dlx-dpx)>1e-3)) warning('significant differences') else warning('no differences')
deltaPX <- function(x) {
dx <- rep(0,length(x))
for (i in 2:length(x)) dx[i] <- (x[i]-x[i-1])/x[i-1]
dx
}
@@ -0,0 +1,68 @@

#' @title genEVtime
#' @name genEVtime
#'
#' @description Function to generate an Eviews-formatted vector of strings with time references.
#' @details Eviews formats dates of regularly spaced time series as YYYYPSS where YYYY is the year, P is the period (Month,Quarter) and SS is the subperiod (eg.1 to 12 for months)
#'
#' @param start String with the starting date formatted as \code{"2000M12"} or \code{"2002Q4"}.
#' @param end String with the ending date, uses the same format as \code{start}. Optional if \code{qobs} specified. Defaults to NULL.
#' @param qobs Number of observations. Optional if \code{end} specified. Defaults to NULL.
#'
#' @export
#'
#' @examples
#' genEVtime("1973Y1","1975Y1") # yeap, must explicit "Y1"
#' genEVtime("1947q3","1948Q02") # note the 0 before 2 is optional
#' genEVtime("1947M1",qobs=6) #
#'
genEVtime <- function( start, end=NULL, qobs=NULL )
{
start_y <- substr(start,1,4) #year
start_p <- toupper(substr(start,5,5)) #period
# if (start_p=="") start_p <- "Y" # to support year with Y and subperiod
start_s <- substr(start,6,8) #subperiod
# if (start_s=="") start_s <- "0"

starts <- as.integer(start_s)
starty <- as.integer(start_y)

freq <- 0
freq <- switch(start_p,"Y"=1,"S"=2,"Q"=4,"M"=12)
if (freq==0) stop('frequencies supported are Y,S,Q,M ')

if (starts>freq) stop('start subperiod not compatible with frequency')
if (is.null(qobs) & is.null(end)) stop('either end or number obs must be supplied')

endy <- 0
if (!is.null(end)) {
end_y <- substr(end,1,4)
end_s <- substr(end,6,8)
endy <- as.integer(end_y)
ends <- as.integer(end_s)
if(endy<starty) stop('end year is before than start year')
if(ends>freq) stop('end subperiod not compatible with frequency')
}

time_out <- NULL
sub <- starts # subperiod
year <- starty
cobs <- 1 # counter
cond <- FALSE
if (!is.null(qobs))
cond <- expression(cobs<=qobs)
if (!is.null(end))
cond <- expression((year<endy)||(year==endy && sub<=ends))

while (eval(cond)) {
if (freq==1)
time_out <- c(time_out,paste0(year)) # year has no subperiod
else
time_out <- c(time_out,paste0(year,start_p,sub))
sub <- sub+1
cobs <- cobs+1
if (sub>freq){ sub<-1; year<-year+1 }
}
time_out
}

@@ -0,0 +1,33 @@

#'
#' @title removeOutliers
#' @name removeOutliers
#'
#' @description Function to remove "outlier" observations as defined by quantile range of acceptable values.
#' @details Source: \url{http://stackoverflow.com/questions/4787332/how-to-remove-outliers-from-a-dataset}
#'
#' @param x Series to be filtered
#' @param ok_range Quantile range of accepted values
#' @param na.rm Control NA removal with TRUE (to remove NAs) or FALSE (to keep them).
#'
#' @export
#' @importFrom stats quantile
#'
#' @examples
#'
#' set.seed(123)
#' x <- rnorm(100)
#' x <- c(-10, x, 10)
#' y <- removeOutliers(x)
# par(mfrow = c(1, 2))
# boxplot(x)
# boxplot(y)
#'
removeOutliers <- function( x, range=c(.01,.99), na.rm = TRUE, ... ) {
if (missing(x)) stop("x must be supplied")
qnt <- quantile(x, probs=range, na.rm = na.rm, ...)
y <- x
y[x < qnt[1]] <- NA
y[x > qnt[2]] <- NA
y
}
@@ -0,0 +1,35 @@
#'
#' @title toIndex
#' @name toIndex
#'
#' @description Convert series to index based 100 corresponding to the reference observation given in optional parameter \code{base}.
#' @details
#'
#' @param x Time series to be converted to index.
#' @param base (optional) is the index of the observaton to be used as reference.
#'
#' @export toIndex
#'
#' @seealso \code{\link{dlog}}
#' @examples
#' Inflation <- ts( rnorm(13,mean=.2,sd=.1), start=c(2007,12), freq=12 )
#' Cpi <- toIndex(Inflation)
#'
# TODO: allow to pass a date as argument
# Cpi <- toIndex(Inflation,base= ts.date( Inflation, c(2008,1) ) )
#
# ts.date <- function( ts, date ) {
# step <- (tsp(ts)[2]-tsp(ts)[1])/tsp(ts)[3]
# (date-tsp(ts)[1])/step # BUG: converter date
# }

toIndex <- function(x,base=1) {
if (!is.ts(x)) stop("toIndex must be called with a ts object.")
rangex <- max(x)-min(x)
x <- x + rangex
if (is.null(base))
basex <- min(x)
else
basex <- x[base]
100*(1+(x-basex)/basex)
}
@@ -0,0 +1,69 @@

#' @title tsComplete
#' @name tsComplete
#'
#' @description Function to complete a time-indexed series (either \code{ts} or \code{zoo}) by combining it with another series to avoid NAs. The function has a third argument "fill" to chose whether to leave NA, interpolate or repeate the last value when both vectors are missing.
#' @details Supports both \code{ts} and \code{zoo}. Converts to \code{zoo} if one of the parameters is \code{zoo}.

#' @param x main data series (either \code{ts} or \code{zoo}) to be completed for missing observations.
#' @param y secondary series (either \code{ts} or \code{zoo}) to use only when missing in the main series.
#' @param fill method to treat NA's after combining \code{x} and \code{y}, can be "na","fill","interpolate" or "repeat"
#'
#' @return Returns either a \code{ts} (default) or \code{zoo} object.
#'
#' @export
#'
#' @examples
#' a <- c(NA, 1, 2,NA,NA, 5,NA)
#' b <- c( NA,12,NA,13,NA,15,16)
#' a2 <- ts(a,start=c(2000,1),freq=4)
#' b2 <- ts(b,start=c(2000,2),freq=4)
#' b3 <- zoo::zoo(b2)
#' aa <- tsComplete(a2)
#' ab1 <- tsComplete(a2,b2)
#' ab2 <- tsComplete(a2,b2,fill="na")
#' ab3 <- tsComplete(a2,b3)
#' ab4 <- tsComplete(a2,b3,fill="fill")

tsComplete <- function( x,y,fill="fill" )
{
if (missing(x)) stop("x must be supplied")
if (!is.ts(x) & !zoo::is.zoo(x)) stop("x must be a 'ts' or 'zoo' object.")
if (!missing(y)) if (!is.ts(y) & !zoo::is.zoo(y)) stop("y is optional but if supplied it must be a 'ts' or 'zoo' object.")
if (!missing(y)) if (is.ts(x) & is.ts(y)) if (tsp(x)[3]!=tsp(y)[3]) stop("Cannot combine TS with different frequencies.")
# TODO: check freq match for zoo objects

# if x not zoo but y is zoo then transform x into zoo
if (!zoo::is.zoo(x) & !missing(y))
if (zoo::is.zoo(y)) x <- zoo::zooreg(x, start=tsp(x)[1], freq=tsp(x)[3] )

# synchronize series
sx <- sync <- x
lCombined <- FALSE
if (is.ts(x) & !missing(y)) { sync <- ts.union(x,y); lCombined <- TRUE }
if (zoo::is.zoo(x) & !missing(y)) { sync <- zoo::merge.zoo(x,y); lCombined <- TRUE }
#print(sync) # debug
if (lCombined) {
sx <- sync[,1]
if (!missing(y)) sy <- sync[,2]
}
# combine only if sx is NA and sy not NA
temp <- sx
if (!missing(y)) {
mask <- which(is.na(sx))
temp[mask] <- sy[mask]
}

# complete the remaining NA according to a user defined rule
fill.options <- c("na","fill","interpolate","repeat")
fill <- match.arg(fill,fill.options)
choice <- which(fill==fill.options)
# we rely on the fact that a ts object can implicitly be transformed into a zoo and back to ts
res <- switch( choice, temp, zoo::na.fill(temp, "extend"),
zoo::na.approx(temp), zoo::na.locf(temp) )
# check that (ts,ts) -> ts, (zoo,ts) or (ts,zoo) -> zoo and (zoo,zoo) -> zoo
res
}



@@ -0,0 +1,59 @@

#'
#' @title tsDummy
#' @name tsDummy
#'
#' @description Generates a \code{ts} object filled with 0's, with 1's in a period or specific dates.
#' @details
#'
#' @param start Same as the \code{start} date in a \code{ts} object.
#' @param end Same as the \code{end} date in a \code{ts} object.
#' @param freq Same as the \code{frequency} date in a \code{ts} object.
#' @param period (optional) List with two elements: the starting and ending dates for the 1's formated as for ex. \code{c(2008,12)}.
#' @param dates (optional) List of discontinuous dates formated as for ex. \code{c(2008,12)}.
#' @export
#' @examples
#' lehman <- tsDummy( start=c(2000,1), end=c(2015,12), freq=12, period=list( c(2008,9), c(2008,12) ) )
#' oilshocks <- tsDummy( start=c(1970,1), end=c(2015,12), freq=12,
#' dates=list( c(1973,10), c(1979,12), c(1990,10), c(2008,6) ) )
#'
tsDummy <- function( start, end, freq=12 , period=NULL, dates=NULL ) {
if (missing(start)) stop("supply a start date formated as c(2001,12).")
if (missing(end)) stop("supply an end date formated as c(2001,12).")
d <- ts( 0, start=start, end=end, freq=freq )
if (!missing(period))
window( d, start=period[[1]], end=period[[2]] ) <- 1
else
if (!is.null(dates))
for (i in seq(1,length(dates))) window( d, start=dates[[i]], end=dates[[i]] ) <- 1
else
warning("No period or points specified for the dummy, choose a period of specific dates formated as c(2001,12).")
d
}

# dummy.ts <- function( ts , period=NULL, points=NULL ) {
# dts <- ts.dummy( start=tsp[1],end=tsp[2],freq=tsp[3],period=period,points=points )
# dts
# }
#
# gdp <- ts( 0.1*seq(1:(16*4))+rnorm(16*4,sd=.1),start=c(2000,1), end=c(2015,4), freq=4)
# gdp <- window( gdp,start=c(2008,10),end=c(2009,3), freq=4 ) -0.5
# plot(gdp)
#
# dcrisis2 <- dummy.ts( gdp, period=list(c(2008,3),c(2009,2)) )

# eof














@@ -0,0 +1,30 @@

#'
#' @title tsMapOutliers
#' @name tsMapOutliers
#'
#' @description Creates a \code{ts} object with 1 in outlier positions and 0 elsewhere.
#'
#' @param x A \code{ts} object with data that may not be stationary, either I(2), I(1) or I(0).
#' @param range Quantile range of acceptable values, defaults to \code{range=c(0.01,0.99)}.
#' @return ots a dummy \code{ts} object with 1 when the outlier is detected and 0 everywhere else.
#'
#' @details uses \code{tsDummy()} in identified outliers.
#' @export
#' @seealso \code{\link{removeOutliers}}
#' @examples
#' x <- ts( c(8,rnorm(10),-15), start=c(2000,1),freq=12 )
#' xo1 <- tsMapOutliers( x )
#' xo2 <- tsMapOutliers( x, c(.05,.95) )

tsMapOutliers <- function( x,range=NULL ){
res <- ifelse( is.na(tsworkflow::removeOutliers(x,range)), 1, 0 )
res
}

#install.packages("outliers")
# tsoutliers::outlier



# ---------------------------------------------------------------
@@ -0,0 +1,8 @@
#'
#' Set of functions to work with time series \code{ts} and \code{zoo} objects.
#'
#' Package \code{tsworkflow} speeds up tasks related to time series cleaning and classification I(?).
#'
"_PACKAGE"
#> [1] "_PACKAGE"

0 comments on commit 4d787ff

Please sign in to comment.
You can’t perform that action at this time.