Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
157 changed files
with
6,494 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
.Rproj.user | ||
.Rhistory | ||
.RData |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
data(iris) | ||
ffiris <- as.ffdf(iris) | ||
|
||
result <- ffdfddply(x=ffiris, | ||
split=x$Species, | ||
FUN=function(x){ | ||
lowestbypetalwidth <- x[order(x$Petal.Width, decreasing=TRUE), ] | ||
lowestbypetalwidth <- lowestbypetalwidth[!duplicated(lowestbypetalwidth[, c("Species","Petal.Width")]), ] | ||
lowestbypetalwidth$group <- factor(x= "lowest", levels = c("lowest","highest")) | ||
highestbypetalwidth <- x[order(x$Petal.Width, decreasing=FALSE), ] | ||
highestbypetalwidth <- highestbypetalwidth[!duplicated(highestbypetalwidth[, c("Species","Petal.Width")]), ] | ||
highestbypetalwidth$group <- factor(x= "highest", levels = c("lowest","highest")) | ||
rbind(lowestbypetalwidth, highestbypetalwidth) | ||
}, | ||
BATCHBYTES = 5000, | ||
trace=TRUE) | ||
class(result) | ||
dim(result) | ||
dim(iris) | ||
result[1:10,] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
Package: ffbase | ||
Maintainer: Edwin de Jonge <edwindjonge@gmail.com> | ||
License: GPL-3 | ||
Title: Basic statistical functions for package ff | ||
Type: Package | ||
LazyLoad: yes | ||
Author: Edwin de Jonge, Jan Wijffels | ||
Description: Implementation for min,max, mean, tabulate, | ||
table, with, within. for package ff | ||
Version: 0.4 | ||
URL: http://code.google.com/p/fffunctions/ | ||
Date: 2011-11-1 | ||
Depends: ff (>= 2.2-0) | ||
Suggests: testthat, LaF | ||
Collate: 'auxilary.R' 'chunkify.R' 'compact.R' 'cut_ff.R' 'diff_ff.R' | ||
'droplevels.R' 'ffappend.R' 'ffdfsave.R' 'mean.R' 'pkg.R' | ||
'risplit.R' 'subset.R' 'Summary_ff.R' 'table_ff.R' | ||
'tabulate_ff.R' 'transform.R' 'unique.R' 'with.R' 'ffwhich.R' | ||
'ffplyr.R' 'zzz.R' | ||
Packaged: 2012-04-18 21:31:40 UTC; jan |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
export(all.ff) | ||
export(chunkify) | ||
export(ffappend) | ||
export(ffdfddply) | ||
export(ffdfsave) | ||
export(ffwhich) | ||
export(max.ff) | ||
export(min.ff) | ||
export(range.ff) | ||
export(subset.ff) | ||
export(subset.ffdf) | ||
export(table.ff) | ||
export(tabulate.ff) | ||
export(transform.ffdf) | ||
S3method(any,ff) | ||
S3method(c,ff) | ||
S3method(compact,ff) | ||
S3method(cut,ff) | ||
S3method(droplevels,ff) | ||
S3method(droplevels,ffdf) | ||
S3method(ffwhich,ffdf) | ||
S3method(ffwhich,ff_vector) | ||
S3method(mean,ff) | ||
S3method(mean,ffdf) | ||
S3method(sum,ff) | ||
S3method(unique,ff) | ||
S3method(with,ffdf) | ||
S3method(within,ffdf) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
version 0.3 | ||
- Added ffdfsave function that can be used to save an ffdf data.frame | ||
|
||
version 0.2-2 | ||
- bug fix, subset for ffdf now works properly (thanks to Martijn Tennekes) | ||
|
||
version 0.2-1 | ||
- bug fix, sum.ff with na.rm=TRUE is now working correctly (thanks to Andreas Borg) | ||
|
||
version 0.2 | ||
- initial version |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
#' Summary methods for ff objects | ||
#' | ||
#' @export | ||
#' @param x a \code{ff} object | ||
#' @param ... optional other (\code{ff}) objects | ||
#' @param na.rm should \code{NA} be removed? | ||
#' @param range a \code{ri} or an \code{integer} vector of \code{length==2} giving a range restriction for chunked processing | ||
#' @return \code{TRUE}, \code{FALSE} or \code{NA} | ||
all.ff <- function(x,..., na.rm=FALSE, range=NULL){ | ||
r <- checkRange(range,x) | ||
all( ... | ||
, sapply(chunk(x, from=min(r), to=max(r)) | ||
, function(i){ | ||
all(x[i], na.rm=na.rm) | ||
} | ||
) | ||
) | ||
} | ||
|
||
#' Summary methods for ff objects | ||
#' @export | ||
#' @method any ff | ||
#' @param x a \code{ff} object | ||
#' @param ... optional other (\code{ff}) objects | ||
#' @param na.rm should \code{NA} be removed? | ||
#' @param range a \code{ri} or an \code{integer} vector of \code{length==2} giving a range restriction for chunked processing | ||
#' @return \code{TRUE}, \code{FALSE} or \code{NA} | ||
any.ff <- function(x, ..., na.rm=FALSE, range=NULL){ | ||
r <- checkRange(range,x) | ||
any( ... | ||
, sapply(chunk(x, from=min(r), to=max(r)) | ||
, function(i){ | ||
any(x[i], na.rm=na.rm) | ||
} | ||
) | ||
) | ||
} | ||
|
||
#' \code{sum} returns the sum of all the values present in its arguments. | ||
#' | ||
#' @title Sum of \code{ff} vector Elements | ||
#' @method sum ff | ||
#' @export | ||
#' @param x a \code{ff} object | ||
#' @param ... optional other (\code{ff}) objects | ||
#' @param na.rm should \code{NA} be removed? | ||
#' @param range a \code{ri} or an \code{integer} vector of \code{length==2} giving a range restriction for chunked processing | ||
#' @return sum of elements | ||
sum.ff <- function(x, ..., na.rm=FALSE, range=NULL){ | ||
r <- checkRange(range, x) | ||
sum( ... | ||
, sapply( chunk(x, from=min(r), to=max(r)) | ||
, function(i){ | ||
sum(x[i], na.rm=na.rm) | ||
} | ||
) | ||
) | ||
} | ||
|
||
#' Minimum, maximum and range of ff vector | ||
#' | ||
#' default behaviour of \code{\link{min}},\code{\link{max}} and \code{\link{range}} | ||
#' @method min ff | ||
#' @method max ff | ||
#' @method range ff | ||
#' @example ../examples/minmaxrange.R | ||
#' @aliases min max range | ||
#' @export min.ff max.ff range.ff | ||
#' @aliases min.ff max.ff range.ff | ||
#' @param x a \code{ff} object | ||
#' @param ... optional other (\code{ff}) objects | ||
#' @param na.rm should \code{NA} be removed? | ||
#' @param range a \code{ri} or an \code{integer} vector of \code{length==2} giving a range restriction for chunked processing | ||
#' @return minimun, maximum or range values | ||
min.ff <- function(x, ..., na.rm=FALSE, range=NULL){ | ||
r <- checkRange(range, x) | ||
|
||
min( ... #for all other ff's? | ||
, sapply( chunk(x, from=min(r), to=max(r)) | ||
, function(i){ | ||
#print(x[i]) | ||
min(x[i], na.rm=na.rm) | ||
} | ||
) | ||
) | ||
} | ||
|
||
max.ff <- function(x, ..., na.rm=FALSE, range=NULL){ | ||
r <- checkRange(range, x) | ||
|
||
max( ... #for all other ff's? | ||
, sapply( chunk(x, from=min(r), to=max(r)) | ||
, function(i){ | ||
max(x[i], na.rm=na.rm) | ||
} | ||
) | ||
) | ||
} | ||
|
||
range.ff <- function(x, ..., na.rm=FALSE, range=NULL){ | ||
r <- checkRange(range, x) | ||
|
||
range( ... #for all other ff's? | ||
, sapply( chunk(x, from=min(r), to=max(r)) | ||
, function(i){ | ||
#print(x[i]) | ||
range(x[i], na.rm=na.rm) | ||
} | ||
) | ||
) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
checkRange <- function(range, x){ | ||
|
||
if (is.null(range)){ | ||
return(ri(1,length(x))) | ||
} | ||
|
||
#TODO add checks | ||
range | ||
} | ||
|
||
#' Groups the input integer vector into several groups if the running cumulative sum increases a certain maximum number | ||
#' | ||
#' Groups the input integer vector into several groups if the running cumulative sum increases a certain maximum number | ||
#' | ||
#' @param x an integer vector | ||
#' @param max the maximum running cumulative size before an extra grouping is done | ||
#' @return An integer vector of the same length of x, indicating groups | ||
grouprunningcumsum <- function(x, max){ | ||
l <- as.integer(length(x)) | ||
if(l == 0){ | ||
return(x) | ||
} | ||
x <- as.integer(x) | ||
max <- as.integer(max) | ||
result <- .C("grouprunningcumsum", | ||
x = x, | ||
l = l, | ||
max = max, | ||
PACKAGE="ffbase") | ||
result$x | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
#' Chunkify an element-wise function | ||
#' | ||
#' Chunkify creates a new function that operates on a ff vector. | ||
#' It creates chunks from the ff vector and calls the orginal function \code{fun} on each chunk. | ||
#' @export chunkify | ||
#' @param fun function to be 'chunkified', the function must accept a vector and | ||
#' return a vector of the same \code{length} | ||
#' @return 'chunkified' function that accepts a \code{ff} vector as its first argument. | ||
chunkify <- function(fun){ | ||
cfun <- function( x | ||
, ... | ||
, inplace=FALSE | ||
){ | ||
|
||
chunks <- chunk(x) | ||
|
||
i <- chunks[[1]] | ||
ret <- as.ff(fun(x[i], ...)) | ||
length(ret) <- length(x) | ||
|
||
for (i in chunks[-1]){ | ||
ret[i] <- fun(x[i], ...) | ||
} | ||
ret | ||
} | ||
cfun | ||
} | ||
|
||
#' Chunk an expression to be used in a chunk for loop | ||
#'@param x \code{character} with vars | ||
#'@param expr \code{expression} vector | ||
#'@param i name of index | ||
#'@keywords internal | ||
chunkexpr <- function(x, expr, i="i"){ | ||
es <- deparse(expr) | ||
xs <- x | ||
for (var in xs){ | ||
varre <- paste("\\b(",var,")\\b", sep="") | ||
varsub <- paste("\\1[",i,"]", sep="") | ||
es <- gsub(varre, varsub, es) | ||
} | ||
#print(es) | ||
parse(text=es) | ||
} | ||
|
||
#chunkexpr(c("x","y"), expression(x>2 & y==1, z==3, y> 3)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
#' Compact a ff vector or ffdf data frame | ||
#' | ||
#' Compact takes a ff vector and tries to use the smallest binary data type for this vector. | ||
#' @aliases compact compact.ff compact.ffdf | ||
#' @method compact ff | ||
#' @method compact ffdf | ||
#' @export | ||
#' @param x \code{ff} or \code{ffdf} object | ||
#' @param use.na \code{logical} if TRUE the resulting ff vector can contain NA, otherwise not | ||
#' @param ... other parameters | ||
#' @return compact ff vector | ||
compact <- function(x, use.na=TRUE, ...){ | ||
UseMethod("compact") | ||
} | ||
|
||
compact.ff <- function(x, use.na=TRUE,...){ | ||
switch( vmode(x) | ||
, integer = { | ||
lev <- levels(x) | ||
levels(x) <- NULL | ||
r <- range(x, na.rm=TRUE) | ||
|
||
if (r[1] <= 0) | ||
return(x) | ||
as.ff(x, vmode="byte") | ||
} | ||
, x | ||
) | ||
} | ||
|
||
compact.ffdf <- function(x, use.na, ...){ | ||
ret <- lapply(physical(x), compact, use.na) | ||
do.call(ffdf, ret) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
#' cut divides the range of x into intervals and codes the values in x according to which interval they fall. The leftmost interval corresponds to level one, the next leftmost to level two and so on. | ||
#' | ||
#' The \code{cut} method for ff with the behaviour of \code{link{cut}} | ||
#' @title Convert Numeric ff vector to factor ff | ||
#' @export | ||
#' @seealso cut | ||
#' @method cut ff | ||
#' | ||
#' @param x a (numeric) ff object that will be cut into pieces | ||
#' @param breaks specifies the breaks for cutting this | ||
#' @param ... other parameters that can be given to \code{\link{cut.default}} | ||
#' | ||
#' @return ff a new \code{\link{ff}} object with the newly created factor | ||
cut.ff <- function(x, breaks, ...){ | ||
f <- NULL | ||
|
||
#### borrowed code from cut.default | ||
if (length(breaks) == 1) { | ||
if (is.na(breaks) | breaks < 2) | ||
stop("invalid number of intervals") | ||
nb <- as.integer(breaks + 1) | ||
dx <- diff(rx <- range(x, na.rm = TRUE)) | ||
if (dx == 0) | ||
dx <- abs(rx[1]) | ||
breaks <- seq.int( rx[1] - dx/1000 | ||
, rx[2L] + dx/1000 | ||
, length.out = nb | ||
) | ||
} | ||
#### | ||
|
||
args <- list(...) | ||
|
||
args$breaks <- breaks | ||
for (i in chunk(x, ...)){ | ||
args$x <- x[i] | ||
f <- ffappend( f | ||
, do.call(cut, args) | ||
) | ||
} | ||
f | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
diff.ff <- function(x, ...){ | ||
#TODO generalize for lag and difference | ||
chunks <- chunk(x, ...) | ||
|
||
# hmm, next line may go wrong for small datatypes... | ||
d <- ff(vmode=vmode(x), length=length(x)-1) | ||
for (i in chunks){ | ||
j <- i | ||
d[i] <- diff(x[i]) | ||
} | ||
d | ||
} |
Oops, something went wrong.