Skip to content

Commit

Permalink
correct handling of NA for cume_dist, percent_rank, min_rank and dens…
Browse files Browse the repository at this point in the history
…e_rank. #774
  • Loading branch information
romainfrancois committed Dec 12, 2014
1 parent 57f8c82 commit f5e2752
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 6 deletions.
3 changes: 3 additions & 0 deletions NEWS.md
Expand Up @@ -148,6 +148,9 @@

* `min(.,na.rm = TRUE)` works with `Date`s built on numeric vectors (#755)

* Internal implementations of `dense_rank`, `min_rank`, `cume_dist` and
`percent_rank` deal correctly with NA (#774)

# dplyr 0.3.0.1

* Fixed problem with test script on Windows.
Expand Down
4 changes: 2 additions & 2 deletions R/rank.R
Expand Up @@ -68,12 +68,12 @@ dense_rank <- function(x) {
#' @export
#' @rdname ranking
percent_rank <- function(x) {
(min_rank(x) - 1) / (length(x) - 1)
(min_rank(x) - 1) / (length(!is.na(x)) - 1)
}

#' @export
#' @rdname ranking
cume_dist <- function(x) {
rank(x, ties.method = "max", na.last = "keep") / length(x)
rank(x, ties.method = "max", na.last = "keep") / length(!is.na(x))
}

26 changes: 22 additions & 4 deletions inst/include/dplyr/Result/Rank.h
Expand Up @@ -21,6 +21,7 @@ namespace dplyr {
inline int start() const {
return 1 ;
}

} ;

struct dense_rank_increment{
Expand All @@ -40,6 +41,7 @@ namespace dplyr {
inline int start() const {
return 1 ;
}

} ;

struct percent_rank_increment{
Expand All @@ -59,6 +61,8 @@ namespace dplyr {
inline double start() const {
return 0.0 ;
}


} ;

struct cume_dist_increment{
Expand Down Expand Up @@ -168,21 +172,35 @@ namespace dplyr {
for( int j=0; j<m; j++) {
map[ slice[j] ].push_back(j) ;
}

STORAGE na = Rcpp::traits::get_na<RTYPE>() ;
typename Map::const_iterator it = map.find( na ) ;
if( it != map.end() ){
m -= it->second.size() ;
}

oMap ordered;

typename Map::const_iterator it = map.begin() ;
it = map.begin() ;
for( ; it != map.end() ; ++it){
ordered[it->first] = &it->second ;
}
typename oMap::const_iterator oit = ordered.begin() ;
typename Increment::scalar_type j = Increment::start() ;
for( ; oit != ordered.end(); ++oit){
STORAGE key = oit->first ;
const std::vector<int>& chunk = *oit->second ;
int n = chunk.size() ;
j += Increment::pre_increment( chunk, m ) ;
for( int k=0; k<n; k++){
out[ chunk[k] ] = j ;
if( Rcpp::traits::is_na<RTYPE>( key ) ){
typename Increment::scalar_type na =
Rcpp::traits::get_na< Rcpp::traits::r_sexptype_traits<typename Increment::scalar_type>::rtype >() ;
for( int k=0; k<n; k++){
out[ chunk[k] ] = na ;
}
} else {
for( int k=0; k<n; k++){
out[ chunk[k] ] = j ;
}
}
j += Increment::post_increment( chunk, m ) ;
}
Expand Down
20 changes: 20 additions & 0 deletions tests/testthat/test-mutate-windowed.R
Expand Up @@ -113,6 +113,26 @@ test_that("min_rank handles columns full of NaN (#726)", {
expect_true( all(data$rank == 1L ) )
})

test_that("rank functions deal correctly with NA (#774)", {
data <- data_frame( x = c(1,2,NA,1,0,NA) )
res <- data %>% mutate(
min_rank = min_rank(x),
percent_rank = percent_rank(x),
dense_rank = dense_rank(x),
cume_dist = cume_dist(x)
)
expect_true( all( is.na( res$min_rank[c(3,6)] ) ) )
expect_true( all( is.na( res$dense_rank[c(3,6)] ) ) )
expect_true( all( is.na( res$percent_rank[c(3,6)] ) ) )
expect_true( all( is.na( res$cume_dist[c(3,6)] ) ) )

expect_equal( res$percent_rank[ c(1,2,4,5) ], c(1/3, 1, 1/3, 0 ) )
expect_equal( res$min_rank[ c(1,2,4,5) ], c(2L,4L,2L,1L) )
expect_equal( res$dense_rank[ c(1,2,4,5) ], c(2L,3L,2L,1L) )
expect_equal( res$cume_dist[ c(1,2,4,5) ], c(.75,1,.75,.25) )

})

# FIXME: this should only fail if strict checking is on.
# test_that("window functions fail if db doesn't support windowing", {
# df_sqlite <- temp_load(temp_srcs("sqlite"), df)$sql %>% group_by(g)
Expand Down

0 comments on commit f5e2752

Please sign in to comment.