GitHub - averissimo/notes-on-r: General notes and tips on R

title	Notes on R
author	André Veríssimo
output	github_document

Benchmarking parallel methods with `cor`

Functions to build dataset

library(futile.logger)

genData <- function(reps = 100, sample.size = 1000) {
  genDataAux <- function(template = runif(1000), correlation = 1) {
    xdata <- array(0, length(template))
    correlated.ixs <- template < correlation
    xdata[correlated.ixs] <- template[correlated.ixs] + .0005 * runif(length(template[correlated.ixs]))
    xdata[!correlated.ixs] <- template[!correlated.ixs] + .05 * rnorm(length(template[!correlated.ixs]))
    return(xdata)
  }
  
  template <- runif(sample.size)
  dat <- sapply(seq(reps), function(e) {genDataAux(template , correlation = abs(.3 + runif(1))) })
  #
  flog.info('Size of generated data:', dim(dat), capture = T)
  return(list(dat = dat, template = template)) 
}

my.fun <- function(dat.ix) {
  cor(dat$template, dat$dat[,1], method = 'pearson')
}

Setting up common benchmark parameters

library(microbenchmark)

ntimes <- 10
#
dat <- genData(reps = 1, sample.size = 1000)

## INFO [2016-12-13 18:48:37] Size of generated data:
## 
## [1] 1000    1

#
reps <- 100000;

flog.info('')

## INFO [2016-12-13 18:48:37]

flog.info('')

## INFO [2016-12-13 18:48:37]

flog.info('Repeating benchmarks %d times to find statistics', ntimes)

## INFO [2016-12-13 18:48:37] Repeating benchmarks 10 times to find statistics

flog.info('  -> %d samples per variables', length(dat$template))

## INFO [2016-12-13 18:48:37]   -> 1000 samples per variables

flog.info('  -> %d correlations being calculated', reps)

## INFO [2016-12-13 18:48:37]   -> 100000 correlations being calculated

flog.info('')

## INFO [2016-12-13 18:48:37]

Benchmark Xapply functions

microbenchmark(
    lapply(1:reps, my.fun)
  , sapply(1:reps, my.fun)
  , vapply(1:reps, my.fun, array(double(0), 1))
  , times = ntimes)

## Unit: seconds
##                                         expr      min       lq     mean
##                       lapply(1:reps, my.fun) 13.33089 13.46899 13.58955
##                       sapply(1:reps, my.fun) 13.49540 13.55955 13.68354
##  vapply(1:reps, my.fun, array(double(0), 1)) 13.24117 13.39339 13.46551
##    median       uq      max neval cld
##  13.61063 13.71154 13.85029    10  ab
##  13.67085 13.80471 13.92711    10   b
##  13.46388 13.52053 13.68896    10  a

Benchmark different combinations of mcapply

library(parallel)
microbenchmark(
    mclapply(1:reps, my.fun, mc.cores = 16)
  , mclapply(1:reps, my.fun, mc.cores = 15)
  , mclapply(1:reps, my.fun, mc.cores = 14)
  , times = ntimes)

## Unit: seconds
##                                     expr      min       lq     mean
##  mclapply(1:reps, my.fun, mc.cores = 16) 1.610561 1.642636 1.680131
##  mclapply(1:reps, my.fun, mc.cores = 15) 1.674383 1.711419 1.752971
##  mclapply(1:reps, my.fun, mc.cores = 14) 1.776585 1.782533 1.801348
##    median       uq      max neval cld
##  1.645246 1.666160 1.968459    10  a 
##  1.725278 1.744501 2.015944    10  ab
##  1.793219 1.807371 1.849380    10   b

Benchamark different combinations of foreach

library(parallel)
library(foreach)
library(doMC) # parallel computing
registerDoMC(cores = 16) 
flog.info('16 cores')

## INFO [2016-12-13 18:56:19] 16 cores

microbenchmark(
    foreach(el = 1:reps) %dopar% { my.fun(el) }
  , foreach(el = 1:reps, .inorder = F ) %dopar% { my.fun(el) }
  , foreach(el = 1:reps, .combine = 'c') %dopar% { my.fun(el) }
  , foreach(el = 1:reps, .combine = 'c', .inorder = F ) %dopar% { my.fun(el) }
  , foreach(el = 1:reps, .combine = 'cbind') %dopar% { my.fun(el) }
  , foreach(el = 1:reps, .combine = 'cbind', .inorder = F ) %dopar% { my.fun(el) }
  , times = ntimes)

## Unit: seconds
##                                                                                            expr
##                                                 foreach(el = 1:reps) %dopar% {     my.fun(el) }
##                                   foreach(el = 1:reps, .inorder = F) %dopar% {     my.fun(el) }
##                                 foreach(el = 1:reps, .combine = "c") %dopar% {     my.fun(el) }
##                   foreach(el = 1:reps, .combine = "c", .inorder = F) %dopar% {     my.fun(el) }
##                             foreach(el = 1:reps, .combine = "cbind") %dopar% {     my.fun(el) }
##  foreach(el = 1:reps, .combine = "cbind", .inorder = F) %dopar%      {         my.fun(el)     }
##       min       lq     mean   median       uq      max neval   cld
##  42.29074 42.40442 42.71077 42.64085 43.07830 43.18791    10   c  
##  42.99840 43.13113 43.24925 43.19802 43.37627 43.63018    10    d 
##  41.45168 41.48543 41.79080 41.69424 42.05392 42.29476    10 a    
##  42.19233 42.27469 42.36435 42.29764 42.34854 42.77995    10  b   
##  43.15101 43.20782 43.29498 43.27505 43.32283 43.63896    10    d 
##  43.30991 43.72202 43.75812 43.80028 43.85897 43.94144    10     e

Benchmark combinations of BiocParallel

library(BiocParallel)
#
microbenchmark(
    bplapply(1:reps, my.fun, BPPARAM = MulticoreParam(16))
  , bplapply(1:reps, my.fun, BPPARAM = MulticoreParam(15))
  , bplapply(1:reps, my.fun, BPPARAM = MulticoreParam(14))
  , times = ntimes)

## Unit: seconds
##                                                    expr      min       lq
##  bplapply(1:reps, my.fun, BPPARAM = MulticoreParam(16)) 4.018822 4.042074
##  bplapply(1:reps, my.fun, BPPARAM = MulticoreParam(15)) 4.005688 4.051643
##  bplapply(1:reps, my.fun, BPPARAM = MulticoreParam(14)) 4.027397 4.051482
##      mean   median       uq      max neval cld
##  4.070524 4.073427 4.104559 4.110163    10   a
##  4.092317 4.083183 4.102033 4.287266    10   a
##  4.071622 4.063018 4.095685 4.127835    10   a

Name		Name	Last commit message	Last commit date
Latest commit History 7 Commits
.gitignore		.gitignore
LICENSE		LICENSE
README.Rmd		README.Rmd
README.md		README.md
notes-on-r.Rproj		notes-on-r.Rproj

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Repository files navigation

Benchmarking parallel methods with `cor`

Functions to build dataset

Setting up common benchmark parameters

Benchmark Xapply functions

Benchmark different combinations of mcapply

Benchamark different combinations of foreach

Benchmark combinations of BiocParallel

About

Uh oh!

Releases

Packages

Uh oh!

Contributors

Uh oh!

Folders and files

Latest commit

History

Repository files navigation

Benchmarking parallel methods with cor

Functions to build dataset

Setting up common benchmark parameters

Benchmark Xapply functions

Benchmark different combinations of mcapply

Benchamark different combinations of foreach

Benchmark combinations of BiocParallel

About

Resources

License

Uh oh!

Stars

Watchers

Forks

Releases

Packages 0

Uh oh!

Contributors

Uh oh!

Benchmarking parallel methods with `cor`

Packages