Skip to content

Commit

Permalink
Merge pull request #29 from corehunter/feature/seed
Browse files Browse the repository at this point in the history
allow to set seed
  • Loading branch information
hdbeukel committed Jun 22, 2017
2 parents 81cc5d8 + abcb372 commit c87b7a3
Show file tree
Hide file tree
Showing 20 changed files with 186 additions and 67 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
@@ -1,6 +1,6 @@
Package: corehunter
Title: Multi-Purpose Core Subset Selection
Version: 3.1.0
Version: 3.1.0.9000
Date: 2017-01-27
Authors@R: c(person("Herman", "De Beukelaer", email = "herman.debeukelaer@gmail.com", role = c("aut", "cre")),
person("Guy", "Davenport", email = "daveneti@gmail.com", role = "aut"),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Expand Up @@ -23,5 +23,6 @@ export(setRange)
import(naturalsort)
import(rJava)
importFrom(methods,is)
importFrom(stats,runif)
importFrom(utils,read.delim)
importFrom(utils,write.csv)
5 changes: 5 additions & 0 deletions NEWS.md
@@ -1,6 +1,11 @@
Core Hunter 3
=============

Version 3.1.0.9000 (dev)
------------------------

- Using `set.seed` prior to executing Core Hunter now yields reproducible results.

Version 3.1.0 (27/01/2017)
--------------------------

Expand Down
22 changes: 15 additions & 7 deletions R/data.R
Expand Up @@ -19,6 +19,9 @@
#' Selection Index to Real and Simulated Data",
#' \url{http://hdl.handle.net/11529/10199} V10
#'
#' @examples
#' exampleData()
#'
#' @return Core Hunter data of class \code{chdata}
#' @export
exampleData <- function(){
Expand Down Expand Up @@ -704,7 +707,7 @@ print.chgeno <- function(x, include.size = TRUE, ...){
#' to some or all individuals.
#'
#' @param types Variable types (optional).
#' Vector of characters of length one or two.
#' Vector of characters, each of length one or two.
#' Ignored when reading from file.
#'
#' The first letter indicates the scale type and should be one of \code{N} (nominal),
Expand Down Expand Up @@ -1070,15 +1073,20 @@ print.chpheno <- function(x, include.size = TRUE, ...){
#'
#' @importFrom utils read.delim
#' @export
read.autodelim <- function(file, row.names = 1, check.names = FALSE, stringsAsFactors = FALSE,
strip.white = TRUE, quote = "'\"", ...){
read.autodelim <- function(file, quote = "'\"",
row.names = 1,
na.strings = "",
check.names = FALSE,
strip.white = TRUE,
stringsAsFactors = FALSE,
...){
sep <- switch(tolower(tools::file_ext(file)),
"csv" = ",",
"txt" = "\t")
read.delim(file, sep = sep,
row.names = row.names, check.names = check.names,
stringsAsFactors = stringsAsFactors, strip.white = strip.white,
quote = quote, ...)
read.delim(file, sep = sep, quote = quote,
row.names = row.names, na.string = na.strings, check.names = check.names,
strip.white = strip.white, stringsAsFactors = stringsAsFactors,
...)
}

# ----------------- #
Expand Down
40 changes: 35 additions & 5 deletions R/execution.R
Expand Up @@ -13,6 +13,16 @@
#' is being minimized, the roles of upper and lower bound are interchanged, and the
#' Pareto maximum is used instead.
#'
#' Because Core Hunter uses stochastic algorithms, repeated runs may produce different
#' results. To eliminate randomness, you may set a random number generation seed using
#' \code{\link{set.seed}} prior to executing Core Hunter. Note however that Core Hunter
#' uses runtime-based stop conditions, meaning that it is not entirely guaranteed that
#' the same final selection will be obtained when using the same seed, since runtimes
#' may be influenced by external factors such as the current CPU workload. Therefore,
#' the number of executed steps may vary across runs, which may affect the returned
#' solution. When aiming for reproducible results, it is thus also important to allow
#' sufficient execution time to ensure convergence of the optimization algorithm.
#'
#' @param data Core Hunter data (\code{chdata}) containing genotypes,
#' phenotypes and/or a precomputed distance matrix. Can also be an
#' object of class \code{chdist}, \code{chgeno} or \code{chpheno}
Expand All @@ -21,16 +31,16 @@
#' If no objectives are specified Core Hunter maximizes a weighted
#' index including the default entry-to-nearest-entry distance
#' (\code{EN}) for each available data type.
#' For genotyes, the Modified Roger's distance (\code{MR}) is
#' For genotypes, the Modified Roger's distance (\code{MR}) is
#' used. For phenotypes, Gower's distance (\code{GD}) is applied.
#' @param size Desired core subset size (numeric). If larger than one the value
#' is used as the absolute core size after rounding. Else it is used as the
#' sampling rate and multiplied with the dataset size to determine the size of
#' the core. The default sampling rate is 0.2.
#' @param mode Execution mode (\code{default} or \code{fast}). In default mode,
#' the normalization searches terminate when no improvement is found for ten
#' seconds. In fast mode, searches terminated as soon as no improvement is
#' made for two seconds. Stop conditions can be overriden with arguments
#' seconds. In fast mode, searches terminate as soon as no improvement is
#' made for two seconds. These stop conditions can be overriden using arguments
#' \code{time} and \code{impr.time}.
#' @param time Absolute runtime limit in seconds. Not used by default. If used
#' it should be a strictly positive value and is rounded to the nearest integer.
Expand Down Expand Up @@ -80,7 +90,10 @@ getNormalizationRanges <- function(data, obj, size = 0.2, mode = c("default", "f

# run Core Hunter normalization
api <- ch.api()
ranges <- .jevalArray(api$getNormalizationRanges(j.args, mode, time, impr.time), simplify = TRUE)
ranges <- .jevalArray(
api$getNormalizationRanges(j.args, mode, time, impr.time, genSeed()),
simplify = TRUE
)
obj.ids <- sapply(obj, function(o){
id <- o$type
if(!is.null(o$meas)){
Expand All @@ -96,8 +109,20 @@ getNormalizationRanges <- function(data, obj, size = 0.2, mode = c("default", "f

}

#' Sample a core collection.
#'
#' Sample a core collection from the given data.
#'
#' Because Core Hunter uses stochastic algorithms, repeated runs may produce different
#' results. To eliminate randomness, you may set a random number generation seed using
#' \code{\link{set.seed}} prior to executing Core Hunter. Note however that Core Hunter
#' uses runtime-based stop conditions, meaning that it is not entirely guaranteed that
#' the same final selection will be obtained when using the same seed, since runtimes
#' may be influenced by external factors such as the current CPU workload. Therefore,
#' the number of executed steps may vary across runs, which may affect the returned
#' solution. When aiming for reproducible results, it is thus also important to allow
#' sufficient execution time to ensure convergence of the optimization algorithm.
#'
#' @param data Core Hunter data (\code{chdata}) containing genotypes,
#' phenotypes and/or a precomputed distance matrix. Typically the
#' data is obtained with \code{\link{coreHunterData}}. Can also be
Expand Down Expand Up @@ -213,7 +238,7 @@ sampleCore <- function(data, obj, size = 0.2, mode = c("default", "fast"), norma

# run Core Hunter
api <- ch.api()
sel <- api$sampleCore(j.args, mode, time, impr.time, !verbose)
sel <- api$sampleCore(j.args, mode, time, impr.time, genSeed(), !verbose)
if(indices){
sel <- toRIndices(sel)
} else {
Expand Down Expand Up @@ -323,6 +348,11 @@ createArguments <- function(data, obj, size, normalize){

}

#' @importFrom stats runif
genSeed <- function(){
.jlong(ceiling(runif(1, 0, 2^31-1)))
}

# ---------- #
# OBJECTIVES #
# ---------- #
Expand Down
Binary file not shown.
4 changes: 4 additions & 0 deletions man/exampleData.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 14 additions & 3 deletions man/getNormalizationRanges.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/phenotypes.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 16 additions & 11 deletions man/read.autodelim.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 12 additions & 1 deletion man/sampleCore.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 6 additions & 6 deletions tests/testthat/data/distances-small.txt
@@ -1,6 +1,6 @@
ID NAME Alice Dave "Bob-1" Bob-2 Carol
Alice 0.0 0.2 0.4 0.6 0.8
'Dave' 0.2 0.0 0.2 0.4 0.6
Bob-1 Bob 0.4 0.2 0.0 0.1 0.4
Bob-2 "Bob" 0.6 0.4 0.1 0.0 0.2
Carol 0.8 0.6 0.4 0.2 0.0
ID NAME Alice Dave "Bob" "Bob'" Carol
Alice 0.0 0.2 0.4 0.6 0.8
'Dave' 0.2 0.0 0.2 0.4 0.6
Bob Bob 0.4 0.2 0.0 0.1 0.4
"Bob'" "Bob" 0.6 0.4 0.1 0.0 0.2
Carol 0.8 0.6 0.4 0.2 0.0
12 changes: 6 additions & 6 deletions tests/testthat/data/genotypes-bi-small.csv
@@ -1,6 +1,6 @@
ID , NAME , "mk1", mk2, mk3, mk4
Alice , Alice , 1 , 0 , 2 , 1
Dave , , 2 , 0 , 2 , 0
"Bob-1", 'Bob' , 1 , 0 , , 0
Bob-2 , Bob , 1 , 0 , 1 , 1
'Carol', "Carol", 1 , 0 , , 0
ID , NAME , "mk1", "mk,2", "mk'3", mk4
Alice , Alice , 1 , 0 , 2 , 1
Dave , , 2 , 0 , 2 , 0
"Bob" , 'Bob' , 1 , 0 , , 0
"Bob'" , Bob , 1 , 0 , 1 , 1
'Carol', "Carol", 1 , 0 , , 0
14 changes: 7 additions & 7 deletions tests/testthat/data/genotypes-freq-small.csv
@@ -1,7 +1,7 @@
ID , NAME , mk1 , mk1 , 'mk1', mk2 , mk2 , mk3 , mk3 , "mk3" , mk4 , mk4 , mk4 , mk4
ALLELE , , mk1-1, "mk1-2", mk1-3, mk2-1, mk2-2, , mk3-2, "" , mk4-1, mk4-2, mk4-3, 'mk4-4'
Alice , Alice , , , , 0.50 , 0.50 , 0.00 , 0.50 , 0.50 , 0.00 , 0.00 , 0.50 , 0.50
Dave , , 1.00 , 0.00 , 0.00 , 0.50 , 0.50 , 0.00 , 0.50 , 0.50 , 1.00 , 0.00 , 0.00 , 0.00
Bob-1 , Bob , 0.60 , 0.00 , 0.40 , 0.50 , 0.50 , 0.00 , 0.50 , 0.50 , 0.25 , 0.25 , 0.25 , 0.25
"Bob-2", Bob , , , , 1.00 , 0.00 , , , , 0.00 , 0.00 , 1.00 , 0.00
Carol , Carol , 0.33 , 0.33 , 0.33 , 0.50 , 0.50 , 0.00 , 0.50 , 0.50 , 0.50 , 0.00 , 0.50 , 0.00
ID , NAME , mk1 , mk1 , 'mk1', "mk,2", "mk,2", "mk'3", "mk'3", "mk'3", mk4 , mk4 , mk4 , mk4
ALLELE , , mk1-1, "mk1-2", mk1-3, mk2-1 , mk2-2 , , mk3-2 , "" , mk4-1, mk4-2, mk4-3, 'mk4-4'
Alice , Alice , , , , 0.50 , 0.50 , 0.00 , 0.50 , 0.50 , 0.00 , 0.00 , 0.50 , 0.50
Dave , , 1.00 , 0.00 , 0.00 , 0.50 , 0.50 , 0.00 , 0.50 , 0.50 , 1.00 , 0.00 , 0.00 , 0.00
Bob , Bob , 0.60 , 0.00 , 0.40 , 0.50 , 0.50 , 0.00 , 0.50 , 0.50 , 0.25 , 0.25 , 0.25 , 0.25
"Bob'" , Bob , , , , 1.00 , 0.00 , , , , 0.00 , 0.00 , 1.00 , 0.00
Carol , Carol , 0.33 , 0.33 , 0.33 , 0.50 , 0.50 , 0.00 , 0.50 , 0.50 , 0.50 , 0.00 , 0.50 , 0.00
12 changes: 6 additions & 6 deletions tests/testthat/data/genotypes-small.csv
@@ -1,6 +1,6 @@
ID , NAME , 'mk1-1', mk1-2, mk2-1, mk2-2, mk3 , mk3-2, mk4-1, "mk4-22"
Alice , , 1 , 3 , "B" , B , a1 , a1
Dave , , 2 , 2 , 'C' , A , a1 , a2 , + , -
Bob-1 , Bob , 1 , 2 , 'D' , D , a2 , a2 , + , +
Bob-2 , "Bob", 2 , 3 , "B" , B , 'a2', "a1" , + , -
'Carol', , 1 , 1 , , , a1 , a1 , - , -
ID , NAME , 'mk1-1', mk1-2, "mk,2-1", "mk,2-2", "mk'3", "mk'3-2", mk4-1, "mk4-22"
Alice , , 1 , 3 , "B" , B , a1 , a1
Dave , , 2 , 2 , 'C' , A , a1 , a2 , + , -
Bob , Bob , 1 , 2 , 'D' , D , a2 , a2 , + , +
"Bob'" , "Bob", 2 , 3 , "B" , B , 'a2' , "a1" , + , -
'Carol', , 1 , 1 , , , a1 , a1 , - , -
4 changes: 2 additions & 2 deletions tests/testthat/data/phenotypes-no-types.csv
@@ -1,6 +1,6 @@
ID , NAME , trait 1, trait 2, "trait 3", trait 4, 'trait 5'
"Alice", , A , x , 4 , 1.4 , false
Dave , , B , b , 5 , 0.5 , true
Bob-1 , 'Bob', A , a , 6 , 0.5 , true
Bob-2 , Bob , C , c , 9 , 0.5 , false
Bob , 'Bob', A , a , 6 , 0.5 , true
"Bob'" , Bob , C , c , 9 , 0.5 , false
Carol , , B , c , 1 , 1.3 , true
4 changes: 2 additions & 2 deletions tests/testthat/data/phenotypes-small.csv
Expand Up @@ -4,6 +4,6 @@
MAX , , , , 10 , 2.0 ,
Alice , , "A" , 'x' , 4 , 1.4 , false
"Dave" , , B , b , 5 , 0.5 , true
Bob-1 , Bob , A , a , 6 , 0.5 , true
Bob-2 , "Bob", C , "c" , 9 , 0.5 , false
Bob , Bob , A , a , 6 , 0.5 , true
"Bob'" , "Bob", C , "c" , 9 , 0.5 , false
'Carol', , 'B' , c , 1 , 1.3 , true

0 comments on commit c87b7a3

Please sign in to comment.