Permalink
Browse files

Final update

  • Loading branch information...
1 parent 9cd3b2a commit 5704e051567624d0108365765b0c55af24f4cd73 @cheeyeelim committed Dec 3, 2015
Showing with 644 additions and 693 deletions.
  1. +1 −1 DESCRIPTION
  2. +1 −2 NAMESPACE
  3. +3 −3 R/initialisation.R
  4. +5 −165 R/search.R
  5. +155 −120 README.md
  6. BIN data/example_models.rda
  7. +1 −1 man/BoolModel-class.Rd
  8. +1 −1 man/BoolTraineR.Rd
  9. +1 −1 man/amat_to_bm.Rd
  10. +1 −1 man/bm_to_amat.Rd
  11. +1 −1 man/bm_to_df.Rd
  12. +2 −2 man/bon_bmodel.Rd
  13. +2 −2 man/bon_istate.Rd
  14. +1 −1 man/calc_mscore.Rd
  15. +1 −1 man/calc_roc.Rd
  16. +1 −1 man/check_and.Rd
  17. +2 −2 man/compress_bmodel.Rd
  18. +2 −2 man/decompress_bmodel.Rd
  19. +1 −1 man/decreate_boolmodel.Rd
  20. +1 −1 man/df_to_bm.Rd
  21. +1 −1 man/emodel1.Rd
  22. +1 −1 man/emodel2.Rd
  23. +1 −1 man/emodel3.Rd
  24. +1 −1 man/eval_bool.Rd
  25. +1 −1 man/extract_term.Rd
  26. +1 −1 man/filter_dflist.Rd
  27. +1 −1 man/gen_one_rmodel.Rd
  28. +1 −1 man/gen_singlerule.Rd
  29. +2 −2 man/gen_two_rmodel.Rd
  30. +2 −2 man/gen_two_rmodel_dag.Rd
  31. +1 −1 man/get_encodings.Rd
  32. +1 −1 man/grow_bmodel.Rd
  33. +1 −1 man/initialise_data.Rd
  34. +1 −1 man/initialise_model.Rd
  35. +3 −3 man/initialise_raw_data.Rd
  36. +2 −2 man/krum_bmodel.Rd
  37. +2 −2 man/krum_istate.Rd
  38. +1 −1 man/m_score.Rd
  39. +1 −1 man/man_dist.Rd
  40. +1 −1 man/match_term.Rd
  41. +1 −1 man/minmod_internal.Rd
  42. +1 −1 man/minmod_model.Rd
  43. +1 −1 man/model_consensus.Rd
  44. +1 −1 man/model_dist.Rd
  45. +1 −1 man/model_setdiff.Rd
  46. +12 −5 man/model_train.Rd
  47. +1 −1 man/outgenysis_model.Rd
  48. +1 −1 man/outgraph_model.Rd
  49. +1 −1 man/outstate_graph.Rd
  50. +1 −1 man/plotBM.Rd
  51. +1 −1 man/printBM.Rd
  52. +1 −1 man/rcpp_simulate.Rd
  53. +1 −1 man/rcpp_validate.Rd
  54. +1 −1 man/simulate_model.Rd
  55. +1 −1 man/unique_raw_data.Rd
  56. +1 −1 man/validate_adjmat.Rd
  57. +1 −1 man/vcat.Rd
  58. +1 −1 man/which.random.min.Rd
  59. +2 −2 man/wilson_raw_data.Rd
  60. +2 −2 man/wilson_raw_rnaseq.Rd
  61. +1 −1 man/writeBM.Rd
  62. BIN src/BoolTraineR.dll
  63. BIN src/RcppExports.o
  64. BIN src/score_calculation.o
  65. BIN src/simulation.o
  66. +87 −62 vignettes/booltrainer.Rmd
  67. +159 −149 vignettes/booltrainer.html
  68. +155 −120 vignettes/booltrainer.md
  69. BIN vignettes/booltrainer.pdf
  70. BIN vignettes/booltrainer_files/figure-markdown_github/unnamed-chunk-17-1.png
  71. BIN vignettes/booltrainer_files/figure-markdown_github/unnamed-chunk-23-1.png
  72. BIN vignettes/booltrainer_files/figure-markdown_github/unnamed-chunk-31-1.png
View
@@ -1,7 +1,7 @@
Package: BoolTraineR
Type: Package
Title: Tools For Training and Analysing Asynchronous Boolean Models
-Version: 1.1.2
+Version: 1.1.3
Date: 2015-10-22
Author: Chee Yee Lim
Maintainer: Chee Yee Lim <cyl49@cam.ac.uk>
View
@@ -1,4 +1,4 @@
-# Generated by roxygen2 (4.1.1): do not edit by hand
+# Generated by roxygen2: do not edit by hand
export(BoolModel)
export(amat_to_bm)
@@ -20,7 +20,6 @@ export(minmod_model)
export(model_dist)
export(model_setdiff)
export(model_train)
-export(model_train_sa)
export(outgenysis_model)
export(outgraph_model)
export(outstate_graph)
View
@@ -5,12 +5,12 @@
#' Note that kmeans clustering as binarisation only works well if the data has a bimodal distribution.
#'
#' @param x matrix. Numeric data of gene expression.
-#' @param data_type character. Specify data types: qpcr, rnaseq.
+#' @param max_expr character. Specify whether max expression value is the lowest (as in qPCR), or the highest (as in RNAseq and microarray). Option: 'low', 'high'. Default to 'high'.
#' @param uni_thre numerical. Speficy threshold for unimodality test. Default to 0.2.
#' @param scale logical. Whether to scale the data to a range of 0-1. Default to T.
#'
#' @export
-initialise_raw_data = function(x, data_type='qpcr', uni_thre=0.2, scale=T)
+initialise_raw_data = function(x, max_expr='high', uni_thre=0.2, scale=T)
{
#(1) Convert negative to positive values.
if(min(x)<0)
@@ -23,7 +23,7 @@ initialise_raw_data = function(x, data_type='qpcr', uni_thre=0.2, scale=T)
stopifnot(min(x)==0)
- if(data_type=='qpcr')
+ if(max_expr=='low')
{
#(2) Invert qPCR values. Lowest expression should be close to 0, highest expression should be away from 0.
x = abs(max(x) - x)
View
@@ -1,165 +1,11 @@
-#' @title Training Model (using simulated annealing)
-#'
-#' @description
-#' This function performs model training to find the best model, using information from data. It requires an initial state supplied to perform the search, and an initial model can also be supplied to be included in the initial population.
-#' Note that if a model is supplied, and the genes in the model is different from the genes in the data, only the genes overlapping between model and data will be retained for further analysis.
-#'
-#' @param edata data frame of expression data. Should have state(row) x gene(column).
-#' @param bmodel Boolean model in data frame. If NULL, use a random Boolean model. Defaults to NULL.
-#' @param istate data frame. Must have only 1 row, which represents 1 initial state. Defaults to NULL.
-#' @param max_varperrule integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must be higher than number of genes. Defaults to 6.
-#' @param and_bool logical. Whether to consider AND terms. IF bmodel is not NULL, defaults to whether AND interaction is included in bmodel. If bmodel is NULL, then defaults to TRUE.
-#' @param self_loop logical. Whether to allow self_loop in random starting model. Only used if is.null(bmodel). Default to F.
-#' @param restart integer. Number of restart from the best solution. Defaults to 0.
-#' @param verbose logical. Whether to give detailed output to the screen. Defaults to F.
-#'
-#' @export
-model_train_sa = function(edata, bmodel=NULL, istate=NULL, max_varperrule=6, and_bool=T, self_loop=F, restart=0, verbose=F)
-{
- ##################Implement restart##########################
-
- vcat('Preparing data for analysis.\n', verbose)
-
- #Initialise expression data.
- tmp_data = initialise_raw_data(edata) #returns a list of two data frames.
- cdata = initialise_data(tmp_data[[1]]) #continuous data
- ddata = initialise_data(tmp_data[[2]]) #discretised data
-
- #Initialise model.
- if(is.null(bmodel))
- {
- bmodel = gen_one_rmodel(colnames(edata), max_varperrule, and_bool, self_loop)
- } else
- {
- if(class(bmodel) != 'BoolModel')
- {
- bmodel = initialise_model(bmodel)
- }
-
- if(check_and(bmodel) != and_bool)
- {
- and_bool = check_and(bmodel)
- }
- }
-
- #Initialise initial state.
- if(is.null(istate))
- {
- istate = rbinom(length(bmodel@target), 1, 0.5)
- #Getting a random initial state.
- while(mean(istate) > 0.9 | mean(istate) < 0.1) #do not want initial state that is too homogenous.
- {
- istate = rbinom(length(bmodel@target), 1, 0.5)
- }
- istate = data.frame(matrix(istate, nrow=1))
- colnames(istate) = bmodel@target
- }
- istate = initialise_data(istate, aslogic=T)
-
- #Filtering expression data.
- overlap_gene = intersect(colnames(cdata), y=bmodel@target)
- nonoverlap_gene = bmodel@target[!(bmodel@target %in% overlap_gene)]
- names(overlap_gene) = bmodel@target_var[bmodel@target %in% overlap_gene]
- names(nonoverlap_gene) = bmodel@target_var[!(bmodel@target %in% overlap_gene)]
-
- fddata = filter_dflist(ddata, overlap_gene, F)
- fcdata = filter_dflist(cdata, overlap_gene, F)
-
- fcdata = unique_raw_data(fddata, fcdata) #removes duplicates in continuous data.
- fddata = unique(fddata)
-
- vcat('Start training.\n', verbose)
-
- #(3) Calling final combined search.
- cur_score = NA
- cur_model = bmodel
- cur_step = 1
- cur_temp = 1
- min_temp = 0.00001
- alpha = 0.9
- max_ite = 100 #iterations in same step.
- while(cur_temp > min_temp)
- {
- vcat(sprintf('Current iteration: %s.\n', cur_step), verbose)
-
- vcat('Stage 1: Exploring neighbouring models.\n', verbose)
- mod_model = unlist(minmod_model(cur_model, overlap_gene=overlap_gene))
- vcat(sprintf('Total neighbouring models: %s.\n', length(mod_model)), verbose)
-
- vcat('Stage 2: Evaluating next model.\n', verbose)
- cur_ite = 1
- while(cur_ite <= max_ite)
- {
- model_ind = sample(1:length(mod_model), 1)
- next_model = mod_model[[model_ind]]
- mod_model = mod_model[-model_ind]
-
- #print(printBM(next_model)) #debug
-
- next_score = calc_mscore(bmodel=next_model, istate=istate, fcdata=fcdata, overlap_gene=overlap_gene, max_varperrule=max_varperrule)
-
- #Breaking conditions.
- if(length(mod_model) == 0)
- {
- cur_score = next_score
- cur_model = next_model
-
- if(cur_score < best_score) #store best solution ever, regardless of the final ending point.
- {
- best_score = next_score
- best_model = next_model
- }
- break
- }
-
- if(is.na(cur_score))
- {
- #For first iteration.
- cur_score = next_score
- cur_model = next_model
-
- best_score = next_score
- best_model = next_model
- } else
- {
- #For subsequent iteration.
- accept_prob = exp((cur_score - next_score)/cur_temp) #if next solution is better than current solution, accept_prob always more than 1.
-
- if(accept_prob > runif(1)) #move forward if the prob is more than a random number between 0-1.
- {
- cur_score = next_score
- cur_model = next_model
-
- #writeBM(cur_model, 'tmp_model.csv') #debug
-
- if(cur_score < best_score) #store best solution ever, regardless of the final ending point.
- {
- best_score = next_score
- best_model = next_model
- }
- }
- }
- cur_ite = cur_ite + 1
- }
-
- cur_temp = cur_temp*alpha #Reduce subsequent temperature.
- cur_step = cur_step + 1
- }
- vcat(sprintf('Final iteration: %s.\n', cur_step), verbose)
-
- output = list(best_score=best_score, best_model=best_model,
- cur_score=cur_score, cur_model=cur_model, overlap_gene=overlap_gene, nonoverlap_gene=nonoverlap_gene)
-
- return(output)
-}
-
#' @title Training Model
#'
#' @description
#' This function performs model training to find the best model, using information from data. It requires an initial state supplied to perform the search, and an initial model can also be supplied to be included in the initial population.
#' Note that if a model is supplied, and the genes in the model is different from the genes in the data, only the genes overlapping between model and data will be retained for further analysis.
#'
-#' @param edata data frame of expression data. Should have state(row) x gene(column).
+#' @param cdata data frame of expression data. Should have state(row) x gene(column).
+#' @param ddata discretised data frame of expression data. Must supply when preprocess=F. Obtain from initialise_raw_data(). Defaults to NULL.
#' @param bmodel Boolean model in data frame. If NULL, use a random Boolean model. Defaults to NULL.
#' @param istate data frame. Must have only 1 row, which represents 1 initial state. Defaults to NULL.
#' @param max_varperrule integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must be higher than number of genes. Defaults to 6.
@@ -171,19 +17,14 @@ model_train_sa = function(edata, bmodel=NULL, istate=NULL, max_varperrule=6, and
#' @param detailed_output logical. Whether to return only the model inferred, or all the details obtained during optimisation. Defaults to F.
#'
#' @export
-model_train = function(edata, bmodel=NULL, istate=NULL, max_varperrule=6, and_bool=T, self_loop=F, con_thre=0.3, tol=1e-6, verbose=F, detailed_output=F)
+model_train = function(cdata, ddata=NULL, bmodel=NULL, istate=NULL, max_varperrule=6, and_bool=T, self_loop=F, con_thre=0.3, tol=1e-6, verbose=F, detailed_output=F)
{
vcat('Preparing data for analysis.\n', verbose)
- #Initialise expression data.
- tmp_data = initialise_raw_data(edata) #returns a list of two data frames.
- cdata = initialise_data(tmp_data[[1]]) #continuous data
- ddata = initialise_data(tmp_data[[2]]) #discretised data
-
#Initialise model.
if(is.null(bmodel))
{
- bmodel = gen_one_rmodel(colnames(edata), max_varperrule, and_bool, self_loop)
+ bmodel = gen_one_rmodel(colnames(cdata), max_varperrule, and_bool, self_loop)
} else
{
if(class(bmodel) != 'BoolModel')
@@ -226,6 +67,7 @@ model_train = function(edata, bmodel=NULL, istate=NULL, max_varperrule=6, and_bo
vcat('Start training.\n', verbose)
#(3) Calling final combined search.
+ i = 0 #suppress check error on non-visible global binding.
best_model = c()
best_score = c()
all_best_score = list()
@@ -307,8 +149,6 @@ model_train = function(edata, bmodel=NULL, istate=NULL, max_varperrule=6, and_bo
previous_score = best_score #store it for comparison.
all_best_score = c(all_best_score, list(best_score))
cur_step = cur_step + 1
-
- #browser()
}
vcat(sprintf('Final iteration: %s.\n', cur_step), verbose)
Oops, something went wrong.

0 comments on commit 5704e05

Please sign in to comment.