From 7dae6d4798cc097e6c503062d65c0774b39582a7 Mon Sep 17 00:00:00 2001 From: Chee Yee Lim Date: Tue, 15 Mar 2016 17:20:15 +0000 Subject: [PATCH] Changes for revision round. --- DESCRIPTION | 60 +- NAMESPACE | 2 +- R/boolmodel_class.R | 34 +- R/btr.R | 36 +- R/compression.R | 665 ++++++++------- R/data_desc.R | 272 +++--- R/general.R | 934 +++++++++++---------- R/initialisation.R | 367 ++++---- R/methods.R | 678 +++++++-------- R/model_modification.R | 102 ++- R/rand_model.R | 875 ++++++++++--------- R/score_calculation.R | 129 ++- R/search.R | 457 +++++----- data/example_models.rda | Bin 536 -> 534 bytes man/BTR.Rd | 2 +- man/BoolModel-class.Rd | 2 +- man/amat_to_bm.Rd | 2 +- man/bm_to_amat.Rd | 2 +- man/bm_to_df.Rd | 2 +- man/bon_bmodel.Rd | 4 +- man/bon_istate.Rd | 4 +- man/calc_mscore.Rd | 9 +- man/calc_roc.Rd | 2 +- man/check_and.Rd | 2 +- man/compress_bmodel.Rd | 8 +- man/decompress_bmodel.Rd | 8 +- man/decreate_boolmodel.Rd | 2 +- man/df_to_bm.Rd | 2 +- man/emodel1.Rd | 2 +- man/emodel2.Rd | 2 +- man/emodel3.Rd | 2 +- man/eval_bool.Rd | 2 +- man/extract_term.Rd | 2 +- man/filter_dflist.Rd | 2 +- man/gen_one_rmodel.Rd | 11 +- man/gen_singlerule.Rd | 7 +- man/gen_two_rmodel.Rd | 10 +- man/gen_two_rmodel_dag.Rd | 4 +- man/get_encodings.Rd | 6 +- man/grow_bmodel.Rd | 2 +- man/initialise_data.Rd | 2 +- man/initialise_model.Rd | 2 +- man/initialise_raw_data.Rd | 9 +- man/krum_bmodel.Rd | 4 +- man/krum_istate.Rd | 4 +- man/man_dist.Rd | 2 +- man/match_term.Rd | 2 +- man/minmod_internal.Rd | 12 +- man/minmod_model.Rd | 15 +- man/model_consensus.Rd | 6 +- man/model_dist.Rd | 6 +- man/model_setdiff.Rd | 6 +- man/model_train.Rd | 14 +- man/outgenysis_model.Rd | 2 +- man/outgraph_model.Rd | 2 +- man/outstate_graph.Rd | 2 +- man/plotBM.Rd | 2 +- man/printBM.Rd | 2 +- man/rcpp_simulate.Rd | 2 +- man/rcpp_validate.Rd | 2 +- man/simulate_model.Rd | 2 +- man/unique_raw_data.Rd | 2 +- man/validate_adjmat.Rd | 2 +- man/vcat.Rd | 2 +- man/which.random.min.Rd | 2 +- man/wilson_raw_data.Rd | 4 +- man/wilson_raw_rnaseq.Rd | 4 +- man/writeBM.Rd | 2 +- vignettes/btr.Rmd | 47 +- vignettes/btr.html | 203 ++--- vignettes/btr.md | 106 +-- vignettes/btr.pdf | Bin 305210 -> 269948 bytes .../figure-markdown_github/unnamed-chunk-17-1.png | Bin 6140 -> 12649 bytes .../figure-markdown_github/unnamed-chunk-23-1.png | Bin 7981 -> 35715 bytes .../figure-markdown_github/unnamed-chunk-31-1.png | Bin 8031 -> 39495 bytes 75 files changed, 2648 insertions(+), 2540 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 295842b..e1b6481 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,29 +1,31 @@ -Package: BTR -Type: Package -Title: Tools For Training and Analysing Asynchronous Boolean Models -Version: 1.1.3 -Date: 2015-10-22 -Author: Chee Yee Lim -Maintainer: Chee Yee Lim -Description: This package contains tools for inferring asynchronous Boolean - models from single-cell expression data. -Depends: - R (>= 3.0.3), - methods -Imports: - parallel, - Rcpp (>= 0.11.4), - foreach (>= 1.4.1), - doParallel (>= 1.0.8), - poweRlaw (>= 0.30.0), - diptest (>= 0.75-7), - igraph (>= 1.0.1) -LinkingTo: Rcpp -License: GPL-3 -LazyData: true -Suggests: - bnlearn (>= 3.8.1), - knitr, - rmarkdown -VignetteBuilder: knitr -RoxygenNote: 5.0.1 +Package: BTR +Type: Package +Title: Tools For Training and Analysing Asynchronous Boolean Models +Version: 1.2.2 +Date: 2015-10-22 +Author: Chee Yee Lim +Maintainer: Chee Yee Lim +Description: This package contains tools for inferring asynchronous Boolean + models from single-cell expression data. +Depends: + R (>= 3.0.3), + methods +Imports: + parallel, + Rcpp (>= 0.11.4), + foreach (>= 1.4.1), + doParallel (>= 1.0.8), + poweRlaw (>= 0.30.0), + diptest (>= 0.75-7), + igraph (>= 1.0.1), + infotheo (>= 1.2.0), + entropy (>= 1.2.1) +LinkingTo: Rcpp +License: GPL-3 +LazyData: true +Suggests: + bnlearn (>= 3.8.1), + knitr, + rmarkdown +VignetteBuilder: knitr +RoxygenNote: 5.0.1 diff --git a/NAMESPACE b/NAMESPACE index 4620035..9abb4d5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,4 +1,4 @@ -# Generated by roxygen2 (4.1.1): do not edit by hand +# Generated by roxygen2: do not edit by hand export(BoolModel) export(amat_to_bm) diff --git a/R/boolmodel_class.R b/R/boolmodel_class.R index 3e36218..f677c8e 100644 --- a/R/boolmodel_class.R +++ b/R/boolmodel_class.R @@ -1,17 +1,17 @@ -#' @title An S4 class to represent a Boolean Model -#' -#' @description -#' This class represents Boolean Model in a S4 BoolModel object. -#' -#' @field target character vector. It should contain gene symbols. -#' @field targer_var character vector. It should contain the internal representation of gene variables, in the form of "v[0-9]+s" -#' @field rule_act list of character vectors. Each element in the list should contain the activating gene variables for a particular target gene. -#' @field rule_inh list of character vectors. Each element in the list should contain the inhibiting gene variables for a particular target gene. -#' -#' @export BoolModel -#' @exportClass BoolModel -BoolModel = setClass('BoolModel', - slots=c(target='character', target_var='character', - rule_act='list', rule_inh='list') - ) - +#' @title An S4 class to represent a Boolean Model +#' +#' @description +#' This class represents Boolean Model in a S4 BoolModel object. +#' +#' @field target character vector. It should contain gene symbols. +#' @field targer_var character vector. It should contain the internal representation of gene variables, in the form of "v[0-9]+s" +#' @field rule_act list of character vectors. Each element in the list should contain the activating gene variables for a particular target gene. +#' @field rule_inh list of character vectors. Each element in the list should contain the inhibiting gene variables for a particular target gene. +#' +#' @export BoolModel +#' @exportClass BoolModel +BoolModel = setClass('BoolModel', + slots=c(target='character', target_var='character', + rule_act='list', rule_inh='list') + ) + diff --git a/R/btr.R b/R/btr.R index 9b155d0..36432c3 100644 --- a/R/btr.R +++ b/R/btr.R @@ -1,19 +1,19 @@ -#' @title BTR: A package for studying asynchronous Boolean models -#' -#' @description -#' This package contains tools for Boolean model manipulation, as well as the search for the best Boolean model. -#' -#' @docType package -#' @name BTR -NULL - -## All the Roxygen codes below are for generating the correct NAMESPACE file. -#' @import methods -#' @import parallel -#' @import foreach -#' @import doParallel -NULL - -#' @useDynLib BTR -#' @importFrom Rcpp sourceCpp evalCpp +#' @title BTR: A package for studying asynchronous Boolean models +#' +#' @description +#' This package contains tools for Boolean model manipulation, as well as the search for the best Boolean model. +#' +#' @docType package +#' @name BTR +NULL + +## All the Roxygen codes below are for generating the correct NAMESPACE file. +#' @import methods +#' @import parallel +#' @import foreach +#' @import doParallel +NULL + +#' @useDynLib BTR +#' @importFrom Rcpp sourceCpp evalCpp NULL \ No newline at end of file diff --git a/R/compression.R b/R/compression.R index 68b5fc0..b6ed26f 100644 --- a/R/compression.R +++ b/R/compression.R @@ -1,323 +1,342 @@ -#' @title Get corresponding encodings for compression or decompression. -#' -#' @description -#' This function gets all possible terms (single or double terms) and their corresponding encodings. -#' This function limits the number of possible variables in the model to 999. -#' -#' @param bmodel S4 BoolModel object. -#' -#' @export -get_encodings = function(bmodel) -{ - and_bool = check_and(bmodel) - - #Get all possible terms. - svar = bmodel@target_var - if(and_bool) - { - dvar = sapply(combn(svar, 2, simplify=F), function(x) paste(x, collapse='&')) #get all possible interacting pairs. - dvar = c(dvar, sapply(combn(svar, 2, simplify=F), function(x) paste(rev(x), collapse='&'))) #get the reversed pattern as well. - - term_pool = c(svar, dvar) - term_pool = c('0', term_pool, '!0', paste('!', term_pool, sep='')) #add in inh terms. - - #Generate index for activation terms. - num_pool = seq(1, length(svar)+1) #get numbers for svar. - num_pool = c(num_pool, as.vector(replicate(2, seq(max(num_pool)+1, max(num_pool)+(length(dvar)/2))))) #get numbers for both forward and reverse dvar. - - #Generate index for inhibition terms. - num_pool = c(num_pool, seq(max(num_pool)+1, max(num_pool)+length(svar)+1)) #get numbers for svar. - num_pool = c(num_pool, as.vector(replicate(2, seq(max(num_pool)+1, max(num_pool)+(length(dvar)/2))))) #get numbers for both forward and reverse dvar. - - num_pool = unname(sapply(num_pool, function(x) ifelse(nchar(x)==1, paste('0', x, sep=''), x))) #convert single digit to double digit. - num_pool = unname(sapply(num_pool, function(x) ifelse(nchar(x)==2, paste('0', x, sep=''), x))) #convert double digit to triple digit. - num_pool = unname(sapply(num_pool, function(x) ifelse(nchar(x)==3, paste('0', x, sep=''), x))) #convert triple digit to quadruple digit. - - names(num_pool) = term_pool - } else - { - term_pool = svar - term_pool = c('0', term_pool, '!0', paste('!', term_pool, sep='')) #add in inh terms. - - #Generate index for activation terms. - num_pool = seq(1, length(svar)+1) #get numbers for svar. - - #Generate index for inhibition terms. - num_pool = c(num_pool, seq(max(num_pool)+1, max(num_pool)+length(svar)+1)) #get numbers for svar. - - num_pool = unname(sapply(num_pool, function(x) ifelse(nchar(x)==1, paste('0', x, sep=''), x))) #convert single digit to double digit. - num_pool = unname(sapply(num_pool, function(x) ifelse(nchar(x)==2, paste('0', x, sep=''), x))) #convert double digit to triple digit. - num_pool = unname(sapply(num_pool, function(x) ifelse(nchar(x)==3, paste('0', x, sep=''), x))) #convert triple digit to quadruple digit. - - names(num_pool) = term_pool - } - - stopifnot(all(!is.na(names(num_pool)))) - stopifnot(all(!is.na(term_pool))) - - return(num_pool) -} - -#' @title Compress BoolModel -#' -#' @description -#' This function compresses S4 BoolModel object by representing variables using numbers, and also only the act rules and inh rules are kept. -#' Return a list of 3 vectors, corresponding to act rules and inh rules. -#' -#' @param bmodel S4 BoolModel object. -#' @param encoding named numerical vector returned by get_encodings(). -#' @param max_varperrule integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6. -#' -#' @export -compress_bmodel = function(bmodel, encoding, max_varperrule) -{ - #Convert variables into encodings. - #Convert 'v1s' into '001'. - act_list = sapply(bmodel@rule_act, function(x) unname(encoding[x])) - inh_list = sapply(bmodel@rule_inh, function(x) unname(encoding[paste('!', x, sep='')])) - - stopifnot(all(!is.na(unlist(act_list)))) - stopifnot(all(!is.na(unlist(inh_list)))) - - #Convert '0001' into 1.0001. - act_list = sapply(1:length(act_list), function(x) as.numeric(paste(x, act_list[[x]], sep='.'))) - inh_list = sapply(1:length(inh_list), function(x) as.numeric(paste(x, inh_list[[x]], sep='.'))) - - #Convert list into vector. - act_vec = unique(unlist(act_list)) - inh_vec = unique(unlist(inh_list)) - - #Add in integer that represents empty spots, according to max_varperrule. - empty_term = max_varperrule - (rle(round(act_vec))$lengths+rle(round(inh_vec))$lengths) #get freq of missing terms. - empty_term[empty_term < 0] = 0 #set negative values to 0. note that negative values can occur because this is a soft constraint. - empty_term = unlist(sapply(1:length(empty_term), function(x) rep(x, empty_term[x]))) - - return(sort(c(act_vec,inh_vec, empty_term))) -} - -#' @title Decompress BoolModel -#' -#' @description -#' This function decompresses the bmodel compressed by compress_bmodel(). -#' Return a S4 BoolModel object. -#' -#' @param x vector returned by compress_bmodel. -#' @param encoding named numerical vector returned by get_encodings(). -#' @param gene character vector. Corresponds to genes in the bmodel. Default to using variable names in encoding. -#' @param format character. Specifies which format to return. Possible values: 'bmodel', 'df', 'amat', 'simp_df'. Default to 'bmodel'. -#' -#' @export -decompress_bmodel = function(x, encoding, gene=NULL, format='bmodel') -{ - #Setup default gene names. - gene_var = names(encoding)[!(grepl('&',names(encoding))|grepl('!',names(encoding)))][-1] - if(is.null(gene)) - { - gene = gene_var - } - stopifnot(length(gene_var)==length(gene)) - - #Round up values to 4 decimals. (due to external solver returning continuous values.) - x = round(x, 4) - - #Remove all empty terms from x. - x = x[!(x%%1==0)] #check for integer. - x = x[!(sapply(x, function(y) isTRUE(all.equal(y%%1,0.9999))))] #check for lower_bound values (i.e. 2.9999). (it is equivalent to integer.) - - #Filter encodings to remove reversed AND terms, if present. - encoding = encoding[!duplicated(encoding)] - - #Convert encodings into variables. - #Obtain 2 from 2.001. - comb_ind = round(x) - - #Convert numbers to characters. - comb_rule = as.character(x) - - #Convert '2' to '2.'. - comb_rule = unname(sapply(comb_rule, function(y) ifelse(!grepl('[.]', y), paste(y, '.', sep=''), y))) - - #Convert '2.' to '2.0'. - comb_rule = unname(sapply(comb_rule, function(y) ifelse(grepl('[.]$', y), paste(y, '0', sep=''), y))) - - #Convert '2.0' to '2.00'. - comb_rule = unname(sapply(comb_rule, function(y) ifelse(grepl('[.].$', y), paste(y, '0', sep=''), y))) - - #Convert '2.00' to '2.000'. - comb_rule = unname(sapply(comb_rule, function(y) ifelse(grepl('[.]..$', y), paste(y, '0', sep=''), y))) - - #Convert '2.000' to '2.0000'. - comb_rule = unname(sapply(comb_rule, function(y) ifelse(grepl('[.]...$', y), paste(y, '0', sep=''), y))) - - #Obtain '0001' from '2.0001'. - comb_rule = gsub('^[0-9]+[.]([0-9]+)', '\\1', comb_rule) - - #print(comb_rule) - stopifnot(all(nchar(comb_rule)==4)) - - #Remove 0 and !0. - zero_enc = encoding[grepl('0$', names(encoding))] #get encodings for 0 and !0. - stopifnot(length(zero_enc)==2) - comb_ind = comb_ind[comb_rule!=zero_enc[1]] #remove 0. - comb_rule = comb_rule[comb_rule!=zero_enc[1]] #remove 0. - comb_ind = comb_ind[comb_rule!=zero_enc[2]] #remove !0. - comb_rule = comb_rule[comb_rule!=zero_enc[2]] #remove !0. - stopifnot(length(comb_ind)==length(comb_rule)) - - #Generate reverse map. - rev_encoding = names(encoding) - names(rev_encoding) = encoding - - #Main conversion. - comb_rule = unname(rev_encoding[comb_rule]) - stopifnot(all(!is.na(comb_rule))) - - if(format=='df') - { - #Converting rules from var to gene names. - conv_comb_rule = comb_rule - for(i in 1:length(gene_var)) - { - conv_comb_rule = gsub(gene_var[i], gene[i], conv_comb_rule) - } - conv_comb_rule = unname(split(conv_comb_rule, as.factor(comb_ind))) - - #If there is completely empty rules for a gene, add placeholder in. - if(length(unique(comb_ind)) != length(gene_var)) - { - missing_ind = which(!(1:length(gene_var) %in% unique(comb_ind))) - - for(i in missing_ind) - { - conv_comb_rule = append(conv_comb_rule, '0', i-1) - } - } - stopifnot(length(conv_comb_rule)==length(gene_var)) - } - - #Convert vector into list. - comb_rule = unname(split(comb_rule, as.factor(comb_ind))) - - #If there is completely empty rules for a gene, add placeholder in. - if(length(unique(comb_ind)) != length(gene_var)) - { - missing_ind = which(!(1:length(gene_var) %in% unique(comb_ind))) - - for(i in missing_ind) - { - comb_rule = append(comb_rule, '0', i-1) - } - } - stopifnot(length(comb_rule)==length(gene_var)) - - if(format=='bmodel') - { - #Split into act and inh list. - act_rule = lapply(comb_rule, function(x) x[!grepl('!', x)]) - inh_rule = lapply(comb_rule, function(x) x[grepl('!', x)]) - - #Set '0' for empty rules. - act_rule[which(sapply(act_rule, function(x) length(x))==0)] = list('0') - inh_rule[which(sapply(inh_rule, function(x) length(x))==0)] = list('0') - - inh_rule = lapply(inh_rule, function(x) gsub('!', '', x)) #remove !. - - stopifnot(length(gene)==length(act_rule)) - - output = BoolModel(target=gene, target_var=gene_var, - rule_act=act_rule, rule_inh=inh_rule) - } else if(format=='df') - { - #This way of preparing output ensures that the order of 'output' is the same as 'x'. - out_var = c() - out_gene = c() - for(i in 1:length(comb_rule)) - { - for(j in 1:length(comb_rule[[i]])) - { - if(!grepl('!', comb_rule[[i]][j])) - { - if(comb_rule[[i]][j] != '0') - { - out_var = c(out_var, paste(gene_var[i], '<-', comb_rule[[i]][j])) - out_gene = c(out_gene, paste(gene[i], '<-', conv_comb_rule[[i]][j])) - } - } else - { - if(comb_rule[[i]][j] != '0') - { - out_var = c(out_var, paste(gene_var[i], '|-', gsub('!', '', comb_rule[[i]][j]))) - out_gene = c(out_gene, paste(gene[i], '|-', gsub('!', '', conv_comb_rule[[i]][j]))) - } - } - } - } - - output = cbind(out_gene, out_var) - } else if(format=='amat') - { - comb_rule = lapply(1:length(comb_rule), function(x) unlist(strsplit(comb_rule[[x]], '&'))) #remove &. - comb_rule = lapply(1:length(comb_rule), function(x) unique(gsub('!', '', comb_rule[[x]]))) #remove ! and non-unique gene interactions. - comb_rule = lapply(1:length(comb_rule), function(x) comb_rule[[x]][order(as.numeric(gsub('v(.+)s', '\\1', comb_rule[[x]])))]) #reorder the elements. - - output = matrix(NA, ncol=length(gene_var), nrow=length(gene_var)) - for(i in 1:nrow(output)) - { - output[i,] = gene_var %in% comb_rule[[i]] - } - output = t(output) - output = output + 0 - - colnames(output) = gene - rownames(output) = gene - } else if(format=='direct_amat') - { - comb_rule = lapply(1:length(comb_rule), function(x) unlist(strsplit(comb_rule[[x]], '&'))) #remove &. - comb_rule = lapply(comb_rule, unique) #remove non-unique gene interactions. - - output = matrix(0, ncol=length(gene_var), nrow=length(gene_var)) - for(i in 1:length(comb_rule)) - { - for(j in 1:length(comb_rule[[i]])) - { - row_ind = as.numeric(gsub('.+([0-9]+)s', '\\1', comb_rule[[i]][j])) - if(grepl('!', comb_rule[[i]][j])) - { - output[row_ind,i] = -1 - } else - { - output[row_ind,i] = 1 - } - } - } - - colnames(output) = gene - rownames(output) = gene - } else if(format=='simp_df') - { - comb_rule = lapply(1:length(comb_rule), function(x) unlist(strsplit(comb_rule[[x]], '&'))) #remove &. - comb_rule = lapply(1:length(comb_rule), function(x) unique(gsub('!', '', comb_rule[[x]]))) #remove ! and non-unique gene interactions. - comb_rule = lapply(1:length(comb_rule), function(x) comb_rule[[x]][order(as.numeric(gsub('v(.+)s', '\\1', comb_rule[[x]])))]) #reorder the elements. - comb_rule = comb_rule[!sapply(comb_rule, function(x) x[1]=='0')] #remove empty rules. - - output = matrix(NA, ncol=2, nrow=length(unlist(comb_rule))) - colnames(output) = c('from', 'to') - - entry_ind = 1 - for(i in 1:length(comb_rule)) - { - for(j in 1:length(comb_rule[[i]])) - { - output[entry_ind,1] = comb_rule[[i]][j] - output[entry_ind,2] = paste('v', i, 's', sep='') - - entry_ind = entry_ind + 1 - } - } - } else - { - stop('Please provide a valid format.') - } - - return(output) -} +#' @title Get corresponding encodings for compression or decompression. +#' +#' @description +#' This function gets all possible terms (single or double terms) and their corresponding encodings. +#' This function limits the number of possible variables in the model to 999. +#' +#' @param bmodel S4 BoolModel object. +#' @param force_and logical. Whether to include ANDs in the encoding even if no AND is present in the bmodel object. Defaults to T. +#' +#' @export +get_encodings = function(bmodel, force_and=T) +{ + if(force_and) + { + and_bool = T + } else + { + and_bool = check_and(bmodel) + } + + + #Get all possible terms. + svar = bmodel@target_var + if(and_bool) + { + dvar = sapply(combn(svar, 2, simplify=F), function(x) paste(x, collapse='&')) #get all possible interacting pairs. + dvar = c(dvar, sapply(combn(svar, 2, simplify=F), function(x) paste(rev(x), collapse='&'))) #get the reversed pattern as well. + + term_pool = c(svar, dvar) + term_pool = c('0', term_pool, '!0', paste('!', term_pool, sep='')) #add in inh terms. + + #Generate index for activation terms. + num_pool = seq(1, length(svar)+1) #get numbers for svar. + num_pool = c(num_pool, as.vector(replicate(2, seq(max(num_pool)+1, max(num_pool)+(length(dvar)/2))))) #get numbers for both forward and reverse dvar. + + #Generate index for inhibition terms. + num_pool = c(num_pool, seq(max(num_pool)+1, max(num_pool)+length(svar)+1)) #get numbers for svar. + num_pool = c(num_pool, as.vector(replicate(2, seq(max(num_pool)+1, max(num_pool)+(length(dvar)/2))))) #get numbers for both forward and reverse dvar. + + num_pool = unname(sapply(num_pool, function(x) ifelse(nchar(x)==1, paste('0', x, sep=''), x))) #convert single digit to double digit. + num_pool = unname(sapply(num_pool, function(x) ifelse(nchar(x)==2, paste('0', x, sep=''), x))) #convert double digit to triple digit. + num_pool = unname(sapply(num_pool, function(x) ifelse(nchar(x)==3, paste('0', x, sep=''), x))) #convert triple digit to quadruple digit. + + names(num_pool) = term_pool + } else + { + term_pool = svar + term_pool = c('0', term_pool, '!0', paste('!', term_pool, sep='')) #add in inh terms. + + #Generate index for activation terms. + num_pool = seq(1, length(svar)+1) #get numbers for svar. + + #Generate index for inhibition terms. + num_pool = c(num_pool, seq(max(num_pool)+1, max(num_pool)+length(svar)+1)) #get numbers for svar. + + num_pool = unname(sapply(num_pool, function(x) ifelse(nchar(x)==1, paste('0', x, sep=''), x))) #convert single digit to double digit. + num_pool = unname(sapply(num_pool, function(x) ifelse(nchar(x)==2, paste('0', x, sep=''), x))) #convert double digit to triple digit. + num_pool = unname(sapply(num_pool, function(x) ifelse(nchar(x)==3, paste('0', x, sep=''), x))) #convert triple digit to quadruple digit. + + names(num_pool) = term_pool + } + + #Store original gene names. + bnames = bmodel@target + names(bnames) = bmodel@target_var + + stopifnot(all(!is.na(names(num_pool)))) + stopifnot(all(!is.na(term_pool))) + + return(list(num_pool, bnames)) +} + +#' @title Compress BoolModel +#' +#' @description +#' This function compresses S4 BoolModel object by representing variables using numbers, and also only the act rules and inh rules are kept. +#' Return a list of 3 vectors, corresponding to act rules and inh rules. +#' +#' @param bmodel S4 BoolModel object. +#' @param encoding named numerical vector returned by get_encodings(). +#' +#' @export +compress_bmodel = function(bmodel, encoding) +{ + encoding = encoding[[1]] + stopifnot(length(bmodel@target)== length(encoding[!grepl('&|!', names(encoding))])-1) #check if bmodel and encoding match. + + #Convert variables into encodings. + #Convert 'v1s' into '001'. + act_list = lapply(bmodel@rule_act, function(x) unname(encoding[x])) + inh_list = lapply(bmodel@rule_inh, function(x) unname(encoding[paste('!', x, sep='')])) + + stopifnot(all(!is.na(unlist(act_list)))) + stopifnot(all(!is.na(unlist(inh_list)))) + + #Get max number of variables. + max_lim = max(sapply(act_list, length) + sapply(inh_list, length)) + + #Convert '0001' into 1.0001. + act_list = lapply(1:length(act_list), function(x) as.numeric(paste(x, act_list[[x]], sep='.'))) + inh_list = lapply(1:length(inh_list), function(x) as.numeric(paste(x, inh_list[[x]], sep='.'))) + + #Convert list into vector. + act_vec = unique(unlist(act_list)) + inh_vec = unique(unlist(inh_list)) + + #Add in integer that represents empty spots, according to max_lim. + empty_term = max_lim - (rle(round(act_vec))$lengths+rle(round(inh_vec))$lengths) #get freq of missing terms. + empty_term[empty_term < 0] = 0 #set negative values to 0. note that negative values can occur because this is a soft constraint. + empty_term = unlist(sapply(1:length(empty_term), function(x) rep(x, empty_term[x]))) + + return(sort(c(act_vec,inh_vec, empty_term))) +} + +#' @title Decompress BoolModel +#' +#' @description +#' This function decompresses the bmodel compressed by compress_bmodel(). +#' Return a S4 BoolModel object. +#' +#' @param x vector returned by compress_bmodel. +#' @param encoding named numerical vector returned by get_encodings(). +#' @param format character. Specifies which format to return. Possible values: 'bmodel', 'df', 'amat', 'simp_df'. Default to 'bmodel'. +#' +#' @export +decompress_bmodel = function(x, encoding, format='bmodel') +{ + gene = encoding[[2]] + encoding = encoding[[1]] + + #Setup default gene names. + gene_var = names(encoding)[!(grepl('&',names(encoding))|grepl('!',names(encoding)))][-1] + if(is.null(gene)) + { + gene = gene_var + } + stopifnot(length(gene_var)==length(gene)) + + #Round up values to 4 decimals. (due to external solver returning continuous values.) + x = round(x, 4) + + #Remove all empty terms from x. + x = x[!(x%%1==0)] #check for integer. + x = x[!(sapply(x, function(y) isTRUE(all.equal(y%%1,0.9999))))] #check for lower_bound values (i.e. 2.9999). (it is equivalent to integer.) + + #Filter encodings to remove reversed AND terms, if present. + encoding = encoding[!duplicated(encoding)] + + #Convert encodings into variables. + #Obtain 2 from 2.001. + comb_ind = round(x) + + #Convert numbers to characters. + comb_rule = as.character(x) + + #Convert '2' to '2.'. + comb_rule = unname(sapply(comb_rule, function(y) ifelse(!grepl('[.]', y), paste(y, '.', sep=''), y))) + + #Convert '2.' to '2.0'. + comb_rule = unname(sapply(comb_rule, function(y) ifelse(grepl('[.]$', y), paste(y, '0', sep=''), y))) + + #Convert '2.0' to '2.00'. + comb_rule = unname(sapply(comb_rule, function(y) ifelse(grepl('[.].$', y), paste(y, '0', sep=''), y))) + + #Convert '2.00' to '2.000'. + comb_rule = unname(sapply(comb_rule, function(y) ifelse(grepl('[.]..$', y), paste(y, '0', sep=''), y))) + + #Convert '2.000' to '2.0000'. + comb_rule = unname(sapply(comb_rule, function(y) ifelse(grepl('[.]...$', y), paste(y, '0', sep=''), y))) + + #Obtain '0001' from '2.0001'. + comb_rule = gsub('^[0-9]+[.]([0-9]+)', '\\1', comb_rule) + + #print(comb_rule) + stopifnot(all(nchar(comb_rule)==4)) + + #Remove 0 and !0. + zero_enc = encoding[grepl('0$', names(encoding))] #get encodings for 0 and !0. + stopifnot(length(zero_enc)==2) + comb_ind = comb_ind[comb_rule!=zero_enc[1]] #remove 0. + comb_rule = comb_rule[comb_rule!=zero_enc[1]] #remove 0. + comb_ind = comb_ind[comb_rule!=zero_enc[2]] #remove !0. + comb_rule = comb_rule[comb_rule!=zero_enc[2]] #remove !0. + stopifnot(length(comb_ind)==length(comb_rule)) + + #Generate reverse map. + rev_encoding = names(encoding) + names(rev_encoding) = encoding + + #Main conversion. + comb_rule = unname(rev_encoding[comb_rule]) + stopifnot(all(!is.na(comb_rule))) + + if(format=='df') + { + #Converting rules from var to gene names. + conv_comb_rule = comb_rule + for(i in 1:length(gene_var)) + { + conv_comb_rule = gsub(gene_var[i], gene[i], conv_comb_rule) + } + conv_comb_rule = unname(split(conv_comb_rule, as.factor(comb_ind))) + + #If there is completely empty rules for a gene, add placeholder in. + if(length(unique(comb_ind)) != length(gene_var)) + { + missing_ind = which(!(1:length(gene_var) %in% unique(comb_ind))) + + for(i in missing_ind) + { + conv_comb_rule = append(conv_comb_rule, '0', i-1) + } + } + stopifnot(length(conv_comb_rule)==length(gene_var)) + } + + #Convert vector into list. + comb_rule = unname(split(comb_rule, as.factor(comb_ind))) + + #If there is completely empty rules for a gene, add placeholder in. + if(length(unique(comb_ind)) != length(gene_var)) + { + missing_ind = which(!(1:length(gene_var) %in% unique(comb_ind))) + + for(i in missing_ind) + { + comb_rule = append(comb_rule, '0', i-1) + } + } + stopifnot(length(comb_rule)==length(gene_var)) + + if(format=='bmodel') + { + #Split into act and inh list. + act_rule = lapply(comb_rule, function(x) x[!grepl('!', x)]) + inh_rule = lapply(comb_rule, function(x) x[grepl('!', x)]) + + #Set '0' for empty rules. + act_rule[which(sapply(act_rule, function(x) length(x))==0)] = list('0') + inh_rule[which(sapply(inh_rule, function(x) length(x))==0)] = list('0') + + inh_rule = lapply(inh_rule, function(x) gsub('!', '', x)) #remove !. + + stopifnot(length(gene)==length(act_rule)) + + output = BoolModel(target=gene, target_var=gene_var, + rule_act=act_rule, rule_inh=inh_rule) + } else if(format=='df') + { + #This way of preparing output ensures that the order of 'output' is the same as 'x'. + out_var = c() + out_gene = c() + for(i in 1:length(comb_rule)) + { + for(j in 1:length(comb_rule[[i]])) + { + if(!grepl('!', comb_rule[[i]][j])) + { + if(comb_rule[[i]][j] != '0') + { + out_var = c(out_var, paste(gene_var[i], '<-', comb_rule[[i]][j])) + out_gene = c(out_gene, paste(gene[i], '<-', conv_comb_rule[[i]][j])) + } + } else + { + if(comb_rule[[i]][j] != '0') + { + out_var = c(out_var, paste(gene_var[i], '|-', gsub('!', '', comb_rule[[i]][j]))) + out_gene = c(out_gene, paste(gene[i], '|-', gsub('!', '', conv_comb_rule[[i]][j]))) + } + } + } + } + + output = cbind(out_gene, out_var) + } else if(format=='amat') + { + comb_rule = lapply(1:length(comb_rule), function(x) unlist(strsplit(comb_rule[[x]], '&'))) #remove &. + comb_rule = lapply(1:length(comb_rule), function(x) unique(gsub('!', '', comb_rule[[x]]))) #remove ! and non-unique gene interactions. + comb_rule = lapply(1:length(comb_rule), function(x) comb_rule[[x]][order(as.numeric(gsub('v(.+)s', '\\1', comb_rule[[x]])))]) #reorder the elements. + + output = matrix(NA, ncol=length(gene_var), nrow=length(gene_var)) + for(i in 1:nrow(output)) + { + output[i,] = gene_var %in% comb_rule[[i]] + } + output = t(output) + output = output + 0 + + colnames(output) = gene + rownames(output) = gene + } else if(format=='direct_amat') + { + comb_rule = lapply(1:length(comb_rule), function(x) unlist(strsplit(comb_rule[[x]], '&'))) #remove &. + comb_rule = lapply(comb_rule, unique) #remove non-unique gene interactions. + + output = matrix(0, ncol=length(gene_var), nrow=length(gene_var)) + for(i in 1:length(comb_rule)) + { + for(j in 1:length(comb_rule[[i]])) + { + row_ind = as.numeric(gsub('.+([0-9]+)s', '\\1', comb_rule[[i]][j])) + if(grepl('!', comb_rule[[i]][j])) + { + output[row_ind,i] = -1 + } else + { + output[row_ind,i] = 1 + } + } + } + + colnames(output) = gene + rownames(output) = gene + } else if(format=='simp_df') + { + comb_rule = lapply(1:length(comb_rule), function(x) unlist(strsplit(comb_rule[[x]], '&'))) #remove &. + comb_rule = lapply(1:length(comb_rule), function(x) unique(gsub('!', '', comb_rule[[x]]))) #remove ! and non-unique gene interactions. + comb_rule = lapply(1:length(comb_rule), function(x) comb_rule[[x]][order(as.numeric(gsub('v(.+)s', '\\1', comb_rule[[x]])))]) #reorder the elements. + comb_rule = comb_rule[!sapply(comb_rule, function(x) x[1]=='0')] #remove empty rules. + + output = matrix(NA, ncol=2, nrow=length(unlist(comb_rule))) + colnames(output) = c('from', 'to') + + entry_ind = 1 + for(i in 1:length(comb_rule)) + { + for(j in 1:length(comb_rule[[i]])) + { + output[entry_ind,1] = comb_rule[[i]][j] + output[entry_ind,2] = paste('v', i, 's', sep='') + + entry_ind = entry_ind + 1 + } + } + } else + { + stop('Please provide a valid format.') + } + + return(output) +} diff --git a/R/data_desc.R b/R/data_desc.R index 397251b..ffb664b 100644 --- a/R/data_desc.R +++ b/R/data_desc.R @@ -1,136 +1,136 @@ -#' @title HSC Boolean Model from Bonzanni et al. -#' -#' @description -#' A Boolean model describing HSC in mice. It contains 11 genes. Its steady state is a cyclic loop of 32 states. -#' -#' @format -#' A data frame with 11 rows and 2 columns. -#' -#' Rows: each row consists of 1 gene and its associated Boolean rule. -#' Column 1: target gene -#' Column 2: associated Boolean rule -#' -#' @docType data -#' @name bon_bmodel -#' @usage data(bon_bmodel) -NULL - -#' @title Initial state from Bonzanni et al. -#' -#' @description -#' An intial state specified in Bonzanni et al. It contains a set of Boolean values for 11 genes. -#' -#' @format -#' A data frame with 1 row and 11 columns. -#' -#' Rows: each row consists of 1 set of Boolean state. -#' Columns: each column is for 1 gene/variable. -#' -#' @docType data -#' @name bon_istate -#' @usage data(bon_istate) -NULL - -#' @title Myeloid Boolean Model from Krumsiek et al. -#' -#' @description -#' A Boolean model describing myeloid development in mice. It contains 11 genes. Its steady states are 4 static attractors. -#' -#' @format -#' A data frame with 11 rows and 2 columns. -#' -#' Rows: each row consists of 1 gene and its associated Boolean rule. -#' Column 1: target gene -#' Column 2: associated Boolean rule -#' -#' @docType data -#' @name krum_bmodel -#' @usage data(krum_bmodel) -NULL - -#' @title Initial state from Krumsiek et al. -#' -#' @description -#' An intial state specified in Krumsiek et al. It contains a set of Boolean values for 11 genes. -#' -#' @format -#' A data frame with 1 row and 11 columns. -#' -#' Rows: each row consists of 1 set of Boolean state. -#' Columns: each column is for 1 gene/variable. -#' -#' @docType data -#' @name krum_istate -#' @usage data(krum_istate) -NULL - -#' @title Raw single cell qRT-PCR expression data from Wilson et al. -#' -#' @description -#' A raw single cell expression data obtained from multiple cell types. -#' -#' @format -#' A data frame with 1626 rows and 44 columns. -#' -#' Rows: each row consists of raw expression values from 1 cell. -#' Columns: each column is for 1 gene/variable. -#' -#' @docType data -#' @name wilson_raw_data -#' @usage data(wilson_raw_data) -NULL - -#' @title Raw single cell RNAseq expression data from Wilson et al. -#' -#' @description -#' A raw single cell expression data obtained from multiple cell types. -#' -#' @format -#' A data frame with 96 rows and 38498 columns. -#' -#' Rows: each row consists of raw expression values from 1 cell. -#' Columns: each column is for 1 gene/variable. -#' -#' @docType data -#' @name wilson_raw_rnaseq -#' @usage data(wilson_raw_rnaseq) -NULL - -#' @title Example Boolean Model used in the vignette -#' -#' @description -#' A Boolean model used in the examples of the vignette. -#' -#' @format -#' Each Boolean model is a BoolModel object. -#' -#' @docType data -#' @name emodel1 -#' @usage data(example_models) -NULL - -#' @title Example Boolean Model used in the vignette -#' -#' @description -#' A Boolean model used in the examples of the vignette. -#' -#' @format -#' Each Boolean model is a BoolModel object. -#' -#' @docType data -#' @name emodel2 -#' @usage data(example_models) -NULL - -#' @title Example Boolean Model used in the vignette -#' -#' @description -#' A Boolean model used in the examples of the vignette. -#' -#' @format -#' Each Boolean model is a BoolModel object. -#' -#' @docType data -#' @name emodel3 -#' @usage data(example_models) -NULL +#' @title HSC Boolean Model from Bonzanni et al. +#' +#' @description +#' A Boolean model describing HSC in mice. It contains 11 genes. Its steady state is a cyclic loop of 32 states. +#' +#' @format +#' A data frame with 11 rows and 2 columns. +#' +#' Rows: each row consists of 1 gene and its associated Boolean rule. +#' Column 1: target gene +#' Column 2: associated Boolean rule +#' +#' @docType data +#' @name bon_bmodel +#' @usage data(bon_bmodel) +NULL + +#' @title Initial state from Bonzanni et al. +#' +#' @description +#' An intial state specified in Bonzanni et al. It contains a set of Boolean values for 11 genes. +#' +#' @format +#' A data frame with 1 row and 11 columns. +#' +#' Rows: each row consists of 1 set of Boolean state. +#' Columns: each column is for 1 gene/variable. +#' +#' @docType data +#' @name bon_istate +#' @usage data(bon_istate) +NULL + +#' @title Myeloid Boolean Model from Krumsiek et al. +#' +#' @description +#' A Boolean model describing myeloid development in mice. It contains 11 genes. Its steady states are 4 static attractors. +#' +#' @format +#' A data frame with 11 rows and 2 columns. +#' +#' Rows: each row consists of 1 gene and its associated Boolean rule. +#' Column 1: target gene +#' Column 2: associated Boolean rule +#' +#' @docType data +#' @name krum_bmodel +#' @usage data(krum_bmodel) +NULL + +#' @title Initial state from Krumsiek et al. +#' +#' @description +#' An intial state specified in Krumsiek et al. It contains a set of Boolean values for 11 genes. +#' +#' @format +#' A data frame with 1 row and 11 columns. +#' +#' Rows: each row consists of 1 set of Boolean state. +#' Columns: each column is for 1 gene/variable. +#' +#' @docType data +#' @name krum_istate +#' @usage data(krum_istate) +NULL + +#' @title Raw single cell qRT-PCR expression data from Wilson et al. +#' +#' @description +#' A raw single cell expression data obtained from multiple cell types. +#' +#' @format +#' A data frame with 1626 rows and 44 columns. +#' +#' Rows: each row consists of raw expression values from 1 cell. +#' Columns: each column is for 1 gene/variable. +#' +#' @docType data +#' @name wilson_raw_data +#' @usage data(wilson_raw_data) +NULL + +#' @title Raw single cell RNAseq expression data from Wilson et al. +#' +#' @description +#' A raw single cell expression data obtained from multiple cell types. +#' +#' @format +#' A data frame with 96 rows and 38498 columns. +#' +#' Rows: each row consists of raw expression values from 1 cell. +#' Columns: each column is for 1 gene/variable. +#' +#' @docType data +#' @name wilson_raw_rnaseq +#' @usage data(wilson_raw_rnaseq) +NULL + +#' @title Example Boolean Model used in the vignette +#' +#' @description +#' A Boolean model used in the examples of the vignette. +#' +#' @format +#' Each Boolean model is a BoolModel object. +#' +#' @docType data +#' @name emodel1 +#' @usage data(example_models) +NULL + +#' @title Example Boolean Model used in the vignette +#' +#' @description +#' A Boolean model used in the examples of the vignette. +#' +#' @format +#' Each Boolean model is a BoolModel object. +#' +#' @docType data +#' @name emodel2 +#' @usage data(example_models) +NULL + +#' @title Example Boolean Model used in the vignette +#' +#' @description +#' A Boolean model used in the examples of the vignette. +#' +#' @format +#' Each Boolean model is a BoolModel object. +#' +#' @docType data +#' @name emodel3 +#' @usage data(example_models) +NULL diff --git a/R/general.R b/R/general.R index 3569cfd..fac95d8 100644 --- a/R/general.R +++ b/R/general.R @@ -1,458 +1,478 @@ -#' @title Verbose cat -#' -#' @description -#' This function is a simple wrapper for cat with a Boolean value for turning it on/off. -#' -#' @param string character vector. String intended to be printed. -#' @param bool logical. Specify whether to print the string to stout or not.+ -vcat = function(string, bool) -{ - if(bool) - { - cat(string) - } - - invisible() -} - -#' @title Check if containing AND terms -#' -#' @description -#' This function checks if a particular Boolean model contains AND terms. -#' -#' @param bmodel BoolModel object. -check_and = function(bmodel) -{ - and_bool = F - - if(any(grepl('&', bmodel@rule_act))) - { - and_bool = T - } else if(any(grepl('&', bmodel@rule_inh))) - { - and_bool = T - } - - return(and_bool) -} - -#' @title Extract Boolean terms -#' -#' @description -#' This function extracts the terms within a Boolean rule, using OR as the separator. Bracketed variables are counted as one term. -#' -#' @param brule character vector. It should be either an activating or inhibiting rule for a target gene. -extract_term = function(brule) -{ - if(brule!='0') #account for cases with empty rule. - { - #Strip outermost brackets. - brule = gsub('^[(](.+)[)]$', '\\1', brule) - term = strsplit(brule, '[|]')[[1]] - term = gsub('^[(](.+)[)]$', '\\1', term) - } else - { - term = '0' - } - - return(term) -} - -#' @title Check for matching terms -#' -#' @description -#' This function checks if the first term is found within a vector of terms. Return a logical value. -#' It is smarter than simple string matching, e.g. match_term(v1s&v2s, v2s&v1s) == T, match_term(v1s, v1s&v2s) == T, match_term(v1s&v2s, v1s) == T. -#' -#' @param t1 character vector of length 1 or more. It should be a vector of gene variable. -#' @param t2 character vector of length 1 or more. It should be a vector of gene variables. -#' @param mode character. Indicates the mode of action. Options: 'logic', 'unique'. Default to 'logic'. -match_term = function(t1, t2, mode='logic') -{ - if(t1[1] != '0') - { - t1 = unlist(strsplit(t1, '&')) #this will split up terms with &. - - #Check if t1 is present in t2. - #Note: if t1='v1s', this will match 'v1s', 'v2s&v1s'. - if(length(t2) > 1) - { - check_mat = sapply(t1, function(x) grepl(x, t2)) - } else - { - check_mat = t(matrix(sapply(t1, function(x) grepl(x, t2)))) #matrix() is necessary when t2 has length 1. - } - - if(mode == 'logic') - { - check_ind = which(rowSums(check_mat) > 0) - - if(length(check_ind) == 0) - { - match = FALSE - } else - { - match = TRUE - } - - return(match) - } else if(mode == 'unique') - { - check_ind = which(rowSums(check_mat) == 0) - - return(t2[check_ind]) - } else - { - stop('Error in specifying match_term() mode.') - } - } else if(t1[1] == '0') - { - if(mode == 'logic') - { - return(FALSE) - } else if(mode == 'unique') - { - return(t2) - } else - { - stop('Error in specifying match_term() mode.') - } - } -} - -#' @title Pick a random minimum value -#' -#' @description -#' This function locates the minimum value in a vector (similar to which.min), however it will randomly break ties when there are multiple minimum values. -#' -#' @param x numeric vector. -#' @param favour_first logical. If this is TRUE, and the first value in the vector supplies is one of the min, the index for the first value will always be returned. -which.random.min = function(x, favour_first=F) -{ - id = which(x == min(x)) #get indices of all minimum values. - - # If the minimum value appear multiple times pick one index at random, otherwise just return its position in the vector - # Exception occurs if the initial model (at position 1) also has the minimum value, then the initial model is favoured. - if(length(id) > 1 & 1 %in% id & favour_first==T) - { - id = 1 - } else if(length(id) > 1) #for all other cases with multiple minimum values, pick a random one. - { - id = sample(id, 1) - } else if(length(id) == 1) #if there is only single min value, return it. - { - id = id - } - - return(id) -} - -#' @title Filter columns of df in a list -#' -#' @description -#' This function filters columns of multiple df in a list, when compared using a vector. Use through lapply(). -#' -#' @param x data frame. It should be a list element if used from lapply. -#' @param y character vector. It should contains gene names found in the colnames of the data frame. -#' @param uniq_bool logical. Whether to return unique rows only. -filter_dflist = function(x, y, uniq_bool=T) -{ - stopifnot(class(x)== 'data.frame' | class(x)== 'matrix') - - if(uniq_bool) - { - return(unique(x[,colnames(x) %in% y, drop=F])) - } else - { - return(x[,colnames(x) %in% y, drop=F]) - } -} - -#' @title Calculate distance between Boolean models -#' -#' @description -#' This method takes in two models and calculate the distance between them. The value return indicate the number of steps between the two models. -#' -#' @param x S4 BoolModel object. Test model. -#' @param y S4 BoolModel object. Reference model. -#' @param max_varperrule integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6. -#' -#' @export -model_dist = function(x, y, max_varperrule) -{ - set_diff = unlist(model_setdiff(x, y, max_varperrule)) - - #Calculate total dist. - t_dist = length(set_diff) - - #Return results. - return(t_dist) -} - -#' @title Find the set difference between two Boolean models -#' -#' @description -#' This method takes in two models and find the set difference between them. Return a vector with the set difference. -#' -#' @param x S4 BoolModel object. Test model. -#' @param y S4 BoolModel object. Reference model. -#' @param max_varperrule integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6. -#' @param directed logical. If TRUE, return the difference in terms with respect to x. -#' -#' @export -model_setdiff = function(x, y, max_varperrule, directed=F) -{ - stopifnot(length(x@target) == length(y@target)) - stopifnot(get_encodings(x)==get_encodings(y)) - - ind = get_encodings(x) - - x1 = compress_bmodel(x, ind, max_varperrule) - x2 = compress_bmodel(y, ind, max_varperrule) - - #Pick which model has more terms, and which has less. - more_ind = which.max(c(length(x1), length(x2))) - less_ind = ifelse(more_ind==1, 2, 1) - - more_x = list(x1, x2)[[more_ind]] - less_x = list(x1, x2)[[less_ind]] - - #Remove empty terms. - more_x = more_x[more_x%%1!=0] - less_x = less_x[less_x%%1!=0] - - #Remove 0 and !0. - #Find the encodings for 0 and !0. - zero_enc = as.numeric(paste('0.', ind[which(names(ind)=='0')], sep='')) - notzero_enc = as.numeric(paste('0.', ind[which(names(ind)=='!0')], sep='')) - - #Remove them. - more_x = more_x[round(more_x-round(more_x), 4)!=zero_enc] - more_x = more_x[round(more_x-round(more_x), 4)!=notzero_enc] - - less_x = less_x[round(less_x-round(less_x), 4)!=zero_enc] - less_x = less_x[round(less_x-round(less_x), 4)!=notzero_enc] - - #Check for set difference. - more_bool = sapply(more_x, function(x) !(x %in% less_x)) - less_bool = sapply(less_x, function(x) !(x %in% more_x)) - - more_diff = more_x[more_bool] - less_diff = less_x[less_bool] - - more_vec = c() - if(length(more_diff)>0) - { - gene = round(more_diff) - rule = gsub('^[0-9]+[.]', '', gsub(' ', '', format(more_diff, nsmall=4))) - - gene = x@target[gene] - rule = unname(sapply(rule, function(x) names(which(ind==x))[1])) - - more_vec = rule - names(more_vec) = gene - } - - less_vec = c() - if(length(less_diff)>0) - { - gene = round(less_diff) - rule = gsub('^[0-9]+[.]', '', gsub(' ', '', format(less_diff, nsmall=4))) - - gene = x@target[gene] - rule = unname(sapply(rule, function(x) names(which(ind==x))[1])) - - less_vec = rule - names(less_vec) = gene - } - - #Handle step difference that includes changing to and from AND terms. - if(length(more_vec) != 0 & length(less_vec) != 0) - { - for(i in 1:length(more_vec)) - { - for(j in 1:length(less_vec)) - { - if(names(more_vec[i])==names(less_vec[j])) - { - if(grepl('!', more_vec[i]) & grepl('!', less_vec[j])) #for both inh terms. do not cross compare act and inh terms. - { - #Split & terms if present. - mvec = unlist(strsplit(more_vec[i], '&')) - lvec = unlist(strsplit(less_vec[j], '&')) - - #Removes !. - mvec = gsub('!', '', mvec) - lvec = gsub('!', '', lvec) - - #Check for presence of &. - mand_bool = ifelse(grepl('&', more_vec[i]), T, F) - land_bool = ifelse(grepl('&', less_vec[j]), T, F) - - mbool = mvec %in% lvec - lbool = lvec %in% mvec - - #assign NA if there is no term remaining. - if(length(mvec[!mbool])!=0) - { - if(!is.na(mvec[!mbool][1])) - { - more_vec[i] = paste(mvec[!mbool], collapse='&') - } - } else - { - more_vec[i] = NA - } - - if(length(lvec[!lbool])!=0) - { - if(!is.na(lvec[!lbool][1])) - { - less_vec[j] = paste(lvec[!lbool], collapse='&') - } - } else - { - less_vec[j] = NA - } - - #if this involves AND term, adds & indicator to term name. - if(mand_bool & !is.na(more_vec[i]) & !grepl('&', more_vec[i])) - { - more_vec[i] = paste('&', more_vec[i], sep='') - } - if(land_bool & !is.na(less_vec[j]) & !grepl('&', less_vec[j])) - { - less_vec[j] = paste('&', less_vec[j], sep='') - } - - #Add the ! back to the terms. - if(!is.na(more_vec[i])) - { - if(grepl('^&', more_vec[i])) - { - more_vec[i] = gsub('(^[&])(.+)', '\\1!\\2', more_vec[i]) - } else - { - more_vec[i] = paste('!', more_vec[i], sep='') - } - } - if(!is.na(less_vec[j])) - { - if(grepl('^&', less_vec[j])) - { - less_vec[j] = gsub('(^[&])(.+)', '\\1!\\2', less_vec[j]) - } else - { - less_vec[j] = paste('!', less_vec[j], sep='') - } - } - - } else if(!grepl('!', more_vec[i]) & !grepl('!', less_vec[j])) #for both act terms. do not cross compare act and inh terms. - { - #Split & terms if present. - mvec = unlist(strsplit(more_vec[i], '&')) - lvec = unlist(strsplit(less_vec[j], '&')) - - #Check for presence of &. - mand_bool = ifelse(grepl('&', more_vec[i]), T, F) - land_bool = ifelse(grepl('&', less_vec[j]), T, F) - - mbool = mvec %in% lvec - lbool = lvec %in% mvec - - #assign NA if there is no term remaining. - if(length(mvec[!mbool])!=0) - { - if(!is.na(mvec[!mbool][1])) - { - more_vec[i] = paste(mvec[!mbool], collapse='&') - } - } else - { - more_vec[i] = NA - } - - if(length(lvec[!lbool])!=0) - { - if(!is.na(lvec[!lbool][1])) - { - less_vec[j] = paste(lvec[!lbool], collapse='&') - } - } else - { - less_vec[j] = NA - } - - #if this involves AND term, adds & indicator to term name. - if(mand_bool & !is.na(more_vec[i]) & !grepl('&', more_vec[i])) - { - more_vec[i] = paste('&', more_vec[i], sep='') - } - if(land_bool & !is.na(less_vec[j]) & !grepl('&', less_vec[j])) - { - less_vec[j] = paste('&', less_vec[j], sep='') - } - } - } - } - } - } - - #Remove NAs. - if(!is.null(more_vec)) - { - more_vec = more_vec[!is.na(more_vec)] - } - if(!is.null(less_vec)) - { - less_vec = less_vec[!is.na(less_vec)] - } - - #If there are any AND term remaining here, they should be separated as they count as 2 steps. - if(length(more_vec)!=0) - { - for(i in 1:length(more_vec)) - { - if(grepl('.+&.+', more_vec[i])) - { - tmp_entry = unlist(strsplit(more_vec[i], '&'), use.names=F) - names(tmp_entry) = rep(names(more_vec[i]), length(tmp_entry)) - more_vec = c(more_vec, tmp_entry) - more_vec = more_vec[-i] - } - } - } - - if(length(less_vec)!=0) - { - for(i in 1:length(less_vec)) - { - if(grepl('.+&.+', less_vec[i])) - { - tmp_entry = unlist(strsplit(less_vec[i], '&'), use.names=F) - names(tmp_entry) = rep(names(less_vec[i]), length(tmp_entry)) - less_vec = c(less_vec, tmp_entry) - less_vec = less_vec[-i] - } - } - } - - comb_vec = c(more_vec, less_vec) - #Replace the gene names back into update functions. - for(i in 1:length(x@target_var)) - { - comb_vec = gsub(x@target_var[i], x@target[i], comb_vec) - } - - stopifnot(all(!grepl('v[0-9]+s', unlist(strsplit(comb_vec, '&'))) | - !grepl('^0$', unlist(strsplit(comb_vec, '&'))))) #check if all gene name conversions is successful. - - #Return results. - if(directed) - { - return(list(more=more_vec, less=less_vec)) - } else - { - return(list(comb_vec)) - } +#' @title Verbose cat +#' +#' @description +#' This function is a simple wrapper for cat with a Boolean value for turning it on/off. +#' +#' @param string character vector. String intended to be printed. +#' @param bool logical. Specify whether to print the string to stout or not.+ +vcat = function(string, bool) +{ + if(bool) + { + cat(string) + } + + invisible() +} + +#' @title Check if containing AND terms +#' +#' @description +#' This function checks if a particular Boolean model contains AND terms. +#' +#' @param bmodel BoolModel object. +check_and = function(bmodel) +{ + and_bool = F + + if(any(grepl('&', bmodel@rule_act))) + { + and_bool = T + } else if(any(grepl('&', bmodel@rule_inh))) + { + and_bool = T + } + + return(and_bool) +} + +#' @title Extract Boolean terms +#' +#' @description +#' This function extracts the terms within a Boolean rule, using OR as the separator. Bracketed variables are counted as one term. +#' +#' @param brule character vector. It should be either an activating or inhibiting rule for a target gene. +extract_term = function(brule) +{ + if(brule!='0') #account for cases with empty rule. + { + #Strip outermost brackets. + brule = gsub('^[(](.+)[)]$', '\\1', brule) + term = strsplit(brule, '[|]')[[1]] + term = gsub('^[(](.+)[)]$', '\\1', term) + } else + { + term = '0' + } + + return(term) +} + +#' @title Check for matching terms +#' +#' @description +#' This function checks if the first term is found within a vector of terms. Return a logical value. +#' It is smarter than simple string matching, e.g. match_term(v1s&v2s, v2s&v1s) == T, match_term(v1s, v1s&v2s) == T, match_term(v1s&v2s, v1s) == T. +#' +#' @param t1 character vector of length 1 or more. It should be a vector of gene variable. +#' @param t2 character vector of length 1 or more. It should be a vector of gene variables. +#' @param mode character. Indicates the mode of action. Options: 'logic', 'unique'. Default to 'logic'. +match_term = function(t1, t2, mode='logic') +{ + if(t1[1] != '0') + { + t1 = unlist(strsplit(t1, '&')) #this will split up terms with &. + + #Check if t1 is present in t2. + #Note: if t1='v1s', this will match 'v1s', 'v2s&v1s'. + if(length(t2) > 1) + { + check_mat = sapply(t1, function(x) grepl(x, t2)) + } else + { + check_mat = t(matrix(sapply(t1, function(x) grepl(x, t2)))) #matrix() is necessary when t2 has length 1. + } + + if(mode == 'logic') + { + check_ind = which(rowSums(check_mat) > 0) + + if(length(check_ind) == 0) + { + match = FALSE + } else + { + match = TRUE + } + + return(match) + } else if(mode == 'unique') + { + check_ind = which(rowSums(check_mat) == 0) + + return(t2[check_ind]) + } else + { + stop('Error in specifying match_term() mode.') + } + } else if(t1[1] == '0') + { + if(mode == 'logic') + { + return(FALSE) + } else if(mode == 'unique') + { + return(t2) + } else + { + stop('Error in specifying match_term() mode.') + } + } +} + +#' @title Pick a random minimum value +#' +#' @description +#' This function locates the minimum value in a vector (similar to which.min), however it will randomly break ties when there are multiple minimum values. +#' +#' @param x numeric vector. +#' @param favour_first logical. If this is TRUE, and the first value in the vector supplies is one of the min, the index for the first value will always be returned. +which.random.min = function(x, favour_first=F) +{ + id = which(x == min(x)) #get indices of all minimum values. + + # If the minimum value appear multiple times pick one index at random, otherwise just return its position in the vector + # Exception occurs if the initial model (at position 1) also has the minimum value, then the initial model is favoured. + if(length(id) > 1 & 1 %in% id & favour_first==T) + { + id = 1 + } else if(length(id) > 1) #for all other cases with multiple minimum values, pick a random one. + { + id = sample(id, 1) + } else if(length(id) == 1) #if there is only single min value, return it. + { + id = id + } + + return(id) +} + +# #' @title Filter a Boolean model +# #' +# #' @description +# #' This function filters a Boolean model to retain only overlapping genes. +# #' +# #' @param x BoolModel. +# #' @param y character vector. Overlapping genes. +# filter_bmodel = function(x, y) +# { +# stopifnot(class(x)== 'BoolModel') +# +# if(uniq_bool) +# { +# return(unique(x[,colnames(x) %in% y, drop=F])) +# } else +# { +# return(x[,colnames(x) %in% y, drop=F]) +# } +# } + +#' @title Filter columns of df in a list +#' +#' @description +#' This function filters columns of multiple df in a list, when compared using a vector. Use through lapply(). +#' +#' @param x data frame. It should be a list element if used from lapply. +#' @param y character vector. It should contains gene names found in the colnames of the data frame. +#' @param uniq_bool logical. Whether to return unique rows only. +filter_dflist = function(x, y, uniq_bool=T) +{ + stopifnot(class(x)== 'data.frame' | class(x)== 'matrix') + + if(uniq_bool) + { + return(unique(x[,colnames(x) %in% y, drop=F])) + } else + { + return(x[,colnames(x) %in% y, drop=F]) + } +} + +#' @title Calculate distance between Boolean models +#' +#' @description +#' This method takes in two models and calculate the distance between them. The value return indicate the number of steps between the two models. +#' +#' @param x S4 BoolModel object. Test model. +#' @param y S4 BoolModel object. Reference model. +#' +#' @export +model_dist = function(x, y) +{ + set_diff = unlist(model_setdiff(x, y)) + + #Calculate total dist. + t_dist = length(set_diff) + + #Return results. + return(t_dist) +} + +#' @title Find the set difference between two Boolean models +#' +#' @description +#' This method takes in two models and find the set difference between them. Return a vector with the set difference. +#' +#' @param x S4 BoolModel object. Test model. +#' @param y S4 BoolModel object. Reference model. +#' @param directed logical. If TRUE, return the difference in terms with respect to x. +#' +#' @export +model_setdiff = function(x, y, directed=F) +{ + stopifnot(length(x@target) == length(y@target)) + stopifnot(get_encodings(x)[[1]]==get_encodings(y)[[1]]) + + ind = get_encodings(x) + + x1 = compress_bmodel(x, ind) + x2 = compress_bmodel(y, ind) + + ind = ind[[1]] + + #Pick which model has more terms, and which has less. + more_ind = which.max(c(length(x1), length(x2))) + less_ind = ifelse(more_ind==1, 2, 1) + + more_x = list(x1, x2)[[more_ind]] + less_x = list(x1, x2)[[less_ind]] + + #Remove empty terms. + more_x = more_x[more_x%%1!=0] + less_x = less_x[less_x%%1!=0] + + #Remove 0 and !0. + #Find the encodings for 0 and !0. + zero_enc = as.numeric(paste('0.', ind[which(names(ind)=='0')], sep='')) + notzero_enc = as.numeric(paste('0.', ind[which(names(ind)=='!0')], sep='')) + + #Remove them. + more_x = more_x[round(more_x-round(more_x), 4)!=zero_enc] + more_x = more_x[round(more_x-round(more_x), 4)!=notzero_enc] + + less_x = less_x[round(less_x-round(less_x), 4)!=zero_enc] + less_x = less_x[round(less_x-round(less_x), 4)!=notzero_enc] + + #Check for set difference. + more_bool = sapply(more_x, function(x) !(x %in% less_x)) + less_bool = sapply(less_x, function(x) !(x %in% more_x)) + + more_diff = more_x[more_bool] + less_diff = less_x[less_bool] + + more_vec = c() + if(length(more_diff)>0) + { + gene = round(more_diff) + rule = gsub('^[0-9]+[.]', '', gsub(' ', '', format(more_diff, nsmall=4))) + + gene = x@target[gene] + rule = unname(sapply(rule, function(x) names(which(ind==x))[1])) + + more_vec = rule + names(more_vec) = gene + } + + less_vec = c() + if(length(less_diff)>0) + { + gene = round(less_diff) + rule = gsub('^[0-9]+[.]', '', gsub(' ', '', format(less_diff, nsmall=4))) + + gene = x@target[gene] + rule = unname(sapply(rule, function(x) names(which(ind==x))[1])) + + less_vec = rule + names(less_vec) = gene + } + + #Handle step difference that includes changing to and from AND terms. + if(length(more_vec) != 0 & length(less_vec) != 0) + { + for(i in 1:length(more_vec)) + { + for(j in 1:length(less_vec)) + { + if(names(more_vec[i])==names(less_vec[j])) + { + if(grepl('!', more_vec[i]) & grepl('!', less_vec[j])) #for both inh terms. do not cross compare act and inh terms. + { + #Split & terms if present. + mvec = unlist(strsplit(more_vec[i], '&')) + lvec = unlist(strsplit(less_vec[j], '&')) + + #Removes !. + mvec = gsub('!', '', mvec) + lvec = gsub('!', '', lvec) + + #Check for presence of &. + mand_bool = ifelse(grepl('&', more_vec[i]), T, F) + land_bool = ifelse(grepl('&', less_vec[j]), T, F) + + mbool = mvec %in% lvec + lbool = lvec %in% mvec + + #assign NA if there is no term remaining. + if(length(mvec[!mbool])!=0) + { + if(!is.na(mvec[!mbool][1])) + { + more_vec[i] = paste(mvec[!mbool], collapse='&') + } + } else + { + more_vec[i] = NA + } + + if(length(lvec[!lbool])!=0) + { + if(!is.na(lvec[!lbool][1])) + { + less_vec[j] = paste(lvec[!lbool], collapse='&') + } + } else + { + less_vec[j] = NA + } + + #if this involves AND term, adds & indicator to term name. + if(mand_bool & !is.na(more_vec[i]) & !grepl('&', more_vec[i])) + { + more_vec[i] = paste('&', more_vec[i], sep='') + } + if(land_bool & !is.na(less_vec[j]) & !grepl('&', less_vec[j])) + { + less_vec[j] = paste('&', less_vec[j], sep='') + } + + #Add the ! back to the terms. + if(!is.na(more_vec[i])) + { + if(grepl('^&', more_vec[i])) + { + more_vec[i] = gsub('(^[&])(.+)', '\\1!\\2', more_vec[i]) + } else + { + more_vec[i] = paste('!', more_vec[i], sep='') + } + } + if(!is.na(less_vec[j])) + { + if(grepl('^&', less_vec[j])) + { + less_vec[j] = gsub('(^[&])(.+)', '\\1!\\2', less_vec[j]) + } else + { + less_vec[j] = paste('!', less_vec[j], sep='') + } + } + + } else if(!grepl('!', more_vec[i]) & !grepl('!', less_vec[j])) #for both act terms. do not cross compare act and inh terms. + { + #Split & terms if present. + mvec = unlist(strsplit(more_vec[i], '&')) + lvec = unlist(strsplit(less_vec[j], '&')) + + #Check for presence of &. + mand_bool = ifelse(grepl('&', more_vec[i]), T, F) + land_bool = ifelse(grepl('&', less_vec[j]), T, F) + + mbool = mvec %in% lvec + lbool = lvec %in% mvec + + #assign NA if there is no term remaining. + if(length(mvec[!mbool])!=0) + { + if(!is.na(mvec[!mbool][1])) + { + more_vec[i] = paste(mvec[!mbool], collapse='&') + } + } else + { + more_vec[i] = NA + } + + if(length(lvec[!lbool])!=0) + { + if(!is.na(lvec[!lbool][1])) + { + less_vec[j] = paste(lvec[!lbool], collapse='&') + } + } else + { + less_vec[j] = NA + } + + #if this involves AND term, adds & indicator to term name. + if(mand_bool & !is.na(more_vec[i]) & !grepl('&', more_vec[i])) + { + more_vec[i] = paste('&', more_vec[i], sep='') + } + if(land_bool & !is.na(less_vec[j]) & !grepl('&', less_vec[j])) + { + less_vec[j] = paste('&', less_vec[j], sep='') + } + } + } + } + } + } + + #Remove NAs. + if(!is.null(more_vec)) + { + more_vec = more_vec[!is.na(more_vec)] + } + if(!is.null(less_vec)) + { + less_vec = less_vec[!is.na(less_vec)] + } + + #If there are any AND term remaining here, they should be separated as they count as 2 steps. + if(length(more_vec)!=0) + { + for(i in 1:length(more_vec)) + { + if(grepl('.+&.+', more_vec[i])) + { + tmp_entry = unlist(strsplit(more_vec[i], '&'), use.names=F) + names(tmp_entry) = rep(names(more_vec[i]), length(tmp_entry)) + more_vec = c(more_vec, tmp_entry) + more_vec = more_vec[-i] + } + } + } + + if(length(less_vec)!=0) + { + for(i in 1:length(less_vec)) + { + if(grepl('.+&.+', less_vec[i])) + { + tmp_entry = unlist(strsplit(less_vec[i], '&'), use.names=F) + names(tmp_entry) = rep(names(less_vec[i]), length(tmp_entry)) + less_vec = c(less_vec, tmp_entry) + less_vec = less_vec[-i] + } + } + } + + comb_vec = c(more_vec, less_vec) + #Replace the gene names back into update functions. + for(i in 1:length(x@target_var)) + { + comb_vec = gsub(x@target_var[i], x@target[i], comb_vec) + } + + stopifnot(all(!grepl('v[0-9]+s', unlist(strsplit(comb_vec, '&'))) | + !grepl('^0$', unlist(strsplit(comb_vec, '&'))))) #check if all gene name conversions is successful. + + #Return results. + if(directed) + { + return(list(more=more_vec, less=less_vec)) + } else + { + return(list(comb_vec)) + } } \ No newline at end of file diff --git a/R/initialisation.R b/R/initialisation.R index 7d7f773..53061dd 100644 --- a/R/initialisation.R +++ b/R/initialisation.R @@ -1,179 +1,188 @@ -#' @title Initialise raw data -#' -#' @description -#' This function initialise raw gene expression values in a matrix. Return a list of two matrices: (1) continuous values and (2) binary values. -#' Note that kmeans clustering as binarisation only works well if the data has a bimodal distribution. -#' -#' @param x matrix. Numeric data of gene expression. -#' @param max_expr character. Specify whether max expression value is the lowest (as in qPCR), or the highest (as in RNAseq and microarray). Option: 'low', 'high'. Default to 'high'. -#' @param uni_thre numerical. Speficy threshold for unimodality test. Default to 0.2. -#' @param scale logical. Whether to scale the data to a range of 0-1. Default to T. -#' -#' @export -initialise_raw_data = function(x, max_expr='high', uni_thre=0.2, scale=T) -{ - #(1) Convert negative to positive values. - if(min(x)<0) - { - x = x + abs(min(x)) - } else - { - x = x - min(x) - } - - stopifnot(min(x)==0) - - if(max_expr=='low') - { - #(2) Invert qPCR values. Lowest expression should be close to 0, highest expression should be away from 0. - x = abs(max(x) - x) - } - - #(3) Scale values to between 0 and 1. - #Scaling should be done by each gene, instead of globally. Because clustering is done by gene, and the expression values should reflect that. - if(scale) - { - for(i in 1:ncol(x)) - { - a = x[,i] - if(max(a)-min(a) != 0) #when min and max are different. usual case. - { - x[,i] = (a-min(a))/(max(a)-min(a)) - } else - { - x[,i] = a-min(a) - } - } - } - - #(4) Binarise the data by k-means clustering. - y = matrix(NA, ncol=ncol(x), nrow=nrow(x)) - for(i in 1:ncol(x)) - { - #Perform unimodality test for each gene. - uni_test = diptest::dip.test(x[,i])$p.value - - if(uni_test > uni_thre) - { - cent_measure = median(x[,i]) - if(cent_measure < 0.5) - { - y[,i] = rep(0, nrow(y)) #set all values to 0, if the cent_measure is less than 0.5 - } else - { - y[,i] = rep(1, nrow(y)) #set all values to 1, if the cent_measure is more than 0.5 - } - } else - { - bin_result = kmeans(x[,i,drop=F], 2) #Note that kmeans do not give 1 to cluster with lower centroid. - x_bin = unname(bin_result$cluster) - - #(5) Decide which cluster is 0 and which is 1. - if(bin_result$centers[,1][1] < bin_result$centers[,1][2]) - { - y[,i] = x_bin - 1 #set 1 to 0, 2 to 1. - } else - { - y[,i] = abs(x_bin-2) #set 1 to 1, 2 to 0. - } - } - } - - stopifnot(all(dim(x)==dim(y))) - - colnames(y) = colnames(x) - rownames(y) = rownames(x) - - return(list(x, y)) -} - -#' @title Initialise data -#' -#' @description -#' This function initialises data frame of Boolean state space. Returns initialised data frame. -#' -#' @param state data frame. It should contain either 0/1 or F/T data of gene expression. -#' @param aslogic logical. It specifies whether to convert the input data into Boolean values. Default to FALSE. -#' -#' @export -initialise_data = function(state, aslogic=F) -{ - #Store column and row names. - rn_tmp = rownames(state) - cn_tmp = colnames(state) - - #Convert to logical T/F if aslogic=T. - if(aslogic) - { - #Convert numerical (i.e. 0, 1) to logical type. - if(nrow(state) != 1) #one row won't work with apply(). - { - state = apply(state, 2, as.logical) - } else - { - state = data.frame(matrix(apply(state, 2, as.logical), nrow=1)) - } - - stopifnot(all(!is.na(state))) #Check if contains NA values due to coercion. - colnames(state) = cn_tmp - } - - #Convert all row and column names to lowercase. - #rownames(state) = tolower(rn_tmp) - #colnames(state) = tolower(cn_tmp) - - #Order colname names alphabetically. - if(all(grepl('v[0-9]+s', cn_tmp))) #if variable names are in the form of v1s, v2s, etc. - { - state = state[, order(as.numeric(gsub('v([0-9]+)s', '\\1', cn_tmp)))] - } else if(all(grepl('G[0-9]+', cn_tmp))) #if variable names are in the form of v1s, v2s, etc. - { - state = state[, order(as.numeric(gsub('G([0-9]+)', '\\1', cn_tmp)))] - } else - { - state = state[, order(cn_tmp)] - } - - return(state) -} - -#' @title Initialise model -#' -#' @description -#' This function initialises a Boolean model. Returns initialised S4 BoolModel object. -#' Note that the model should only has 1 NOT operator. More than 1 is STRICTLY NOT allowed. -#' -#' @param init_model data frame of Boolean model. It should contain two columns, targets and functions. -#' -#' @export -initialise_model = function(init_model) -{ - bmodel = df_to_bm(init_model) - return(bmodel) -} - -#' @title Remove raw data duplicated wrt to the model state -#' -#' @description -#' This function removes the 'duplicates' in an expression wrt to the model state. -#' -#' @param dx matrix. Initialised and discretised numeric data of gene expression. -#' @param cx matrix. Initialised and continuous numeric data of gene expression. -#' -#' @export -unique_raw_data = function(dx, cx) -{ - fac_dx = c() - for(i in 1:nrow(dx)) - { - fac_dx = c(fac_dx, paste(dx[i,], collapse='')) - } - - fac_dx = list(as.factor(fac_dx)) #convert into factors. - - output = aggregate(cx, fac_dx, mean) #calculate means for each factor group. - rownames(output) = output[,1] - output = output[,-1] - - return(as.matrix(output)) -} +#' @title Initialise raw data +#' +#' @description +#' This function initialise raw gene expression values in a matrix. Return either a matrix of (1) continuous values or (2) binary values. +#' Note that kmeans clustering as binarisation only works well if the data has a bimodal distribution. +#' +#' @param x matrix. Numeric data of gene expression. +#' @param max_expr character. Specify whether max expression value is the lowest (as in qPCR), or the highest (as in RNAseq and microarray). Option: 'low', 'high'. Default to 'high'. +#' @param uni_thre numerical. Speficy threshold for unimodality test. Default to 0.2. +#' @param scale logical. Whether to scale the data to a range of 0-1. Default to T. +#' @param discretised logical. Whether to return discretised data. Default to F. +#' +#' @export +initialise_raw_data = function(x, max_expr='high', uni_thre=0.2, scale=T, discretised=F) +{ + #(1) Convert negative to positive values. + if(min(x)<0) + { + x = x + abs(min(x)) + } else + { + x = x - min(x) + } + + stopifnot(min(x)==0) + + if(max_expr=='low') + { + #(2) Invert qPCR values. Lowest expression should be close to 0, highest expression should be away from 0. + x = abs(max(x) - x) + } + + #(3) Scale values to between 0 and 1. + #Scaling should be done by each gene, instead of globally. Because clustering is done by gene, and the expression values should reflect that. + if(scale) + { + for(i in 1:ncol(x)) + { + a = x[,i] + if(max(a)-min(a) != 0) #when min and max are different. usual case. + { + x[,i] = (a-min(a))/(max(a)-min(a)) + } else + { + x[,i] = a-min(a) + } + } + } + + #(4) Binarise the data by k-means clustering. + y = matrix(NA, ncol=ncol(x), nrow=nrow(x)) + for(i in 1:ncol(x)) + { + #Perform unimodality test for each gene. + uni_test = diptest::dip.test(x[,i])$p.value + + if(uni_test > uni_thre) + { + cent_measure = median(x[,i]) + if(cent_measure < 0.5) + { + y[,i] = rep(0, nrow(y)) #set all values to 0, if the cent_measure is less than 0.5 + } else + { + y[,i] = rep(1, nrow(y)) #set all values to 1, if the cent_measure is more than 0.5 + } + } else + { + bin_result = kmeans(x[,i,drop=F], 2) #Note that kmeans do not give 1 to cluster with lower centroid. + x_bin = unname(bin_result$cluster) + x_center = bin_result$centers[,1][order(bin_result$centers[,1])] + x_center_rank = c(0, 1) #low, high + names(x_center_rank) = as.numeric(names(x_center)) + + #(5) Reassign cluster labels. + y[,i] = x_center_rank[x_bin] + } + } + + stopifnot(all(dim(x)==dim(y))) + + colnames(y) = colnames(x) + rownames(y) = rownames(x) + + if(discretised) + { + return(y) + } else + { + return(x) + } +} + +#' @title Initialise data +#' +#' @description +#' This function initialises data frame of Boolean state space. Returns initialised data frame. +#' +#' @param state data frame. It should contain either 0/1 or F/T data of gene expression. +#' @param aslogic logical. It specifies whether to convert the input data into Boolean values. Default to FALSE. +#' +#' @export +initialise_data = function(state, aslogic=F) +{ + if(class(state) == 'numeric' | class(state) == 'logical') + { + state = t(data.frame(istate)) + } + + #Store column and row names. + rn_tmp = rownames(state) + cn_tmp = colnames(state) + + #Convert to logical T/F if aslogic=T. + if(aslogic) + { + #Convert numerical (i.e. 0, 1) to logical type. + if(nrow(state) != 1) #one row won't work with apply(). + { + state = apply(state, 2, as.logical) + } else + { + state = data.frame(matrix(apply(state, 2, as.logical), nrow=1)) + } + + stopifnot(all(!is.na(state))) #Check if contains NA values due to coercion. + colnames(state) = cn_tmp + } + + #Convert all row and column names to lowercase. + #rownames(state) = tolower(rn_tmp) + #colnames(state) = tolower(cn_tmp) + + #Order colname names alphabetically. + if(all(grepl('v[0-9]+s', cn_tmp))) #if variable names are in the form of v1s, v2s, etc. + { + state = state[, order(as.numeric(gsub('v([0-9]+)s', '\\1', cn_tmp)))] + } else if(all(grepl('G[0-9]+', cn_tmp))) #if variable names are in the form of v1s, v2s, etc. + { + state = state[, order(as.numeric(gsub('G([0-9]+)', '\\1', cn_tmp)))] + } else + { + state = state[, order(cn_tmp)] + } + + return(state) +} + +#' @title Initialise model +#' +#' @description +#' This function initialises a Boolean model. Returns initialised S4 BoolModel object. +#' Note that the model should only has 1 NOT operator. More than 1 is STRICTLY NOT allowed. +#' +#' @param init_model data frame of Boolean model. It should contain two columns, targets and functions. +#' +#' @export +initialise_model = function(init_model) +{ + bmodel = df_to_bm(init_model) + return(bmodel) +} + +#' @title Remove raw data duplicated wrt to the model state +#' +#' @description +#' This function removes the 'duplicates' in an expression wrt to the model state. +#' +#' @param dx matrix. Initialised and discretised numeric data of gene expression. +#' @param cx matrix. Initialised and continuous numeric data of gene expression. +#' +#' @export +unique_raw_data = function(dx, cx) +{ + fac_dx = c() + for(i in 1:nrow(dx)) + { + fac_dx = c(fac_dx, paste(dx[i,], collapse='')) + } + + fac_dx = list(as.factor(fac_dx)) #convert into factors. + + output = aggregate(cx, fac_dx, mean) #calculate means for each factor group. + rownames(output) = output[,1] + output = output[,-1] + + return(as.matrix(output)) +} diff --git a/R/methods.R b/R/methods.R index ffef26e..77ca62c 100644 --- a/R/methods.R +++ b/R/methods.R @@ -1,339 +1,339 @@ -#' @title Print Boolean Model -#' -#' @description -#' This method converts the S4 BoolModel object back into a human-readable data frame, with two columns: (1) target genes, (2) Boolean rules. -#' -#' @param bmodel S4 BoolModel object. -#' @param gene.names logical. Specify whether to write rules in terms of genes or internal variables. Default to FALSE. -#' -#' @export -printBM = function(bmodel, gene.names=F) -{ - #Prettify update functions. - act_rule = sapply(bmodel@rule_act, function(x) paste(x, collapse='|')) - inh_rule = sapply(bmodel@rule_inh, function(x) paste(x, collapse='|')) - - if(gene.names) - { - if(all(bmodel@target != bmodel@target_var)) - { - #Replace the gene names back into update functions. - for(i in 1:length(bmodel@target_var)) - { - act_rule = gsub(bmodel@target_var[i], bmodel@target[i], act_rule) - inh_rule = gsub(bmodel@target_var[i], bmodel@target[i], inh_rule) - } - - stopifnot(all(!grepl('v[0-9]+s', unlist(strsplit(unlist(act_rule), '&'))) | - grepl('^0$', unlist(strsplit(unlist(act_rule), '&'))))) #check if all gene name conversions is successful. - stopifnot(all(!grepl('v[0-9]+s', unlist(strsplit(unlist(inh_rule), '&'))) | - grepl('^0$', unlist(strsplit(unlist(inh_rule), '&'))))) #check if all gene name conversions is successful. - } - } - - #Add in brackets around & terms. - act_rule = lapply(act_rule, function(x) gsub('([[:alnum:]]+&[[:alnum:]]+)', '(\\1)', x)) - inh_rule = lapply(inh_rule, function(x) gsub('([[:alnum:]]+&[[:alnum:]]+)', '(\\1)', x)) - - comb_rule = paste('(', act_rule, ') &! (', inh_rule, ')', sep='') - - #Combine all information together. - out_df = data.frame(gene=bmodel@target, var=bmodel@target_var, func=comb_rule, stringsAsFactors=F) - - return(out_df) -} - -#' @title Write Boolean Model -#' -#' @description -#' This method writes the S4 BoolModel object into a CSV file. This method is a wrapper for print.BoolModel. The output is a data frame, with two columns: (1) target genes, (2) Boolean rules. -#' -#' @param bmodel S4 BoolModel object. -#' @param file file name with path, or a file connection object. -#' @param gene.names logical. Specify whether to write rules in terms of genes or internal variables. Default to FALSE. -#' @param rownames logical. It specifies whether to write row names. -#' -#' @export -writeBM = function(bmodel, file, gene.names=F, rownames=F) -{ - #Method that writes information from BoolModel object. - out_df = printBM(bmodel, gene.names) - - write.csv(out_df, file=file, quote=F, row.names=F) -} - -#' @title Plot Boolean Model -#' -#' @description -#' This method plots the network underlying Boolean models by using igraph for quick visualisation. -#' Require igraph. -#' -#' @param bmodel S4 BoolModel object. -#' @param makePlot logical. Whether to make plot or just return the object. Default to T. -#' @param ... Additional parameters to plot.igraph. -#' -#' @export -plotBM = function(bmodel, makePlot=T, ...) -{ - #Convert to amat. - am = bm_to_amat(bmodel) - - #Convert into a graph. - g = igraph::graph.adjacency(am, mode='directed', weighted=T) - - #Setup edge colour for plotting. - #Activation = black, inhibition = red - igraph::E(g)$color = sapply(igraph::E(g)$weight, function(x) ifelse(x==1, 'black', 'red')) - - #Setup other colours. - igraph::V(g)$frame.color = "white" - igraph::V(g)$color = rgb(255, 165, 0, 200, maxColorValue = 255) - - #Setup vertex font size. - igraph::V(g)$label.cex = 1.5 - - if(makePlot) - { - #Make the plot. - igraph::plot.igraph(g, layout=igraph::layout_in_circle, ...) - } - - invisible(g) -} - -#' @title Convert BoolModel into adjacency matrix -#' -#' @description -#' This function converts a BoolModel object into an adjacency matrix. -#' -#' @param x S4 BoolModel object. -#' @param directed logical. Whether to return directed or undirected adjacency matrix. Default to TRUE. -#' -#' @export -bm_to_amat = function(x, directed=T) -{ - #(1) Extract act and inh rules. And removes &. - target_gene = x@target - act_rule = lapply(x@rule_act, function(x) unlist(strsplit(x, '&'))) - inh_rule = lapply(x@rule_inh, function(x) unlist(strsplit(x, '&'))) - all_rule = lapply(1:length(target_gene), function(x) c(act_rule[[x]], inh_rule[[x]])) - all_rule = lapply(all_rule, function(x) x[x!='0']) #removes 0s. - - #(2) Convert gene variables into gene names. - #Replace the gene names back into update functions. - for(i in 1:length(x@target_var)) - { - all_rule = lapply(all_rule, function(y) gsub(x@target_var[i], x@target[i], y)) - } - - stopifnot(all(!grepl('v[0-9]+s', unlist(strsplit(unlist(all_rule), '&'))) | - grepl('^0$', unlist(strsplit(unlist(all_rule), '&'))))) #check if all gene name conversions is successful. - - #(3) Setup output matrix. - out_mat = matrix(F, nrow=length(target_gene), ncol=length(target_gene)) - colnames(out_mat) = target_gene - rownames(out_mat) = target_gene - - #(4) Fill in the output matrix. - for(i in 1:length(target_gene)) - { - out_mat[,i] = target_gene %in% all_rule[[i]] - } - - if(!directed) - { - #(5) Make the output matrix symmetric. - for(i in 1:length(target_gene)) - { - tmp = out_mat[i,] | out_mat[,i] - out_mat[i,] = tmp - out_mat[,i] = tmp - } - - stopifnot(isSymmetric(out_mat)) - out_mat = out_mat + 0 #convert logical to numeric. - } else - { - out_mat = out_mat + 0 #convert logical to numeric. - - #(6) Make inhibitory edges as negative. - for(i in 1:length(inh_rule)) - { - for(j in 1:length(inh_rule[[i]])) - { - if(inh_rule[[i]][j]!='0') - { - from_ind = as.numeric(gsub('v([0-9]+)s', '\\1', inh_rule[[i]][j])) - to_ind = i - - out_mat[from_ind, to_ind] = -1 - } - } - } - } - - return(out_mat) -} - -#' @title Convert adjacency matrix into BoolModel object -#' -#' @description -#' This function converts adjacency matrix to BoolModel object. Able to take in adjacency matrix with -1, which encodes for inhibitory interaction. -#' -#' @param amat matrix. directed adjacency matrix. -#' @param random logical. Randomly assign to either act or inh rules, when the adjacency matrix only has values 0 and 1, but not -1. -#' @export -amat_to_bm = function(amat, random=F) -{ - if(random) - { - stopifnot(any(amat!=-1)) - - #Get all edges. - edge_df = cbind(which(amat==1, arr.ind = T), rep(NA, nrow(which(amat==1, arr.ind = T)))) - - #Setup row and column names. - colnames(edge_df) = c('from', 'to', 'type') - rownames(edge_df) = NULL - - #Randomly assign edges to act or inh rules. - edge_df[,3] = replicate(nrow(edge_df), sample(c('act', 'inh'), 1)) - - } else - { - #Get all edges. - act_edge = cbind(which(amat==1, arr.ind = T), rep('act', nrow(which(amat==1, arr.ind = T)))) - inh_edge = cbind(which(amat==-1, arr.ind = T), rep('inh', nrow(which(amat==-1, arr.ind = T)))) - edge_df = rbind(act_edge, inh_edge) - - #Setup row and column names. - colnames(edge_df) = c('from', 'to', 'type') - rownames(edge_df) = NULL - } - - #Replace indices by variable names. - var_name = paste('v', seq(1, length(colnames(amat))), 's', sep='') - names(var_name) = seq(1, nrow(amat)) - edge_df[,1] = var_name[as.character(edge_df[,1])] - - #Convert edge data frame to Boolmodel rules. - act_rule = vector('list', length(var_name)) - inh_rule = vector('list', length(var_name)) - for(i in 1:nrow(edge_df)) - { - if(edge_df[i,3] == 'act') - { - tmp_act = unname(c(act_rule[[as.numeric(edge_df[i,2])]], edge_df[i,1])) - act_rule[[as.numeric(edge_df[i,2])]] = tmp_act - } else - { - tmp_inh = unname(c(inh_rule[[as.numeric(edge_df[i,2])]], edge_df[i,1])) - inh_rule[[as.numeric(edge_df[i,2])]] = tmp_inh - } - } - - #Fill in empty rules. - act_rule[which(sapply(act_rule, length)==0)] = '0' - inh_rule[which(sapply(inh_rule, length)==0)] = '0' - - #Generate BoolModel object. - out_model = BoolModel(target=colnames(amat), target_var=var_name, rule_act=act_rule, rule_inh=inh_rule) - - return(out_model) -} - -#' @title Convert BoolModel object into BoolNet readable data frame -#' -#' @description -#' This method converts BoolModel object into a data frame, which is readable by BoolNet. -#' -#' @param bmodel BoolModel object. -#' -#' @export -bm_to_df = function(bmodel) -{ - out_df = printBM(bmodel, gene.names=T)[,-2] - colnames(out_df) = c('targets', 'factors') - - return(out_df) -} - -#' @title Convert a data frame into BoolModel object -#' -#' @description -#' This method converts a data frame into a BoolModel object. -#' Note that the model should only has 1 NOT operator. More than 1 is STRICTLY NOT allowed. -#' -#' @param in_df data frame with 2 columns, targets and factors -#' -#' @export -df_to_bm = function(in_df) -{ - #Setup the initial data frame. - in_df = as.data.frame(apply(in_df, 2, tolower), stringsAsFactors=F) - in_df = in_df[order(in_df[,1]),] - in_df[,2] = gsub('\\s', '', in_df[,2]) - - #Take out target genes. - target = in_df[,1] - - #Create corresponding simplified variable terms. - target_var = paste('v', seq(1,length(target)), 's', sep='') #the variable name consists of digits bounded by two characters. - - #Separate the activators and the inhibitors. - rule_list = strsplit(in_df[,2], '&!') - - rule_act = character(length(rule_list)) #initialise variable with correct lengths. - rule_inh = character(length(rule_list)) - for(i in 1:length(rule_list)) - { - if(length(rule_list[[i]]) == 2) - { - rule_act[i] = rule_list[[i]][1] - rule_inh[i] = rule_list[[i]][2] - } else if(length(rule_list[[i]]) == 1) - { - if(grepl('!', rule_list[[i]][1])) #if the first term in each vector in the list has a !, this means that there is no activator in this vector, but only inhibitor. - { - rule_act[i] = '0' - rule_inh[i] = rule_list[[i]][1] - } else - { - rule_act[i] = rule_list[[i]][1] - rule_inh[i] = '0' - } - } else - { - stop('Error in model rule specification.') - } - } - - rule_act = gsub('[(]0[)]', '0', rule_act) #take away bracketed 0s. - rule_inh = gsub('[(]0[)]', '0', rule_inh) - - rule_inh = gsub('^([!])', '', rule_inh) #take away remaining !. - - #Replace genes in rules with simplified variables. - for(i in 1:length(target_var)) - { - rule_act = gsub(target[i], target_var[i], rule_act) - rule_inh = gsub(target[i], target_var[i], rule_inh) - } - - rule_act = unname(sapply(rule_act, extract_term)) - rule_inh = unname(sapply(rule_inh, extract_term)) - - if(class(rule_act) != 'list') - { - rule_act = as.list(rule_act) - } - - if(class(rule_inh) != 'list') - { - rule_inh = as.list(rule_inh) - } - - bmodel = BoolModel(target=target, target_var=target_var, rule_act=rule_act, rule_inh=rule_inh) - - return(bmodel) -} +#' @title Print Boolean Model +#' +#' @description +#' This method converts the S4 BoolModel object back into a human-readable data frame, with two columns: (1) target genes, (2) Boolean rules. +#' +#' @param bmodel S4 BoolModel object. +#' @param gene.names logical. Specify whether to write rules in terms of genes or internal variables. Default to FALSE. +#' +#' @export +printBM = function(bmodel, gene.names=F) +{ + #Prettify update functions. + act_rule = sapply(bmodel@rule_act, function(x) paste(x, collapse='|')) + inh_rule = sapply(bmodel@rule_inh, function(x) paste(x, collapse='|')) + + if(gene.names) + { + if(all(bmodel@target != bmodel@target_var)) + { + #Replace the gene names back into update functions. + for(i in 1:length(bmodel@target_var)) + { + act_rule = gsub(bmodel@target_var[i], bmodel@target[i], act_rule) + inh_rule = gsub(bmodel@target_var[i], bmodel@target[i], inh_rule) + } + + stopifnot(all(!grepl('v[0-9]+s', unlist(strsplit(unlist(act_rule), '&'))) | + grepl('^0$', unlist(strsplit(unlist(act_rule), '&'))))) #check if all gene name conversions is successful. + stopifnot(all(!grepl('v[0-9]+s', unlist(strsplit(unlist(inh_rule), '&'))) | + grepl('^0$', unlist(strsplit(unlist(inh_rule), '&'))))) #check if all gene name conversions is successful. + } + } + + #Add in brackets around & terms. + act_rule = lapply(act_rule, function(x) gsub('([[:alnum:]]+&[[:alnum:]]+)', '(\\1)', x)) + inh_rule = lapply(inh_rule, function(x) gsub('([[:alnum:]]+&[[:alnum:]]+)', '(\\1)', x)) + + comb_rule = paste('(', act_rule, ') &! (', inh_rule, ')', sep='') + + #Combine all information together. + out_df = data.frame(gene=bmodel@target, var=bmodel@target_var, func=comb_rule, stringsAsFactors=F) + + return(out_df) +} + +#' @title Write Boolean Model +#' +#' @description +#' This method writes the S4 BoolModel object into a CSV file. This method is a wrapper for print.BoolModel. The output is a data frame, with two columns: (1) target genes, (2) Boolean rules. +#' +#' @param bmodel S4 BoolModel object. +#' @param file file name with path, or a file connection object. +#' @param gene.names logical. Specify whether to write rules in terms of genes or internal variables. Default to FALSE. +#' @param rownames logical. It specifies whether to write row names. +#' +#' @export +writeBM = function(bmodel, file, gene.names=F, rownames=F) +{ + #Method that writes information from BoolModel object. + out_df = printBM(bmodel, gene.names) + + write.csv(out_df, file=file, quote=F, row.names=F) +} + +#' @title Plot Boolean Model +#' +#' @description +#' This method plots the network underlying Boolean models by using igraph for quick visualisation. +#' Require igraph. +#' +#' @param bmodel S4 BoolModel object. +#' @param makePlot logical. Whether to make plot or just return the object. Default to T. +#' @param ... Additional parameters to plot.igraph. +#' +#' @export +plotBM = function(bmodel, makePlot=T, ...) +{ + #Convert to amat. + am = bm_to_amat(bmodel) + + #Convert into a graph. + g = igraph::graph.adjacency(am, mode='directed', weighted=T) + + #Setup edge colour for plotting. + #Activation = black, inhibition = red + igraph::E(g)$color = sapply(igraph::E(g)$weight, function(x) ifelse(x==1, 'black', 'red')) + + #Setup other colours. + igraph::V(g)$frame.color = "white" + igraph::V(g)$color = rgb(255, 165, 0, 200, maxColorValue = 255) + + #Setup vertex font size. + igraph::V(g)$label.cex = 1.5 + + if(makePlot) + { + #Make the plot. + igraph::plot.igraph(g, layout=igraph::layout_in_circle, ...) + } + + invisible(g) +} + +#' @title Convert BoolModel into adjacency matrix +#' +#' @description +#' This function converts a BoolModel object into an adjacency matrix. +#' +#' @param x S4 BoolModel object. +#' @param directed logical. Whether to return directed or undirected adjacency matrix. Default to TRUE. +#' +#' @export +bm_to_amat = function(x, directed=T) +{ + #(1) Extract act and inh rules. And removes &. + target_gene = x@target + act_rule = lapply(x@rule_act, function(x) unlist(strsplit(x, '&'))) + inh_rule = lapply(x@rule_inh, function(x) unlist(strsplit(x, '&'))) + all_rule = lapply(1:length(target_gene), function(x) c(act_rule[[x]], inh_rule[[x]])) + all_rule = lapply(all_rule, function(x) x[x!='0']) #removes 0s. + + #(2) Convert gene variables into gene names. + #Replace the gene names back into update functions. + for(i in 1:length(x@target_var)) + { + all_rule = lapply(all_rule, function(y) gsub(x@target_var[i], x@target[i], y)) + } + + stopifnot(all(!grepl('v[0-9]+s', unlist(strsplit(unlist(all_rule), '&'))) | + grepl('^0$', unlist(strsplit(unlist(all_rule), '&'))))) #check if all gene name conversions is successful. + + #(3) Setup output matrix. + out_mat = matrix(F, nrow=length(target_gene), ncol=length(target_gene)) + colnames(out_mat) = target_gene + rownames(out_mat) = target_gene + + #(4) Fill in the output matrix. + for(i in 1:length(target_gene)) + { + out_mat[,i] = target_gene %in% all_rule[[i]] + } + + if(!directed) + { + #(5) Make the output matrix symmetric. + for(i in 1:length(target_gene)) + { + tmp = out_mat[i,] | out_mat[,i] + out_mat[i,] = tmp + out_mat[,i] = tmp + } + + stopifnot(isSymmetric(out_mat)) + out_mat = out_mat + 0 #convert logical to numeric. + } else + { + out_mat = out_mat + 0 #convert logical to numeric. + + #(6) Make inhibitory edges as negative. + for(i in 1:length(inh_rule)) + { + for(j in 1:length(inh_rule[[i]])) + { + if(inh_rule[[i]][j]!='0') + { + from_ind = as.numeric(gsub('v([0-9]+)s', '\\1', inh_rule[[i]][j])) + to_ind = i + + out_mat[from_ind, to_ind] = -1 + } + } + } + } + + return(out_mat) +} + +#' @title Convert adjacency matrix into BoolModel object +#' +#' @description +#' This function converts adjacency matrix to BoolModel object. Able to take in adjacency matrix with -1, which encodes for inhibitory interaction. +#' +#' @param amat matrix. directed adjacency matrix. +#' @param random logical. Randomly assign to either act or inh rules, when the adjacency matrix only has values 0 and 1, but not -1. +#' @export +amat_to_bm = function(amat, random=F) +{ + if(random) + { + stopifnot(any(amat!=-1)) + + #Get all edges. + edge_df = cbind(which(amat==1, arr.ind = T), rep(NA, nrow(which(amat==1, arr.ind = T)))) + + #Setup row and column names. + colnames(edge_df) = c('from', 'to', 'type') + rownames(edge_df) = NULL + + #Randomly assign edges to act or inh rules. + edge_df[,3] = replicate(nrow(edge_df), sample(c('act', 'inh'), 1)) + + } else + { + #Get all edges. + act_edge = cbind(which(amat==1, arr.ind = T), rep('act', nrow(which(amat==1, arr.ind = T)))) + inh_edge = cbind(which(amat==-1, arr.ind = T), rep('inh', nrow(which(amat==-1, arr.ind = T)))) + edge_df = rbind(act_edge, inh_edge) + + #Setup row and column names. + colnames(edge_df) = c('from', 'to', 'type') + rownames(edge_df) = NULL + } + + #Replace indices by variable names. + var_name = paste('v', seq(1, length(colnames(amat))), 's', sep='') + names(var_name) = seq(1, nrow(amat)) + edge_df[,1] = var_name[as.character(edge_df[,1])] + + #Convert edge data frame to Boolmodel rules. + act_rule = vector('list', length(var_name)) + inh_rule = vector('list', length(var_name)) + for(i in 1:nrow(edge_df)) + { + if(edge_df[i,3] == 'act') + { + tmp_act = unname(c(act_rule[[as.numeric(edge_df[i,2])]], edge_df[i,1])) + act_rule[[as.numeric(edge_df[i,2])]] = tmp_act + } else + { + tmp_inh = unname(c(inh_rule[[as.numeric(edge_df[i,2])]], edge_df[i,1])) + inh_rule[[as.numeric(edge_df[i,2])]] = tmp_inh + } + } + + #Fill in empty rules. + act_rule[which(sapply(act_rule, length)==0)] = '0' + inh_rule[which(sapply(inh_rule, length)==0)] = '0' + + #Generate BoolModel object. + out_model = BoolModel(target=colnames(amat), target_var=var_name, rule_act=act_rule, rule_inh=inh_rule) + + return(out_model) +} + +#' @title Convert BoolModel object into BoolNet readable data frame +#' +#' @description +#' This method converts BoolModel object into a data frame, which is readable by BoolNet. +#' +#' @param bmodel BoolModel object. +#' +#' @export +bm_to_df = function(bmodel) +{ + out_df = printBM(bmodel, gene.names=T)[,-2] + colnames(out_df) = c('targets', 'factors') + + return(out_df) +} + +#' @title Convert a data frame into BoolModel object +#' +#' @description +#' This method converts a data frame into a BoolModel object. +#' Note that the model should only has 1 NOT operator. More than 1 is STRICTLY NOT allowed. +#' +#' @param in_df data frame with 2 columns, targets and factors +#' +#' @export +df_to_bm = function(in_df) +{ + #Setup the initial data frame. + in_df = as.data.frame(apply(in_df, 2, tolower), stringsAsFactors=F) + in_df = in_df[order(in_df[,1]),] + in_df[,2] = gsub('\\s', '', in_df[,2]) + + #Take out target genes. + target = in_df[,1] + + #Create corresponding simplified variable terms. + target_var = paste('v', seq(1,length(target)), 's', sep='') #the variable name consists of digits bounded by two characters. + + #Separate the activators and the inhibitors. + rule_list = strsplit(in_df[,2], '&!') + + rule_act = character(length(rule_list)) #initialise variable with correct lengths. + rule_inh = character(length(rule_list)) + for(i in 1:length(rule_list)) + { + if(length(rule_list[[i]]) == 2) + { + rule_act[i] = rule_list[[i]][1] + rule_inh[i] = rule_list[[i]][2] + } else if(length(rule_list[[i]]) == 1) + { + if(grepl('!', rule_list[[i]][1])) #if the first term in each vector in the list has a !, this means that there is no activator in this vector, but only inhibitor. + { + rule_act[i] = '0' + rule_inh[i] = rule_list[[i]][1] + } else + { + rule_act[i] = rule_list[[i]][1] + rule_inh[i] = '0' + } + } else + { + stop('Error in model rule specification.') + } + } + + rule_act = gsub('[(]0[)]', '0', rule_act) #take away bracketed 0s. + rule_inh = gsub('[(]0[)]', '0', rule_inh) + + rule_inh = gsub('^([!])', '', rule_inh) #take away remaining !. + + #Replace genes in rules with simplified variables. + for(i in 1:length(target_var)) + { + rule_act = gsub(target[i], target_var[i], rule_act) + rule_inh = gsub(target[i], target_var[i], rule_inh) + } + + rule_act = unname(sapply(rule_act, extract_term)) + rule_inh = unname(sapply(rule_inh, extract_term)) + + if(class(rule_act) != 'list') + { + rule_act = as.list(rule_act) + } + + if(class(rule_inh) != 'list') + { + rule_inh = as.list(rule_inh) + } + + bmodel = BoolModel(target=target, target_var=target_var, rule_act=rule_act, rule_inh=rule_inh) + + return(bmodel) +} diff --git a/R/model_modification.R b/R/model_modification.R index 0199d3b..6181a76 100644 --- a/R/model_modification.R +++ b/R/model_modification.R @@ -6,10 +6,20 @@ #' @param bm S4 BoolModel object. #' @param index integer. Specifying rule of which gene to modify. If NULL, modifies all rules in the model. Defaults to NULL. #' @param overlap_gene character vector. Specify which genes are present in both model and data inputs. +#' @param sep_list logical. Separate add and del lists. +#' @param encoded logical. Return Boolean models in encoded form to save space. +#' @param model_encoding list. Only used if encoded=T. +#' @param and_bool logical. Default to check_and(). +#' @param self_loop logical. Whether to allow self_loop in random starting model. Default to F. #' #' @export -minmod_model = function(bm, index=NULL, overlap_gene=NULL) +minmod_model = function(bm, index=NULL, overlap_gene=NULL, sep_list=F, encoded=F, model_encoding, and_bool=NULL, self_loop=F) { + if(class(bm) != 'BoolModel') + { + bm = decompress_bmodel(bm, model_encoding) + } + if(is.null(index)) { #Modify only the overlapping genes @@ -19,7 +29,7 @@ minmod_model = function(bm, index=NULL, overlap_gene=NULL) add_list = c() for(ind in gene_ind) #Modify act and inh rules for one gene at a time. { - tmp_list = minmod_internal(bm, ind) + tmp_list = minmod_internal(bm, ind, encoded, model_encoding, and_bool, self_loop) if(!is.null(tmp_list$dellist)) { del_list = c(del_list, tmp_list$dellist) @@ -29,11 +39,24 @@ minmod_model = function(bm, index=NULL, overlap_gene=NULL) add_list = c(add_list, tmp_list$addlist) } } - out_list = list(del_list=del_list, - add_list=add_list) + + if(sep_list) + { + out_list = list(del_list=del_list, + add_list=add_list) + } else + { + out_list = c(del_list, add_list) + } } else { - out_list = minmod_internal(bm, index) + if(sep_list) + { + out_list = minmod_internal(bm, index, encoded, model_encoding, and_bool, self_loop) + } else + { + out_list = unlist(minmod_internal(bm, index, encoded, model_encoding, and_bool, self_loop)) + } } return(out_list) @@ -46,9 +69,16 @@ minmod_model = function(bm, index=NULL, overlap_gene=NULL) #' #' @param bm S4 BoolModel object. #' @param index integer. Specifying rule of which gene to modify. -minmod_internal = function(bm, index) +#' @param encoded logical. Return Boolean models in encoded form to save space. +#' @param model_encoding list. Only used if encoded=T. +#' @param and_bool logical. Default to check_and() if unspecified. +#' @param self_loop logical. Whether to allow self_loop in random starting model. Default to F. +minmod_internal = function(bm, index, encoded, model_encoding, and_bool, self_loop=F) { - and_bool = check_and(bm) + if(is.null(and_bool)) + { + and_bool = check_and(bm) + } arule = bm@rule_act[[index]] irule = bm@rule_inh[[index]] @@ -61,10 +91,14 @@ minmod_internal = function(bm, index) { tmp = unlist(strsplit(arule[1], '&')) #split the AND term into 2. dellist_arule = c(dellist_arule, list(c(tmp[1], arule[-1]), c(tmp[2], arule[-1]))) - } else if(arule[1] != '0' & irule[1] != '0') + } else #allow empty rule. { - dellist_arule = c(dellist_arule, list('0')) + dellist_arule = c(dellist_arule, list(arule[-1])) } +# else if(arule[1] != '0' & irule[1] != '0') #not allowing empty rule. +# { +# dellist_arule = c(dellist_arule, list('0')) +# } } else { for(i in 1:length(arule)) @@ -89,10 +123,14 @@ minmod_internal = function(bm, index) { tmp = unlist(strsplit(irule[1], '&')) #split the AND term into 2. dellist_irule = c(dellist_irule, list(c(tmp[1], irule[-1]), c(tmp[2], irule[-1]))) - } else if(arule[1] != '0' & irule[1] != '0') + } else #allow empty rule. { - dellist_irule = c(dellist_irule, list('0')) + dellist_irule = c(dellist_irule, list(irule[-1])) } +# } else if(arule[1] != '0' & irule[1] != '0') #not allowing empty rule. +# { +# dellist_irule = c(dellist_irule, list('0')) +# } } else { for(i in 1:length(irule)) @@ -110,10 +148,17 @@ minmod_internal = function(bm, index) dellist_irule[sapply(dellist_irule, length)==0] = list('0') #if there is any empty term at the end, add in '0' #Addition of act rule. (single) - pos_actterm = bm@target_var[!bm@target_var %in% unlist(strsplit(c(arule, irule), '&'))] + if(self_loop) + { + pos_actterm = bm@target_var[!(bm@target_var %in% unlist(strsplit(c(arule, irule), '&')))] + } else + { + pos_actterm = bm@target_var[!(bm@target_var %in% unlist(strsplit(c(arule, irule), '&')) | bm@target_var %in% bm@target_var[index])] + } + + addlist_arule = list() if(length(pos_actterm) != 0) { - addlist_arule = list() if(arule[1] == '0') { for(i in 1:length(pos_actterm)) @@ -152,11 +197,18 @@ minmod_internal = function(bm, index) } #Addition of inh rule. (single) - pos_inhterm = bm@target_var[!bm@target_var %in% unlist(strsplit(c(arule, irule), '&'))] + if(self_loop) + { + pos_inhterm = bm@target_var[!(bm@target_var %in% unlist(strsplit(c(arule, irule), '&')))] + } else + { + pos_inhterm = bm@target_var[!(bm@target_var %in% unlist(strsplit(c(arule, irule), '&')) | bm@target_var %in% bm@target_var[index])] + } + #pos_inhterm = pos_inhterm[!is.na(pos_inhterm)] + addlist_irule = list() if(length(pos_inhterm)!=0) { - addlist_irule = list() if(irule[1] == '0') { for(i in 1:length(pos_inhterm)) @@ -204,6 +256,11 @@ minmod_internal = function(bm, index) { tmp_bm = bm tmp_bm@rule_act[[index]] = dellist_arule[[i]] + if(encoded) + { + tmp_bm = list(compress_bmodel(tmp_bm, model_encoding)) + stopifnot(length(model_encoding[[2]])==max(round(tmp_bm[[1]]))) + } bmodel_dellist = c(bmodel_dellist, tmp_bm) } } @@ -214,6 +271,11 @@ minmod_internal = function(bm, index) { tmp_bm = bm tmp_bm@rule_inh[[index]] = dellist_irule[[i]] + if(encoded) + { + tmp_bm = list(compress_bmodel(tmp_bm, model_encoding)) + stopifnot(length(model_encoding[[2]])==max(round(tmp_bm[[1]]))) + } bmodel_dellist = c(bmodel_dellist, tmp_bm) } } @@ -224,6 +286,11 @@ minmod_internal = function(bm, index) { tmp_bm = bm tmp_bm@rule_act[[index]] = addlist_arule[[i]] + if(encoded) + { + tmp_bm = list(compress_bmodel(tmp_bm, model_encoding)) + stopifnot(length(model_encoding[[2]])==max(round(tmp_bm[[1]]))) + } bmodel_addlist = c(bmodel_addlist, tmp_bm) } } @@ -234,6 +301,11 @@ minmod_internal = function(bm, index) { tmp_bm = bm tmp_bm@rule_inh[[index]] = addlist_irule[[i]] + if(encoded) + { + tmp_bm = list(compress_bmodel(tmp_bm, model_encoding)) + stopifnot(length(model_encoding[[2]])==max(round(tmp_bm[[1]]))) + } bmodel_addlist = c(bmodel_addlist, tmp_bm) } } diff --git a/R/rand_model.R b/R/rand_model.R index dd2ac8c..f4f341c 100644 --- a/R/rand_model.R +++ b/R/rand_model.R @@ -1,410 +1,467 @@ -#' @title Generate random act and inh rule for a single gene -#' -#' @description -#' This function generates one random Boolean rule (both act and inh) per run. Return a list of two vectors. -#' Note that this method will not give empty rule, i.e. 0 term in both act and inh rules. -#' -#' @param x character vector. A vector of all single terms to be used. -#' @param np integer. Number of gene variables in a rule. NOT max_varperrule here. -#' @param tar_ind numerical. Indicate which gene is the rule for. Used in preventing self-loop. -#' @param and_bool logical. Indicates whether to include AND terms or not. -#' @param self_loop logical. Indicates whether to allow self_loop. Default to F. -gen_singlerule = function(x, np, tar_ind, and_bool, self_loop=F) -{ - #Convert x to the form of 'v1s'. - if(!(all(grepl('v[0-9]+s', x)))) - { - if(any(grepl('v[0-9]+s', x))) - { - stop('The name of some genes resemble internally used variables. Respecify names.') - } else - { - x = paste('v', seq(1, length(x)), 's', sep='') - } - } - - rnum_act = floor(runif(1, 0, np+1)) #get a random number from 0 to np for act_rule. - - if(rnum_act==0) - { - #rnum_inh = floor(runif(1, 1, np-rnum_act+1)) #must have at least one term if rnum_act is empty. - rnum_inh = np - } else { - #rnum_inh = floor(runif(1, 0, np-rnum_act+1)) #get a random number from 0 to np-rnum_act for inh_rule. - rnum_inh = np - rnum_act - } - - if(rnum_act + rnum_inh > length(x)) #cannot use more terms than available. - { - stop('Error in generating rules for random models.') - } - - #Get the activation rule. - if(rnum_act != 0) - { - if(!self_loop) - { - ract_prob = rep(1/(length(x)-1), length(x)) - ract_prob[tar_ind] = 0 #set probability to 0, to prevent self-loop. - tmp_ract = sample(x, rnum_act, prob=ract_prob) #get single variables first. - } else - { - tmp_ract = sample(x, rnum_act) #get single variables first. - } - - rule_act = c() - - if(and_bool) - { - #Determine which single variables to combine into double variables. - ra_ind = as.logical(replicate(rnum_act,round(runif(1)))) - while(sum(ra_ind)%%2==1) - { - ra_ind = as.logical(replicate(rnum_act,round(runif(1)))) - } - - #Add double variables. - if(length(tmp_ract[ra_ind]) != 0) - { - for(i in 1:length(tmp_ract[ra_ind])) - { - if(i %% 2 == 1) #only do it when i is odd. - { - rule_act = c(rule_act, paste(c(tmp_ract[ra_ind][i], tmp_ract[ra_ind][i+1]), collapse='&')) - } - } - } - - #Add single variables. - rule_act = c(tmp_ract[!ra_ind], rule_act) - } else - { - #Add single variables. - rule_act = tmp_ract - } - } else - { - rule_act = '0' - } - - if(rnum_inh != 0) - { - #Get the inhibition rule. - if(!self_loop) - { - intr_x = x[!(x %in% unlist(strsplit(rule_act, '&')))] - rinh_prob = rep(1/(length(intr_x)-1), length(intr_x)) - rinh_prob[which(intr_x %in% x[tar_ind])] = 0 #set probability to 0, to prevent self-loop. - tmp_rinh = sample(intr_x, rnum_inh, prob=rinh_prob) #get single variables first. - } else - { - tmp_rinh = sample(x[!(x %in% unlist(strsplit(rule_act, '&')))], rnum_inh) #exclude terms already used in rule_act. - } - - if(and_bool) - { - #Determine which single variables to combine into double variables. - ri_ind = as.logical(replicate(rnum_inh, round(runif(1)))) - while(sum(ri_ind)%%2==1) - { - ri_ind = as.logical(replicate(rnum_inh, round(runif(1)))) - } - - #Add double variables. - rule_inh = c() - if(length(tmp_rinh[ri_ind]) != 0) - { - for(i in 1:length(tmp_rinh[ri_ind])) - { - if(i %% 2 == 1) #only do it when i is odd. - { - rule_inh = c(rule_inh, paste(c(tmp_rinh[ri_ind][i], tmp_rinh[ri_ind][i+1]), collapse='&')) - } - } - } - - #Add single variables. - rule_inh = c(tmp_rinh[!ri_ind], rule_inh) - } else - { - rule_inh = tmp_rinh - } - } else - { - rule_inh = '0' - } - - return(list(rule_act, rule_inh)) -} - -#' @title Generate a random Boolean model -#' -#' @description -#' This function generates a random Boolean model. Returns an S4 BoolModel object. -#' Note that this method will not give empty rule, i.e. 0 term in both act and inh rules. -#' -#' @param var character vector. A vector of single genes/variables to be used in the model. -#' @param mvar integer. Maximum number of variables in act or inh rule. Default to length(var). -#' @param and_bool logical. Indicates whether to include AND terms or not. -#' @param self_loop logical. Indicates whether to allow self_loop. Default to F. -#' -#' @details -#' The number of terms in a function for a gene is modelled by power-law distribution. -gen_one_rmodel = function(var, mvar=length(var), and_bool, self_loop=F) -{ - if(mvar > length(var)) - { - mvar=length(var) - } - - #(1) By using power law distribution, estimate the in-degree genetic partner for each gene. - #The minimum in degree is set to 2. 'v1s' and 'v1s&v2s' are currently considered as 2 different partners. - num_partner = poweRlaw::rpldis(length(var), xmin=2, alpha=3) #xmin = the minimum value of resulting random integer, alpha = scaling factor of the distribution. According to literature, for gene network, this should be 3. (or 2 mvar] = mvar #power law distribution can gives very high number, therefore must cap it. - - arule = sapply(1:length(num_partner), function(x) gen_singlerule(var, num_partner[x], x, and_bool, self_loop)) - arule = apply(arule, 1, c) #arule is a list of lists, with list[[1]] = all act rules, list[[2]] = all inh rules. - - bmodel = BoolModel(target=var, target_var=paste('v',seq(1,length(var)),'s', sep=''), rule_act=arule[[1]], rule_inh=arule[[2]]) - - return(bmodel) -} - -#' @title Generate two random DAG Boolean models with a specified number of steps apart -#' -#' @description -#' This function generates a random DAG Boolean model, then get another random DAG Boolean model that is a specified number of steps apart by adding and/or removing genes. -#' Difficult to generate completely directed graph with a specified number of steps apart. -#' -#' @param var character vector. A vector of single genes/variables to be used in the model. -#' @param steps integer. Number of steps apart between the two models. If steps=0, give completely random starting model. -#' @param mvar integer. Maximum number of variables in act or inh rule. Default to length(var). -#' @param in_amat matrix. Provide adjacency matrix of first model. -#' @param acyclic logical. Whether to restrict the model to being acyclic or not. Defaults to TRUE. -#' -#' @export -gen_two_rmodel_dag = function(var, steps, mvar=length(var), in_amat=NULL, acyclic=T) -{ - if(!requireNamespace('bnlearn', quietly = TRUE)) { - stop('Requires bnlearn package to run this function.') - } else - { - if(steps==0) - { - start_model = bnlearn::random.graph(var, method = "melancon", max.degree=mvar) - end_model = bnlearn::random.graph(var, method = "melancon", max.degree=mvar) - } else - { - if(is.null(in_amat)) - { - #(3) Get first model. - start_model = bnlearn::random.graph(var, method = "melancon", max.degree=mvar) - } else - { - start_model = bnlearn::empty.graph(var) - if(acyclic) - { - bnlearn::amat(start_model) = in_amat - } else - { - bnlearn::amat(start_model, ignore.cycles=T) = in_amat - } - } - - cur_ite = 1 - max_ite = 100 - out_break = F - while(cur_ite < max_ite & !out_break) - { - #(4) Get second model of a specified number of steps away. - end_model = start_model - memory_ind = c() - #memory_ind = apply(which(t(amat(end_model))==1, arr.ind = T), 1, function(x) paste(x, collapse=',')) #no undirected arcs. - #memory_ind = c(memory_ind, paste(1:nrow(tmp), 1:ncol(tmp), sep=',')) #no self_loops. - #start_time = proc.time() - while(TRUE) - { - tryCatch({ - #used_time = proc.time() - start_time - #used_time = as.numeric(used_time)[3] - - tmp_graph = bnlearn::empty.graph(var) - tmp = bnlearn::amat(end_model) - - x_ind = sample(1:nrow(tmp),1) - y_ind = sample(1:ncol(tmp),1) - - while(paste(x_ind, y_ind, collapse=',') %in% memory_ind) - { - x_ind = sample(1:nrow(tmp),1) - y_ind = sample(1:ncol(tmp),1) - } - - memory_ind = c(memory_ind, paste(x_ind, y_ind, collapse=',')) - if(length(memory_ind) == nrow(tmp)*ncol(tmp)) #break loop, get a new starting model and try again. - { - #cat(sprintf('For step %s, no other possible model exists.', steps)) - break - } - - #Change one step at a time. - if(tmp[x_ind,y_ind]==0) - { - tmp[x_ind,y_ind] = 1 - } else - { - tmp[x_ind,y_ind] = 0 - } - - if(acyclic) - { - bnlearn::amat(tmp_graph) = tmp - } else - { - bnlearn::amat(tmp_graph, ignore.cycles=T) = tmp - } - - end_model = tmp_graph - - if(sum(abs(bnlearn::amat(start_model)-bnlearn::amat(tmp_graph))) == steps) - { - if(nrow(bnlearn::undirected.arcs(tmp_graph))==0) #cannot have undirected arcs in the models. - { - out_break = T - break - } - } - }, error=function(e){}, - finally={}) - } - - cur_ite = cur_ite + 1 - } - - if(cur_ite == max_ite) - { - stop(sprintf('For step %s, no other possible model exists.', steps)) - } - } - - return(list(start_model, end_model)) - } -} - -#' @title Generate two random Boolean models with a specified number of steps apart -#' -#' @description -#' This function generates a random Boolean model, then get another random Boolean model that is a specified number of steps apart by adding and/or removing genes. -#' Returns a list of two S4 BoolModel objects. -#' -#' @param var character vector. A vector of single genes/variables to be used in the model. -#' @param steps integer. Number of steps apart between the two models. If steps=0, give completely random starting model. -#' @param mvar integer. Maximum number of variables in act or inh rule. Default to length(var). -#' @param and_bool logical. Indicates whether to include AND terms or not. Default to F. -#' @param in_bmodel BoolModel object. The starting model supplied. -#' @param self_loop logical. Indicates whether to allow self_loop. Default to F. -#' -#' @details -#' The number of terms in a function for a gene is modelled by power-law distribution. -#' -#' @export -gen_two_rmodel = function(var, steps, mvar=length(var), and_bool=F, in_bmodel=NULL, self_loop=F) -{ - if(mvar > length(var)) - { - mvar=length(var) - } - - if(is.null(in_bmodel)) - { - bmodel1 = gen_one_rmodel(var, mvar, and_bool, self_loop) - } else - { - if(class(in_bmodel)!='BoolModel') - { - stop('in_bmodel: Please give BoolModel object.') - } - - bmodel1 = in_bmodel - stopifnot(length(in_bmodel@target) == length(var)) - } - - if(steps==0) - { - bmodel2 = gen_one_rmodel(var, mvar, and_bool, self_loop) - } else - { - cur_model = bmodel1 - bmodel2 = bmodel1 - ite = 1 - while(length(unlist(model_setdiff(cur_model, bmodel1, mvar))) steps) - # { - # browser() - # stop('Error in code.') - # } - - all_rule = lapply(1:length(cur_model@target), function(x) c(cur_model@rule_act[[x]], cur_model@rule_inh[[x]])) - all_rule = lapply(all_rule, function(x) x[x!='0']) - all_rule = lapply(all_rule, function(x) unlist(strsplit(x, '&'))) - - #Summarise info in bmodel. - all_len = sapply(all_rule, length) #get number of terms (act and inh) for each target gene - - avail_add_ind = which(all_len < mvar) #any rule with less genes than mvar can take more genes. - avail_del_ind = which(all_len > 1) #any rule with more genes than 1 can have fewer genes. - - if(length(avail_add_ind) == 0 & length(avail_del_ind) == 0) - { - stop('Model is fully specified. No term can be modified.') - break - } - - #picking choice for modification. - if(length(avail_add_ind) == 0) - { - mod_choice = 'del' - } else if(length(avail_del_ind) == 0) - { - mod_choice = 'add' - } else - { - mod_choice = sample(c('add', 'del'), 1) - } - - #Modifying models. - if(mod_choice == 'add') - { - if(length(avail_add_ind) == 1) - { - rule_aind = avail_add_ind - } else - { - rule_aind = sample(avail_add_ind, 1) - } - - next_model = sample(minmod_model(cur_model, rule_aind)$addlist, 1)[[1]] - - } else if(mod_choice == 'del') - { - if(length(avail_del_ind) == 1) - { - rule_dind = avail_del_ind - } else - { - rule_dind = sample(avail_del_ind, 1) - } - - next_model = sample(minmod_model(cur_model, rule_dind)$dellist, 1)[[1]] - } - stopifnot(length(unlist(model_setdiff(cur_model, next_model, mvar)))==1) - cur_model = next_model - } - - bmodel2 = cur_model - stopifnot(length(unlist(model_setdiff(bmodel2, bmodel1, mvar)))==steps) - } - - return(list(bmodel1, bmodel2)) +#' @title Generate random act and inh rule for a single gene +#' +#' @description +#' This function generates one random Boolean rule (both act and inh) per run. Return a list of two vectors. +#' Note that this method will not give empty rule, i.e. 0 term in both act and inh rules. +#' +#' @param x character vector. A vector of all single terms to be used. +#' @param np integer. Number of gene variables in a rule. NOT max_varperrule here. +#' @param tar_ind numerical. Indicate which gene is the rule for. Used in preventing self-loop. +#' @param and_bool logical. Indicates whether to include AND terms or not. +#' @param self_loop logical. Indicates whether to allow self_loop. Default to F. +#' @param rule_type character. Types of rules. Defaults to random. +gen_singlerule = function(x, np, tar_ind, and_bool, self_loop=F, rule_type='random') +{ + #Convert x to the form of 'v1s'. + if(!(all(grepl('v[0-9]+s', x)))) + { + if(any(grepl('v[0-9]+s', x))) + { + stop('The name of some genes resemble internally used variables. Respecify names.') + } else + { + x = paste('v', seq(1, length(x)), 's', sep='') + } + } + + if(rule_type=='full') + { + if(self_loop) + { + rnum_act = np + rnum_inh = 0 + } else + { + rnum_act = np - 1 + rnum_inh = 0 + } + } else if(rule_type=='minimal') + { + rnum_act = 1 + rnum_inh = 0 + } else if(rule_type=='random') + { + rnum_act = floor(runif(1, 0, np+1)) #get a random number from 0 to np for act_rule. + + if(rnum_act==0) + { + #rnum_inh = floor(runif(1, 1, np-rnum_act+1)) #must have at least one term if rnum_act is empty. + rnum_inh = np + } else { + #rnum_inh = floor(runif(1, 0, np-rnum_act+1)) #get a random number from 0 to np-rnum_act for inh_rule. + rnum_inh = np - rnum_act + } + + if(rnum_act + rnum_inh > length(x)) #cannot use more terms than available. + { + stop('Error in generating rules for random models.') + } + } + + #Get the activation rule. + if(rnum_act != 0) + { + if(!self_loop) + { + ract_prob = rep(1/(length(x)-1), length(x)) + ract_prob[tar_ind] = 0 #set probability to 0, to prevent self-loop. + tmp_ract = sample(x, rnum_act, prob=ract_prob) #get single variables first. + } else + { + tmp_ract = sample(x, rnum_act) #get single variables first. + } + + rule_act = c() + + if(and_bool) + { + #Determine which single variables to combine into double variables. + ra_ind = as.logical(replicate(rnum_act,round(runif(1)))) + while(sum(ra_ind)%%2==1) + { + ra_ind = as.logical(replicate(rnum_act,round(runif(1)))) + } + + #Add double variables. + if(length(tmp_ract[ra_ind]) != 0) + { + for(i in 1:length(tmp_ract[ra_ind])) + { + if(i %% 2 == 1) #only do it when i is odd. + { + rule_act = c(rule_act, paste(c(tmp_ract[ra_ind][i], tmp_ract[ra_ind][i+1]), collapse='&')) + } + } + } + + #Add single variables. + rule_act = c(tmp_ract[!ra_ind], rule_act) + } else + { + #Add single variables. + rule_act = tmp_ract + } + } else + { + rule_act = '0' + } + + if(rnum_inh != 0) + { + #Get the inhibition rule. + if(!self_loop) + { + if(rule_type=='full') + { + rinh_prob = rep(1/(length(x)-1), length(x)) + rinh_prob[tar_ind] = 0 #set probability to 0, to prevent self-loop. + tmp_rinh = sample(x, rnum_inh, prob=rinh_prob) #get single variables first. + } else + { + intr_x = x[!(x %in% unlist(strsplit(rule_act, '&')))] + rinh_prob = rep(1/(length(intr_x)-1), length(intr_x)) + rinh_prob[which(intr_x %in% x[tar_ind])] = 0 #set probability to 0, to prevent self-loop. + tmp_rinh = sample(intr_x, rnum_inh, prob=rinh_prob) #get single variables first. + } + } else + { + tmp_rinh = sample(x[!(x %in% unlist(strsplit(rule_act, '&')))], rnum_inh) #exclude terms already used in rule_act. + } + + if(and_bool) + { + #Determine which single variables to combine into double variables. + ri_ind = as.logical(replicate(rnum_inh, round(runif(1)))) + while(sum(ri_ind)%%2==1) + { + ri_ind = as.logical(replicate(rnum_inh, round(runif(1)))) + } + + #Add double variables. + rule_inh = c() + if(length(tmp_rinh[ri_ind]) != 0) + { + for(i in 1:length(tmp_rinh[ri_ind])) + { + if(i %% 2 == 1) #only do it when i is odd. + { + rule_inh = c(rule_inh, paste(c(tmp_rinh[ri_ind][i], tmp_rinh[ri_ind][i+1]), collapse='&')) + } + } + } + + #Add single variables. + rule_inh = c(tmp_rinh[!ri_ind], rule_inh) + } else + { + rule_inh = tmp_rinh + } + } else + { + rule_inh = '0' + } + + return(list(rule_act, rule_inh)) +} + +#' @title Generate a random Boolean model +#' +#' @description +#' This function generates a random Boolean model. Returns an S4 BoolModel object. +#' Note that this method will not give empty rule, i.e. 0 term in both act and inh rules. +#' +#' @param var character vector. A vector of single genes/variables to be used in the model. +#' @param exponent integer. The exponent of power law distribution. Default to 3. +#' @param and_bool logical. Indicates whether to include AND terms or not. +#' @param self_loop logical. Indicates whether to allow self_loop. Default to F. +#' @param mvar integer. Maximum number of variables in act or inh rule. Default to length(var). +#' @param model_type character. Specifies the type of model generated. +#' +#' @details +#' The number of terms in a function for a gene is modelled by power-law distribution. +gen_one_rmodel = function(var, exponent=3, and_bool, self_loop=F, mvar=length(var), model_type='random') +{ + if(mvar > length(var)) + { + mvar=length(var) + } + + #(1) By using power law distribution, estimate the in-degree genetic partner for each gene. + #The minimum in degree is set to 2. 'v1s' and 'v1s&v2s' are currently considered as 2 different partners. + if(model_type=='full') + { + mvar = length(var) + num_partner = rep(mvar, length(var)) + } else if(model_type=='minimal') + { + mvar = 1 + num_partner = rep(mvar, length(var)) + } else if(model_type=='random') + { + num_partner = poweRlaw::rpldis(length(var), xmin=2, alpha=exponent) #xmin = the minimum value of resulting random integer, alpha = scaling factor of the distribution. According to literature, for gene network, this should be 3. (or 2 mvar] = mvar #power law distribution can gives very high number, therefore must cap it. + } + + arule = sapply(1:length(num_partner), function(x) gen_singlerule(var, num_partner[x], x, and_bool, self_loop, rule_type = model_type)) + arule = apply(arule, 1, c) #arule is a list of lists, with list[[1]] = all act rules, list[[2]] = all inh rules. + + bmodel = BoolModel(target=var, target_var=paste('v',seq(1,length(var)),'s', sep=''), rule_act=arule[[1]], rule_inh=arule[[2]]) + + return(bmodel) +} + +#' @title Generate two random DAG Boolean models with a specified number of steps apart +#' +#' @description +#' This function generates a random DAG Boolean model, then get another random DAG Boolean model that is a specified number of steps apart by adding and/or removing genes. +#' Difficult to generate completely directed graph with a specified number of steps apart. +#' +#' @param var character vector. A vector of single genes/variables to be used in the model. +#' @param steps integer. Number of steps apart between the two models. If steps=0, give completely random starting model. +#' @param mvar integer. Maximum number of variables in act or inh rule. Default to length(var). +#' @param in_amat matrix. Provide adjacency matrix of first model. +#' @param acyclic logical. Whether to restrict the model to being acyclic or not. Defaults to TRUE. +#' +#' @export +gen_two_rmodel_dag = function(var, steps, mvar=length(var), in_amat=NULL, acyclic=T) +{ + if(!requireNamespace('bnlearn', quietly = TRUE)) { + stop('Requires bnlearn package to run this function.') + } else + { + if(steps==0) + { + start_model = bnlearn::random.graph(var, method = "melancon", max.degree=mvar) + end_model = bnlearn::random.graph(var, method = "melancon", max.degree=mvar) + } else + { + if(is.null(in_amat)) + { + #(3) Get first model. + start_model = bnlearn::random.graph(var, method = "melancon", max.degree=mvar) + } else + { + start_model = bnlearn::empty.graph(var) + if(acyclic) + { + bnlearn::amat(start_model) = in_amat + } else + { + bnlearn::amat(start_model, ignore.cycles=T) = in_amat + } + } + + cur_ite = 1 + max_ite = 100 + out_break = F + while(cur_ite < max_ite & !out_break) + { + #(4) Get second model of a specified number of steps away. + end_model = start_model + memory_ind = c() + #memory_ind = apply(which(t(amat(end_model))==1, arr.ind = T), 1, function(x) paste(x, collapse=',')) #no undirected arcs. + #memory_ind = c(memory_ind, paste(1:nrow(tmp), 1:ncol(tmp), sep=',')) #no self_loops. + #start_time = proc.time() + while(TRUE) + { + tryCatch({ + #used_time = proc.time() - start_time + #used_time = as.numeric(used_time)[3] + + tmp_graph = bnlearn::empty.graph(var) + tmp = bnlearn::amat(end_model) + + x_ind = sample(1:nrow(tmp),1) + y_ind = sample(1:ncol(tmp),1) + + while(paste(x_ind, y_ind, collapse=',') %in% memory_ind) + { + x_ind = sample(1:nrow(tmp),1) + y_ind = sample(1:ncol(tmp),1) + } + + memory_ind = c(memory_ind, paste(x_ind, y_ind, collapse=',')) + if(length(memory_ind) == nrow(tmp)*ncol(tmp)) #break loop, get a new starting model and try again. + { + #cat(sprintf('For step %s, no other possible model exists.', steps)) + break + } + + #Change one step at a time. + if(tmp[x_ind,y_ind]==0) + { + tmp[x_ind,y_ind] = 1 + } else + { + tmp[x_ind,y_ind] = 0 + } + + if(acyclic) + { + bnlearn::amat(tmp_graph) = tmp + } else + { + bnlearn::amat(tmp_graph, ignore.cycles=T) = tmp + } + + end_model = tmp_graph + + if(sum(abs(bnlearn::amat(start_model)-bnlearn::amat(tmp_graph))) == steps) + { + if(nrow(bnlearn::undirected.arcs(tmp_graph))==0) #cannot have undirected arcs in the models. + { + out_break = T + break + } + } + }, error=function(e){}, + finally={}) + } + + cur_ite = cur_ite + 1 + } + + if(cur_ite == max_ite) + { + stop(sprintf('For step %s, no other possible model exists.', steps)) + } + } + + return(list(start_model, end_model)) + } +} + +#' @title Generate two random Boolean models with a specified number of steps apart +#' +#' @description +#' This function generates a random Boolean model, then get another random Boolean model that is a specified number of steps apart by adding and/or removing genes. +#' Returns a list of two S4 BoolModel objects. +#' +#' @param var character vector. A vector of single genes/variables to be used in the model. +#' @param steps integer. Number of steps apart between the two models. If steps=0, give completely random starting model. +#' @param mvar integer. Maximum number of variables in act or inh rule. Default to length(var). +#' @param and_bool logical. Indicates whether to include AND terms or not. +#' @param in_bmodel BoolModel object. The starting model supplied. +#' @param self_loop logical. Indicates whether to allow self_loop. Default to F. +#' +#' @details +#' The number of terms in a function for a gene is modelled by power-law distribution. +#' +#' @export +gen_two_rmodel = function(var, steps, mvar=length(var), and_bool, in_bmodel=NULL, self_loop=F) +{ + if(mvar > length(var)) + { + mvar=length(var) + } + + if(is.null(in_bmodel)) + { + bmodel1 = gen_one_rmodel(var, mvar, and_bool, self_loop) + } else + { + if(class(in_bmodel)!='BoolModel') + { + stop('in_bmodel: Please give BoolModel object.') + } + + bmodel1 = in_bmodel + stopifnot(length(in_bmodel@target) == length(var)) + } + + if(steps==0) + { + bmodel2 = gen_one_rmodel(var, mvar, and_bool, self_loop) + } else + { + cur_model = bmodel1 + bmodel2 = bmodel1 + ite = 1 + while(length(unlist(model_setdiff(cur_model, bmodel1, mvar))) steps) + # { + # browser() + # stop('Error in code.') + # } + + all_rule = lapply(1:length(cur_model@target), function(x) c(cur_model@rule_act[[x]], cur_model@rule_inh[[x]])) + all_rule = lapply(all_rule, function(x) x[x!='0']) + all_rule = lapply(all_rule, function(x) unlist(strsplit(x, '&'))) + + #Summarise info in bmodel. + all_len = sapply(all_rule, length) #get number of terms (act and inh) for each target gene + + avail_add_ind = which(all_len < mvar) #any rule with less genes than mvar can take more genes. + avail_del_ind = which(all_len > 1) #any rule with more genes than 1 can have fewer genes. + + if(length(avail_add_ind) == 0 & length(avail_del_ind) == 0) + { + stop('Model is fully specified. No term can be modified.') + break + } + + #picking choice for modification. + if(length(avail_add_ind) == 0) + { + mod_choice = 'del' + } else if(length(avail_del_ind) == 0) + { + mod_choice = 'add' + } else + { + mod_choice = sample(c('add', 'del'), 1) + } + + #Modifying models. + if(mod_choice == 'add') + { + if(length(avail_add_ind) == 1) + { + rule_aind = avail_add_ind + } else + { + rule_aind = sample(avail_add_ind, 1) + } + + all_models = minmod_model(cur_model, rule_aind, sep_list=T, and_bool=and_bool, self_loop=self_loop)$addlist + samp_ind = sample(1:length(all_models), 1)[[1]] + next_model = all_models[[samp_ind]] + all_models = all_models[-samp_ind] + while(length(unlist(model_setdiff(cur_model, next_model, mvar)))!=1) + { + samp_ind = sample(1:length(all_models), 1)[[1]] + next_model = all_models[[samp_ind]] + all_models = all_models[-samp_ind] + } + } else if(mod_choice == 'del') + { + if(length(avail_del_ind) == 1) + { + rule_dind = avail_del_ind + } else + { + rule_dind = sample(avail_del_ind, 1) + } + + all_models = minmod_model(cur_model, rule_aind, sep_list=T, and_bool=and_bool, self_loop=self_loop)$dellist + samp_ind = sample(1:length(all_models), 1)[[1]] + next_model = all_models[[samp_ind]] + all_models = all_models[-samp_ind] + while(length(unlist(model_setdiff(cur_model, next_model, mvar)))!=1) + { + samp_ind = sample(1:length(all_models), 1)[[1]] + next_model = all_models[[samp_ind]] + all_models = all_models[-samp_ind] + } + } + stopifnot(length(unlist(model_setdiff(cur_model, next_model, mvar)))==1) + cur_model = next_model + } + + bmodel2 = cur_model + stopifnot(length(unlist(model_setdiff(bmodel2, bmodel1, mvar)))==steps) + } + + return(list(bmodel1, bmodel2)) } \ No newline at end of file diff --git a/R/score_calculation.R b/R/score_calculation.R index 41689c2..a0b2ca3 100644 --- a/R/score_calculation.R +++ b/R/score_calculation.R @@ -1,60 +1,3 @@ -#' @title Inner function of calculating Boolean model score -#' -#' @description -#' This function calculates a final model score from pairwise and penalty scores. -#' -#' @param x matrix vector. Pairwise scores computed by dist_measure(). -#' @param bmodel S4 BoolModel object. Model to be evaluated. -#' @param max_varperrule integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6. -#' @param detail logical. Whether to give more details in score calculation. Default to FALSE. -m_score = function(x, bmodel, max_varperrule, detail=F) -{ - #(1) Calculate score for distance between states - y = mean(apply(x, 2, min)) #best - #y = mean(apply(x, 1, min)) - - #(2) Calculate penalty term - #(A) To penalise having too low or too high number of model states, when compared to the number of data states. - #Ideally, the number of model states >= the number of data states. - #abs(number of model states - number of data states) - za = abs(ncol(x) - nrow(x)) / (nrow(x) * length(bmodel@target)) #best - #za = abs(ncol(x) - nrow(x)) / (nrow(x)) - - #(B) To penalise having too many variables in the rules. - var_len = list() #combine act and inh rules for each variable. - for(i in 1:length(bmodel@target)) - { - var_len = c(var_len, list(c(bmodel@rule_act[[i]], bmodel@rule_inh[[i]]))) - } - - #calculate number and fraction of each variable. - var_len = sapply(var_len, function(x) x[!grepl('0', x)]) #to remove '0' - num_var = sapply(var_len, function(x) length(strsplit(paste(x, collapse=''), 'v')[[1]])-1 ) #to count the number of v[0-9]s terms. e.g. v1s&v2s will be counted as 2 terms. - num_var[num_var<0] = 0 #if the rule for the variable is completely empty, has to set the negative values to 0. - - frac_var = (num_var-max_varperrule) - zb_ind = num_var > max_varperrule - zb = sum(frac_var[zb_ind]) - - #(3) Calculate final score. - #Specify the constants for each penalty term. - a = 1 - b = 1 - - f = y + a*za + b*zb #f ranges from 0 to infinity. - - if(detail) - { - output = c(f, y, za, zb) - names(output) = c('f', 'y', 'za', 'zb') - } else - { - output = c(f) - names(output) = c('f') - } - return(output) -} - #' @title Calculating Boolean model score wrt to a dataset #' #' @description @@ -65,34 +8,68 @@ m_score = function(x, bmodel, max_varperrule, detail=F) #' @param fcdata matrix. Represents the expression data df. #' @param overlap_gene character vector. Specify which genes are present in both model and data inputs. #' @param max_varperrule integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6. +#' @param model_encoding numeric. Only required if the bmodel is encoded. #' @param detail logical. Whether to give more details in score calculation. Default to FALSE. #' #' @export -calc_mscore = function(bmodel, istate, fcdata, overlap_gene, max_varperrule, detail=F) +calc_mscore = function(bmodel, istate, fcdata, overlap_gene, max_varperrule, model_encoding, detail=F) { + if(class(bmodel)!='BoolModel') + { + bmodel = decompress_bmodel(bmodel, model_encoding) + } + #(1) Simulate each of these models. - mdata = simulate_model(bmodel, istate) - + fmdata = simulate_model(bmodel, istate) + #(2) Perform gene filtering on model state space. - fmdata = filter_dflist(mdata, overlap_gene) + fmdata = filter_dflist(fmdata, overlap_gene) fmdata = fmdata+0 #convert logical matrix into numeric matrix. if(class(fcdata)!='matrix') { fcdata = as.matrix(fcdata) } - + #(3) Score each model state wrt to data state. score_matrix = man_dist(fcdata, fmdata) #The first must be the data state. This returns a matrix of row=data, col=model. - #(4) Calculate score for distance between states - y = mean(apply(score_matrix, 2, min)) #best + #(4) Get the orderings of data states wrt to model states. + rank_matrix = apply(score_matrix, 2, rank) + rownames(rank_matrix) = seq(1,nrow(rank_matrix)) + + rank_matrix = apply(rank_matrix, 2, function(x) as.numeric(names(sort(x)))) + + ans_row = rank_matrix[1,] + for(i in 1:nrow(rank_matrix)) + { + if(!anyDuplicated(ans_row)) #stop if there is no more duplicates. + { + break + } + + if(i != nrow(rank_matrix)) + { + ans_row[duplicated(ans_row)] = rank_matrix[i+1,][duplicated(ans_row)] + } + } + ans_row[duplicated(ans_row)] = rank_matrix[1,][duplicated(ans_row)] #if any duplicate remains at the end of loop, set them back to the best value. + + #(5) Calculate score for distance between states + y = mean(sapply(1:length(ans_row), function(x) score_matrix[ans_row[x],x])) / ncol(fmdata) + + #(6) Calculate penalty term + #(A) To penalise the states. The proportions of 0s and 1s are good predictors. + ybin = infotheo::discretize(fmdata, nbins=2) #force mininum bin to 2. + ybin = unique(ybin) + + tmp_y = rle(sort(unlist(ybin, use.names = F)))$lengths + names(tmp_y) = rle(sort(unlist(ybin, use.names = F)))$values - #(5) Calculate penalty term - #(A) To penalise having too low or too high number of model states, when compared to the number of data states. - #Ideally, the number of model states >= the number of data states. - #abs(number of model states - number of data states) - za = abs(ncol(score_matrix) - nrow(score_matrix)) / (nrow(score_matrix) * length(bmodel@target)) #best + tmp_x = c(0.5, 0.5) + tmp_y = tmp_y / sum(tmp_y) + + za = exp(-entropy::chi2.empirical(tmp_x, tmp_y)) #(B) To penalise having too many variables in the rules. var_len = list() #combine act and inh rules for each variable. @@ -102,21 +79,21 @@ calc_mscore = function(bmodel, istate, fcdata, overlap_gene, max_varperrule, det } #calculate number and fraction of each variable. - var_len = sapply(var_len, function(x) x[!grepl('0', x)]) #to remove '0' + var_len = sapply(var_len, function(x) x[!grepl('^0$', x)]) #to remove '0' num_var = sapply(var_len, function(x) length(strsplit(paste(x, collapse=''), 'v')[[1]])-1 ) #to count the number of v[0-9]s terms. e.g. v1s&v2s will be counted as 2 terms. num_var[num_var<0] = 0 #if the rule for the variable is completely empty, has to set the negative values to 0. - frac_var = (num_var-max_varperrule) - zb_ind = num_var > max_varperrule - zb = sum(frac_var[zb_ind]) + frac_var = num_var-max_varperrule #only punished if above set threshold. + frac_var[frac_var<0] = 0 + zb = mean(frac_var/max_varperrule) - #(6 Calculate final score. + #(7) Calculate final score. #Specify the constants for each penalty term. a = 1 b = 1 f = y + a*za + b*zb #f ranges from 0 to infinity. - + if(detail) { output = c(f, y, za, zb) @@ -141,7 +118,7 @@ man_dist = function(x, y) { z = matrix(0, nrow = nrow(x), ncol = nrow(y)) for (k in 1:nrow(y)) { - z[, k] <- colSums(abs(t(x) - y[k, ])) + z[,k] = colSums(abs(t(x) - y[k,])) } return(z) } diff --git a/R/search.R b/R/search.R index 378028d..137e94d 100644 --- a/R/search.R +++ b/R/search.R @@ -1,223 +1,234 @@ -#' @title Training Model -#' -#' @description -#' This function performs model training to find the best model, using information from data. It requires an initial state supplied to perform the search, and an initial model can also be supplied to be included in the initial population. -#' Note that if a model is supplied, and the genes in the model is different from the genes in the data, only the genes overlapping between model and data will be retained for further analysis. -#' -#' @param cdata data frame of expression data. Should have state(row) x gene(column). -#' @param ddata discretised data frame of expression data. Must supply when preprocess=F. Obtain from initialise_raw_data(). Defaults to NULL. -#' @param bmodel Boolean model in data frame. If NULL, use a random Boolean model. Defaults to NULL. -#' @param istate data frame. Must have only 1 row, which represents 1 initial state. Defaults to NULL. -#' @param max_varperrule integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must be higher than number of genes. Defaults to 6. -#' @param and_bool logical. Whether to consider AND terms. IF bmodel is not NULL, defaults to whether AND interaction is included in bmodel. If bmodel is NULL, then defaults to TRUE. -#' @param self_loop logical. Whether to allow self_loop in random starting model. Only used if is.null(bmodel). Default to F. -#' @param con_thre numerical. Threshold used to generating the final consensus model. Must be between 0 and 1. -#' @param tol numeric. Tolerance in ending condition. Default to 1e-6. It cannot be lower than .Machine$double.eps ^ 0.5. -#' @param verbose logical. Whether to give detailed output to the screen. Defaults to F. -#' @param detailed_output logical. Whether to return only the model inferred, or all the details obtained during optimisation. Defaults to F. -#' -#' @export -model_train = function(cdata, ddata=NULL, bmodel=NULL, istate=NULL, max_varperrule=6, and_bool=T, self_loop=F, con_thre=0.3, tol=1e-6, verbose=F, detailed_output=F) -{ - vcat('Preparing data for analysis.\n', verbose) - - #Initialise model. - if(is.null(bmodel)) - { - bmodel = gen_one_rmodel(colnames(cdata), max_varperrule, and_bool, self_loop) - } else - { - if(class(bmodel) != 'BoolModel') - { - bmodel = initialise_model(bmodel) - } - - if(check_and(bmodel) != and_bool) - { - and_bool = check_and(bmodel) - } - } - - #Initialise initial state. - if(is.null(istate)) - { - istate = rbinom(length(bmodel@target), 1, 0.5) - #Getting a random initial state. - while(mean(istate) > 0.9 | mean(istate) < 0.1) #do not want initial state that is too homogenous. - { - istate = rbinom(length(bmodel@target), 1, 0.5) - } - istate = data.frame(matrix(istate, nrow=1)) - colnames(istate) = bmodel@target - } - istate = initialise_data(istate, aslogic=T) - - #Filtering expression data. - overlap_gene = intersect(colnames(cdata), y=bmodel@target) - nonoverlap_gene = bmodel@target[!(bmodel@target %in% overlap_gene)] - names(overlap_gene) = bmodel@target_var[bmodel@target %in% overlap_gene] - names(nonoverlap_gene) = bmodel@target_var[!(bmodel@target %in% overlap_gene)] - - fddata = filter_dflist(ddata, overlap_gene, F) - fcdata = filter_dflist(cdata, overlap_gene, F) - - fcdata = unique_raw_data(fddata, fcdata) #removes duplicates in continuous data. - fddata = unique(fddata) - - vcat('Start training.\n', verbose) - - #(3) Calling final combined search. - i = 0 #suppress check error on non-visible global binding. - best_model = c() - best_score = c() - all_best_score = list() - cur_step = 1 - next_break_ite = 1 - while(TRUE) - { - vcat(sprintf('Current iteration: %s.\n', cur_step), verbose) - - if(cur_step == 1) - { - if(length(bmodel)==1) - { - mod_model = list(bmodel) - } else - { - mod_model = bmodel - } - } else - { - mod_model = next_model - } - - vcat('Stage 1: Exploring neighbouring models.\n', verbose) - mod_model = foreach(i=1:length(mod_model)) %dopar% { - c(mod_model[[i]], unlist(minmod_model(mod_model[[i]], overlap_gene=overlap_gene))) - } - mod_model = unlist(mod_model) - - vcat(sprintf('Total neighbouring models: %s.\n', length(mod_model)), verbose) - if(length(mod_model)>1000000) - { - #due to memory limit and runtime consideration. - break - } - - vcat('Stage 2: Simulating and calculating scores for models.\n', verbose) - all_final_score = foreach(i=1:length(mod_model)) %dopar% { - model_score = calc_mscore(bmodel=mod_model[[i]], istate=istate, fcdata=fcdata, overlap_gene=overlap_gene, max_varperrule=max_varperrule) - unname(model_score['f']) - } - - stopifnot(all(sapply(all_final_score, function(x) !is.null(x)))) #there will be NULL if any problem occurs within the workers of foreach. - stopifnot(all(sapply(all_final_score, length) == sapply(mod_model, length))) - - all_final_score = unlist(all_final_score) - - #Locating all equally best score branches. - best_ind = which(all_final_score == min(all_final_score)) #take all min scores. - best_score = all_final_score[best_ind] - best_model = mod_model[best_ind] - - next_model = best_model - - vcat(sprintf('Total model retained for next iteration: %s.\n', length(next_model)), verbose) - - if(cur_step > 1) #checking convergence, and breaking condition. - { - if(any(best_score == 0)) #if meet any model with 0 score, #if the score changes less than the tolerance between 2 iterations. - { - break - } else if(isTRUE(all.equal(mean(previous_score),mean(best_score), tolerance=tol))) - { - #increase break count. Iteration will be broken once the count reaches 2. - next_break_ite = next_break_ite + 1 - if(next_break_ite == 2) - { - break - } - } else if(length(next_model) == 0) - { - stop('Error: length(next_model)==0.') - } else - { - next_break_ite = 1 - } - } - - previous_score = best_score #store it for comparison. - all_best_score = c(all_best_score, list(best_score)) - cur_step = cur_step + 1 - } - vcat(sprintf('Final iteration: %s.\n', cur_step), verbose) - - vcat('Stage 4: Performing consensus analysis.\n', verbose) - consensus = model_consensus(best_model, max_varperrule=max_varperrule) - res_con = consensus[consensus > con_thre] - final_model = decompress_bmodel(as.numeric(names(res_con)), get_encodings(bmodel), gene=bmodel@target) - - if(detailed_output) - { - output = list(consensus=consensus, final_model=final_model, best_model=best_model, - best_score=best_score, ite_score=all_best_score, overlap_gene=overlap_gene, nonoverlap_gene=nonoverlap_gene) - } else - { - output = final_model - } - - return(output) -} - -#' @title Intersection of genes -#' -#' @description -#' This function finds the intersection of genes and provide a score for them. Return a consensus model or a vector of scores. -#' -#' @param bmodel_list list of BoolModel. -#' @param max_varperrule integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6. -#' @param format character. Specifies which format to return. Possible values: 'vec', 'df'. Default to 'vec'. -model_consensus = function(bmodel_list, max_varperrule, format='vec') -{ - #(1) Convert all bmodels to encoded forms. - encoding = get_encodings(bmodel_list[[1]]) - encmodel_list = lapply(bmodel_list, function(x) compress_bmodel(x, encoding, max_varperrule)) - - #(2) Check and remove duplicated models. - #stopifnot(all(sapply(encmodel_list, length)==length(encmodel_list[[1]]))) #all models must have same lengths after encoding. - encmodel_list = unique(encmodel_list) - - #(3) Count frequency of each term. - rle_freq = rle(sort(unlist(encmodel_list))) - term_freq = rle_freq$lengths - names(term_freq) = rle_freq$values - - #(4) Remove integers (i.e. empty terms) - term_freq = term_freq[as.numeric(names(term_freq))%%1!=0] - - #(5) Remove 0 and !0. - zero_enc = encoding[grepl('0$', names(encoding))] #get encodings for 0 and !0. - stopifnot(length(zero_enc)==2) - term_freq = term_freq[!grepl(paste(zero_enc[1], '$', sep=''), names(term_freq))] #remove 0. - term_freq = term_freq[!grepl(paste(zero_enc[2], '$', sep=''), names(term_freq))] #remove !0. - - #(6) Obtain final frequency. - final_res = signif(term_freq/length(encmodel_list)) #convert into percentage. - - if(format=='df') - { - #(7) Decompress the genes into a Boolean model. - out_bmodel = decompress_bmodel(as.numeric(names(term_freq)), encoding, gene=bmodel_list[[1]]@target, format='df') - stopifnot(length(term_freq)==nrow(out_bmodel)) - - output = cbind(out_bmodel, final_res) - } else if(format=='vec') - { - output = final_res - } else - { - stop('Invalid format parameter.') - } - - return(output) -} +#' @title Training Model +#' +#' @description +#' This function performs model training to find the best model, using information from data. It requires an initial state supplied to perform the search, and an initial model can also be supplied to be included in the initial population. +#' Note that if a model is supplied, and the genes in the model is different from the genes in the data, only the genes overlapping between model and data will be retained for further analysis. +#' +#' @param cdata data frame of expression data. Should have state(row) x gene(column). +#' @param bmodel Boolean model in data frame. If NULL, use a random Boolean model. Defaults to NULL. +#' @param istate data frame. Must have only 1 row, which represents 1 initial state. Defaults to NULL. +#' @param max_varperrule integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must be higher than number of genes. Defaults to 3. +#' @param and_bool logical. Whether to consider AND terms. IF bmodel is not NULL, defaults to whether AND interaction is included in bmodel. If bmodel is NULL, then defaults to TRUE. +#' @param self_loop logical. Whether to allow self_loop in random starting model. Default to F. +#' @param con_thre numerical. Threshold used to generating the final consensus model. Must be between 0 and 1. +#' @param tol numeric. Tolerance in ending condition. Default to 1e-6. It cannot be lower than .Machine$double.eps ^ 0.5. +#' @param verbose logical. Whether to give detailed output to the screen. Defaults to F. +#' @param detailed_output logical. Whether to return only the model inferred, or all the details obtained during optimisation. Defaults to F. +#' +#' @export +model_train = function(cdata, bmodel=NULL, istate=NULL, max_varperrule=6, and_bool=T, self_loop=F, con_thre=0.3, tol=1e-6, verbose=F, detailed_output=F) +{ + vcat('Preparing data for analysis.\n', verbose) + + #Initialise model. + if(is.null(bmodel)) + { + bmodel = gen_one_rmodel(var=colnames(cdata), mvar=max_varperrule, and_bool=and_bool, self_loop=self_loop) + } else + { + if(class(bmodel) != 'BoolModel') + { + bmodel = initialise_model(bmodel) + } + } + + if(length(bmodel)==1) + { + tmp_bm = bmodel + } else + { + tmp_bm = bmodel[[1]] + } + + #Initialise initial state. + if(is.null(istate)) + { + istate = rbinom(length(tmp_bm@target), 1, 0.5) + #Getting a random initial state. + while(mean(istate) > 0.9 | mean(istate) < 0.1) #do not want initial state that is too homogenous. + { + istate = rbinom(length(tmp_bm@target), 1, 0.5) + } + istate = data.frame(matrix(istate, nrow=1)) + colnames(istate) = tmp_bm@target + } + istate = initialise_data(istate, aslogic=T) + + #Filtering expression data. + overlap_gene = intersect(colnames(cdata), y=tmp_bm@target) + nonoverlap_gene = tmp_bm@target[!(tmp_bm@target %in% overlap_gene)] + names(overlap_gene) = tmp_bm@target_var[tmp_bm@target %in% overlap_gene] + names(nonoverlap_gene) = tmp_bm@target_var[!(tmp_bm@target %in% overlap_gene)] + + fcdata = filter_dflist(cdata, overlap_gene, F) + + model_encoding = get_encodings(tmp_bm) #for compression purpose. + + vcat('Start training.\n', verbose) + + #(3) Calling final combined search. + i = 0 #suppress check error on non-visible global binding. + #all_best_score = list() + start_step = 1 + max_step = 100 + best_model = c() + best_score = c() + next_break_ite = 1 + for(cur_step in start_step:max_step) + { + vcat(sprintf('Current iteration: %s.\n', cur_step), verbose) + + if(cur_step == 1) + { + if(length(bmodel)==1) + { + mod_model = list(compress_bmodel(bmodel, model_encoding)) + } else + { + mod_model = lapply(bmodel, function(x) compress_bmodel(x, model_encoding)) + } + } else + { + mod_model = next_model + } + + vcat('Stage 1: Exploring neighbouring models.\n', verbose) + mod_model = foreach(i=1:length(mod_model), .combine = c) %dopar% { + c(list(mod_model[[i]]), minmod_model(mod_model[[i]], overlap_gene=overlap_gene, encoded = T, model_encoding=model_encoding, and_bool=and_bool, self_loop=self_loop)) + } + stopifnot(!any(is.null(mod_model))) #there will be NULL if any problem occurs within the workers of foreach. + + mod_model = unique(mod_model) + + vcat(sprintf('Total neighbouring models: %s.\n', length(mod_model)), verbose) + + #Take random 1e6 models, if total number of neighbours more than that. + if(length(mod_model)>1000000) + { + mod_model = sample(mod_model, 1000000) + } + + vcat('Stage 2: Simulating and calculating scores for models.\n', verbose) + all_final_score = foreach(i=1:length(mod_model), .inorder=F, .combine = c) %dopar% { + model_score = calc_mscore(bmodel=mod_model[[i]], istate=istate, fcdata=fcdata, overlap_gene=overlap_gene, max_varperrule=max_varperrule, model_encoding=model_encoding) + unname(model_score['f']) + } + + stopifnot(!any(is.null(all_final_score))) #there will be NULL if any problem occurs within the workers of foreach. + stopifnot(length(all_final_score) == length(mod_model)) + + #Locating all equally best score branches. + best_ind = which(all_final_score == min(all_final_score)) #take all min scores. + best_score = all_final_score[best_ind] + best_model = mod_model[best_ind] + #save(best_score, best_model, file='tmp_cur_best_result.rda') + + next_model = unique(best_model) + vcat(sprintf('Current mean best score: %s.\n', round(mean(best_score), 6)), verbose) + + vcat(sprintf('Total model retained for next iteration: %s.\n', length(next_model)), verbose) + + if(cur_step > 1) #checking convergence, and breaking condition. + { + if(any(best_score == 0)) #if meet any model with 0 score, #if the score changes less than the tolerance between 2 iterations. + { + break + } else if(isTRUE(all.equal(mean(previous_score),mean(best_score), tolerance=tol))) + { + #increase break count. Iteration will be broken once the count reaches 2. + next_break_ite = next_break_ite + 1 + if(next_break_ite == 2) + { + break + } + } else if(length(next_model) == 0) + { + stop('Error: length(next_model)==0.') + } else + { + next_break_ite = 1 + } + } + + previous_score = best_score #store it for comparison. + #all_best_score = c(all_best_score, list(best_score)) + + gc() + } + vcat(sprintf('Final iteration: %s.\n', cur_step), verbose) + + vcat('Stage 4: Performing consensus analysis.\n', verbose) + consensus = model_consensus(best_model, model_encoding=model_encoding) + res_con = consensus[consensus > con_thre] + final_model = decompress_bmodel(as.numeric(names(res_con)), model_encoding) + + if(detailed_output) + { + output = list(consensus=consensus, final_model=final_model, best_model=best_model, + best_score=best_score, overlap_gene=overlap_gene, nonoverlap_gene=nonoverlap_gene) + } else + { + output = final_model + } + + return(output) +} + +#' @title Intersection of genes +#' +#' @description +#' This function finds the intersection of genes and provide a score for them. Return a consensus model or a vector of scores. +#' +#' @param bmodel_list list of BoolModel. +#' @param model_encoding list. Use for compressing and decompressing Boolean models. +#' @param format character. Specifies which format to return. Possible values: 'vec', 'df'. Default to 'vec'. +model_consensus = function(bmodel_list, model_encoding, format='vec') +{ + model_encoding = model_encoding[[1]] + if(class(bmodel_list[[1]])=='BoolModel') + { + #(1) Convert all bmodels to encoded forms. + encmodel_list = lapply(bmodel_list, function(x) compress_bmodel(x, model_encoding)) + } else + { + encmodel_list = bmodel_list + } + + #(2) Check and remove duplicated models. + #stopifnot(all(sapply(encmodel_list, length)==length(encmodel_list[[1]]))) #all models must have same lengths after encoding. + encmodel_list = unique(encmodel_list) + + #(3) Count frequency of each term. + rle_freq = rle(sort(unlist(encmodel_list))) + term_freq = rle_freq$lengths + names(term_freq) = rle_freq$values + + #(4) Remove integers (i.e. empty terms) + term_freq = term_freq[as.numeric(names(term_freq))%%1!=0] + + #(5) Remove 0 and !0. + zero_enc = model_encoding[grepl('0$', names(model_encoding))] #get model_encodings for 0 and !0. + stopifnot(length(zero_enc)==2) + term_freq = term_freq[!grepl(paste(zero_enc[1], '$', sep=''), names(term_freq))] #remove 0. + term_freq = term_freq[!grepl(paste(zero_enc[2], '$', sep=''), names(term_freq))] #remove !0. + + #(6) Obtain final frequency. + final_res = signif(term_freq/length(encmodel_list)) #convert into percentage. + + if(format=='df') + { + #(7) Decompress the genes into a Boolean model. + out_bmodel = decompress_bmodel(as.numeric(names(term_freq)), model_encoding, format='df') + stopifnot(length(term_freq)==nrow(out_bmodel)) + + output = cbind(out_bmodel, final_res) + } else if(format=='vec') + { + output = final_res + } else + { + stop('Invalid format parameter.') + } + + return(output) +} diff --git a/data/example_models.rda b/data/example_models.rda index cdbcc7f08db63ee20a55434d23d373b844df810e..5151dcb6fa59415f6aba4b88b75247b39375574d 100644 GIT binary patch literal 534 zcmV+x0_pu9iwFP!000002F+DXQ^GJ14N0*G9GvmwALyADLA*L%y*M7YIiaLgDIY_D zpKnduqz%bJtAhg<*uH%1+wI%e*XOa<9=8!f7!iySCOot-|7jo#UGsmN&6iUaxu`+z zw56*JU59ML&)%d@0=0tZ(-m^BO1p-X%f7CS}voPRGqqJ zSJa=;km;G9-d^)}Z@hn@a<5c5PDHj&L4i$HRLa-?+Aic6uyDPg6S=}>Q2>mB6)eM( zj0H(VS)HPLe^;msAIfl4hL19|c5Y9JU1fx<%b=4|X(mFt9Yb=iu0mObinTKBnRv$M zHxP!(0}hN#=vZ=Wc37$YO0S+A?QGsdJ=bF`Xi&q640ljFUP%pTgJEjwHhe%jS!e8| z!Z=!qo~_a;`hZh(&*cmM);~nqr*fTjjIg2}I9YP2t4yA-TV(L=yY7v*T#ptl> zBk^7b2yvFgI0s?)CV&@@hwGKCa~Zp5*;ve0Ad0Il&n}MC+#4jfN#!Hfr~x-9XPjml zGv+s9y;{dhvt+ExnBqK1Z-*l>d$rnj)JUziTbPcPwMMtXP_`OCTT!$h9PEeCZ=?X* zVGmcKsRDz6Xg1P_UC2;C9H9V;E1*r4)f9-;2Ga+`3+x7V0<>oVcs)NPh>)36&N`dw zQRc}t(??0oU9g;)+q=uthP0h?QM=1WB1J82do+CL>eTWs=%aX*W%)9t2cfvQrntps z#gTdOEXW$N1H@LYbJ3`T^=}eQBfY%K^=YudC{Ox2!mcttMXD|z4!%XUvv0%Db<{`| zy?I5~Q#4zOvfDs+I}Cwv0NOxtAkm5LLU{^WD=0rc(cKOvz<^|5pbn)1732^hw!oEE z-^*Ni0psYYiFq_=+4h1F_=deI??>%d;Z-{<}w-vJT; diff --git a/man/BTR.Rd b/man/BTR.Rd index d51fad2..8993b15 100644 --- a/man/BTR.Rd +++ b/man/BTR.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/btr.R \docType{package} \name{BTR} diff --git a/man/BoolModel-class.Rd b/man/BoolModel-class.Rd index f709c8b..dce6959 100644 --- a/man/BoolModel-class.Rd +++ b/man/BoolModel-class.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/boolmodel_class.R \docType{class} \name{BoolModel-class} diff --git a/man/amat_to_bm.Rd b/man/amat_to_bm.Rd index 1f40e2c..e718a6d 100644 --- a/man/amat_to_bm.Rd +++ b/man/amat_to_bm.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods.R \name{amat_to_bm} \alias{amat_to_bm} diff --git a/man/bm_to_amat.Rd b/man/bm_to_amat.Rd index 9518b00..64726ee 100644 --- a/man/bm_to_amat.Rd +++ b/man/bm_to_amat.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods.R \name{bm_to_amat} \alias{bm_to_amat} diff --git a/man/bm_to_df.Rd b/man/bm_to_df.Rd index f01f9a7..92bf2fe 100644 --- a/man/bm_to_df.Rd +++ b/man/bm_to_df.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods.R \name{bm_to_df} \alias{bm_to_df} diff --git a/man/bon_bmodel.Rd b/man/bon_bmodel.Rd index 0fa5960..e87c3ef 100644 --- a/man/bon_bmodel.Rd +++ b/man/bon_bmodel.Rd @@ -1,10 +1,10 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/data_desc.R \docType{data} \name{bon_bmodel} \alias{bon_bmodel} \title{HSC Boolean Model from Bonzanni et al.} -\format{A data frame with 11 rows and 2 columns. +\format{A data frame with 11 rows and 2 columns. Rows: each row consists of 1 gene and its associated Boolean rule. Column 1: target gene diff --git a/man/bon_istate.Rd b/man/bon_istate.Rd index 75e26f2..e4e2f26 100644 --- a/man/bon_istate.Rd +++ b/man/bon_istate.Rd @@ -1,10 +1,10 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/data_desc.R \docType{data} \name{bon_istate} \alias{bon_istate} \title{Initial state from Bonzanni et al.} -\format{A data frame with 1 row and 11 columns. +\format{A data frame with 1 row and 11 columns. Rows: each row consists of 1 set of Boolean state. Columns: each column is for 1 gene/variable.} diff --git a/man/calc_mscore.Rd b/man/calc_mscore.Rd index a1224c5..ba2a64c 100644 --- a/man/calc_mscore.Rd +++ b/man/calc_mscore.Rd @@ -1,10 +1,11 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/score_calculation.R \name{calc_mscore} \alias{calc_mscore} \title{Calculating Boolean model score wrt to a dataset} \usage{ -calc_mscore(bmodel, istate, fcdata, overlap_gene, max_varperrule, detail = F) +calc_mscore(bmodel, istate, fcdata, overlap_gene, max_varperrule, + model_encoding, detail = F) } \arguments{ \item{bmodel}{S4 BoolModel object. Model to be evaluated.} @@ -17,7 +18,11 @@ calc_mscore(bmodel, istate, fcdata, overlap_gene, max_varperrule, detail = F) \item{max_varperrule}{integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6.} +\item{model_encoding}{numeric. Only required if the bmodel is encoded.} + \item{detail}{logical. Whether to give more details in score calculation. Default to FALSE.} + +\item{debug}{logical. Debugging mode.} } \description{ This function calculates a score for a Boolean model wrt to a dataset. diff --git a/man/calc_roc.Rd b/man/calc_roc.Rd index e89015f..bd11a3c 100644 --- a/man/calc_roc.Rd +++ b/man/calc_roc.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/score_calculation.R \name{calc_roc} \alias{calc_roc} diff --git a/man/check_and.Rd b/man/check_and.Rd index d638251..2d14d1a 100644 --- a/man/check_and.Rd +++ b/man/check_and.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/general.R \name{check_and} \alias{check_and} diff --git a/man/compress_bmodel.Rd b/man/compress_bmodel.Rd index a1c8f2d..0c61b40 100644 --- a/man/compress_bmodel.Rd +++ b/man/compress_bmodel.Rd @@ -1,20 +1,18 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/compression.R \name{compress_bmodel} \alias{compress_bmodel} \title{Compress BoolModel} \usage{ -compress_bmodel(bmodel, encoding, max_varperrule) +compress_bmodel(bmodel, encoding) } \arguments{ \item{bmodel}{S4 BoolModel object.} \item{encoding}{named numerical vector returned by get_encodings().} - -\item{max_varperrule}{integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6.} } \description{ -This function compresses S4 BoolModel object by representing variables using numbers, and also only the act rules and inh rules are kept. +This function compresses S4 BoolModel object by representing variables using numbers, and also only the act rules and inh rules are kept. Return a list of 3 vectors, corresponding to act rules and inh rules. } diff --git a/man/decompress_bmodel.Rd b/man/decompress_bmodel.Rd index e846067..f8ffadf 100644 --- a/man/decompress_bmodel.Rd +++ b/man/decompress_bmodel.Rd @@ -1,22 +1,20 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/compression.R \name{decompress_bmodel} \alias{decompress_bmodel} \title{Decompress BoolModel} \usage{ -decompress_bmodel(x, encoding, gene = NULL, format = "bmodel") +decompress_bmodel(x, encoding, format = "bmodel") } \arguments{ \item{x}{vector returned by compress_bmodel.} \item{encoding}{named numerical vector returned by get_encodings().} -\item{gene}{character vector. Corresponds to genes in the bmodel. Default to using variable names in encoding.} - \item{format}{character. Specifies which format to return. Possible values: 'bmodel', 'df', 'amat', 'simp_df'. Default to 'bmodel'.} } \description{ -This function decompresses the bmodel compressed by compress_bmodel(). +This function decompresses the bmodel compressed by compress_bmodel(). Return a S4 BoolModel object. } diff --git a/man/decreate_boolmodel.Rd b/man/decreate_boolmodel.Rd index 6b3688d..f5e7602 100644 --- a/man/decreate_boolmodel.Rd +++ b/man/decreate_boolmodel.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/simulation.R \name{decreate_boolmodel} \alias{decreate_boolmodel} diff --git a/man/df_to_bm.Rd b/man/df_to_bm.Rd index 04923a2..58e4305 100644 --- a/man/df_to_bm.Rd +++ b/man/df_to_bm.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods.R \name{df_to_bm} \alias{df_to_bm} diff --git a/man/emodel1.Rd b/man/emodel1.Rd index b51d416..f51cca0 100644 --- a/man/emodel1.Rd +++ b/man/emodel1.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/data_desc.R \docType{data} \name{emodel1} diff --git a/man/emodel2.Rd b/man/emodel2.Rd index 7c26d86..0d3f435 100644 --- a/man/emodel2.Rd +++ b/man/emodel2.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/data_desc.R \docType{data} \name{emodel2} diff --git a/man/emodel3.Rd b/man/emodel3.Rd index a68d0ff..ad434d4 100644 --- a/man/emodel3.Rd +++ b/man/emodel3.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/data_desc.R \docType{data} \name{emodel3} diff --git a/man/eval_bool.Rd b/man/eval_bool.Rd index d38c7ea..1ef8ba4 100644 --- a/man/eval_bool.Rd +++ b/man/eval_bool.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/simulation.R \name{eval_bool} \alias{eval_bool} diff --git a/man/extract_term.Rd b/man/extract_term.Rd index 5e372ba..ae8b98c 100644 --- a/man/extract_term.Rd +++ b/man/extract_term.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/general.R \name{extract_term} \alias{extract_term} diff --git a/man/filter_dflist.Rd b/man/filter_dflist.Rd index aa85f85..6264b88 100644 --- a/man/filter_dflist.Rd +++ b/man/filter_dflist.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/general.R \name{filter_dflist} \alias{filter_dflist} diff --git a/man/gen_one_rmodel.Rd b/man/gen_one_rmodel.Rd index 5cdbfea..4511772 100644 --- a/man/gen_one_rmodel.Rd +++ b/man/gen_one_rmodel.Rd @@ -1,19 +1,24 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rand_model.R \name{gen_one_rmodel} \alias{gen_one_rmodel} \title{Generate a random Boolean model} \usage{ -gen_one_rmodel(var, mvar = length(var), and_bool, self_loop = F) +gen_one_rmodel(var, exponent = 3, and_bool, self_loop = F, + mvar = length(var), model_type = "random") } \arguments{ \item{var}{character vector. A vector of single genes/variables to be used in the model.} -\item{mvar}{integer. Maximum number of variables in act or inh rule. Default to length(var).} +\item{exponent}{integer. The exponent of power law distribution. Default to 3.} \item{and_bool}{logical. Indicates whether to include AND terms or not.} \item{self_loop}{logical. Indicates whether to allow self_loop. Default to F.} + +\item{mvar}{integer. Maximum number of variables in act or inh rule. Default to length(var).} + +\item{model_type}{character. Specifies the type of model generated.} } \description{ This function generates a random Boolean model. Returns an S4 BoolModel object. diff --git a/man/gen_singlerule.Rd b/man/gen_singlerule.Rd index 2968654..7b4e17f 100644 --- a/man/gen_singlerule.Rd +++ b/man/gen_singlerule.Rd @@ -1,10 +1,11 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rand_model.R \name{gen_singlerule} \alias{gen_singlerule} \title{Generate random act and inh rule for a single gene} \usage{ -gen_singlerule(x, np, tar_ind, and_bool, self_loop = F) +gen_singlerule(x, np, tar_ind, and_bool, self_loop = F, + rule_type = "random") } \arguments{ \item{x}{character vector. A vector of all single terms to be used.} @@ -16,6 +17,8 @@ gen_singlerule(x, np, tar_ind, and_bool, self_loop = F) \item{and_bool}{logical. Indicates whether to include AND terms or not.} \item{self_loop}{logical. Indicates whether to allow self_loop. Default to F.} + +\item{rule_type}{character. Types of rules. Defaults to random.} } \description{ This function generates one random Boolean rule (both act and inh) per run. Return a list of two vectors. diff --git a/man/gen_two_rmodel.Rd b/man/gen_two_rmodel.Rd index 9ccfb22..144a0c7 100644 --- a/man/gen_two_rmodel.Rd +++ b/man/gen_two_rmodel.Rd @@ -1,11 +1,11 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rand_model.R \name{gen_two_rmodel} \alias{gen_two_rmodel} \title{Generate two random Boolean models with a specified number of steps apart} \usage{ -gen_two_rmodel(var, steps, mvar = length(var), and_bool = F, - in_bmodel = NULL, self_loop = F) +gen_two_rmodel(var, steps, mvar = length(var), and_bool, in_bmodel = NULL, + self_loop = F) } \arguments{ \item{var}{character vector. A vector of single genes/variables to be used in the model.} @@ -14,14 +14,14 @@ gen_two_rmodel(var, steps, mvar = length(var), and_bool = F, \item{mvar}{integer. Maximum number of variables in act or inh rule. Default to length(var).} -\item{and_bool}{logical. Indicates whether to include AND terms or not. Default to F.} +\item{and_bool}{logical. Indicates whether to include AND terms or not.} \item{in_bmodel}{BoolModel object. The starting model supplied.} \item{self_loop}{logical. Indicates whether to allow self_loop. Default to F.} } \description{ -This function generates a random Boolean model, then get another random Boolean model that is a specified number of steps apart by adding and/or removing genes. +This function generates a random Boolean model, then get another random Boolean model that is a specified number of steps apart by adding and/or removing genes. Returns a list of two S4 BoolModel objects. } \details{ diff --git a/man/gen_two_rmodel_dag.Rd b/man/gen_two_rmodel_dag.Rd index d62f2e1..6e1b31b 100644 --- a/man/gen_two_rmodel_dag.Rd +++ b/man/gen_two_rmodel_dag.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rand_model.R \name{gen_two_rmodel_dag} \alias{gen_two_rmodel_dag} @@ -19,7 +19,7 @@ gen_two_rmodel_dag(var, steps, mvar = length(var), in_amat = NULL, \item{acyclic}{logical. Whether to restrict the model to being acyclic or not. Defaults to TRUE.} } \description{ -This function generates a random DAG Boolean model, then get another random DAG Boolean model that is a specified number of steps apart by adding and/or removing genes. +This function generates a random DAG Boolean model, then get another random DAG Boolean model that is a specified number of steps apart by adding and/or removing genes. Difficult to generate completely directed graph with a specified number of steps apart. } diff --git a/man/get_encodings.Rd b/man/get_encodings.Rd index f09f216..ffa1e35 100644 --- a/man/get_encodings.Rd +++ b/man/get_encodings.Rd @@ -1,13 +1,15 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/compression.R \name{get_encodings} \alias{get_encodings} \title{Get corresponding encodings for compression or decompression.} \usage{ -get_encodings(bmodel) +get_encodings(bmodel, force_and = T) } \arguments{ \item{bmodel}{S4 BoolModel object.} + +\item{force_and}{logical. Whether to include ANDs in the encoding even if no AND is present in the bmodel object. Defaults to T.} } \description{ This function gets all possible terms (single or double terms) and their corresponding encodings. diff --git a/man/grow_bmodel.Rd b/man/grow_bmodel.Rd index 2599274..f1d41ac 100644 --- a/man/grow_bmodel.Rd +++ b/man/grow_bmodel.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/model_modification.R \name{grow_bmodel} \alias{grow_bmodel} diff --git a/man/initialise_data.Rd b/man/initialise_data.Rd index b0e3ceb..871034e 100644 --- a/man/initialise_data.Rd +++ b/man/initialise_data.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/initialisation.R \name{initialise_data} \alias{initialise_data} diff --git a/man/initialise_model.Rd b/man/initialise_model.Rd index f4bd251..3a1a799 100644 --- a/man/initialise_model.Rd +++ b/man/initialise_model.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/initialisation.R \name{initialise_model} \alias{initialise_model} diff --git a/man/initialise_raw_data.Rd b/man/initialise_raw_data.Rd index 2c0c957..b214077 100644 --- a/man/initialise_raw_data.Rd +++ b/man/initialise_raw_data.Rd @@ -1,10 +1,11 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/initialisation.R \name{initialise_raw_data} \alias{initialise_raw_data} \title{Initialise raw data} \usage{ -initialise_raw_data(x, max_expr = "high", uni_thre = 0.2, scale = T) +initialise_raw_data(x, max_expr = "high", uni_thre = 0.2, scale = T, + discretised = F) } \arguments{ \item{x}{matrix. Numeric data of gene expression.} @@ -14,9 +15,11 @@ initialise_raw_data(x, max_expr = "high", uni_thre = 0.2, scale = T) \item{uni_thre}{numerical. Speficy threshold for unimodality test. Default to 0.2.} \item{scale}{logical. Whether to scale the data to a range of 0-1. Default to T.} + +\item{discretised}{logical. Whether to return discretised data. Default to F.} } \description{ -This function initialise raw gene expression values in a matrix. Return a list of two matrices: (1) continuous values and (2) binary values. +This function initialise raw gene expression values in a matrix. Return either a matrix of (1) continuous values or (2) binary values. Note that kmeans clustering as binarisation only works well if the data has a bimodal distribution. } diff --git a/man/krum_bmodel.Rd b/man/krum_bmodel.Rd index 579807b..78a5d67 100644 --- a/man/krum_bmodel.Rd +++ b/man/krum_bmodel.Rd @@ -1,10 +1,10 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/data_desc.R \docType{data} \name{krum_bmodel} \alias{krum_bmodel} \title{Myeloid Boolean Model from Krumsiek et al.} -\format{A data frame with 11 rows and 2 columns. +\format{A data frame with 11 rows and 2 columns. Rows: each row consists of 1 gene and its associated Boolean rule. Column 1: target gene diff --git a/man/krum_istate.Rd b/man/krum_istate.Rd index dcd8f2d..a97c9a9 100644 --- a/man/krum_istate.Rd +++ b/man/krum_istate.Rd @@ -1,10 +1,10 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/data_desc.R \docType{data} \name{krum_istate} \alias{krum_istate} \title{Initial state from Krumsiek et al.} -\format{A data frame with 1 row and 11 columns. +\format{A data frame with 1 row and 11 columns. Rows: each row consists of 1 set of Boolean state. Columns: each column is for 1 gene/variable.} diff --git a/man/man_dist.Rd b/man/man_dist.Rd index f2be266..1e5be3a 100644 --- a/man/man_dist.Rd +++ b/man/man_dist.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/score_calculation.R \name{man_dist} \alias{man_dist} diff --git a/man/match_term.Rd b/man/match_term.Rd index 4751b42..b3b3573 100644 --- a/man/match_term.Rd +++ b/man/match_term.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/general.R \name{match_term} \alias{match_term} diff --git a/man/minmod_internal.Rd b/man/minmod_internal.Rd index 4eb7b88..eaa6533 100644 --- a/man/minmod_internal.Rd +++ b/man/minmod_internal.Rd @@ -1,15 +1,23 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/model_modification.R \name{minmod_internal} \alias{minmod_internal} \title{Inner function of minimal modification of whole Boolean model} \usage{ -minmod_internal(bm, index) +minmod_internal(bm, index, encoded, model_encoding, and_bool, self_loop = F) } \arguments{ \item{bm}{S4 BoolModel object.} \item{index}{integer. Specifying rule of which gene to modify.} + +\item{encoded}{logical. Return Boolean models in encoded form to save space.} + +\item{model_encoding}{list. Only used if encoded=T.} + +\item{and_bool}{logical. Default to check_and() if unspecified.} + +\item{self_loop}{logical. Whether to allow self_loop in random starting model. Default to F.} } \description{ This function generates all possible boolean models minimally modified. Returns a lists of 2 lists, deleted models and added models diff --git a/man/minmod_model.Rd b/man/minmod_model.Rd index 13ea234..f94ecf8 100644 --- a/man/minmod_model.Rd +++ b/man/minmod_model.Rd @@ -1,10 +1,11 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/model_modification.R \name{minmod_model} \alias{minmod_model} \title{Minimal modification of whole Boolean model} \usage{ -minmod_model(bm, index = NULL, overlap_gene = NULL) +minmod_model(bm, index = NULL, overlap_gene = NULL, sep_list = F, + encoded = F, model_encoding, and_bool = NULL, self_loop = F) } \arguments{ \item{bm}{S4 BoolModel object.} @@ -12,6 +13,16 @@ minmod_model(bm, index = NULL, overlap_gene = NULL) \item{index}{integer. Specifying rule of which gene to modify. If NULL, modifies all rules in the model. Defaults to NULL.} \item{overlap_gene}{character vector. Specify which genes are present in both model and data inputs.} + +\item{sep_list}{logical. Separate add and del lists.} + +\item{encoded}{logical. Return Boolean models in encoded form to save space.} + +\item{model_encoding}{list. Only used if encoded=T.} + +\item{and_bool}{logical. Default to check_and().} + +\item{self_loop}{logical. Whether to allow self_loop in random starting model. Default to F.} } \description{ This function generates all possible boolean models minimally modified. Returns a lists of 2 lists, deleted models and added models diff --git a/man/model_consensus.Rd b/man/model_consensus.Rd index e77c7c1..1e73b1f 100644 --- a/man/model_consensus.Rd +++ b/man/model_consensus.Rd @@ -1,15 +1,15 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/search.R \name{model_consensus} \alias{model_consensus} \title{Intersection of genes} \usage{ -model_consensus(bmodel_list, max_varperrule, format = "vec") +model_consensus(bmodel_list, model_encoding, format = "vec") } \arguments{ \item{bmodel_list}{list of BoolModel.} -\item{max_varperrule}{integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6.} +\item{model_encoding}{list. Use for compressing and decompressing Boolean models.} \item{format}{character. Specifies which format to return. Possible values: 'vec', 'df'. Default to 'vec'.} } diff --git a/man/model_dist.Rd b/man/model_dist.Rd index f785b0f..15c0842 100644 --- a/man/model_dist.Rd +++ b/man/model_dist.Rd @@ -1,17 +1,15 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/general.R \name{model_dist} \alias{model_dist} \title{Calculate distance between Boolean models} \usage{ -model_dist(x, y, max_varperrule) +model_dist(x, y) } \arguments{ \item{x}{S4 BoolModel object. Test model.} \item{y}{S4 BoolModel object. Reference model.} - -\item{max_varperrule}{integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6.} } \description{ This method takes in two models and calculate the distance between them. The value return indicate the number of steps between the two models. diff --git a/man/model_setdiff.Rd b/man/model_setdiff.Rd index 2fa5646..0d06ecc 100644 --- a/man/model_setdiff.Rd +++ b/man/model_setdiff.Rd @@ -1,18 +1,16 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/general.R \name{model_setdiff} \alias{model_setdiff} \title{Find the set difference between two Boolean models} \usage{ -model_setdiff(x, y, max_varperrule, directed = F) +model_setdiff(x, y, directed = F) } \arguments{ \item{x}{S4 BoolModel object. Test model.} \item{y}{S4 BoolModel object. Reference model.} -\item{max_varperrule}{integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6.} - \item{directed}{logical. If TRUE, return the difference in terms with respect to x.} } \description{ diff --git a/man/model_train.Rd b/man/model_train.Rd index c9a0315..e3855d4 100644 --- a/man/model_train.Rd +++ b/man/model_train.Rd @@ -1,27 +1,25 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/search.R \name{model_train} \alias{model_train} \title{Training Model} \usage{ -model_train(cdata, ddata = NULL, bmodel = NULL, istate = NULL, - max_varperrule = 6, and_bool = T, self_loop = F, con_thre = 0.3, - tol = 1e-06, verbose = F, detailed_output = F) +model_train(cdata, bmodel = NULL, istate = NULL, max_varperrule = 6, + and_bool = T, self_loop = F, con_thre = 0.3, tol = 1e-06, + verbose = F, detailed_output = F) } \arguments{ \item{cdata}{data frame of expression data. Should have state(row) x gene(column).} -\item{ddata}{discretised data frame of expression data. Must supply when preprocess=F. Obtain from initialise_raw_data(). Defaults to NULL.} - \item{bmodel}{Boolean model in data frame. If NULL, use a random Boolean model. Defaults to NULL.} \item{istate}{data frame. Must have only 1 row, which represents 1 initial state. Defaults to NULL.} -\item{max_varperrule}{integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must be higher than number of genes. Defaults to 6.} +\item{max_varperrule}{integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must be higher than number of genes. Defaults to 3.} \item{and_bool}{logical. Whether to consider AND terms. IF bmodel is not NULL, defaults to whether AND interaction is included in bmodel. If bmodel is NULL, then defaults to TRUE.} -\item{self_loop}{logical. Whether to allow self_loop in random starting model. Only used if is.null(bmodel). Default to F.} +\item{self_loop}{logical. Whether to allow self_loop in random starting model. Default to F.} \item{con_thre}{numerical. Threshold used to generating the final consensus model. Must be between 0 and 1.} diff --git a/man/outgenysis_model.Rd b/man/outgenysis_model.Rd index 8c0a9cc..4f5179a 100644 --- a/man/outgenysis_model.Rd +++ b/man/outgenysis_model.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/output_format.R \name{outgenysis_model} \alias{outgenysis_model} diff --git a/man/outgraph_model.Rd b/man/outgraph_model.Rd index c3edd9d..436dcac 100644 --- a/man/outgraph_model.Rd +++ b/man/outgraph_model.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/output_format.R \name{outgraph_model} \alias{outgraph_model} diff --git a/man/outstate_graph.Rd b/man/outstate_graph.Rd index 0ec40fe..86cf208 100644 --- a/man/outstate_graph.Rd +++ b/man/outstate_graph.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/output_format.R \name{outstate_graph} \alias{outstate_graph} diff --git a/man/plotBM.Rd b/man/plotBM.Rd index 8e7d538..d03c437 100644 --- a/man/plotBM.Rd +++ b/man/plotBM.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods.R \name{plotBM} \alias{plotBM} diff --git a/man/printBM.Rd b/man/printBM.Rd index 2d8b0ad..8f8115b 100644 --- a/man/printBM.Rd +++ b/man/printBM.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods.R \name{printBM} \alias{printBM} diff --git a/man/rcpp_simulate.Rd b/man/rcpp_simulate.Rd index 4805cf5..c4db52f 100644 --- a/man/rcpp_simulate.Rd +++ b/man/rcpp_simulate.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/RcppExports.R \name{rcpp_simulate} \alias{rcpp_simulate} diff --git a/man/rcpp_validate.Rd b/man/rcpp_validate.Rd index 6cd1548..0aaeff8 100644 --- a/man/rcpp_validate.Rd +++ b/man/rcpp_validate.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/RcppExports.R \name{rcpp_validate} \alias{rcpp_validate} diff --git a/man/simulate_model.Rd b/man/simulate_model.Rd index 952490f..2b9650c 100644 --- a/man/simulate_model.Rd +++ b/man/simulate_model.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/simulation.R \name{simulate_model} \alias{simulate_model} diff --git a/man/unique_raw_data.Rd b/man/unique_raw_data.Rd index 0cd4620..79207b1 100644 --- a/man/unique_raw_data.Rd +++ b/man/unique_raw_data.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/initialisation.R \name{unique_raw_data} \alias{unique_raw_data} diff --git a/man/validate_adjmat.Rd b/man/validate_adjmat.Rd index 159e7b4..0e17ed1 100644 --- a/man/validate_adjmat.Rd +++ b/man/validate_adjmat.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/score_calculation.R \name{validate_adjmat} \alias{validate_adjmat} diff --git a/man/vcat.Rd b/man/vcat.Rd index 2274b5d..b33a080 100644 --- a/man/vcat.Rd +++ b/man/vcat.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/general.R \name{vcat} \alias{vcat} diff --git a/man/which.random.min.Rd b/man/which.random.min.Rd index 1ca082e..2f75a73 100644 --- a/man/which.random.min.Rd +++ b/man/which.random.min.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/general.R \name{which.random.min} \alias{which.random.min} diff --git a/man/wilson_raw_data.Rd b/man/wilson_raw_data.Rd index f3e2fa5..ec91f7b 100644 --- a/man/wilson_raw_data.Rd +++ b/man/wilson_raw_data.Rd @@ -1,10 +1,10 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/data_desc.R \docType{data} \name{wilson_raw_data} \alias{wilson_raw_data} \title{Raw single cell qRT-PCR expression data from Wilson et al.} -\format{A data frame with 1626 rows and 44 columns. +\format{A data frame with 1626 rows and 44 columns. Rows: each row consists of raw expression values from 1 cell. Columns: each column is for 1 gene/variable.} diff --git a/man/wilson_raw_rnaseq.Rd b/man/wilson_raw_rnaseq.Rd index 3c94553..c582799 100644 --- a/man/wilson_raw_rnaseq.Rd +++ b/man/wilson_raw_rnaseq.Rd @@ -1,10 +1,10 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/data_desc.R \docType{data} \name{wilson_raw_rnaseq} \alias{wilson_raw_rnaseq} \title{Raw single cell RNAseq expression data from Wilson et al.} -\format{A data frame with 96 rows and 38498 columns. +\format{A data frame with 96 rows and 38498 columns. Rows: each row consists of raw expression values from 1 cell. Columns: each column is for 1 gene/variable.} diff --git a/man/writeBM.Rd b/man/writeBM.Rd index 1fef686..5536855 100644 --- a/man/writeBM.Rd +++ b/man/writeBM.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods.R \name{writeBM} \alias{writeBM} diff --git a/vignettes/btr.Rmd b/vignettes/btr.Rmd index a6a7e07..5949f67 100644 --- a/vignettes/btr.Rmd +++ b/vignettes/btr.Rmd @@ -162,22 +162,18 @@ library(BTR) #(2) Load data. data(wilson_raw_data) #load a data frame of expression data. -tmp_data = initialise_raw_data(wilson_raw_data, max_expr = 'low') -cdata = tmp_data[[1]] #continuous data -ddata = tmp_data[[2]] #discretised data +cdata = initialise_raw_data(wilson_raw_data, max_expr = 'low') #(3) Filter cell types. cell_ind = grepl('cmp', rownames(cdata)) | grepl('gmp', rownames(cdata)) | grepl('mep', rownames(cdata)) fcdata = cdata[cell_ind,] #select only relevant cells. -fddata = ddata[cell_ind,] #(4) Filter genes. gene_ind = c('fli1', 'gata1', 'gata2', 'gfi1', 'scl', 'sfpi1') #select genes to be included. fcdata = fcdata[, gene_ind] -fddata = fddata[, gene_ind] #(5) Inferring Boolean model. -final_model = model_train(cdata=fcdata, ddata=fddata, max_varperrule=4, verbose=T) +final_model = model_train(cdata=fcdata, max_varperrule=3, verbose=T) #(6) Visualise the Boolean model generated. plotBM(final_model) @@ -210,9 +206,7 @@ To load the data into R, use `read.table` or `read.csv`. In this example, we are ```{r, eval=FALSE, tidy=TRUE} #(2) Load data. data(wilson_raw_data) #load a data frame of expression data. -tmp_data = initialise_raw_data(wilson_raw_data, max_expr = 'low') -cdata = tmp_data[[1]] #continuous data -ddata = tmp_data[[2]] #discretised data +cdata = initialise_raw_data(wilson_raw_data, max_expr = 'low') ``` Once data is loaded and preprocessed, filter the cell types or genes to be included in the analysis if needed. It is advisable to reduce the number of genes to be included if the computation takes too long to complete. @@ -221,12 +215,10 @@ Once data is loaded and preprocessed, filter the cell types or genes to be inclu #(3) Filter cell types. cell_ind = grepl('cmp', rownames(cdata)) | grepl('gmp', rownames(cdata)) | grepl('mep', rownames(cdata)) fcdata = cdata[cell_ind,] #select only relevant cells. -fddata = ddata[cell_ind,] #(4) Filter genes. gene_ind = c('fli1', 'gata1', 'gata2', 'gfi1', 'scl', 'sfpi1') #select genes to be included. fcdata = fcdata[, gene_ind] -fddata = fddata[, gene_ind] ``` ### Run model training @@ -239,14 +231,13 @@ You will receive a BoolModel object at the end of the model training process. Th ```{r, eval=FALSE, tidy=TRUE} #(5) Inferring Boolean model. -final_model = model_train(cdata=fcdata, ddata=fddata, max_varperrule=4, verbose=T) +final_model = model_train(cdata=fcdata, max_varperrule=3, verbose=T) #(6) Visualise the Boolean model generated. plotBM(final_model) ``` ```{r, echo=FALSE, fig.show='hold', message=FALSE, dpi=75, fig.width=6, fig.height=6} data(example_models) -emodel1=initialise_model(em1) plotBM(emodel1) ``` @@ -278,17 +269,14 @@ data(wilson_raw_data) #load a data frame of expression data. bmodel = initialise_model(krum_bmodel) istate = krum_istate -tmp_data = initialise_raw_data(wilson_raw_data, max_expr = 'low') -cdata = tmp_data[[1]] #continuous data -ddata = tmp_data[[2]] #discretised data +cdata = initialise_raw_data(wilson_raw_data, max_expr = 'low') #(3) Filter cell types. cell_ind = grepl('cmp', rownames(cdata)) | grepl('gmp', rownames(cdata)) | grepl('mep', rownames(cdata)) fcdata = cdata[cell_ind,] #select only relevant cells. -fddata = ddata[cell_ind,] #(4) Inferring Boolean model. -final_model = model_train(cdata=fcdata, ddata=fddata, bmodel=bmodel, istate=istate, max_varperrule=4, verbose=T) +final_model = model_train(cdata=fcdata, bmodel=bmodel, istate=istate, max_varperrule=3, verbose=T) #(5) Visualise the Boolean model generated. plotBM(final_model) @@ -327,9 +315,7 @@ data(wilson_raw_data) #load a data frame of expression data. bmodel = initialise_model(krum_bmodel) istate = krum_istate -tmp_data = initialise_raw_data(wilson_raw_data, max_expr = 'low') -cdata = tmp_data[[1]] #continuous data -ddata = tmp_data[[2]] #discretised data +cdata = initialise_raw_data(wilson_raw_data, max_expr = 'low') ``` Once data are loaded and preprocessed, filter the cell types or genes to be included in the analysis if needed. It is advisable to reduce the number of genes to be included if the computation takes too long to complete. In this example, genes are not filtered as all genes that are present in both expression data and Boolean model are used automatically. @@ -338,7 +324,6 @@ Once data are loaded and preprocessed, filter the cell types or genes to be incl #(3) Filter cell types. cell_ind = grepl('cmp', rownames(cdata)) | grepl('gmp', rownames(cdata)) | grepl('mep', rownames(cdata)) fcdata = cdata[cell_ind,] #select only relevant cells. -fddata = ddata[cell_ind,] ``` ### Run model training @@ -351,14 +336,13 @@ You will receive a BoolModel object at the end of the model training process. Th ```{r, eval=FALSE, tidy=TRUE} #(4) Inferring Boolean model. -final_model = model_train(cdata=fcdata, ddata=fddata, bmodel=bmodel, istate=istate, max_varperrule=4, verbose=T) +final_model = model_train(cdata=fcdata, bmodel=bmodel, istate=istate, max_varperrule=3, verbose=T) #(5) Visualise the Boolean model generated. plotBM(final_model) ``` ```{r, echo=FALSE, fig.show='hold', message=FALSE, dpi=75, fig.width=6, fig.height=6} data(example_models) -emodel2=initialise_model(em2) plotBM(emodel2) ``` @@ -392,14 +376,11 @@ data(wilson_raw_data) #load a data frame of expression data. bmodel = initialise_model(krum_bmodel) istate = krum_istate -tmp_data = initialise_raw_data(wilson_raw_data, max_expr = 'low') -cdata = tmp_data[[1]] #continuous data -ddata = tmp_data[[2]] #discretised data +cdata = initialise_raw_data(wilson_raw_data, max_expr = 'low') #(3) Filter cell types. cell_ind = grepl('cmp', rownames(cdata)) | grepl('gmp', rownames(cdata)) | grepl('mep', rownames(cdata)) fcdata = cdata[cell_ind,] #select only relevant cells. -fddata = ddata[cell_ind,] #(4) Adding extra genes to the initial Boolean model. #extra_genes = setdiff(colnames(wilson_raw_data), bmodel@target) #to view available genes to be added. @@ -415,7 +396,7 @@ grown_istate = cbind(istate, tmp_istate) grown_istate = initialise_data(grown_istate) #(6) Inferring Boolean model. -final_model = model_train(cdata=fcdata, ddata=fddata, bmodel=grown_bmodel, istate=grown_istate, verbose=T) +final_model = model_train(cdata=fcdata, bmodel=grown_bmodel, istate=grown_istate, verbose=T) #(7) Visualise the Boolean model generated. plotBM(final_model) @@ -453,9 +434,7 @@ data(wilson_raw_data) #load a data frame of expression data. bmodel = initialise_model(krum_bmodel) istate = krum_istate -tmp_data = initialise_raw_data(wilson_raw_data, max_expr = 'low') -cdata = tmp_data[[1]] #continuous data -ddata = tmp_data[[2]] #discretised data +cdata = initialise_raw_data(wilson_raw_data, max_expr = 'low') ``` Once data are loaded and preprocessed, filter the cell types or genes to be included in the analysis if needed. It is advisable to reduce the number of genes to be included if the computation takes too long to complete. In this example, genes are not filtered as all genes that are present in both expression data and Boolean model are used automatically. @@ -464,7 +443,6 @@ Once data are loaded and preprocessed, filter the cell types or genes to be incl #(3) Filter cell types. cell_ind = grepl('cmp', rownames(cdata)) | grepl('gmp', rownames(cdata)) | grepl('mep', rownames(cdata)) fcdata = cdata[cell_ind,] #select only relevant cells. -fddata = ddata[cell_ind,] ``` ### Add extra genes to the initial Boolean model @@ -504,13 +482,12 @@ You will receive a BoolModel object at the end of the model training process. Th ```{r, eval=FALSE, tidy=TRUE} #(6) Inferring Boolean model. -final_model = model_train(cdata=fcdata, ddata=fddata, bmodel=grown_bmodel, istate=grown_istate, verbose=T) +final_model = model_train(cdata=fcdata, bmodel=grown_bmodel, istate=grown_istate, verbose=T) #(7) Visualise the Boolean model generated. plotBM(final_model) ``` ```{r, echo=FALSE, fig.show='hold', message=FALSE, dpi=75, fig.width=6, fig.height=6} data(example_models) -emodel3=initialise_model(em3) plotBM(emodel3) ``` diff --git a/vignettes/btr.html b/vignettes/btr.html index 2c165be..9acc1c9 100644 --- a/vignettes/btr.html +++ b/vignettes/btr.html @@ -7,10 +7,11 @@ + - + Using BTR to reconstruct asynchronous Boolean models @@ -18,50 +19,27 @@ - - + @@ -69,10 +47,13 @@ -