diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..aaa4b0b --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,22 @@ +Package: BoolTraineR +Type: Package +Title: Tools For Training and Analysing Asynchronous Boolean Models +Version: 1.0.1 +Date: 2015-10-22 +Author: Chee Yee Lim +Maintainer: Chee Yee Lim +Description: This package contains tools for Boolean model manipulation, as well as the search for the best Boolean model. +Depends: + R (>= 3.0.3), + methods +Imports: + parallel, + Rcpp (>= 0.11.4), + foreach (>= 1.4.1), + doParallel (>= 1.0.8), + poweRlaw (>= 0.30.0), + MASS (>= 7.3-44), + diptest (>= 0.75-7) +LinkingTo: Rcpp +License: GPL-3 +LazyData: true diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..eb35b6c --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,44 @@ +# Generated by roxygen2 (4.1.1): do not edit by hand + +export(BoolModel) +export(amat_to_bm) +export(bin_to_real) +export(bm_to_amat) +export(bm_to_df) +export(calc_mscore) +export(calc_roc) +export(check_bmodel) +export(compress_bmodel) +export(decompress_bmodel) +export(df_to_bm) +export(equi_model) +export(gen_randata) +export(gen_randata_bn) +export(gen_two_rmodel) +export(gen_two_rmodel_dag) +export(get_encodings) +export(grow_bmodel) +export(initialise_data) +export(initialise_model) +export(initialise_raw_data) +export(model_consensus) +export(model_dist) +export(model_setdiff) +export(model_train) +export(outcyto_model) +export(outcyto_stategraph) +export(outgenysis_model) +export(param_bimodal) +export(printBM) +export(simulate_model) +export(unique_raw_data) +export(validate_adjmat) +export(writeBM) +exportClasses(BoolModel) +import(doParallel) +import(foreach) +import(methods) +import(parallel) +importFrom(Rcpp,evalCpp) +importFrom(Rcpp,sourceCpp) +useDynLib(BoolTraineR) diff --git a/R/RcppExports.R b/R/RcppExports.R new file mode 100644 index 0000000..ee594f4 --- /dev/null +++ b/R/RcppExports.R @@ -0,0 +1,69 @@ +# This file was generated by Rcpp::compileAttributes +# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#' @title Find a match between two data frames. +#' +#' @description +#' (&&&Not for public use&&&)This function finds a match between two df of states. Used in match_state(). Return an row index vector indicating which row of mstate matches the rows in xstate. +#' +#' @param mstate data frame. It should be a state(row) x gene(column) df. +#' @param xstate data frame. It should be a state(row) x gene(column) df. +match_state_loop <- function(mstate, xstate) { + .Call('BoolTraineR_match_state_loop', PACKAGE = 'BoolTraineR', mstate, xstate) +} + +#' @title Calculating pairwise scores between model and data states. +#' +#' @description +#' This function calculates the pairwise scores between each row of model and data states. The score is calculated using a custom binary distance measure. +#' +#' @param x_df matrix. It should be numerical matrix of model states. +#' @param y_df matrix. It should be numerical matrix of data states. +rcpp_man_dist <- function(x_df, y_df) { + .Call('BoolTraineR_rcpp_man_dist', PACKAGE = 'BoolTraineR', x_df, y_df) +} + +#' @title Calculating Hamming pairwise scores between model and data states. +#' +#' @description +#' This function calculates the pairwise scores between each row of model and data states. The score is calculated using a custom binary distance measure. +#' +#' @param x_df matrix. It should be logical matrix of model states. +#' @param y_df matrix. It should be logical matrix of data states. +rcpp_ham_dist <- function(x_df, y_df) { + .Call('BoolTraineR_rcpp_ham_dist', PACKAGE = 'BoolTraineR', x_df, y_df) +} + +#' @title Inner core for m_score() +#' +#' @description +#' This function takes in a df with columns ranked wrt each row, and try to assign each row to a unique column without repetition. +#' +#' @param x_df matrix. Matrix with columns ranked wrt each row. +rcpp_m_score <- function(x_df) { + .Call('BoolTraineR_rcpp_m_score', PACKAGE = 'BoolTraineR', x_df) +} + +#' @title Calculating validation scores between two adjacency matrices +#' +#' @description +#' This function calculates the validation scores between two adjacency matrices. +#' +#' @param inf_mat matrix. It should be adjacency matrix of inferred network. +#' @param true_mat matrix. It should be adjacency matrix of true network. +rcpp_validate <- function(inf_mat, true_mat) { + .Call('BoolTraineR_rcpp_validate', PACKAGE = 'BoolTraineR', inf_mat, true_mat) +} + +#' @title Simulate a Boolean model. +#' +#' @description +#' (&&&Not for public use&&&)This function simulates the Boolean model using an initial state. For use within simulate_model(). Returns a matrix of full asynchronous state space. +#' +#' @param bmodel list. A list of 4 lists created by decreate_model(). +#' @param fstate data frame. It must have been initialised by initialise_data(), and has gene names as column names. Must contain only 1 row. +#' @param verbose logical. Indicates whether to output progress. +rcpp_simulate <- function(bmodel, fstate, verbose = FALSE) { + .Call('BoolTraineR_rcpp_simulate', PACKAGE = 'BoolTraineR', bmodel, fstate, verbose) +} + diff --git a/R/boolmodel_class.R b/R/boolmodel_class.R new file mode 100644 index 0000000..3e36218 --- /dev/null +++ b/R/boolmodel_class.R @@ -0,0 +1,17 @@ +#' @title An S4 class to represent a Boolean Model +#' +#' @description +#' This class represents Boolean Model in a S4 BoolModel object. +#' +#' @field target character vector. It should contain gene symbols. +#' @field targer_var character vector. It should contain the internal representation of gene variables, in the form of "v[0-9]+s" +#' @field rule_act list of character vectors. Each element in the list should contain the activating gene variables for a particular target gene. +#' @field rule_inh list of character vectors. Each element in the list should contain the inhibiting gene variables for a particular target gene. +#' +#' @export BoolModel +#' @exportClass BoolModel +BoolModel = setClass('BoolModel', + slots=c(target='character', target_var='character', + rule_act='list', rule_inh='list') + ) + diff --git a/R/booltrainer.R b/R/booltrainer.R new file mode 100644 index 0000000..8d96616 --- /dev/null +++ b/R/booltrainer.R @@ -0,0 +1,19 @@ +#' @title BoolTraineR: A package for studying asynchronous Boolean models +#' +#' @description +#' This package contains tools for Boolean model manipulation, as well as the search for the best Boolean model. +#' +#' @docType package +#' @name BoolTraineR +NULL + +## All the Roxygen codes below are for generating the correct NAMESPACE file. +#' @import methods +#' @import parallel +#' @import foreach +#' @import doParallel +NULL + +#' @useDynLib BoolTraineR +#' @importFrom Rcpp sourceCpp evalCpp +NULL \ No newline at end of file diff --git a/R/compression.R b/R/compression.R new file mode 100644 index 0000000..29a03d3 --- /dev/null +++ b/R/compression.R @@ -0,0 +1,309 @@ +#' @title Get corresponding encodings for compression or decompression. +#' +#' @description +#' This function gets all possible terms (single or double terms) and their corresponding encodings. +#' This function limits the number of possible variables in the model to 999. +#' +#' @param bmodel S4 BoolModel object. +#' @param inter_bool logical. Indicate whether to consider AND terms. +#' +#' @export +get_encodings = function(bmodel, inter_bool) +{ + #Get all possible terms. + svar = bmodel@target_var + if(inter_bool) + { + dvar = sapply(combn(svar, 2, simplify=F), function(x) paste(x, collapse='&')) #get all possible interacting pairs. + dvar = c(dvar, sapply(combn(svar, 2, simplify=F), function(x) paste(rev(x), collapse='&'))) #get the reversed pattern as well. + } else + { + dvar = c() + } + + term_pool = c(svar, dvar) + term_pool = c('0', term_pool, '!0', paste('!', term_pool, sep='')) #add in inh terms. + + #Generate index for activation terms. + num_pool = seq(1, length(svar)+1) #get numbers for svar. + num_pool = c(num_pool, as.vector(replicate(2, seq(max(num_pool)+1, max(num_pool)+(length(dvar)/2))))) #get numbers for both forward and reverse dvar. + + #Generate index for inhibition terms. + num_pool = c(num_pool, seq(max(num_pool)+1, max(num_pool)+length(svar)+1)) #get numbers for svar. + num_pool = c(num_pool, as.vector(replicate(2, seq(max(num_pool)+1, max(num_pool)+(length(dvar)/2))))) #get numbers for both forward and reverse dvar. + + num_pool = unname(sapply(num_pool, function(x) ifelse(nchar(x)==1, paste('0', x, sep=''), x))) #convert single digit to double digit. + num_pool = unname(sapply(num_pool, function(x) ifelse(nchar(x)==2, paste('0', x, sep=''), x))) #convert double digit to triple digit. + num_pool = unname(sapply(num_pool, function(x) ifelse(nchar(x)==3, paste('0', x, sep=''), x))) #convert triple digit to quadruple digit. + + names(num_pool) = term_pool + + stopifnot(all(!is.na(names(num_pool)))) + stopifnot(all(!is.na(term_pool))) + + return(num_pool) +} + +#' @title Compress BoolModel +#' +#' @description +#' This function compresses S4 BoolModel object by representing variables using numbers, and also only the act rules and inh rules are kept. +#' Return a list of 3 vectors, corresponding to act rules and inh rules. +#' +#' @param bmodel S4 BoolModel object. +#' @param encoding named numerical vector returned by get_encodings(). +#' @param max_varperrule integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6. +#' +#' @export +compress_bmodel = function(bmodel, encoding, max_varperrule) +{ + #Convert variables into encodings. + #Convert 'v1s' into '001'. + act_list = sapply(bmodel@rule_act, function(x) unname(encoding[x])) + inh_list = sapply(bmodel@rule_inh, function(x) unname(encoding[paste('!', x, sep='')])) + + stopifnot(all(!is.na(unlist(act_list)))) + stopifnot(all(!is.na(unlist(inh_list)))) + + #Convert '0001' into 1.0001. + act_list = sapply(1:length(act_list), function(x) as.numeric(paste(x, act_list[[x]], sep='.'))) + inh_list = sapply(1:length(inh_list), function(x) as.numeric(paste(x, inh_list[[x]], sep='.'))) + + #Convert list into vector. + act_vec = unique(unlist(act_list)) + inh_vec = unique(unlist(inh_list)) + + #Add in integer that represents empty spots, according to max_varperrule. + empty_term = max_varperrule - (rle(round(act_vec))$lengths+rle(round(inh_vec))$lengths) #get freq of missing terms. + empty_term[empty_term < 0] = 0 #set negative values to 0. note that negative values can occur because this is a soft constraint. + empty_term = unlist(sapply(1:length(empty_term), function(x) rep(x, empty_term[x]))) + + return(sort(c(act_vec,inh_vec, empty_term))) +} + +#' @title Decompress BoolModel +#' +#' @description +#' This function decompresses the bmodel compressed by compress_bmodel(). +#' Return a S4 BoolModel object. +#' +#' @param x vector returned by compress_bmodel. +#' @param encoding named numerical vector returned by get_encodings(). +#' @param gene character vector. Corresponds to genes in the bmodel. Default to using variable names in encoding. +#' @param format character. Specifies which format to return. Possible values: 'bmodel', 'df', 'amat', 'simp_df'. Default to 'bmodel'. +#' +#' @export +decompress_bmodel = function(x, encoding, gene=NULL, format='bmodel') +{ + #Setup default gene names. + gene_var = names(encoding)[!(grepl('&',names(encoding))|grepl('!',names(encoding)))][-1] + if(is.null(gene)) + { + gene = gene_var + } + stopifnot(length(gene_var)==length(gene)) + + #Round up values to 4 decimals. (due to external solver returning continuous values.) + x = round(x, 4) + + #Remove all empty terms from x. + x = x[!(x%%1==0)] #check for integer. + x = x[!(sapply(x, function(y) isTRUE(all.equal(y%%1,0.9999))))] #check for lower_bound values (i.e. 2.9999). (it is equivalent to integer.) + + #Filter encodings to remove reversed AND terms, if present. + encoding = encoding[!duplicated(encoding)] + + #Convert encodings into variables. + #Obtain 2 from 2.001. + comb_ind = round(x) + + #Convert numbers to characters. + comb_rule = as.character(x) + + #Convert '2' to '2.'. + comb_rule = unname(sapply(comb_rule, function(y) ifelse(!grepl('[.]', y), paste(y, '.', sep=''), y))) + + #Convert '2.' to '2.0'. + comb_rule = unname(sapply(comb_rule, function(y) ifelse(grepl('[.]$', y), paste(y, '0', sep=''), y))) + + #Convert '2.0' to '2.00'. + comb_rule = unname(sapply(comb_rule, function(y) ifelse(grepl('[.].$', y), paste(y, '0', sep=''), y))) + + #Convert '2.00' to '2.000'. + comb_rule = unname(sapply(comb_rule, function(y) ifelse(grepl('[.]..$', y), paste(y, '0', sep=''), y))) + + #Convert '2.000' to '2.0000'. + comb_rule = unname(sapply(comb_rule, function(y) ifelse(grepl('[.]...$', y), paste(y, '0', sep=''), y))) + + #Obtain '0001' from '2.0001'. + comb_rule = gsub('^[0-9]+[.]([0-9]+)', '\\1', comb_rule) + + #print(comb_rule) + stopifnot(all(nchar(comb_rule)==4)) + + #Remove 0 and !0. + zero_enc = encoding[grepl('0$', names(encoding))] #get encodings for 0 and !0. + stopifnot(length(zero_enc)==2) + comb_ind = comb_ind[comb_rule!=zero_enc[1]] #remove 0. + comb_rule = comb_rule[comb_rule!=zero_enc[1]] #remove 0. + comb_ind = comb_ind[comb_rule!=zero_enc[2]] #remove !0. + comb_rule = comb_rule[comb_rule!=zero_enc[2]] #remove !0. + stopifnot(length(comb_ind)==length(comb_rule)) + + #Generate reverse map. + rev_encoding = names(encoding) + names(rev_encoding) = encoding + + #Main conversion. + comb_rule = unname(rev_encoding[comb_rule]) + stopifnot(all(!is.na(comb_rule))) + + if(format=='df') + { + #Converting rules from var to gene names. + conv_comb_rule = comb_rule + for(i in 1:length(gene_var)) + { + conv_comb_rule = gsub(gene_var[i], gene[i], conv_comb_rule) + } + conv_comb_rule = unname(split(conv_comb_rule, as.factor(comb_ind))) + + #If there is completely empty rules for a gene, add placeholder in. + if(length(unique(comb_ind)) != length(gene_var)) + { + missing_ind = which(!(1:length(gene_var) %in% unique(comb_ind))) + + for(i in missing_ind) + { + conv_comb_rule = append(conv_comb_rule, '0', i-1) + } + } + stopifnot(length(conv_comb_rule)==length(gene_var)) + } + + #Convert vector into list. + comb_rule = unname(split(comb_rule, as.factor(comb_ind))) + + #If there is completely empty rules for a gene, add placeholder in. + if(length(unique(comb_ind)) != length(gene_var)) + { + missing_ind = which(!(1:length(gene_var) %in% unique(comb_ind))) + + for(i in missing_ind) + { + comb_rule = append(comb_rule, '0', i-1) + } + } + stopifnot(length(comb_rule)==length(gene_var)) + + if(format=='bmodel') + { + #Split into act and inh list. + act_rule = lapply(comb_rule, function(x) x[!grepl('!', x)]) + inh_rule = lapply(comb_rule, function(x) x[grepl('!', x)]) + + #Set '0' for empty rules. + act_rule[which(sapply(act_rule, function(x) length(x))==0)] = list('0') + inh_rule[which(sapply(inh_rule, function(x) length(x))==0)] = list('0') + + inh_rule = lapply(inh_rule, function(x) gsub('!', '', x)) #remove !. + + stopifnot(length(gene)==length(act_rule)) + + output = BoolModel(target=gene, target_var=gene_var, + rule_act=act_rule, rule_inh=inh_rule) + } else if(format=='df') + { + #This way of preparing output ensures that the order of 'output' is the same as 'x'. + out_var = c() + out_gene = c() + for(i in 1:length(comb_rule)) + { + for(j in 1:length(comb_rule[[i]])) + { + if(!grepl('!', comb_rule[[i]][j])) + { + if(comb_rule[[i]][j] != '0') + { + out_var = c(out_var, paste(gene_var[i], '<-', comb_rule[[i]][j])) + out_gene = c(out_gene, paste(gene[i], '<-', conv_comb_rule[[i]][j])) + } + } else + { + if(comb_rule[[i]][j] != '0') + { + out_var = c(out_var, paste(gene_var[i], '|-', gsub('!', '', comb_rule[[i]][j]))) + out_gene = c(out_gene, paste(gene[i], '|-', gsub('!', '', conv_comb_rule[[i]][j]))) + } + } + } + } + + output = cbind(out_gene, out_var) + } else if(format=='amat') + { + comb_rule = lapply(1:length(comb_rule), function(x) unlist(strsplit(comb_rule[[x]], '&'))) #remove &. + comb_rule = lapply(1:length(comb_rule), function(x) unique(gsub('!', '', comb_rule[[x]]))) #remove ! and non-unique gene interactions. + comb_rule = lapply(1:length(comb_rule), function(x) comb_rule[[x]][order(as.numeric(gsub('v(.+)s', '\\1', comb_rule[[x]])))]) #reorder the elements. + + output = matrix(NA, ncol=length(gene_var), nrow=length(gene_var)) + for(i in 1:nrow(output)) + { + output[i,] = gene_var %in% comb_rule[[i]] + } + output = t(output) + output = output + 0 + + colnames(output) = gene + rownames(output) = gene + } else if(format=='direct_amat') + { + comb_rule = lapply(1:length(comb_rule), function(x) unlist(strsplit(comb_rule[[x]], '&'))) #remove &. + comb_rule = lapply(comb_rule, unique) #remove non-unique gene interactions. + + output = matrix(0, ncol=length(gene_var), nrow=length(gene_var)) + for(i in 1:length(comb_rule)) + { + for(j in 1:length(comb_rule[[i]])) + { + row_ind = as.numeric(gsub('.+([0-9]+)s', '\\1', comb_rule[[i]][j])) + if(grepl('!', comb_rule[[i]][j])) + { + output[row_ind,i] = -1 + } else + { + output[row_ind,i] = 1 + } + } + } + + colnames(output) = gene + rownames(output) = gene + } else if(format=='simp_df') + { + comb_rule = lapply(1:length(comb_rule), function(x) unlist(strsplit(comb_rule[[x]], '&'))) #remove &. + comb_rule = lapply(1:length(comb_rule), function(x) unique(gsub('!', '', comb_rule[[x]]))) #remove ! and non-unique gene interactions. + comb_rule = lapply(1:length(comb_rule), function(x) comb_rule[[x]][order(as.numeric(gsub('v(.+)s', '\\1', comb_rule[[x]])))]) #reorder the elements. + comb_rule = comb_rule[!sapply(comb_rule, function(x) x[1]=='0')] #remove empty rules. + + output = matrix(NA, ncol=2, nrow=length(unlist(comb_rule))) + colnames(output) = c('from', 'to') + + entry_ind = 1 + for(i in 1:length(comb_rule)) + { + for(j in 1:length(comb_rule[[i]])) + { + output[entry_ind,1] = comb_rule[[i]][j] + output[entry_ind,2] = paste('v', i, 's', sep='') + + entry_ind = entry_ind + 1 + } + } + } else + { + stop('Please provide a valid format.') + } + + return(output) +} diff --git a/R/data_desc.R b/R/data_desc.R new file mode 100644 index 0000000..ec8744d --- /dev/null +++ b/R/data_desc.R @@ -0,0 +1,128 @@ +#' @title HSC Boolean Model from Bonzanni et al. +#' +#' @description +#' A Boolean model describing HSC in mice. It contains 11 genes. Its steady state is a cyclic loop of 32 states. +#' +#' @format +#' A data frame with 11 rows and 2 columns. +#' +#' Rows: each row consists of 1 gene and its associated Boolean rule. +#' Column 1: target gene +#' Column 2: associated Boolean rule +#' +#' @docType data +#' @name bon_bmodel +#' @usage data(bon_bmodel) +NULL + +#' @title Initial state from Bonzanni et al. +#' +#' @description +#' An intial state specified in Bonzanni et al. It contains a set of Boolean values for 11 genes. +#' +#' @format +#' A data frame with 1 row and 11 columns. +#' +#' Rows: each row consists of 1 set of Boolean state. +#' Columns: each column is for 1 gene/variable. +#' +#' @docType data +#' @name bon_istate +#' @usage data(bon_istate) +NULL + +#' @title Initial state from Moignard et al. +#' +#' @description +#' An intial state obtained from data in Moignard et al, determined by taking colMeans over unique rows, and rounding the means to 0-1. +#' Values for genes that are missing in Moignard et al, but are present in Bonzanni et al, are determined by taking values from the original initial state supplied in Bonzanni et al. +#' It contains a set of Boolean values for 20 genes. +#' +#' @format +#' A data frame with 1 row and 20 columns. +#' +#' Rows: each row consists of 1 set of Boolean state. +#' Columns: each column is for 1 gene/variable. +#' +#' @docType data +#' @name bon_moig_istate +#' @usage data(bon_moig_istate) +NULL + +#' @title Myeloid Boolean Model from Krumsiek et al. +#' +#' @description +#' A Boolean model describing myeloid development in mice. It contains 11 genes. Its steady states are 4 static attractors. +#' +#' @format +#' A data frame with 11 rows and 2 columns. +#' +#' Rows: each row consists of 1 gene and its associated Boolean rule. +#' Column 1: target gene +#' Column 2: associated Boolean rule +#' +#' @docType data +#' @name krum_bmodel +#' @usage data(krum_bmodel) +NULL + +#' @title Initial state from Krumsiek et al. +#' +#' @description +#' An intial state specified in Krumsiek et al. It contains a set of Boolean values for 11 genes. +#' +#' @format +#' A data frame with 1 row and 11 columns. +#' +#' Rows: each row consists of 1 set of Boolean state. +#' Columns: each column is for 1 gene/variable. +#' +#' @docType data +#' @name krum_istate +#' @usage data(krum_istate) +NULL + +#' @title Raw single cell qRT-PCR expression data from Moignard et al. +#' +#' @description +#' A raw single cell expression data obtained from multiple cell types. +#' +#' @format +#' A data frame with 597 rows and 18 columns. +#' +#' Rows: each row consists of raw expression values from 1 cell. +#' Columns: each column is for 1 gene/variable. +#' +#' @docType data +#' @name moig_raw_data +#' @usage data(moig_raw_data) +NULL + +#' @title Discretised single cell qRT-PCR expression data from Moignard et al. +#' +#' @description +#' A discretised single cell expression data obtained from multiple cell types. +#' +#' @format +#' A data frame with 597 rows and 18 columns. +#' +#' Rows: each row consists of discretised expression values from 1 cell. +#' Columns: each column is for 1 gene/variable. +#' +#' @docType data +#' @name moig_data +#' @usage data(moig_data) +NULL + +#' @title Estimated parameters from Wilson et al. data +#' +#' @description +#' A list of parameters (based on log normal distribution) estimated from Wilson et al. single-cell qPCR expression data. +#' +#' @format +#' A list with 4 numeric vectors, all_mu1, all_mu2, all_sig1, all_sig2. Note that each element in the vector is estimated from a single gene. +#' +#' @docType data +#' @name real_param +#' @usage data(real_param) +NULL \ No newline at end of file diff --git a/R/general.R b/R/general.R new file mode 100644 index 0000000..d1a3cf4 --- /dev/null +++ b/R/general.R @@ -0,0 +1,644 @@ +#' @title Verbose cat +#' +#' @description +#' This function is a simple wrapper for cat with a Boolean value for turning it on/off. +#' +#' @param string character vector. String intended to be printed. +#' @param bool logical. Specify whether to print the string to stout or not.+ +vcat = function(string, bool) +{ + if(bool) + { + cat(string) + } + + invisible() +} + +#' @title Extract Boolean terms +#' +#' @description +#' This function extracts the terms within a Boolean rule, using OR as the separator. Bracketed variables are counted as one term. +#' +#' @param brule character vector. It should be either an activating or inhibiting rule for a target gene. +extract_term = function(brule) +{ + if(brule!='0') #account for cases with empty rule. + { + #Strip outermost brackets. + brule = gsub('^[(](.+)[)]$', '\\1', brule) + term = strsplit(brule, '[|]')[[1]] + term = gsub('^[(](.+)[)]$', '\\1', term) + } else + { + term = '0' + } + + return(term) +} + +#' @title Check for matching terms +#' +#' @description +#' This function checks if the first term is found within a vector of terms. Return a logical value. +#' It is smarter than simple string matching, e.g. match_term(v1s&v2s, v2s&v1s) == T, match_term(v1s, v1s&v2s) == T, match_term(v1s&v2s, v1s) == T. +#' +#' @param t1 character vector of length 1 or more. It should be a vector of gene variable. +#' @param t2 character vector of length 1 or more. It should be a vector of gene variables. +#' @param mode character. Indicates the mode of action. Options: 'logic', 'unique'. Default to 'logic'. +match_term = function(t1, t2, mode='logic') +{ + if(t1[1] != '0') + { + t1 = unlist(strsplit(t1, '&')) #this will split up terms with &. + + #Check if t1 is present in t2. + #Note: if t1='v1s', this will match 'v1s', 'v2s&v1s'. + if(length(t2) > 1) + { + check_mat = sapply(t1, function(x) grepl(x, t2)) + } else + { + check_mat = t(matrix(sapply(t1, function(x) grepl(x, t2)))) #matrix() is necessary when t2 has length 1. + } + + if(mode == 'logic') + { + check_ind = which(rowSums(check_mat) > 0) + + if(length(check_ind) == 0) + { + match = FALSE + } else + { + match = TRUE + } + + return(match) + } else if(mode == 'unique') + { + check_ind = which(rowSums(check_mat) == 0) + + return(t2[check_ind]) + } else + { + stop('Error in specifying match_term() mode.') + } + } else if(t1[1] == '0') + { + if(mode == 'logic') + { + return(FALSE) + } else if(mode == 'unique') + { + return(t2) + } else + { + stop('Error in specifying match_term() mode.') + } + } +} + +#' @title Check for matching states +#' +#' @description +#' This function finds a match between two df of states. Returns a row index vector indicating for each row of mstate, what is the corresponding row in xstate. If a match cannot be found, a 0 will be return. +#' Only columns that are present in both df will be used in comparison. Note that the row index starts from 1 (as in R), not from 0 (as in cpp). +#' +#' @param mstate data frame. It should be a state(row) x gene(column) df. colnames will be used in comparison. +#' @param xstate data frame. It should be a state(row) x gene(column) df. colnames will be used in comparison. +match_state = function(mstate, xstate) +{ + #Filtering the columns in mstate and xstate. + same_col = intersect(colnames(mstate), colnames(xstate)) + fmstate = mstate[, same_col] + fxstate = xstate[, same_col] + + ind = match_state_loop(as.matrix(fmstate), as.matrix(fxstate)) + + return(ind) #zeroes are present due to mismatching in cpp code. +} + +#' @title Pick a random minimum value +#' +#' @description +#' This function locates the minimum value in a vector (similar to which.min), however it will randomly break ties when there are multiple minimum values. +#' +#' @param x numeric vector. +#' @param favour_first logical. If this is TRUE, and the first value in the vector supplies is one of the min, the index for the first value will always be returned. +which.random.min = function(x, favour_first=F) +{ + id = which(x == min(x)) #get indices of all minimum values. + + # If the minimum value appear multiple times pick one index at random, otherwise just return its position in the vector + # Exception occurs if the initial model (at position 1) also has the minimum value, then the initial model is favoured. + if(length(id) > 1 & 1 %in% id & favour_first==T) + { + id = 1 + } else if(length(id) > 1) #for all other cases with multiple minimum values, pick a random one. + { + id = sample(id, 1) + } else if(length(id) == 1) #if there is only single min value, return it. + { + id = id + } + + return(id) +} + +#' @title Filter columns of df in a list +#' +#' @description +#' This function filters columns of multiple df in a list, when compared using a vector. Use through lapply(). +#' +#' @param x data frame. It should be a list element if used from lapply. +#' @param y character vector. It should contains gene names found in the colnames of the data frame. +#' @param uniq_bool logical. Whether to return unique rows only. +filter_dflist = function(x, y, uniq_bool=T) +{ + stopifnot(class(x)== 'data.frame' | class(x)== 'matrix') + + if(uniq_bool) + { + return(unique(x[,colnames(x) %in% y, drop=F])) + } else + { + return(x[,colnames(x) %in% y, drop=F]) + } +} + +#' @title Check if the Boolean model violates constraints. +#' +#' @description +#' This function checks if the Boolean model violates contraints. Return logical value. +#' (1) Each gene rule should not have more terms than max_varperrule. +#' (2) The same term should not occur twice in the same rule. +#' +#' @param bmodel S4 BoolModel object. +#' @param max_varperrule integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6. +#' +#' @export +check_bmodel = function(bmodel, max_varperrule) +{ + check_bool = F + + act_rule = lapply(bmodel@rule_act, function(x) unlist(strsplit(x, '&'))) + inh_rule = lapply(bmodel@rule_inh, function(x) unlist(strsplit(x, '&'))) + + #(1) Check 1 : The same term should not occur twice in the same rule. + check_bool = check_bool | any(sapply(act_rule, function(x) any(duplicated(x)))) + check_bool = check_bool | any(sapply(inh_rule, function(x) any(duplicated(x)))) + + #(2) Check 2 : Each gene rule should not have more terms than max_varperrule. + act_rule = sapply(act_rule, function(x) unique(x)) + act_rule = sapply(act_rule, function(x) x[!(x %in% '0')]) #remove zeroes. + + inh_rule = sapply(inh_rule, function(x) unique(x)) + inh_rule = sapply(inh_rule, function(x) x[!(x %in% '0')]) #remove zeroes. + + check_bool = check_bool | any(sapply(act_rule, function(x) length(x)>max_varperrule)) + check_bool = check_bool | any(sapply(inh_rule, function(x) length(x)>max_varperrule)) + + if(check_bool) + { + return(FALSE) + } else + { + return(TRUE) + } +} + +#' @title Obtain parameters for bimodal distribution from real data +#' +#' @description +#' This function obtains parameters for bimodal distribution. Returns 4 parameters: mu1, mu2, sig1, sig2. +#' +#' @param x matrix. Input expression data. Col-genes, row-samples. +#' @param data_type character. Specify data types: qpcr, rnaseq. +#' +#' @export +param_bimodal = function(x, data_type='qpcr') +{ + require(MASS) + + #(1) Initialise data. + tmp = initialise_raw_data(x, data_type) + x_con = tmp[[1]] #continuous + x_bin = tmp[[2]] #discrete + + #rescale the data to remove zeroes. + x_con = x_con + 0.0001 + + all_mu1 = c() + all_mu2 = c() + all_sig1 = c() + all_sig2 = c() + for(i in 1:ncol(x_bin)) + { + #(2) Extract the parameters for the two modal distributions. + #First modal - low expression, 0s. Second modal - high expression, 1s. + x_lowmode = x_con[x_bin[,i]!=1, i] + x_highmode = x_con[x_bin[,i]==1, i] + + #(3) Estimate parameters + param1 = MASS::fitdistr(x_lowmode, 'lognormal') + param2 = MASS::fitdistr(x_highmode, 'lognormal') + + #For checking. + #hist(rlnorm(1000, param1$estimate[1], param1$estimate[2])) + #hist(rlnorm(1000, param2$estimate[1], param2$estimate[2])) + + all_mu1 = c(all_mu1, param1$estimate[1]) + all_mu2 = c(all_mu2, param2$estimate[1]) + all_sig1 = c(all_sig1, param1$estimate[2]) + all_sig2 = c(all_sig2, param2$estimate[2]) + } + + return(list(all_mu1=all_mu1, all_mu2=all_mu2, all_sig1=all_sig1, all_sig2=all_sig2)) +} + + +#' @title Generate random real numbers from binary values +#' +#' @description +#' This function generates random real numbers from binary values, with supplied parameters. Returns a vector of real values. +#' +#' @param x logical or 0/1 numeric matrix. Col-genes, row-samples. +#' @param param list of parameters given by param_bimodal(). +#' +#' @export +bin_to_real = function(x, param) +{ + require(MASS) + + #(1) Convert logical to numeric. + x = x + 0 + + #(2) Estimate the distribution for the parameters. + mu1_dist = MASS::fitdistr(-param$all_mu1, 'lognormal') + mu2_dist = MASS::fitdistr(-param$all_mu2, 'lognormal') + sig1_dist = MASS::fitdistr(param$all_sig1, 'lognormal') + sig2_dist = MASS::fitdistr(param$all_sig2, 'lognormal') + + y = matrix(NA, ncol=ncol(x), nrow=nrow(x)) + for(i in 1:ncol(x)) + { + #(3) Generating random values from the distribution. + mu1_est = -rlnorm(1, mu1_dist$estimate[1], mu1_dist$estimate[2]) + mu2_est = -rlnorm(1, mu2_dist$estimate[1], mu2_dist$estimate[2]) + sig1_est = rlnorm(1, sig1_dist$estimate[1], sig1_dist$estimate[2]) + sig2_est = rlnorm(1, sig2_dist$estimate[1], sig2_dist$estimate[2]) + + #(4) For each gene, generate random expression values using the obtained random parameters. + for(j in 1:nrow(x)) + { + if(x[j,i]==0) + { + y[j,i] = rlnorm(1, mu1_est, sig1_est) + + if(y[j,i] > 1) + { + y[j,i] = 1 + } else if(y[j,i] < 0) + { + stop('Error in generating continuous values.') + } + } else + { + y[j,i] = rlnorm(1, mu2_est, sig2_est) + + if(y[j,i] > 1) + { + y[j,i] = 1 + } else if(y[j,i] < 0) + { + stop('Error in generating continuous values.') + } + } + } + } + + return(y) +} + +#' @title Check for equivalent models +#' +#' @description +#' This function checks if the two models have the same rules. Return a logical value. Only TRUE if each rule for each gene is the same. +#' +#' @param bmodel1 S4 BoolModel object. +#' @param bmodel2 S4 BoolModel object. +#' @param inter_bool logical. Indicate whether to consider AND terms. +#' @param max_varperrule integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6. +#' +#' @export +equi_model = function(bmodel1, bmodel2, inter_bool, max_varperrule) +{ + stopifnot(length(bmodel1@target)==length(bmodel2@target)) + stopifnot(get_encodings(bmodel1, inter_bool)==get_encodings(bmodel2, inter_bool)) + + ind = get_encodings(bmodel1, inter_bool) + + dist = model_dist(bmodel1, bmodel2, inter_bool, max_varperrule) + + if(dist==0) + { + match = T + } else + { + match = F + } + + return(match) +} + +#' @title Calculate distance between Boolean models +#' +#' @description +#' This method takes in two models and calculate the distance between them. The value return indicate the number of steps between the two models. +#' +#' @param x S4 BoolModel object. Test model. +#' @param y S4 BoolModel object. Reference model. +#' @param inter_bool logical. Indicate whether to consider AND terms. +#' @param max_varperrule integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6. +#' +#' @export +model_dist = function(x, y, inter_bool, max_varperrule) +{ + set_diff = unlist(model_setdiff(x, y, inter_bool, max_varperrule)) + + #Calculate total dist. + t_dist = length(set_diff) + + #Return results. + return(t_dist) +} + +#' @title Find the set difference between two Boolean models +#' +#' @description +#' This method takes in two models and find the set difference between them. Return a vector with the set difference. +#' +#' @param x S4 BoolModel object. Test model. +#' @param y S4 BoolModel object. Reference model. +#' @param inter_bool logical. Indicate whether to consider AND terms. +#' @param max_varperrule integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6. +#' @param directed logical. If TRUE, return the difference in terms with respect to x. +#' +#' @export +model_setdiff = function(x, y, inter_bool, max_varperrule, directed=F) +{ + stopifnot(length(x@target) == length(y@target)) + stopifnot(get_encodings(x, inter_bool)==get_encodings(y, inter_bool)) + + ind = get_encodings(x, inter_bool) + + x1 = compress_bmodel(x, ind, max_varperrule) + x2 = compress_bmodel(y, ind, max_varperrule) + + #Pick which model has more terms, and which has less. + more_ind = which.max(c(length(x1), length(x2))) + less_ind = ifelse(more_ind==1, 2, 1) + + more_x = list(x1, x2)[[more_ind]] + less_x = list(x1, x2)[[less_ind]] + + #Remove empty terms. + more_x = more_x[more_x%%1!=0] + less_x = less_x[less_x%%1!=0] + + #Remove 0 and !0. + #Find the encodings for 0 and !0. + zero_enc = as.numeric(paste('0.', ind[which(names(ind)=='0')], sep='')) + notzero_enc = as.numeric(paste('0.', ind[which(names(ind)=='!0')], sep='')) + + #Remove them. + more_x = more_x[round(more_x-round(more_x), 4)!=zero_enc] + more_x = more_x[round(more_x-round(more_x), 4)!=notzero_enc] + + less_x = less_x[round(less_x-round(less_x), 4)!=zero_enc] + less_x = less_x[round(less_x-round(less_x), 4)!=notzero_enc] + + #Check for set difference. + more_bool = sapply(more_x, function(x) !(x %in% less_x)) + less_bool = sapply(less_x, function(x) !(x %in% more_x)) + + more_diff = more_x[more_bool] + less_diff = less_x[less_bool] + + more_vec = c() + if(length(more_diff)>0) + { + gene = round(more_diff) + rule = gsub('^[0-9]+[.]', '', gsub(' ', '', format(more_diff, nsmall=4))) + + gene = x@target[gene] + rule = unname(sapply(rule, function(x) names(which(ind==x))[1])) + + more_vec = rule + names(more_vec) = gene + } + + less_vec = c() + if(length(less_diff)>0) + { + gene = round(less_diff) + rule = gsub('^[0-9]+[.]', '', gsub(' ', '', format(less_diff, nsmall=4))) + + gene = x@target[gene] + rule = unname(sapply(rule, function(x) names(which(ind==x))[1])) + + less_vec = rule + names(less_vec) = gene + } + + #Handle step difference that includes changing to and from AND terms. + if(length(more_vec) != 0 & length(less_vec) != 0) + { + for(i in 1:length(more_vec)) + { + for(j in 1:length(less_vec)) + { + if(names(more_vec[i])==names(less_vec[j])) + { + if(grepl('!', more_vec[i]) & grepl('!', less_vec[j])) #for both inh terms. do not cross compare act and inh terms. + { + #Split & terms if present. + mvec = unlist(strsplit(more_vec[i], '&')) + lvec = unlist(strsplit(less_vec[j], '&')) + + #Removes !. + mvec = gsub('!', '', mvec) + lvec = gsub('!', '', lvec) + + #Check for presence of &. + mand_bool = ifelse(grepl('&', more_vec[i]), T, F) + land_bool = ifelse(grepl('&', less_vec[j]), T, F) + + mbool = mvec %in% lvec + lbool = lvec %in% mvec + + #assign NA if there is no term remaining. + if(length(mvec[!mbool])!=0) + { + if(!is.na(mvec[!mbool][1])) + { + more_vec[i] = paste(mvec[!mbool], collapse='&') + } + } else + { + more_vec[i] = NA + } + + if(length(lvec[!lbool])!=0) + { + if(!is.na(lvec[!lbool][1])) + { + less_vec[j] = paste(lvec[!lbool], collapse='&') + } + } else + { + less_vec[j] = NA + } + + #if this involves AND term, adds & indicator to term name. + if(mand_bool & !is.na(more_vec[i]) & !grepl('&', more_vec[i])) + { + more_vec[i] = paste('&', more_vec[i], sep='') + } + if(land_bool & !is.na(less_vec[j]) & !grepl('&', less_vec[j])) + { + less_vec[j] = paste('&', less_vec[j], sep='') + } + + #Add the ! back to the terms. + if(!is.na(more_vec[i])) + { + if(grepl('^&', more_vec[i])) + { + more_vec[i] = gsub('(^[&])(.+)', '\\1!\\2', more_vec[i]) + } else + { + more_vec[i] = paste('!', more_vec[i], sep='') + } + } + if(!is.na(less_vec[j])) + { + if(grepl('^&', less_vec[j])) + { + less_vec[j] = gsub('(^[&])(.+)', '\\1!\\2', less_vec[j]) + } else + { + less_vec[j] = paste('!', less_vec[j], sep='') + } + } + + } else if(!grepl('!', more_vec[i]) & !grepl('!', less_vec[j])) #for both act terms. do not cross compare act and inh terms. + { + #Split & terms if present. + mvec = unlist(strsplit(more_vec[i], '&')) + lvec = unlist(strsplit(less_vec[j], '&')) + + #Check for presence of &. + mand_bool = ifelse(grepl('&', more_vec[i]), T, F) + land_bool = ifelse(grepl('&', less_vec[j]), T, F) + + mbool = mvec %in% lvec + lbool = lvec %in% mvec + + #assign NA if there is no term remaining. + if(length(mvec[!mbool])!=0) + { + if(!is.na(mvec[!mbool][1])) + { + more_vec[i] = paste(mvec[!mbool], collapse='&') + } + } else + { + more_vec[i] = NA + } + + if(length(lvec[!lbool])!=0) + { + if(!is.na(lvec[!lbool][1])) + { + less_vec[j] = paste(lvec[!lbool], collapse='&') + } + } else + { + less_vec[j] = NA + } + + #if this involves AND term, adds & indicator to term name. + if(mand_bool & !is.na(more_vec[i]) & !grepl('&', more_vec[i])) + { + more_vec[i] = paste('&', more_vec[i], sep='') + } + if(land_bool & !is.na(less_vec[j]) & !grepl('&', less_vec[j])) + { + less_vec[j] = paste('&', less_vec[j], sep='') + } + } + } + } + } + } + + #Remove NAs. + if(!is.null(more_vec)) + { + more_vec = more_vec[!is.na(more_vec)] + } + if(!is.null(less_vec)) + { + less_vec = less_vec[!is.na(less_vec)] + } + + #If there are any AND term remaining here, they should be separated as they count as 2 steps. + if(length(more_vec)!=0) + { + for(i in 1:length(more_vec)) + { + if(grepl('.+&.+', more_vec[i])) + { + tmp_entry = unlist(strsplit(more_vec[i], '&'), use.names=F) + names(tmp_entry) = rep(names(more_vec[i]), length(tmp_entry)) + more_vec = c(more_vec, tmp_entry) + more_vec = more_vec[-i] + } + } + } + + if(length(less_vec)!=0) + { + for(i in 1:length(less_vec)) + { + if(grepl('.+&.+', less_vec[i])) + { + tmp_entry = unlist(strsplit(less_vec[i], '&'), use.names=F) + names(tmp_entry) = rep(names(less_vec[i]), length(tmp_entry)) + less_vec = c(less_vec, tmp_entry) + less_vec = less_vec[-i] + } + } + } + + comb_vec = c(more_vec, less_vec) + #Replace the gene names back into update functions. + for(i in 1:length(x@target_var)) + { + comb_vec = gsub(x@target_var[i], x@target[i], comb_vec) + } + + stopifnot(all(!grepl('v[0-9]+s', unlist(strsplit(comb_vec, '&'))) | + !grepl('^0$', unlist(strsplit(comb_vec, '&'))))) #check if all gene name conversions is successful. + + #Return results. + if(directed) + { + return(list(more=more_vec, less=less_vec)) + } else + { + return(list(comb_vec)) + } +} \ No newline at end of file diff --git a/R/initialisation.R b/R/initialisation.R new file mode 100644 index 0000000..56ac7e5 --- /dev/null +++ b/R/initialisation.R @@ -0,0 +1,181 @@ +#' @title Initialise raw data +#' +#' @description +#' This function initialise raw gene expression values in a matrix. Return a list of two matrices: (1) continuous values and (2) binary values. +#' Note that kmeans clustering as binarisation only works well if the data has a bimodal distribution. +#' +#' @param x matrix. Numeric data of gene expression. +#' @param data_type character. Specify data types: qpcr, rnaseq. +#' @param uni_thre numerical. Speficy threshold for unimodality test. Default to 0.2. +#' @param scale logical. Whether to scale the data to a range of 0-1. Default to T. +#' +#' @export +initialise_raw_data = function(x, data_type='qpcr', uni_thre=0.2, scale=T) +{ + require(diptest) + + #(1) Convert negative to positive values. + if(min(x)<0) + { + x = x + abs(min(x)) + } else + { + x = x - min(x) + } + + stopifnot(min(x)==0) + + if(data_type=='qpcr') + { + #(2) Invert qPCR values. Lowest expression should be close to 0, highest expression should be away from 0. + x = abs(max(x) - x) + } + + #(3) Scale values to between 0 and 1. + #Scaling should be done by each gene, instead of globally. Because clustering is done by gene, and the expression values should reflect that. + if(scale) + { + for(i in 1:ncol(x)) + { + a = x[,i] + if(max(a)-min(a) != 0) #when min and max are different. usual case. + { + x[,i] = (a-min(a))/(max(a)-min(a)) + } else + { + x[,i] = a-min(a) + } + } + } + + #(4) Binarise the data by k-means clustering. + y = matrix(NA, ncol=ncol(x), nrow=nrow(x)) + for(i in 1:ncol(x)) + { + #Perform unimodality test for each gene. + uni_test = dip.test(x[,i])$p.value + + if(uni_test > uni_thre) + { + cent_measure = median(x[,i]) + if(cent_measure < 0.5) + { + y[,i] = rep(0, nrow(y)) #set all values to 0, if the cent_measure is less than 0.5 + } else + { + y[,i] = rep(1, nrow(y)) #set all values to 1, if the cent_measure is more than 0.5 + } + } else + { + bin_result = kmeans(x[,i,drop=F], 2) #Note that kmeans do not give 1 to cluster with lower centroid. + x_bin = unname(bin_result$cluster) + + #(5) Decide which cluster is 0 and which is 1. + if(bin_result$centers[,1][1] < bin_result$centers[,1][2]) + { + y[,i] = x_bin - 1 #set 1 to 0, 2 to 1. + } else + { + y[,i] = abs(x_bin-2) #set 1 to 1, 2 to 0. + } + } + } + + stopifnot(all(dim(x)==dim(y))) + + colnames(y) = colnames(x) + rownames(y) = rownames(x) + + return(list(x, y)) +} + +#' @title Initialise data +#' +#' @description +#' This function initialises data frame of Boolean state space. Returns initialised data frame. +#' +#' @param state data frame. It should contain either 0/1 or F/T data of gene expression. +#' @param aslogic logical. It specifies whether to convert the input data into Boolean values. Default to FALSE. +#' +#' @export +initialise_data = function(state, aslogic=F) +{ + #Store column and row names. + rn_tmp = rownames(state) + cn_tmp = colnames(state) + + #Convert to logical T/F if aslogic=T. + if(aslogic) + { + #Convert numerical (i.e. 0, 1) to logical type. + if(nrow(state) != 1) #one row won't work with apply(). + { + state = apply(state, 2, as.logical) + } else + { + state = data.frame(matrix(apply(state, 2, as.logical), nrow=1)) + } + + stopifnot(all(!is.na(state))) #Check if contains NA values due to coercion. + colnames(state) = cn_tmp + } + + #Convert all row and column names to lowercase. + #rownames(state) = tolower(rn_tmp) + #colnames(state) = tolower(cn_tmp) + + #Order colname names alphabetically. + if(all(grepl('v[0-9]+s', cn_tmp))) #if variable names are in the form of v1s, v2s, etc. + { + state = state[, order(as.numeric(gsub('v([0-9]+)s', '\\1', cn_tmp)))] + } else if(all(grepl('G[0-9]+', cn_tmp))) #if variable names are in the form of v1s, v2s, etc. + { + state = state[, order(as.numeric(gsub('G([0-9]+)', '\\1', cn_tmp)))] + } else + { + state = state[, order(cn_tmp)] + } + + return(state) +} + +#' @title Initialise model +#' +#' @description +#' This function initialises a Boolean model. Returns initialised S4 BoolModel object. +#' Note that the model should only has 1 NOT operator. More than 1 is STRICTLY NOT allowed. +#' +#' @param init_model data frame of Boolean model. It should contain two columns, targets and functions. +#' +#' @export +initialise_model = function(init_model) +{ + bmodel = df_to_bm(init_model) + return(bmodel) +} + +#' @title Remove raw data duplicated wrt to the model state +#' +#' @description +#' This function removes the 'duplicates' in an expression wrt to the model state. +#' +#' @param dx matrix. Initialised and discretised numeric data of gene expression. +#' @param cx matrix. Initialised and continuous numeric data of gene expression. +#' +#' @export +unique_raw_data = function(dx, cx) +{ + fac_dx = c() + for(i in 1:nrow(dx)) + { + fac_dx = c(fac_dx, paste(dx[i,], collapse='')) + } + + fac_dx = list(as.factor(fac_dx)) #convert into factors. + + output = aggregate(cx, fac_dx, mean) #calculate means for each factor group. + rownames(output) = output[,1] + output = output[,-1] + + return(as.matrix(output)) +} diff --git a/R/methods.R b/R/methods.R new file mode 100644 index 0000000..0fb4c9a --- /dev/null +++ b/R/methods.R @@ -0,0 +1,300 @@ +#' @title Print Boolean Model +#' +#' @description +#' This method converts the S4 BoolModel object back into a human-readable data frame, with two columns: (1) target genes, (2) Boolean rules. +#' +#' @param bmodel S4 BoolModel object. +#' @param gene.names logical. Specify whether to write rules in terms of genes or internal variables. Default to FALSE. +#' +#' @export +printBM = function(bmodel, gene.names=F) +{ + #Prettify update functions. + act_rule = sapply(bmodel@rule_act, function(x) paste(x, collapse='|')) + inh_rule = sapply(bmodel@rule_inh, function(x) paste(x, collapse='|')) + + if(gene.names) + { + if(all(bmodel@target != bmodel@target_var)) + { + #Replace the gene names back into update functions. + for(i in 1:length(bmodel@target_var)) + { + act_rule = gsub(bmodel@target_var[i], bmodel@target[i], act_rule) + inh_rule = gsub(bmodel@target_var[i], bmodel@target[i], inh_rule) + } + + stopifnot(all(!grepl('v[0-9]+s', unlist(strsplit(unlist(act_rule), '&'))) | + grepl('^0$', unlist(strsplit(unlist(act_rule), '&'))))) #check if all gene name conversions is successful. + stopifnot(all(!grepl('v[0-9]+s', unlist(strsplit(unlist(inh_rule), '&'))) | + grepl('^0$', unlist(strsplit(unlist(inh_rule), '&'))))) #check if all gene name conversions is successful. + } + } + + #Add in brackets around & terms. + act_rule = lapply(act_rule, function(x) gsub('([[:alnum:]]+&[[:alnum:]]+)', '(\\1)', x)) + inh_rule = lapply(inh_rule, function(x) gsub('([[:alnum:]]+&[[:alnum:]]+)', '(\\1)', x)) + + comb_rule = paste('(', act_rule, ') &! (', inh_rule, ')', sep='') + + #Combine all information together. + out_df = data.frame(gene=bmodel@target, var=bmodel@target_var, func=comb_rule, stringsAsFactors=F) + + return(out_df) +} + +#' @title Write Boolean Model +#' +#' @description +#' This method writes the S4 BoolModel object into a CSV file. This method is a wrapper for print.BoolModel. The output is a data frame, with two columns: (1) target genes, (2) Boolean rules. +#' +#' @param bmodel S4 BoolModel object. +#' @param file file name with path, or a file connection object. +#' @param gene.names logical. Specify whether to write rules in terms of genes or internal variables. Default to FALSE. +#' @param rownames logical. It specifies whether to write row names. +#' +#' @export +writeBM = function(bmodel, file, gene.names=F, rownames=F) +{ + #Method that writes information from BoolModel object. + out_df = printBM(bmodel, gene.names) + + write.csv(out_df, file=file, quote=F, row.names=F) +} + +#' @title Convert BoolModel into adjacency matrix +#' +#' @description +#' This function converts a BoolModel object into an adjacency matrix. +#' +#' @param x S4 BoolModel object. +#' @param directed logical. Whether to return directed or undirected adjacency matrix. Default to TRUE. +#' +#' @export +bm_to_amat = function(x, directed=T) +{ + #(1) Extract act and inh rules. And removes &. + target_gene = x@target + act_rule = lapply(x@rule_act, function(x) unlist(strsplit(x, '&'))) + inh_rule = lapply(x@rule_inh, function(x) unlist(strsplit(x, '&'))) + all_rule = lapply(1:length(target_gene), function(x) c(act_rule[[x]], inh_rule[[x]])) + all_rule = lapply(all_rule, function(x) x[x!='0']) #removes 0s. + + #(2) Convert gene variables into gene names. + #Replace the gene names back into update functions. + for(i in 1:length(x@target_var)) + { + all_rule = lapply(all_rule, function(y) gsub(x@target_var[i], x@target[i], y)) + } + + stopifnot(all(!grepl('v[0-9]+s', unlist(strsplit(unlist(all_rule), '&'))) | + grepl('^0$', unlist(strsplit(unlist(all_rule), '&'))))) #check if all gene name conversions is successful. + + #(3) Setup output matrix. + out_mat = matrix(F, nrow=length(target_gene), ncol=length(target_gene)) + colnames(out_mat) = target_gene + rownames(out_mat) = target_gene + + #(4) Fill in the output matrix. + for(i in 1:length(target_gene)) + { + out_mat[,i] = target_gene %in% all_rule[[i]] + } + + if(!directed) + { + #(5) Make the output matrix symmetric. + for(i in 1:length(target_gene)) + { + tmp = out_mat[i,] | out_mat[,i] + out_mat[i,] = tmp + out_mat[,i] = tmp + } + + stopifnot(isSymmetric(out_mat)) + out_mat = out_mat + 0 #convert logical to numeric. + } else + { + out_mat = out_mat + 0 #convert logical to numeric. + + #(6) Make inhibitory edges as negative. + for(i in 1:length(inh_rule)) + { + for(j in 1:length(inh_rule[[i]])) + { + if(inh_rule[[i]][j]!='0') + { + from_ind = as.numeric(gsub('v([0-9]+)s', '\\1', inh_rule[[i]][j])) + to_ind = i + + out_mat[from_ind, to_ind] = -1 + } + } + } + } + + return(out_mat) +} + +#' @title Convert adjacency matrix into BoolModel object +#' +#' @description +#' This function converts adjacency matrix to BoolModel object. Able to take in adjacency matrix with -1, which encodes for inhibitory interaction. +#' +#' @param amat matrix. directed adjacency matrix. +#' @param random logical. Randomly assign to either act or inh rules, when the adjacency matrix only has values 0 and 1, but not -1. +#' @export +amat_to_bm = function(amat, random=F) +{ + if(random) + { + stopifnot(any(amat!=-1)) + + #Get all edges. + edge_df = cbind(which(amat==1, arr.ind = T), rep(NA, nrow(which(amat==1, arr.ind = T)))) + + #Setup row and column names. + colnames(edge_df) = c('from', 'to', 'type') + rownames(edge_df) = NULL + + #Randomly assign edges to act or inh rules. + edge_df[,3] = replicate(nrow(edge_df), sample(c('act', 'inh'), 1)) + + } else + { + #Get all edges. + act_edge = cbind(which(amat==1, arr.ind = T), rep('act', nrow(which(amat==1, arr.ind = T)))) + inh_edge = cbind(which(amat==-1, arr.ind = T), rep('inh', nrow(which(amat==-1, arr.ind = T)))) + edge_df = rbind(act_edge, inh_edge) + + #Setup row and column names. + colnames(edge_df) = c('from', 'to', 'type') + rownames(edge_df) = NULL + } + + #Replace indices by variable names. + var_name = paste('v', seq(1, length(colnames(amat))), 's', sep='') + names(var_name) = seq(1, nrow(amat)) + edge_df[,1] = var_name[as.character(edge_df[,1])] + + #Convert edge data frame to Boolmodel rules. + act_rule = vector('list', length(var_name)) + inh_rule = vector('list', length(var_name)) + for(i in 1:nrow(edge_df)) + { + if(edge_df[i,3] == 'act') + { + tmp_act = unname(c(act_rule[[as.numeric(edge_df[i,2])]], edge_df[i,1])) + act_rule[[as.numeric(edge_df[i,2])]] = tmp_act + } else + { + tmp_inh = unname(c(inh_rule[[as.numeric(edge_df[i,2])]], edge_df[i,1])) + inh_rule[[as.numeric(edge_df[i,2])]] = tmp_inh + } + } + + #Fill in empty rules. + act_rule[which(sapply(act_rule, length)==0)] = '0' + inh_rule[which(sapply(inh_rule, length)==0)] = '0' + + #Generate BoolModel object. + out_model = BoolModel(target=colnames(amat), target_var=var_name, rule_act=act_rule, rule_inh=inh_rule) + + return(out_model) +} + +#' @title Convert BoolModel object into BoolNet readable data frame +#' +#' @description +#' This method converts BoolModel object into a data frame, which is readable by BoolNet. +#' +#' @param bmodel BoolModel object. +#' +#' @export +bm_to_df = function(bmodel) +{ + out_df = printBM(bmodel, gene.names=T)[,-2] + colnames(out_df) = c('targets', 'factors') + + return(out_df) +} + +#' @title Convert a data frame into BoolModel object +#' +#' @description +#' This method converts a data frame into a BoolModel object. +#' Note that the model should only has 1 NOT operator. More than 1 is STRICTLY NOT allowed. +#' +#' @param df data frame with 2 columns, targets and factors +#' +#' @export +df_to_bm = function(in_df) +{ + #Setup the initial data frame. + in_df = as.data.frame(apply(in_df, 2, tolower), stringsAsFactors=F) + in_df = in_df[order(in_df[,1]),] + in_df[,2] = gsub('\\s', '', in_df[,2]) + + #Take out target genes. + target = in_df[,1] + + #Create corresponding simplified variable terms. + target_var = paste('v', seq(1,length(target)), 's', sep='') #the variable name consists of digits bounded by two characters. + + #Separate the activators and the inhibitors. + rule_list = strsplit(in_df[,2], '&!') + + rule_act = character(length(rule_list)) #initialise variable with correct lengths. + rule_inh = character(length(rule_list)) + for(i in 1:length(rule_list)) + { + if(length(rule_list[[i]]) == 2) + { + rule_act[i] = rule_list[[i]][1] + rule_inh[i] = rule_list[[i]][2] + } else if(length(rule_list[[i]]) == 1) + { + if(grepl('!', rule_list[[i]][1])) #if the first term in each vector in the list has a !, this means that there is no activator in this vector, but only inhibitor. + { + rule_act[i] = '0' + rule_inh[i] = rule_list[[i]][1] + } else + { + rule_act[i] = rule_list[[i]][1] + rule_inh[i] = '0' + } + } else + { + stop('Error in model rule specification.') + } + } + + rule_act = gsub('[(]0[)]', '0', rule_act) #take away bracketed 0s. + rule_inh = gsub('[(]0[)]', '0', rule_inh) + + rule_inh = gsub('^([!])', '', rule_inh) #take away remaining !. + + #Replace genes in rules with simplified variables. + for(i in 1:length(target_var)) + { + rule_act = gsub(target[i], target_var[i], rule_act) + rule_inh = gsub(target[i], target_var[i], rule_inh) + } + + rule_act = unname(sapply(rule_act, extract_term)) + rule_inh = unname(sapply(rule_inh, extract_term)) + + if(class(rule_act) != 'list') + { + rule_act = as.list(rule_act) + } + + if(class(rule_inh) != 'list') + { + rule_inh = as.list(rule_inh) + } + + bmodel = BoolModel(target=target, target_var=target_var, rule_act=rule_act, rule_inh=rule_inh) + + return(bmodel) +} diff --git a/R/model_modification.R b/R/model_modification.R new file mode 100644 index 0000000..974320d --- /dev/null +++ b/R/model_modification.R @@ -0,0 +1,249 @@ +#' @title Minimal modification of whole Boolean model +#' +#' @description +#' This function generates all possible boolean models minimally modified. Returns a lists of 2 lists, deleted models and added models +#' +#' @param bm S4 BoolModel object. +#' @param index integer. Specifying rule of which gene to modify. If NULL, modifies all rules in the model. Defaults to NULL. +#' @param overlap_gene character vector. Specify which genes are present in both model and data inputs. Only needed when index=NULL. +minmod_model = function(bm, index=NULL, overlap_gene=NULL) +{ + if(is.null(index)) + { + #Modify only the overlapping genes + gene_ind = which(bm@target %in% overlap_gene) + + del_list = c() + add_list = c() + for(ind in gene_ind) #Modify act and inh rules for one gene at a time. + { + tmp_list = minmod_internal(bm, ind) + if(!is.null(tmp_list$dellist)) + { + del_list = c(del_list, tmp_list$dellist) + } + if(!is.null(tmp_list$addlist)) + { + add_list = c(add_list, tmp_list$addlist) + } + } + out_list = list(del_list=del_list, + add_list=add_list) + } else + { + out_list = minmod_internal(bm, index) + } + + return(out_list) +} + +#' @title Inner function of minimal modification of whole Boolean model +#' +#' @description +#' This function generates all possible boolean models minimally modified. Returns a lists of 2 lists, deleted models and added models +#' +#' @param bm S4 BoolModel object. +#' @param index integer. Specifying rule of which gene to modify. +minmod_internal = function(bm, index) +{ + arule = bm@rule_act[[index]] + irule = bm@rule_inh[[index]] + + #Deletion of act rule. + dellist_arule = list() + if(length(arule) == 1) + { + if(grepl('&' , arule)) + { + tmp = unlist(strsplit(arule[1], '&')) #split the AND term into 2. + dellist_arule = c(dellist_arule, list(c(tmp[1], arule[-1]), c(tmp[2], arule[-1]))) + } else if(arule[1] != '0' & irule[1] != '0') + { + dellist_arule = c(dellist_arule, list('0')) + } + } else + { + for(i in 1:length(arule)) + { + if(grepl('&', arule[i])) + { + tmp = unlist(strsplit(arule[i], '&')) #split the AND term into 2. + dellist_arule = c(dellist_arule, list(c(tmp[1], arule[-i]), c(tmp[2], arule[-i]))) + } else + { + dellist_arule = c(dellist_arule, list(arule[-i])) + } + } + } + + #Deletion of inh rule. + dellist_irule = list() + if(length(irule) == 1) + { + if(grepl('&' , irule)) + { + tmp = unlist(strsplit(irule[1], '&')) #split the AND term into 2. + dellist_irule = c(dellist_irule, list(c(tmp[1], irule[-1]), c(tmp[2], irule[-1]))) + } else if(arule[1] != '0' & irule[1] != '0') + { + dellist_irule = c(dellist_irule, list('0')) + } + } else + { + for(i in 1:length(irule)) + { + if(grepl('&', irule[i])) + { + tmp = unlist(strsplit(irule[i], '&')) #split the AND term into 2. + dellist_irule = c(dellist_irule, list(c(tmp[1], irule[-i]), c(tmp[2], irule[-i]))) + } else + { + dellist_irule = c(dellist_irule, list(irule[-i])) + } + } + } + + #Addition of act rule. (single) + pos_actterm = bm@target_var[!bm@target_var %in% unlist(strsplit(c(arule, irule), '&'))] + addlist_arule = list() + if(arule[1] == '0') + { + for(i in 1:length(pos_actterm)) + { + addlist_arule = c(addlist_arule, list(pos_actterm[i])) + } + } else + { + for(i in 1:length(pos_actterm)) + { + addlist_arule = c(addlist_arule, list(c(arule, pos_actterm[i]))) + } + } + + #Addition of act rule. (double) + if(arule[1] != '0') + { + for(i in 1:length(pos_actterm)) + { + for(j in 1:length(arule)) + { + if(grepl('&', arule[j])) + { + next + } else + { + tmp = sprintf('%s&%s', pos_actterm[i], arule[j]) + addlist_arule = c(addlist_arule, list(c(arule[-j], tmp))) + } + } + } + } + + #Addition of inh rule. (single) + pos_inhterm = bm@target_var[!bm@target_var %in% unlist(strsplit(c(arule, irule), '&'))] + addlist_irule = list() + if(irule[1] == '0') + { + for(i in 1:length(pos_inhterm)) + { + addlist_irule = c(addlist_irule, list(pos_inhterm[i])) + } + } else + { + for(i in 1:length(pos_inhterm)) + { + addlist_irule = c(addlist_irule, list(c(irule, pos_inhterm[i]))) + } + } + + #Addition of inh rule. (double) + if(irule[1] != '0') + { + for(i in 1:length(pos_inhterm)) + { + for(j in 1:length(irule)) + { + if(grepl('&', irule[j])) + { + next + } else + { + tmp = sprintf('%s&%s', pos_inhterm[i], irule[j]) + addlist_irule = c(addlist_irule, list(c(irule[-j], tmp))) + } + } + } + } + + #Generate a list of modified models. + bmodel_dellist = list() + bmodel_addlist = list() + + if(length(dellist_arule) != 0) + { + for(i in 1:length(dellist_arule)) + { + tmp_bm = bm + tmp_bm@rule_act[[index]] = dellist_arule[[i]] + bmodel_dellist = c(bmodel_dellist, tmp_bm) + } + } + + if(length(dellist_irule) != 0) + { + for(i in 1:length(dellist_irule)) + { + tmp_bm = bm + tmp_bm@rule_inh[[index]] = dellist_irule[[i]] + bmodel_dellist = c(bmodel_dellist, tmp_bm) + } + } + + if(length(addlist_arule) != 0) + { + for(i in 1:length(addlist_arule)) + { + tmp_bm = bm + tmp_bm@rule_act[[index]] = addlist_arule[[i]] + bmodel_addlist = c(bmodel_addlist, tmp_bm) + } + } + + if(length(addlist_irule) != 0) + { + for(i in 1:length(addlist_irule)) + { + tmp_bm = bm + tmp_bm@rule_inh[[index]] = addlist_irule[[i]] + bmodel_addlist = c(bmodel_addlist, tmp_bm) + } + } + + return(list(dellist=bmodel_dellist, + addlist=bmodel_addlist)) +} + +#' @title Add extra genes to a Boolean model +#' +#' @description +#' This function adds extra genes to a Boolean model. Input model must be in data frame format, output model will be BoolModel object. +#' +#' @param in_model data frame with 2 columns, which are targets and factors +#' @param in_gene character vector. Genes to be added into the model. +#' +#' @export +grow_bmodel = function(in_model, in_gene) +{ + #Generate a new data frame to be added into the model df. + empty_func = '(0) &! (0)' + in_row = data.frame(in_gene, empty_func) + colnames(in_row) = c('targets', 'factors') + + #Modify the model df. + out_model = rbind(in_model, in_row) + + #Convert the model df into BoolModel object. + out_model = df_to_bm(out_model) + + return(out_model) +} \ No newline at end of file diff --git a/R/output_format.R b/R/output_format.R new file mode 100644 index 0000000..a3b871e --- /dev/null +++ b/R/output_format.R @@ -0,0 +1,277 @@ +#' @title Output a Boolean Model into Cytoscape readable format +#' +#' @description +#' This function outputs a Boolean Model in a format that is readable by Cytoscape. Return invisibly the edges (with edge attributes) and node attributes. (i.e. list of 2 dfs) +#' +#' @param bmodel S4 BoolModel object. +#' @param filepath character vector. Specify path (AND NOT file name). Default to current working directory, i.e. getwd(). Set to NULL to disable file output. +#' +#' @export +outcyto_model = function(bmodel, filepath=getwd()) +{ + edge_vec = character() #setup output vector. + + arule = bmodel@rule_act + irule = bmodel@rule_inh + + #Replace the gene names back into the rules. + for(i in 1:length(bmodel@target_var)) + { + for(j in 1:length(bmodel@target_var)) + { + arule[[j]] = gsub(bmodel@target_var[i], bmodel@target[i], arule[[j]]) + irule[[j]] = gsub(bmodel@target_var[i], bmodel@target[i], irule[[j]]) + } + } + + #Split rules into single or double terms. + s_arule = sapply(arule, function(x) x[!grepl('&', x)]) + d_arule = sapply(arule, function(x) x[grepl('&', x)]) + + s_irule = sapply(irule, function(x) x[!grepl('&', x)]) + d_irule = sapply(irule, function(x) x[grepl('&', x)]) + + #Put '0' for empty rules. + s_arule[sapply(s_arule, function(x) length(x)==0)] = '0' + d_arule[sapply(d_arule, function(x) length(x)==0)] = '0' + + s_irule[sapply(s_irule, function(x) length(x)==0)] = '0' + d_irule[sapply(d_irule, function(x) length(x)==0)] = '0' + + #Start writing rules out for single terms. + for(i in 1:length(bmodel@target)) + { + edge_vec = c(edge_vec, paste(s_arule[[i]], 'activates', bmodel@target[i], sep=',')) + edge_vec = c(edge_vec, paste(s_irule[[i]], 'inhibits', bmodel@target[i], sep=',')) + } + + #Write rules out for double terms. + tmp_and = paste('and', seq(1, sum(grepl('&', unlist(d_arule)))+sum(grepl('&', unlist(d_irule)))), sep='_') + ind = 1 + for(i in 1:length(bmodel@target)) + { + #Write arule. + if(d_arule[[i]][1]!='0') + { + tmp_rule = strsplit(d_arule[[i]], '&') + + for(j in 1:length(tmp_rule)) + { + #Join coproteins with ANDs. + edge_vec = c(edge_vec, paste(tmp_rule[[j]], 'activates', tmp_and[ind], sep=',')) + + #Join ANDs with target genes. + edge_vec = c(edge_vec, paste(tmp_and[ind], 'activates', bmodel@target[i], sep=',')) + + ind = ind + 1 + } + } + + #Write irule. + if(d_irule[[i]][1]!='0') + { + tmp_rule = strsplit(d_irule[[i]], '&') + + for(j in 1:length(tmp_rule)) + { + #Join coproteins with ANDs. + edge_vec = c(edge_vec, paste(tmp_rule[[j]], 'activates', tmp_and[ind], sep=',')) #note that this should be activates. + + #Join ANDs with target genes. + edge_vec = c(edge_vec, paste(tmp_and[ind], 'inhibits', bmodel@target[i], sep=',')) + + ind = ind + 1 + } + } + } + + #Remove anything with 0 in the first and final terms. + edge_vec = edge_vec[!(grepl('^0,', edge_vec))] + edge_vec = edge_vec[!(grepl(',0$', edge_vec))] + + #Convert the vector into a matrix. + edge_df = data.frame(do.call(rbind, strsplit(edge_vec, ','))) + colnames(edge_df) = c('start_node', 'interaction', 'end_node') + + #Generating node attributes. (to distinguish gene nodes from AND nodes) + node_vec = c(bmodel@target, tmp_and) + node_df = data.frame(node_names=node_vec, node_types=ifelse(grepl('and', node_vec), 'ands', 'genes')) + + #Output into files. + if(!is.null(filepath)) + { + write.csv(edge_df, file=paste(filepath, '/cytoscape_edges.csv', sep=''), quote=F) + write.csv(node_df, file=paste(filepath, '/cytoscape_nodes.csv', sep=''), quote=F) + } + + invisible(list(edge_df, node_df)) +} + +#' @title Output a Boolean Model into Genysis readable format +#' +#' @description +#' This function outputs a Boolean Model in a format that is readable by Genysis. Return invisibly the formatted vector. +#' +#' @param bmodel S4 BoolModel object. +#' @param filepath character vector. Specify path (AND NOT file name). Default to current working directory, i.e. getwd(). Set to NULL to disable file output. +#' +#' @export +outgenysis_model = function(bmodel, filepath=getwd()) +{ + gene = bmodel@target + arule = bmodel@rule_act + irule = bmodel@rule_inh + + #Replace the gene names back into update functions. + for(i in 1:length(bmodel@target_var)) + { + arule = lapply(arule, function(x) gsub(bmodel@target_var[i], bmodel@target[i], x)) + irule = lapply(irule, function(x) gsub(bmodel@target_var[i], bmodel@target[i], x)) + } + + stopifnot(all(!grepl('v[0-9]+s', unlist(strsplit(unlist(arule), '&'))) | + grepl('^0$', unlist(strsplit(unlist(arule), '&'))))) #check if all gene name conversions is successful. + stopifnot(all(!grepl('v[0-9]+s', unlist(strsplit(unlist(irule), '&'))) | + grepl('^0$', unlist(strsplit(unlist(irule), '&'))))) #check if all gene name conversions is successful. + + #Match gene with rule. + out_vec = c() + for(i in 1:length(gene)) + { + if(arule[[i]][1]!='0') + { + out_vec = c(out_vec, paste(arule[[i]], gene[i], sep=' -> ')) + } + + if(irule[[i]][1]!='0') + { + out_vec = c(out_vec, paste(irule[[i]], gene[i], sep=' -| ')) + } + } + + #Output into files. + if(!is.null(filepath)) + { + exist_files = list.files(path=filepath, pattern='^genysis_input_[0-9]+.txt') + filename = sprintf('genysis_input_%s.txt', length(exist_files)+1) + + fcon = file(filename, open='w') + writeLines(out_vec, fcon) + close(fcon) + } + + invisible(out_vec) +} + +#' @title Generate state transition graph readable by Cytoscapes +#' +#' @description +#' This function generates a state transition graph using a Boolean model and its state space. Each node represent a state. All nodes in this graph is linked by an edge only if the 2 states have different value in only 1 gene. The output is readable by Cytoscape. +#' +#' @param mstate data frame. It should be a state(row) x gene(column) df. +#' @param bmodel S4 BoolModel object. +#' @param directed logical. Indicates whether to make directed edges or not. Default to FALSE. +#' @param record.both logical. Indicates whether to also record nodes that have no edges. Default to FALSE. +#' @param filepath character vector. Specify path (AND NOT file name). Default to current working directory, i.e. getwd(). Set to NULL to disable file output. +#' +#' @export +outcyto_stategraph = function(mstate, bmodel, directed=F, record.both=F, filepath=getwd()) +{ + cat(sprintf('Generating state transition network...\n')) + + adj_cells = list() + nonadj_cells = list() + for(i in 1:nrow(mstate)) #Pick each row. + { + cat(sprintf('\rComputing state difference using row %s...', rownames(mstate)[i])) + + ind = integer() + non_ind = integer() + for(j in 1:nrow(mstate)) + { + gene_diff = which(mstate[i,]!=mstate[j,]) + + if(length(gene_diff) == 1) #Only those with one state difference. + { + if(directed) #Generate directed network. + { + ans = eval_bool(bmodel, unlist(mstate[i,]), gene_diff) + + if(ans == mstate[j,gene_diff]) + { + ind = c(ind,j) #Record the state step that is possible. + } else + { + non_ind = c(non_ind, j) #Not possible state step. + } + } else #Generate indirected network. + { + ind = c(ind,j) + } + } + } + adj_cells[i] = list(ind) + nonadj_cells[i] = list(non_ind) + } + cat('.\n') + + net_all = data.frame(start_node=character(1), interaction=character(1), + end_node=character(1), stringsAsFactors=F) #Both 1 and stringsAsFactors are essential, but 1 will give an empty first row. + for(i in 1:nrow(mstate)) + { + if(length(adj_cells[[i]] != 0)) #To exclude cell that does not have partner. + { + for(j in 1:length(adj_cells[[i]])) + { + net_row = as.character(c(rownames(mstate)[i], '(pp)', + rownames(mstate)[adj_cells[[i]][j]])) + + net_all = rbind(net_all, net_row) + } + } + } + net_all = net_all[2:nrow(net_all),] + rownames(net_all) = NULL #Reset row names. + + cat(sprintf('State graph generated, with %s nodes and %s edges.\n', length(unique(c(net_all[,1], net_all[,3]))), nrow(net_all))) + + if(record.both) + { + nonnet_all = data.frame(start_node=character(1), interaction=character(1), + end_node=character(1), stringsAsFactors=F) #Both 1 and stringsAsFactors are essential, but 1 will give an empty first row. + for(i in 1:nrow(mstate)) + { + if(length(nonadj_cells[[i]] != 0)) #To exclude cell that does not have partner. + { + for(j in 1:length(nonadj_cells[[i]])) + { + nonnet_row = as.character(c(rownames(mstate)[i], '(pp)', + rownames(mstate)[nonadj_cells[[i]][j]])) + + nonnet_all = rbind(nonnet_all, nonnet_row) + } + } + } + nonnet_all = nonnet_all[2:nrow(nonnet_all),] + rownames(nonnet_all) = NULL #Reset row names. + + cat(sprintf('Transition not in agreement with model: %s nodes and %s edges.\n', length(unique(c(nonnet_all[,1], nonnet_all[,3]))), nrow(nonnet_all))) + + #Output into files. + if(!is.null(filepath)) + { + write.csv(net_all, paste(filepath, '/cytoscape_statespace_edges.txt', sep=''), quote=F, row.names=F) + write.csv(nonnet_all, paste(filepath, '/cytoscape_notstatespace_edges.txt', sep=''), quote=F, row.names=F) + } + + invisible(list(net_all, nonnet_all)) + } + + #Output into files. + if(!is.null(filepath)) + { + write.csv(net_all, paste(filepath, '/cytoscape_statespace_edges.txt', sep=''), quote=F, row.names=F) + } + + invisible(net_all) +} diff --git a/R/rand_model.R b/R/rand_model.R new file mode 100644 index 0000000..07d7764 --- /dev/null +++ b/R/rand_model.R @@ -0,0 +1,543 @@ +#' @title Generate random act and inh rule for a single gene +#' +#' @description +#' This function generates one random Boolean rule (both act and inh) per run. Return a list of two vectors. +#' Note that this method will not give empty rule, i.e. 0 term in both act and inh rules. +#' +#' @param x character vector. A vector of all single terms to be used. +#' @param np integer. Number of gene variables in a rule. NOT max_varperrule here. +#' @param tar_ind numerical. Indicate which gene is the rule for. Used in preventing self-loop. +#' @param ibool logical. Indicates whether to include AND terms or not. Default to F. +#' @param self_loop logical. Indicates whether to allow self_loop. Default to F. +gen_singlerule = function(x, np, tar_ind, ibool=F, self_loop=F) +{ + #Convert x to the form of 'v1s'. + if(!(all(grepl('v[0-9]+s', x)))) + { + if(any(grepl('v[0-9]+s', x))) + { + stop('The name of some genes resemble internally used variables. Respecify names.') + } else + { + x = paste('v', seq(1, length(x)), 's', sep='') + } + } + + rnum_act = floor(runif(1, 0, np+1)) #get a random number from 0 to np for act_rule. + + if(rnum_act==0) + { + #rnum_inh = floor(runif(1, 1, np-rnum_act+1)) #must have at least one term if rnum_act is empty. + rnum_inh = np + } else { + #rnum_inh = floor(runif(1, 0, np-rnum_act+1)) #get a random number from 0 to np-rnum_act for inh_rule. + rnum_inh = np - rnum_act + } + + if(rnum_act + rnum_inh > length(x)) #cannot use more terms than available. + { + stop('Error in generating rules for random models.') + } + + #Get the activation rule. + if(rnum_act != 0) + { + if(!self_loop) + { + ract_prob = rep(1/(length(x)-1), length(x)) + ract_prob[tar_ind] = 0 #set probability to 0, to prevent self-loop. + tmp_ract = sample(x, rnum_act, prob=ract_prob) #get single variables first. + } else + { + tmp_ract = sample(x, rnum_act) #get single variables first. + } + + rule_act = c() + + if(ibool) + { + #Determine which single variables to combine into double variables. + ra_ind = as.logical(replicate(rnum_act,round(runif(1)))) + while(sum(ra_ind)%%2==1) + { + ra_ind = as.logical(replicate(rnum_act,round(runif(1)))) + } + + #Add double variables. + if(length(tmp_ract[ra_ind]) != 0) + { + for(i in 1:length(tmp_ract[ra_ind])) + { + if(i %% 2 == 1) #only do it when i is odd. + { + rule_act = c(rule_act, paste(c(tmp_ract[ra_ind][i], tmp_ract[ra_ind][i+1]), collapse='&')) + } + } + } + + #Add single variables. + rule_act = c(tmp_ract[!ra_ind], rule_act) + } else + { + #Add single variables. + rule_act = tmp_ract + } + } else + { + rule_act = '0' + } + + if(rnum_inh != 0) + { + #Get the inhibition rule. + if(!self_loop) + { + intr_x = x[!(x %in% unlist(strsplit(rule_act, '&')))] + rinh_prob = rep(1/(length(intr_x)-1), length(intr_x)) + rinh_prob[which(intr_x %in% x[tar_ind])] = 0 #set probability to 0, to prevent self-loop. + tmp_rinh = sample(intr_x, rnum_inh, prob=rinh_prob) #get single variables first. + } else + { + tmp_rinh = sample(x[!(x %in% unlist(strsplit(rule_act, '&')))], rnum_inh) #exclude terms already used in rule_act. + } + + if(ibool) + { + #Determine which single variables to combine into double variables. + ri_ind = as.logical(replicate(rnum_inh, round(runif(1)))) + while(sum(ri_ind)%%2==1) + { + ri_ind = as.logical(replicate(rnum_inh, round(runif(1)))) + } + + #Add double variables. + rule_inh = c() + if(length(tmp_rinh[ri_ind]) != 0) + { + for(i in 1:length(tmp_rinh[ri_ind])) + { + if(i %% 2 == 1) #only do it when i is odd. + { + rule_inh = c(rule_inh, paste(c(tmp_rinh[ri_ind][i], tmp_rinh[ri_ind][i+1]), collapse='&')) + } + } + } + + #Add single variables. + rule_inh = c(tmp_rinh[!ri_ind], rule_inh) + } else + { + rule_inh = tmp_rinh + } + } else + { + rule_inh = '0' + } + + return(list(rule_act, rule_inh)) +} + +#' @title Generate a random Boolean model +#' +#' @description +#' This function generates a random Boolean model. Returns an S4 BoolModel object. +#' Note that this method will not give empty rule, i.e. 0 term in both act and inh rules. +#' +#' @param var character vector. A vector of single genes/variables to be used in the model. +#' @param mvar integer. Maximum number of variables in act or inh rule. Default to length(var). +#' @param inter_bool logical. Indicates whether to include AND terms or not. Default to F. +#' @param self_loop logical. Indicates whether to allow self_loop. Default to F. +#' +#' @details +#' The number of terms in a function for a gene is modelled by power-law distribution. +gen_one_rmodel = function(var, mvar=length(var), inter_bool=F, self_loop=F) +{ + require(poweRlaw) + + if(mvar > length(var)) + { + mvar=length(var) + } + + #(1) By using power law distribution, estimate the in-degree genetic partner for each gene. + #The minimum in degree is set to 2. 'v1s' and 'v1s&v2s' are currently considered as 2 different partners. + num_partner = poweRlaw::rpldis(length(var), xmin=2, alpha=3) #xmin = the minimum value of resulting random integer, alpha = scaling factor of the distribution. According to literature, for gene network, this should be 3. (or 2 mvar] = mvar #power law distribution can gives very high number, therefore must cap it. + + arule = sapply(1:length(num_partner), function(x) gen_singlerule(var, num_partner[x], x, inter_bool, self_loop)) + arule = apply(arule, 1, c) #arule is a list of lists, with list[[1]] = all act rules, list[[2]] = all inh rules. + + bmodel = BoolModel(target=var, target_var=paste('v',seq(1,length(var)),'s', sep=''), rule_act=arule[[1]], rule_inh=arule[[2]]) + + return(bmodel) +} + +#' @title Generate sets of random data +#' +#' @description +#' This function generates specified sets of random data, which include initial states, two Boolean models (starting and destination), and continuous+discrete data of the destination model. +#' +#' @param n integer. Number of sets of random data required. +#' @param steps integer. Specify the number of steps between the two Boolean models. If steps=0, give completely random starting model. +#' @param num_genes integer. Number of genes in the Boolean models. +#' @param max_varperrule integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6. +#' @param inter_bool logical. Indicate whether to consider AND terms. +#' +#' @export +gen_randata = function(n, steps, num_genes, max_varperrule, inter_bool) +{ + var = paste('v', seq(1, num_genes), 's', sep='') + + output = list() + for(i in 1:n) + { + istate = rbinom(length(var), 1, 0.5) + #Getting a random initial state. + while(mean(istate) > 0.9 | mean(istate) < 0.1) #do not want initial state that is too homogenous. + { + istate = rbinom(length(var), 1, 0.5) + } + istate = data.frame(matrix(istate, nrow=1)) + colnames(istate) = var + istate = initialise_data(istate) + + #Setting the starting and ending models. + bmodel_pair = gen_two_rmodel(var, steps, max_varperrule, inter_bool) + bmodel_start = bmodel_pair[[1]] + bmodel_end = bmodel_pair[[2]] + + #Getting the simulated data of bmodel_end. + ddata = simulate_model(bmodel_end, istate) + + #Getting parameters from real data. + #real_param = param_bimodal(wilson_raw_data, data_type='qpcr') #only need to run once. + data(real_param) + + #Converting binary values to continuous values. + cdata = bin_to_real(ddata, real_param) + + stopifnot(dim(ddata)==dim(cdata)) + rownames(cdata) = rownames(ddata) + colnames(cdata) = colnames(ddata) + + out_entry = list(istate=istate, bmodel_start=bmodel_start, bmodel_end=bmodel_end, ddata=ddata, cdata=cdata) + + output = c(output, list(out_entry)) + } + + return(output) +} + +#' @title Generate sets of random data +#' +#' @description +#' (Requires bnlearn) This function generates specified sets of random data, which include initial states, two Boolean models (starting and destination), and continuous+discrete data of the destination model. +#' +#' @param n integer. Number of sets of random data required. +#' @param steps integer. Specify the number of steps between the two Boolean models. If steps=0, give completely random starting model. +#' @param num_genes integer. Number of genes in the Boolean models. +#' @param max_varperrule integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6. +#' @param inter_bool logical. Indicate whether to consider AND terms. +#' +#' @export +gen_randata_bn = function(n, steps, num_genes, max_varperrule, inter_bool) +{ + if (!requireNamespace("bnlearn", quietly = TRUE)) { + stop("Package bnlearn needed for this function to work. Please install it.", + call. = FALSE) + } + require('bnlearn') + + var = paste('v', seq(1, num_genes), 's', sep='') + + output = list() + for(i in 1:n) + { + istate = rbinom(length(var), 1, 0.5) + #Getting a random initial state. + while(mean(istate) > 0.9 | mean(istate) < 0.1) #do not want initial state that is too homogenous. + { + istate = rbinom(length(var), 1, 0.5) + } + istate = data.frame(matrix(istate, nrow=1)) + colnames(istate) = var + istate = initialise_data(istate) + + # #Generate random binary values. + # bn_data = matrix(replicate(num_genes * 100, round(runif(1))), ncol=num_genes) + # + # #Getting parameters from real data. + # #real_param = param_bimodal(wilson_raw_data, data_type='qpcr') #only need to run once. + # data(real_param) + # + # #Converting binary values to continuous values. + # bn_data = data.frame(bin_to_real(bn_data, real_param)) + # colnames(bn_data) = var + + #Setting the starting and ending models. + bn_model_pair = gen_two_rmodel_dag(var, steps, max_varperrule) + bn_model_start = bn_model_pair[[1]] + bn_model_end = bn_model_pair[[2]] + + #Converting the starting and ending models from bn objects to BoolModel objects. + bmodel_start = amat_to_bm(amat(bn_model_start)) + bmodel_end = amat_to_bm(amat(bn_model_end)) + + #Getting the simulated data of bmodel_end. + ddata = simulate_model(bmodel_end, istate) + + #Getting parameters from real data. + #real_param = param_bimodal(wilson_raw_data, data_type='qpcr') #only need to run once. + data(real_param) + + #Converting binary values to continuous values. + cdata = bin_to_real(ddata, real_param) + + stopifnot(dim(ddata)==dim(cdata)) + rownames(cdata) = rownames(ddata) + colnames(cdata) = colnames(ddata) + + out_entry = list(istate=istate, bmodel_start=bmodel_start, bmodel_end=bmodel_end, + ddata=ddata, cdata=cdata, + bn_model_start=bn_model_start, bn_model_end=bn_model_end) + + output = c(output, list(out_entry)) + } + + return(output) +} + +#' @title Generate two random DAG Boolean models with a specified number of steps apart +#' +#' @description +#' This function generates a random DAG Boolean model, then get another random DAG Boolean model that is a specified number of steps apart by adding and/or removing genes. +#' Difficult to generate completely directed graph with a specified number of steps apart. +#' +#' @param var character vector. A vector of single genes/variables to be used in the model. +#' @param steps integer. Number of steps apart between the two models. If steps=0, give completely random starting model. +#' @param mvar integer. Maximum number of variables in act or inh rule. Default to length(var). +#' @param in_amat matrix. Provide adjacency matrix of first model. +#' @param acyclic logical. Whether to restrict the model to being acyclic or not. Defaults to TRUE. +#' +#' @export +gen_two_rmodel_dag = function(var, steps, mvar=length(var), in_amat=NULL, acyclic=T) +{ + require(bnlearn) + + if(steps==0) + { + start_model = random.graph(var, method = "melancon", max.degree=mvar) + end_model = random.graph(var, method = "melancon", max.degree=mvar) + } else + { + if(is.null(in_amat)) + { + #(3) Get first model. + start_model = random.graph(var, method = "melancon", max.degree=mvar) + } else + { + start_model = empty.graph(var) + if(acyclic) + { + amat(start_model) = in_amat + } else + { + amat(start_model, ignore.cycles=T) = in_amat + } + } + + cur_ite = 1 + max_ite = 100 + out_break = F + while(cur_ite < max_ite & !out_break) + { + #(4) Get second model of a specified number of steps away. + end_model = start_model + memory_ind = c() + #memory_ind = apply(which(t(amat(end_model))==1, arr.ind = T), 1, function(x) paste(x, collapse=',')) #no undirected arcs. + #memory_ind = c(memory_ind, paste(1:nrow(tmp), 1:ncol(tmp), sep=',')) #no self_loops. + #start_time = proc.time() + while(TRUE) + { + tryCatch({ + #used_time = proc.time() - start_time + #used_time = as.numeric(used_time)[3] + + tmp_graph = empty.graph(var) + tmp = amat(end_model) + + x_ind = sample(1:nrow(tmp),1) + y_ind = sample(1:ncol(tmp),1) + + while(paste(x_ind, y_ind, collapse=',') %in% memory_ind) + { + x_ind = sample(1:nrow(tmp),1) + y_ind = sample(1:ncol(tmp),1) + } + + memory_ind = c(memory_ind, paste(x_ind, y_ind, collapse=',')) + if(length(memory_ind) == nrow(tmp)*ncol(tmp)) #break loop, get a new starting model and try again. + { + #cat(sprintf('For step %s, no other possible model exists.', steps)) + break + } + + #Change one step at a time. + if(tmp[x_ind,y_ind]==0) + { + tmp[x_ind,y_ind] = 1 + } else + { + tmp[x_ind,y_ind] = 0 + } + + if(acyclic) + { + amat(tmp_graph) = tmp + } else + { + amat(tmp_graph, ignore.cycles=T) = tmp + } + + end_model = tmp_graph + + if(sum(abs(amat(start_model)-amat(tmp_graph))) == steps) + { + if(nrow(undirected.arcs(tmp_graph))==0) #cannot have undirected arcs in the models. + { + out_break = T + break + } + } + }, error=function(e){}, + finally={}) + } + + cur_ite = cur_ite + 1 + } + + if(cur_ite == max_ite) + { + stop(sprintf('For step %s, no other possible model exists.', steps)) + } + } + + return(list(start_model, end_model)) +} + +#' @title Generate two random Boolean models with a specified number of steps apart +#' +#' @description +#' This function generates a random Boolean model, then get another random Boolean model that is a specified number of steps apart by adding and/or removing genes. +#' Returns a list of two S4 BoolModel objects. +#' +#' @param var character vector. A vector of single genes/variables to be used in the model. +#' @param steps integer. Number of steps apart between the two models. If steps=0, give completely random starting model. +#' @param mvar integer. Maximum number of variables in act or inh rule. Default to length(var). +#' @param inter_bool logical. Indicates whether to include AND terms or not. Default to F. +#' @param in_bmodel BoolModel object. The starting model supplied. +#' @param self_loop logical. Indicates whether to allow self_loop. Default to F. +#' +#' @details +#' The number of terms in a function for a gene is modelled by power-law distribution. +#' +#' @export +gen_two_rmodel = function(var, steps, mvar=length(var), inter_bool=F, in_bmodel=NULL, self_loop=F) +{ + if(mvar > length(var)) + { + mvar=length(var) + } + + if(is.null(in_bmodel)) + { + bmodel1 = gen_one_rmodel(var, mvar, inter_bool, self_loop) + } else + { + if(class(in_bmodel)!='BoolModel') + { + stop('in_bmodel: Please give BoolModel object.') + } + + bmodel1 = in_bmodel + stopifnot(length(in_bmodel@target) == length(var)) + } + + if(steps==0) + { + bmodel2 = gen_one_rmodel(var, mvar, inter_bool, self_loop) + } else + { + cur_model = bmodel1 + bmodel2 = bmodel1 + while(length(unlist(model_setdiff(cur_model, bmodel1, inter_bool, mvar))) steps) + # { + # browser() + # stop('Error in code.') + # } + + all_rule = lapply(1:length(cur_model@target), function(x) c(cur_model@rule_act[[x]], cur_model@rule_inh[[x]])) + all_rule = lapply(all_rule, function(x) x[x!='0']) + all_rule = lapply(all_rule, function(x) unlist(strsplit(x, '&'))) + + #Summarise info in bmodel. + all_len = sapply(all_rule, length) #get number of terms (act and inh) for each target gene + + avail_add_ind = which(all_len < mvar) #any rule with less genes than mvar can take more genes. + avail_del_ind = which(all_len > 1) #any rule with more genes than 1 can have fewer genes. + + if(length(avail_add_ind) == 0 & length(avail_del_ind) == 0) + { + stop('Model is fully specified. No term can be modified.') + break + } + + #picking choice for modification. + if(length(avail_add_ind) == 0) + { + mod_choice = 'del' + } else if(length(avail_del_ind) == 0) + { + mod_choice = 'add' + } else + { + mod_choice = sample(c('add', 'del'), 1) + } + + #Modifying models. + if(mod_choice == 'add') + { + if(length(avail_add_ind) == 1) + { + rule_aind = avail_add_ind + } else + { + rule_aind = sample(avail_add_ind, 1) + } + + next_model = sample(minmod_model(cur_model, rule_aind)$addlist, 1)[[1]] + + } else if(mod_choice == 'del') + { + if(length(avail_del_ind) == 1) + { + rule_dind = avail_del_ind + } else + { + rule_dind = sample(avail_del_ind, 1) + } + + next_model = sample(minmod_model(cur_model, rule_dind)$dellist, 1)[[1]] + } + stopifnot(length(unlist(model_setdiff(cur_model, next_model, inter_bool, mvar)))==1) + cur_model = next_model + } + + bmodel2 = cur_model + stopifnot(length(unlist(model_setdiff(bmodel2, bmodel1, inter_bool, mvar)))==steps) + } + + return(list(bmodel1, bmodel2)) +} \ No newline at end of file diff --git a/R/score_calculation.R b/R/score_calculation.R new file mode 100644 index 0000000..502293d --- /dev/null +++ b/R/score_calculation.R @@ -0,0 +1,162 @@ +#' @title Inner function of calculating Boolean model score +#' +#' @description +#' This function calculates a final model score from pairwise and penalty scores. +#' +#' @param x matrix vector. Pairwise scores computed by dist_measure(). +#' @param bmodel S4 BoolModel object. Model to be evaluated. +#' @param max_varperrule integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6. +#' @param detail logical. Whether to give more details in score calculation. Default to FALSE. +m_score = function(x, bmodel, max_varperrule, detail=F) +{ + #(1) Calculate score for distance between states + y = mean(apply(x, 2, min)) #best + #y = mean(apply(x, 1, min)) + + #(2) Calculate penalty term + #(A) To penalise having too low or too high number of model states, when compared to the number of data states. + #Ideally, the number of model states >= the number of data states. + #abs(number of model states - number of data states) + za = abs(ncol(x) - nrow(x)) / (nrow(x) * length(bmodel@target)) #best + #za = abs(ncol(x) - nrow(x)) / (nrow(x)) + + #(B) To penalise having too many variables in the rules. + var_len = list() #combine act and inh rules for each variable. + for(i in 1:length(bmodel@target)) + { + var_len = c(var_len, list(c(bmodel@rule_act[[i]], bmodel@rule_inh[[i]]))) + } + + #calculate number and fraction of each variable. + var_len = sapply(var_len, function(x) x[!grepl('0', x)]) #to remove '0' + num_var = sapply(var_len, function(x) length(strsplit(paste(x, collapse=''), 'v')[[1]])-1 ) #to count the number of v[0-9]s terms. e.g. v1s&v2s will be counted as 2 terms. + num_var[num_var<0] = 0 #if the rule for the variable is completely empty, has to set the negative values to 0. + + frac_var = (num_var-max_varperrule) + zb_ind = num_var > max_varperrule + zb = sum(frac_var[zb_ind]) + + #(3) Calculate final score. + #Specify the constants for each penalty term. + a = 1 + b = 1 + + f = y + a*za + b*zb #f ranges from 0 to infinity. + + if(detail) + { + output = c(f, y, za, zb) + names(output) = c('f', 'y', 'za', 'zb') + } else + { + output = c(f) + names(output) = c('f') + } + return(output) +} + +#' @title Calculating Boolean model score wrt to a dataset +#' +#' @description +#' This function calculates a score for a Boolean model wrt to a dataset. +#' +#' @param bmodel S4 BoolModel object. Model to be evaluated. +#' @param istate data frame. Must have only 1 row, which represents 1 initial state. +#' @param data matrix. Represents the expression data df. +#' @param overlap_gene character vector. Specify which genes are present in both model and data inputs. +#' @param max_varperrule integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6. +#' @param detail logical. Whether to give more details in score calculation. Default to FALSE. +#' +#' @export +calc_mscore = function(bmodel, istate, fcdata, overlap_gene, max_varperrule, detail=F) +{ + #(1) Simulate each of these models. + tmp = simulate_model(bmodel, istate) + mdata = tmp + + #(2) Perform gene filtering on model state space. + fmdata = filter_dflist(mdata, overlap_gene) + fmdata = fmdata+0 #convert logical matrix into numeric matrix. + + if(class(fcdata)!='matrix') + { + fcdata = as.matrix(fcdata) + } + + #(3) Score each model state wrt to data state. + #return pairwise scores between each model and data states. + score_matrix = rcpp_man_dist(fmdata, fcdata) #The first must be the model state. This returns a matrix of row=data, col=model. + #score_matrix = rcpp_ham_dist(fmdata, fcdata) + + final_score = m_score(score_matrix, bmodel, max_varperrule, detail=detail) #return a matrix (1x2) of two model scores, penalised score & unpenalised score. the lower the better. + + return(final_score) +} + +#' @title Calculate true positive, true negative, false positive and false negative +#' +#' @description +#' This function calculates the true positive, true negative, false positive and false negative values from the adjacency matrices. +#' +#' @param inf_mat matrix. It should be adjacency matrix of inferred network. +#' @param true_mat matrix. It should be adjacency matrix of true network. +#' +#' @export +validate_adjmat = function(inf_mat, true_mat) +{ + output = rcpp_validate(inf_mat, true_mat) + names(output) = c('tp', 'tn', 'fp', 'fn') + + return(output) +} + +#' @title Calculate precision, recall, f-score, accuracy and specificity +#' +#' @description +#' This function calculates the precision, recall, f-score, accuracy and specificity from the output of validate_adjmat(). +#' +#' @param x integer vector. Vector output by validate_adjmat(). +#' +#' @export +calc_roc = function(x) +{ + #(1) Calculate the precision/positive predictive value. + #probability that the disease is present when the test is positive + #range from 0 to 1. + p = unname(x['tp'] / (x['tp'] + x['fp'])) + + #(2) Calculate the negative predictive value. + #probability that the disease is not present when the test is negative + #range from 0 to 1. + np = unname(x['tn'] / (x['tn'] + x['fn'])) + + #(3) Calculate the recall/sensitivity/true positive rate. + #probability that a test result will be positive when the disease is present. + #range from 0 to 1. + r = unname(x['tp'] / (x['tp'] + x['fn'])) + + #(4) Calculate the accuracy. + a = unname((x['tp'] + x['tn']) / (x['tp'] + x['tn'] + x['fp'] + x['fn'])) + + #(5) Calculate the specificity/true negative rate. + #probability that a test result will be negative when the disease is not present. + #range from 0 to 1. + s = unname(x['tn'] / (x['tn'] + x['fp'])) + + #(6) Calculate the positive likelihood ratio. + #ratio between the probability of a positive test result given the presence of the disease and the probability of a positive test result given the absence of the disease. + plr = r / (1 - s) + + #(7) Calculate the negative likelihood ratio. + #ratio between the probability of a negative test result given the presence of the disease and the probability of a negative test result given the absence of the disease + nlr = (1 - r) / s + + #(8) Calculate the f-score. + #harmonic average of precision and recall. + f = 2*p*r / (r+p) + + output = c(p, np, r, a, s, plr, nlr, f) + names(output) = c('p', 'np', 'r', 'a', 's', 'plr', 'nlr', 'f') + + return(output) +} diff --git a/R/search.R b/R/search.R new file mode 100644 index 0000000..f296717 --- /dev/null +++ b/R/search.R @@ -0,0 +1,272 @@ +#' @title Training Model +#' +#' @description +#' This function performs model training to find the best model, using information from data. It requires an initial state supplied to perform the search, and an initial model can also be supplied to be included in the initial population. +#' Note that if a model is supplied, and the genes in the model is different from the genes in the data, only the genes overlapping between model and data will be retained for further analysis. +#' +#' @param bmodel Boolean model in data frame. If NULL, use a random Boolean model. Default to NULL. +#' @param edata list of 2 data frames. Initialised continuous and discretised expression data. Each data frame should have state(row) x gene(column). +#' @param istate data frame. Must have only 1 row, which represents 1 initial state. +#' @param max_varperrule integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6. +#' @param tol numeric. Specify the tolerance in ending condition. Default to 1e-6. It cannot be lower than .Machine$double.eps ^ 0.5. +#' @param inter_bool logical. Indicate whether to consider AND terms. Default to TRUE. +#' @param verbose logical. Specifies whether to give detailed output. Default to F. +#' @param self_loop logical. Indicates whether to allow self_loop in random starting model. Only used if is.null(bmodel). Default to F. +#' +#' @export +model_train = function(bmodel=NULL, edata, istate, max_varperrule=6, tol=1e-6, inter_bool, verbose=F, self_loop=F) +{ + vcat('Preparing data for analysis.\n', verbose) + + if(class(edata)!='list' | length(edata)!=2) + { + stop('edata: Supply two expression data frames in a list.') + } + + #Initialise input data. + istate = initialise_data(istate, aslogic=T) + cdata = initialise_data(edata[[1]]) + ddata = initialise_data(edata[[2]]) + + #Initialise model. + if(is.null(bmodel)) + { + bmodel = gen_one_rmodel(colnames(istate), max_varperrule, inter_bool, self_loop) + } else if(class(bmodel) != 'BoolModel') + { + bmodel = initialise_model(bmodel) + } + + #Filtering expression data. + stopifnot(colnames(edata[[1]])==colnames(edata[[2]])) + overlap_gene = intersect(colnames(edata[[1]]), y=bmodel@target) + nonoverlap_gene = bmodel@target[!(bmodel@target %in% overlap_gene)] + names(overlap_gene) = bmodel@target_var[bmodel@target %in% overlap_gene] + names(nonoverlap_gene) = bmodel@target_var[!(bmodel@target %in% overlap_gene)] + + fddata = filter_dflist(ddata, overlap_gene, F) + fcdata = filter_dflist(cdata, overlap_gene, F) + + fcdata = unique_raw_data(fddata, fcdata) #removes duplicates in continuous data. + fddata = unique(fddata) + + vcat('Start training.\n', verbose) + + #(3) Calling final combined search. + best_model = c() + best_score = c() + all_best_score = list() + cur_step = 1 + next_break_ite = 1 + while(TRUE) + { + vcat(sprintf('Current iteration: %s.\n', cur_step), verbose) + + if(cur_step == 1) + { + if(length(bmodel)==1) + { + mod_model = list(bmodel) + } else + { + mod_model = bmodel + } + } else + { + mod_model = next_model + } + + vcat('Stage 1: Exploring neighbouring models.\n', verbose) + mod_model = foreach(i=1:length(mod_model)) %dopar% { + c(mod_model[[i]], unlist(minmod_model(mod_model[[i]], overlap_gene=overlap_gene))) + } + mod_model = unlist(mod_model) + + vcat(sprintf('Total neighbouring models: %s.\n', length(mod_model)), verbose) + if(length(mod_model)>1000000) + { + #due to memory limit and runtime consideration. + break + } + + vcat('Stage 2: Simulating and calculating scores for models.\n', verbose) + all_final_score = foreach(i=1:length(mod_model)) %dopar% { + model_score = calc_mscore(bmodel=mod_model[[i]], istate=istate, fcdata=fcdata, overlap_gene=overlap_gene, max_varperrule=max_varperrule) + unname(model_score['f']) + } + + stopifnot(all(sapply(all_final_score, function(x) !is.null(x)))) #there will be NULL if any problem occurs within the workers of foreach. + stopifnot(all(sapply(all_final_score, length) == sapply(mod_model, length))) + + all_final_score = unlist(all_final_score) + + #Locating all equally best score branches. + best_ind = which(all_final_score == min(all_final_score)) #take all min scores. + best_score = all_final_score[best_ind] + best_model = mod_model[best_ind] + + next_model = best_model + + vcat(sprintf('Total model retained for next iteration: %s.\n', length(next_model)), verbose) + + if(cur_step > 1) #checking convergence, and breaking condition. + { + if(any(best_score == 0)) #if meet any model with 0 score, #if the score changes less than the tolerance between 2 iterations. + { + break + } else if(isTRUE(all.equal(mean(previous_score),mean(best_score), tolerance=tol))) + { + #increase break count. Iteration will be broken once the count reaches 2. + next_break_ite = next_break_ite + 1 + if(next_break_ite == 2) + { + break + } + } else if(length(next_model) == 0) + { + stop('Error: length(next_model)==0.') + } else + { + next_break_ite = 1 + } + } + + previous_score = best_score #store it for comparison. + all_best_score = c(all_best_score, list(best_score)) + cur_step = cur_step + 1 + + #browser() + } + vcat(sprintf('Final iteration: %s.\n', cur_step), verbose) + + vcat('Stage 4: Performing consensus analysis.\n', verbose) + consensus = model_consensus(best_model, inter_bool=inter_bool, max_varperrule=max_varperrule) + + output = list(consensus=consensus, best_model=best_model, best_score=best_score, ite_score=all_best_score, overlap_gene=overlap_gene, nonoverlap_gene=nonoverlap_gene) + return(output) +} + +#' @title Simplifying Model +#' +#' @description +#' This method takes in a model and remove redundant terms wrt to a single initial state. +#' Note that this model simplification is random, and the simplified model is not guaranteed to be the simplest model possible. It is only guaranteed to be a simpler model that can give the same state space as the orignal input model. +#' +#' @param bmodel S4 BoolModel object. +#' @param istate data frame. Must have only 1 row, which represents 1 initial state. +#' @param inter_bool logical. Indicate whether to consider AND terms. +#' @param max_varperrule integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6. +#' @param verbose logical. Specifies whether to give detailed output. Default to F. +model_simplify = function(bmodel, istate, inter_bool, max_varperrule, verbose=F) +{ + vcat('Stage 1: Calculating score of initial model.\n', verbose) + #Get the states of the original model. + overlap_gene = bmodel@target + fcdata = simulate_model(bmodel, istate) + fcdata = fcdata+0 #convert logical to numeric. + + ori_score = calc_mscore(bmodel, istate, fcdata, overlap_gene, max_varperrule, simplify_bool=T) + stopifnot(ori_score==0) + + next_bmodel = bmodel + ite = 1 + while(TRUE) + { + cat(sprintf('Simplification iteration: %s\n', ite)) + #Generate list of minimally deleted models. + + vcat('Stage 2: Exploring neighbouring models.\n', verbose) + + mod_model = c(next_bmodel, minmod_model(next_bmodel, ibool=inter_bool, overlap_gene=overlap_gene)$del_list) + #Breaking condition. + if(length(mod_model) == 1) #model can no longer be simplified. + { + final_bmodel = mod_model[[1]] + break + } + vcat(sprintf('Total neighbouring models: %s.\n', length(mod_model)), verbose) + + vcat('Stage 3: Simulating and calculating scores for models.\n', verbose) + model_res = foreach(i=1:length(mod_model), .combine='c') %dopar% { + model_score = calc_mscore(bmodel=mod_model[[i]], istate=istate, fcdata=fcdata, overlap_gene=overlap_gene, max_varperrule=max_varperrule, simplify_bool=T) + names(model_score)=i + model_score + } + + all_final_score = unname(model_res) + + stopifnot(!is.null(model_res)) + stopifnot(length(all_final_score)==length(mod_model)) + + #Breaking condition. + if(!any(all_final_score[-1] == 0)) #model can no longer be simplified. + { + final_bmodel = mod_model[[1]] + break + } + + #Pick a random equivalent model for next iteration. + best_ind = which.random.min(all_final_score) + next_bmodel = mod_model[[best_ind]] + + ite = ite + 1 + } + + return(final_bmodel) +} + +#' @title Intersection of input genes +#' +#' @description +#' This function finds the intersection of input genes and provide a score for them. Return a consensus model or a vector of scores. +#' +#' @param bmodel_list list of BoolModel. +#' @param inter_bool logical. Indicate whether to consider AND terms. +#' @param max_varperrule integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6. +#' @param format character. Specifies which format to return. Possible values: 'vec', 'df'. Default to 'vec'. +#' +#' @export +model_consensus = function(bmodel_list, inter_bool, max_varperrule, format='vec') +{ + #(1) Convert all bmodels to encoded forms. + encoding = get_encodings(bmodel_list[[1]], inter_bool) + encmodel_list = lapply(bmodel_list, function(x) compress_bmodel(x, encoding, max_varperrule)) + + #(2) Check and remove duplicated models. + #stopifnot(all(sapply(encmodel_list, length)==length(encmodel_list[[1]]))) #all models must have same lengths after encoding. + encmodel_list = unique(encmodel_list) + + #(3) Count frequency of each term. + rle_freq = rle(sort(unlist(encmodel_list))) + term_freq = rle_freq$lengths + names(term_freq) = rle_freq$values + + #(4) Remove integers (i.e. empty terms) + term_freq = term_freq[as.numeric(names(term_freq))%%1!=0] + + #(5) Remove 0 and !0. + zero_enc = encoding[grepl('0$', names(encoding))] #get encodings for 0 and !0. + stopifnot(length(zero_enc)==2) + term_freq = term_freq[!grepl(paste(zero_enc[1], '$', sep=''), names(term_freq))] #remove 0. + term_freq = term_freq[!grepl(paste(zero_enc[2], '$', sep=''), names(term_freq))] #remove !0. + + #(6) Obtain final frequency. + final_res = signif(term_freq/length(encmodel_list)) #convert into percentage. + + if(format=='df') + { + #(7) Decompress the genes into a Boolean model. + out_bmodel = decompress_bmodel(as.numeric(names(term_freq)), encoding, gene=bmodel_list[[1]]@target, format='df') + stopifnot(length(term_freq)==nrow(out_bmodel)) + + output = cbind(out_bmodel, final_res) + } else if(format=='vec') + { + output = final_res + } else + { + stop('Invalid format parameter.') + } + + return(output) +} diff --git a/R/simulation.R b/R/simulation.R new file mode 100644 index 0000000..ffb641f --- /dev/null +++ b/R/simulation.R @@ -0,0 +1,102 @@ +#' @title Evaluating Boolean rules +#' +#' @description +#' This function evaluates the Boolean rules (both act and inh) of one gene at a time. Return a logical value for that gene. +#' +#' @param bmodel S4 BoolModel object. +#' @param val named logical vector. It should contain the values for all genes at that time point. Note that each value in the vector must be named by its corresponding gene name. +#' @param ind integer. It indicates the state of which gene should be computed. +eval_bool = function(bmodel, val, ind) +{ + #Get all genes, and assign with initial values. + gene_value = val[bmodel@target] #using gene names to get values. + + #Make a gene to variable conversion index. + gene_var = bmodel@target_var + names(gene_var) = bmodel@target + + names(gene_value) = gene_var[names(gene_value)] #make name conversion + + #Get the activation and inhibition rules and replace gene names with values in rules. + act_rule = bmodel@rule_act[[ind]] + inh_rule = bmodel@rule_inh[[ind]] + + for(i in names(gene_value)) + { + act_rule = gsub(i, gene_value[i], act_rule) + inh_rule = gsub(i, gene_value[i], inh_rule) + } + + #Evaluate the activation and inhibition rules. + act_ans = eval(parse(text=paste(act_rule,collapse='|'))) #join with OR and evaluate. parse converts string into expression. + inh_ans = eval(parse(text=paste(inh_rule,collapse='|'))) + + return(act_ans&!inh_ans) +} + +#' @title Simulating Boolean model +#' +#' @description +#' This function simulates the Boolean model using an initial state. Returns the full asynchronous state space, and point steady states. +#' +#' @param bmodel S4 BoolModel object. +#' @param istate data frame. It must have been initialised by initialise_data(), and has gene names as column names. Must contain only 1 row. +#' @param steady_bool logical. Specifies whether to return point steady states or not. Default to F. +#' +#' @export +simulate_model = function(bmodel, istate, steady_bool=F) +{ + stopifnot(nrow(istate)==1) #must have only 1 row. Only 1 initial state should be passed into this function at each iteration. + + #Filter the start state, if the model is modified. And convert it into a named vector. + #this step will filter and ensure that both bmodel and istate have the same gene order. + istate = istate[bmodel@target] + istate = unname(unlist(istate)) + + #Convert the bmodel from R object into R list. + lmodel = decreate_boolmodel(bmodel) + + all_state = rcpp_simulate(lmodel, istate) #note that a list is returned. + + #Convert the list into a df. + full_state = t(as.data.frame(all_state[[1]])) + + #Set row and column names. + rownames(full_state) = paste('state', seq(1,nrow(full_state)), sep='_') + colnames(full_state) = bmodel@target + + if(steady_bool) + { + steady_state = t(as.data.frame(all_state[[2]])) + if(nrow(steady_state)!=0) + { + rownames(steady_state) = paste('state', seq(1,nrow(steady_state)), sep='_') + colnames(steady_state) = bmodel@target + } + + return(list(full_state, steady_state)) + } + + return(full_state) +} + +#' @title Decreate Boolean model +#' +#' @description +#' This function converts a S4 BoolModel object into a list of 4 lists. (for use by Rcpp in simulate_model()) +#' +#' @param bmodel S4 BoolModel object. +decreate_boolmodel = function(bmodel) #boolean_model +{ + out_list = list() + for(i in 1:length(slotNames(bmodel))) + { + tmp_element = slot(bmodel, slotNames(bmodel)[i]) #use slot() to obtain the elements from the specific slot. + out_list = c(out_list, list(tmp_element)) + } + + #Give names to each list within the main list. (For easier access by Rcpp) + names(out_list) = c('gene','var', 'act_rule', 'inh_rule') + + return(out_list) +} diff --git a/booltrainer.Rproj b/booltrainer.Rproj new file mode 100644 index 0000000..ad83b6f --- /dev/null +++ b/booltrainer.Rproj @@ -0,0 +1,18 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source +PackageRoxygenize: rd,namespace diff --git a/data/bon_bmodel.rda b/data/bon_bmodel.rda new file mode 100644 index 0000000..8a9a4cb Binary files /dev/null and b/data/bon_bmodel.rda differ diff --git a/data/bon_istate.rda b/data/bon_istate.rda new file mode 100644 index 0000000..49f9ad7 Binary files /dev/null and b/data/bon_istate.rda differ diff --git a/data/bon_moig_istate.rda b/data/bon_moig_istate.rda new file mode 100644 index 0000000..cd909b3 Binary files /dev/null and b/data/bon_moig_istate.rda differ diff --git a/data/bon_sstate.rda b/data/bon_sstate.rda new file mode 100644 index 0000000..f9e727d Binary files /dev/null and b/data/bon_sstate.rda differ diff --git a/data/bon_wilson_istate.rda b/data/bon_wilson_istate.rda new file mode 100644 index 0000000..aed1c92 Binary files /dev/null and b/data/bon_wilson_istate.rda differ diff --git a/data/bonkrum_geneid_map.rda b/data/bonkrum_geneid_map.rda new file mode 100644 index 0000000..8622a3d Binary files /dev/null and b/data/bonkrum_geneid_map.rda differ diff --git a/data/krum_bmodel.rda b/data/krum_bmodel.rda new file mode 100644 index 0000000..1a07183 Binary files /dev/null and b/data/krum_bmodel.rda differ diff --git a/data/krum_istate.rda b/data/krum_istate.rda new file mode 100644 index 0000000..3c0e36a Binary files /dev/null and b/data/krum_istate.rda differ diff --git a/data/krum_sstate.rda b/data/krum_sstate.rda new file mode 100644 index 0000000..fec409b Binary files /dev/null and b/data/krum_sstate.rda differ diff --git a/data/krum_wilson_istate.rda b/data/krum_wilson_istate.rda new file mode 100644 index 0000000..5363ea3 Binary files /dev/null and b/data/krum_wilson_istate.rda differ diff --git a/data/moig_raw_data.rda b/data/moig_raw_data.rda new file mode 100644 index 0000000..353b3fc Binary files /dev/null and b/data/moig_raw_data.rda differ diff --git a/data/real_param.rda b/data/real_param.rda new file mode 100644 index 0000000..184a934 Binary files /dev/null and b/data/real_param.rda differ diff --git a/data/wilson_raw_data.rda b/data/wilson_raw_data.rda new file mode 100644 index 0000000..bf2b6fa Binary files /dev/null and b/data/wilson_raw_data.rda differ diff --git a/data/wilson_raw_rnaseq.rda b/data/wilson_raw_rnaseq.rda new file mode 100644 index 0000000..7a22687 Binary files /dev/null and b/data/wilson_raw_rnaseq.rda differ diff --git a/man/BoolModel-class.Rd b/man/BoolModel-class.Rd new file mode 100644 index 0000000..f709c8b --- /dev/null +++ b/man/BoolModel-class.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/boolmodel_class.R +\docType{class} +\name{BoolModel-class} +\alias{BoolModel} +\alias{BoolModel-class} +\title{An S4 class to represent a Boolean Model} +\description{ +This class represents Boolean Model in a S4 BoolModel object. +} +\section{Fields}{ + +\describe{ +\item{\code{target}}{character vector. It should contain gene symbols.} + +\item{\code{targer_var}}{character vector. It should contain the internal representation of gene variables, in the form of "v[0-9]+s"} + +\item{\code{rule_act}}{list of character vectors. Each element in the list should contain the activating gene variables for a particular target gene.} + +\item{\code{rule_inh}}{list of character vectors. Each element in the list should contain the inhibiting gene variables for a particular target gene.} +}} + diff --git a/man/BoolTraineR.Rd b/man/BoolTraineR.Rd new file mode 100644 index 0000000..770934c --- /dev/null +++ b/man/BoolTraineR.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/booltrainer.R +\docType{package} +\name{BoolTraineR} +\alias{BoolTraineR} +\alias{BoolTraineR-package} +\title{BoolTraineR: A package for studying asynchronous Boolean models} +\description{ +This package contains tools for Boolean model manipulation, as well as the search for the best Boolean model. +} + diff --git a/man/amat_to_bm.Rd b/man/amat_to_bm.Rd new file mode 100644 index 0000000..1f40e2c --- /dev/null +++ b/man/amat_to_bm.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/methods.R +\name{amat_to_bm} +\alias{amat_to_bm} +\title{Convert adjacency matrix into BoolModel object} +\usage{ +amat_to_bm(amat, random = F) +} +\arguments{ +\item{amat}{matrix. directed adjacency matrix.} + +\item{random}{logical. Randomly assign to either act or inh rules, when the adjacency matrix only has values 0 and 1, but not -1.} +} +\description{ +This function converts adjacency matrix to BoolModel object. Able to take in adjacency matrix with -1, which encodes for inhibitory interaction. +} + diff --git a/man/bin_to_real.Rd b/man/bin_to_real.Rd new file mode 100644 index 0000000..c30e481 --- /dev/null +++ b/man/bin_to_real.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/general.R +\name{bin_to_real} +\alias{bin_to_real} +\title{Generate random real numbers from binary values} +\usage{ +bin_to_real(x, param) +} +\arguments{ +\item{x}{logical or 0/1 numeric matrix. Col-genes, row-samples.} + +\item{param}{list of parameters given by param_bimodal().} +} +\description{ +This function generates random real numbers from binary values, with supplied parameters. Returns a vector of real values. +} + diff --git a/man/bm_to_amat.Rd b/man/bm_to_amat.Rd new file mode 100644 index 0000000..9518b00 --- /dev/null +++ b/man/bm_to_amat.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/methods.R +\name{bm_to_amat} +\alias{bm_to_amat} +\title{Convert BoolModel into adjacency matrix} +\usage{ +bm_to_amat(x, directed = T) +} +\arguments{ +\item{x}{S4 BoolModel object.} + +\item{directed}{logical. Whether to return directed or undirected adjacency matrix. Default to TRUE.} +} +\description{ +This function converts a BoolModel object into an adjacency matrix. +} + diff --git a/man/bm_to_df.Rd b/man/bm_to_df.Rd new file mode 100644 index 0000000..f01f9a7 --- /dev/null +++ b/man/bm_to_df.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/methods.R +\name{bm_to_df} +\alias{bm_to_df} +\title{Convert BoolModel object into BoolNet readable data frame} +\usage{ +bm_to_df(bmodel) +} +\arguments{ +\item{bmodel}{BoolModel object.} +} +\description{ +This method converts BoolModel object into a data frame, which is readable by BoolNet. +} + diff --git a/man/bon_bmodel.Rd b/man/bon_bmodel.Rd new file mode 100644 index 0000000..0fa5960 --- /dev/null +++ b/man/bon_bmodel.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/data_desc.R +\docType{data} +\name{bon_bmodel} +\alias{bon_bmodel} +\title{HSC Boolean Model from Bonzanni et al.} +\format{A data frame with 11 rows and 2 columns. + +Rows: each row consists of 1 gene and its associated Boolean rule. +Column 1: target gene +Column 2: associated Boolean rule} +\usage{ +data(bon_bmodel) +} +\description{ +A Boolean model describing HSC in mice. It contains 11 genes. Its steady state is a cyclic loop of 32 states. +} + diff --git a/man/bon_istate.Rd b/man/bon_istate.Rd new file mode 100644 index 0000000..75e26f2 --- /dev/null +++ b/man/bon_istate.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/data_desc.R +\docType{data} +\name{bon_istate} +\alias{bon_istate} +\title{Initial state from Bonzanni et al.} +\format{A data frame with 1 row and 11 columns. + +Rows: each row consists of 1 set of Boolean state. +Columns: each column is for 1 gene/variable.} +\usage{ +data(bon_istate) +} +\description{ +An intial state specified in Bonzanni et al. It contains a set of Boolean values for 11 genes. +} + diff --git a/man/bon_moig_istate.Rd b/man/bon_moig_istate.Rd new file mode 100644 index 0000000..00e8acc --- /dev/null +++ b/man/bon_moig_istate.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/data_desc.R +\docType{data} +\name{bon_moig_istate} +\alias{bon_moig_istate} +\title{Initial state from Moignard et al.} +\format{A data frame with 1 row and 20 columns. + +Rows: each row consists of 1 set of Boolean state. +Columns: each column is for 1 gene/variable.} +\usage{ +data(bon_moig_istate) +} +\description{ +An intial state obtained from data in Moignard et al, determined by taking colMeans over unique rows, and rounding the means to 0-1. +Values for genes that are missing in Moignard et al, but are present in Bonzanni et al, are determined by taking values from the original initial state supplied in Bonzanni et al. +It contains a set of Boolean values for 20 genes. +} + diff --git a/man/calc_mscore.Rd b/man/calc_mscore.Rd new file mode 100644 index 0000000..e9ddb87 --- /dev/null +++ b/man/calc_mscore.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/score_calculation.R +\name{calc_mscore} +\alias{calc_mscore} +\title{Calculating Boolean model score wrt to a dataset} +\usage{ +calc_mscore(bmodel, istate, fcdata, overlap_gene, max_varperrule, detail = F) +} +\arguments{ +\item{bmodel}{S4 BoolModel object. Model to be evaluated.} + +\item{istate}{data frame. Must have only 1 row, which represents 1 initial state.} + +\item{overlap_gene}{character vector. Specify which genes are present in both model and data inputs.} + +\item{max_varperrule}{integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6.} + +\item{detail}{logical. Whether to give more details in score calculation. Default to FALSE.} + +\item{data}{matrix. Represents the expression data df.} +} +\description{ +This function calculates a score for a Boolean model wrt to a dataset. +} + diff --git a/man/calc_roc.Rd b/man/calc_roc.Rd new file mode 100644 index 0000000..e89015f --- /dev/null +++ b/man/calc_roc.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/score_calculation.R +\name{calc_roc} +\alias{calc_roc} +\title{Calculate precision, recall, f-score, accuracy and specificity} +\usage{ +calc_roc(x) +} +\arguments{ +\item{x}{integer vector. Vector output by validate_adjmat().} +} +\description{ +This function calculates the precision, recall, f-score, accuracy and specificity from the output of validate_adjmat(). +} + diff --git a/man/check_bmodel.Rd b/man/check_bmodel.Rd new file mode 100644 index 0000000..49e14d2 --- /dev/null +++ b/man/check_bmodel.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/general.R +\name{check_bmodel} +\alias{check_bmodel} +\title{Check if the Boolean model violates constraints.} +\usage{ +check_bmodel(bmodel, max_varperrule) +} +\arguments{ +\item{bmodel}{S4 BoolModel object.} + +\item{max_varperrule}{integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6.} +} +\description{ +This function checks if the Boolean model violates contraints. Return logical value. +(1) Each gene rule should not have more terms than max_varperrule. +(2) The same term should not occur twice in the same rule. +} + diff --git a/man/compress_bmodel.Rd b/man/compress_bmodel.Rd new file mode 100644 index 0000000..a1c8f2d --- /dev/null +++ b/man/compress_bmodel.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/compression.R +\name{compress_bmodel} +\alias{compress_bmodel} +\title{Compress BoolModel} +\usage{ +compress_bmodel(bmodel, encoding, max_varperrule) +} +\arguments{ +\item{bmodel}{S4 BoolModel object.} + +\item{encoding}{named numerical vector returned by get_encodings().} + +\item{max_varperrule}{integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6.} +} +\description{ +This function compresses S4 BoolModel object by representing variables using numbers, and also only the act rules and inh rules are kept. +Return a list of 3 vectors, corresponding to act rules and inh rules. +} + diff --git a/man/decompress_bmodel.Rd b/man/decompress_bmodel.Rd new file mode 100644 index 0000000..e846067 --- /dev/null +++ b/man/decompress_bmodel.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/compression.R +\name{decompress_bmodel} +\alias{decompress_bmodel} +\title{Decompress BoolModel} +\usage{ +decompress_bmodel(x, encoding, gene = NULL, format = "bmodel") +} +\arguments{ +\item{x}{vector returned by compress_bmodel.} + +\item{encoding}{named numerical vector returned by get_encodings().} + +\item{gene}{character vector. Corresponds to genes in the bmodel. Default to using variable names in encoding.} + +\item{format}{character. Specifies which format to return. Possible values: 'bmodel', 'df', 'amat', 'simp_df'. Default to 'bmodel'.} +} +\description{ +This function decompresses the bmodel compressed by compress_bmodel(). +Return a S4 BoolModel object. +} + diff --git a/man/decreate_boolmodel.Rd b/man/decreate_boolmodel.Rd new file mode 100644 index 0000000..6b3688d --- /dev/null +++ b/man/decreate_boolmodel.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/simulation.R +\name{decreate_boolmodel} +\alias{decreate_boolmodel} +\title{Decreate Boolean model} +\usage{ +decreate_boolmodel(bmodel) +} +\arguments{ +\item{bmodel}{S4 BoolModel object.} +} +\description{ +This function converts a S4 BoolModel object into a list of 4 lists. (for use by Rcpp in simulate_model()) +} + diff --git a/man/df_to_bm.Rd b/man/df_to_bm.Rd new file mode 100644 index 0000000..14944bc --- /dev/null +++ b/man/df_to_bm.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/methods.R +\name{df_to_bm} +\alias{df_to_bm} +\title{Convert a data frame into BoolModel object} +\usage{ +df_to_bm(in_df) +} +\arguments{ +\item{df}{data frame with 2 columns, targets and factors} +} +\description{ +This method converts a data frame into a BoolModel object. +Note that the model should only has 1 NOT operator. More than 1 is STRICTLY NOT allowed. +} + diff --git a/man/equi_model.Rd b/man/equi_model.Rd new file mode 100644 index 0000000..8406803 --- /dev/null +++ b/man/equi_model.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/general.R +\name{equi_model} +\alias{equi_model} +\title{Check for equivalent models} +\usage{ +equi_model(bmodel1, bmodel2, inter_bool, max_varperrule) +} +\arguments{ +\item{bmodel1}{S4 BoolModel object.} + +\item{bmodel2}{S4 BoolModel object.} + +\item{inter_bool}{logical. Indicate whether to consider AND terms.} + +\item{max_varperrule}{integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6.} +} +\description{ +This function checks if the two models have the same rules. Return a logical value. Only TRUE if each rule for each gene is the same. +} + diff --git a/man/eval_bool.Rd b/man/eval_bool.Rd new file mode 100644 index 0000000..d38c7ea --- /dev/null +++ b/man/eval_bool.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/simulation.R +\name{eval_bool} +\alias{eval_bool} +\title{Evaluating Boolean rules} +\usage{ +eval_bool(bmodel, val, ind) +} +\arguments{ +\item{bmodel}{S4 BoolModel object.} + +\item{val}{named logical vector. It should contain the values for all genes at that time point. Note that each value in the vector must be named by its corresponding gene name.} + +\item{ind}{integer. It indicates the state of which gene should be computed.} +} +\description{ +This function evaluates the Boolean rules (both act and inh) of one gene at a time. Return a logical value for that gene. +} + diff --git a/man/extract_term.Rd b/man/extract_term.Rd new file mode 100644 index 0000000..5e372ba --- /dev/null +++ b/man/extract_term.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/general.R +\name{extract_term} +\alias{extract_term} +\title{Extract Boolean terms} +\usage{ +extract_term(brule) +} +\arguments{ +\item{brule}{character vector. It should be either an activating or inhibiting rule for a target gene.} +} +\description{ +This function extracts the terms within a Boolean rule, using OR as the separator. Bracketed variables are counted as one term. +} + diff --git a/man/filter_dflist.Rd b/man/filter_dflist.Rd new file mode 100644 index 0000000..aa85f85 --- /dev/null +++ b/man/filter_dflist.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/general.R +\name{filter_dflist} +\alias{filter_dflist} +\title{Filter columns of df in a list} +\usage{ +filter_dflist(x, y, uniq_bool = T) +} +\arguments{ +\item{x}{data frame. It should be a list element if used from lapply.} + +\item{y}{character vector. It should contains gene names found in the colnames of the data frame.} + +\item{uniq_bool}{logical. Whether to return unique rows only.} +} +\description{ +This function filters columns of multiple df in a list, when compared using a vector. Use through lapply(). +} + diff --git a/man/gen_one_rmodel.Rd b/man/gen_one_rmodel.Rd new file mode 100644 index 0000000..79695ce --- /dev/null +++ b/man/gen_one_rmodel.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/rand_model.R +\name{gen_one_rmodel} +\alias{gen_one_rmodel} +\title{Generate a random Boolean model} +\usage{ +gen_one_rmodel(var, mvar = length(var), inter_bool = F, self_loop = F) +} +\arguments{ +\item{var}{character vector. A vector of single genes/variables to be used in the model.} + +\item{mvar}{integer. Maximum number of variables in act or inh rule. Default to length(var).} + +\item{inter_bool}{logical. Indicates whether to include AND terms or not. Default to F.} + +\item{self_loop}{logical. Indicates whether to allow self_loop. Default to F.} +} +\description{ +This function generates a random Boolean model. Returns an S4 BoolModel object. +Note that this method will not give empty rule, i.e. 0 term in both act and inh rules. +} +\details{ +The number of terms in a function for a gene is modelled by power-law distribution. +} + diff --git a/man/gen_randata.Rd b/man/gen_randata.Rd new file mode 100644 index 0000000..988fc93 --- /dev/null +++ b/man/gen_randata.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/rand_model.R +\name{gen_randata} +\alias{gen_randata} +\title{Generate sets of random data} +\usage{ +gen_randata(n, steps, num_genes, max_varperrule, inter_bool) +} +\arguments{ +\item{n}{integer. Number of sets of random data required.} + +\item{steps}{integer. Specify the number of steps between the two Boolean models. If steps=0, give completely random starting model.} + +\item{num_genes}{integer. Number of genes in the Boolean models.} + +\item{max_varperrule}{integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6.} + +\item{inter_bool}{logical. Indicate whether to consider AND terms.} +} +\description{ +This function generates specified sets of random data, which include initial states, two Boolean models (starting and destination), and continuous+discrete data of the destination model. +} + diff --git a/man/gen_randata_bn.Rd b/man/gen_randata_bn.Rd new file mode 100644 index 0000000..3c0f84f --- /dev/null +++ b/man/gen_randata_bn.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/rand_model.R +\name{gen_randata_bn} +\alias{gen_randata_bn} +\title{Generate sets of random data} +\usage{ +gen_randata_bn(n, steps, num_genes, max_varperrule, inter_bool) +} +\arguments{ +\item{n}{integer. Number of sets of random data required.} + +\item{steps}{integer. Specify the number of steps between the two Boolean models. If steps=0, give completely random starting model.} + +\item{num_genes}{integer. Number of genes in the Boolean models.} + +\item{max_varperrule}{integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6.} + +\item{inter_bool}{logical. Indicate whether to consider AND terms.} +} +\description{ +(Requires bnlearn) This function generates specified sets of random data, which include initial states, two Boolean models (starting and destination), and continuous+discrete data of the destination model. +} + diff --git a/man/gen_singlerule.Rd b/man/gen_singlerule.Rd new file mode 100644 index 0000000..2744119 --- /dev/null +++ b/man/gen_singlerule.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/rand_model.R +\name{gen_singlerule} +\alias{gen_singlerule} +\title{Generate random act and inh rule for a single gene} +\usage{ +gen_singlerule(x, np, tar_ind, ibool = F, self_loop = F) +} +\arguments{ +\item{x}{character vector. A vector of all single terms to be used.} + +\item{np}{integer. Number of gene variables in a rule. NOT max_varperrule here.} + +\item{tar_ind}{numerical. Indicate which gene is the rule for. Used in preventing self-loop.} + +\item{ibool}{logical. Indicates whether to include AND terms or not. Default to F.} + +\item{self_loop}{logical. Indicates whether to allow self_loop. Default to F.} +} +\description{ +This function generates one random Boolean rule (both act and inh) per run. Return a list of two vectors. +Note that this method will not give empty rule, i.e. 0 term in both act and inh rules. +} + diff --git a/man/gen_two_rmodel.Rd b/man/gen_two_rmodel.Rd new file mode 100644 index 0000000..4840900 --- /dev/null +++ b/man/gen_two_rmodel.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/rand_model.R +\name{gen_two_rmodel} +\alias{gen_two_rmodel} +\title{Generate two random Boolean models with a specified number of steps apart} +\usage{ +gen_two_rmodel(var, steps, mvar = length(var), inter_bool = F, + in_bmodel = NULL, self_loop = F) +} +\arguments{ +\item{var}{character vector. A vector of single genes/variables to be used in the model.} + +\item{steps}{integer. Number of steps apart between the two models. If steps=0, give completely random starting model.} + +\item{mvar}{integer. Maximum number of variables in act or inh rule. Default to length(var).} + +\item{inter_bool}{logical. Indicates whether to include AND terms or not. Default to F.} + +\item{in_bmodel}{BoolModel object. The starting model supplied.} + +\item{self_loop}{logical. Indicates whether to allow self_loop. Default to F.} +} +\description{ +This function generates a random Boolean model, then get another random Boolean model that is a specified number of steps apart by adding and/or removing genes. +Returns a list of two S4 BoolModel objects. +} +\details{ +The number of terms in a function for a gene is modelled by power-law distribution. +} + diff --git a/man/gen_two_rmodel_dag.Rd b/man/gen_two_rmodel_dag.Rd new file mode 100644 index 0000000..d62f2e1 --- /dev/null +++ b/man/gen_two_rmodel_dag.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/rand_model.R +\name{gen_two_rmodel_dag} +\alias{gen_two_rmodel_dag} +\title{Generate two random DAG Boolean models with a specified number of steps apart} +\usage{ +gen_two_rmodel_dag(var, steps, mvar = length(var), in_amat = NULL, + acyclic = T) +} +\arguments{ +\item{var}{character vector. A vector of single genes/variables to be used in the model.} + +\item{steps}{integer. Number of steps apart between the two models. If steps=0, give completely random starting model.} + +\item{mvar}{integer. Maximum number of variables in act or inh rule. Default to length(var).} + +\item{in_amat}{matrix. Provide adjacency matrix of first model.} + +\item{acyclic}{logical. Whether to restrict the model to being acyclic or not. Defaults to TRUE.} +} +\description{ +This function generates a random DAG Boolean model, then get another random DAG Boolean model that is a specified number of steps apart by adding and/or removing genes. +Difficult to generate completely directed graph with a specified number of steps apart. +} + diff --git a/man/get_encodings.Rd b/man/get_encodings.Rd new file mode 100644 index 0000000..41f9a9f --- /dev/null +++ b/man/get_encodings.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/compression.R +\name{get_encodings} +\alias{get_encodings} +\title{Get corresponding encodings for compression or decompression.} +\usage{ +get_encodings(bmodel, inter_bool) +} +\arguments{ +\item{bmodel}{S4 BoolModel object.} + +\item{inter_bool}{logical. Indicate whether to consider AND terms.} +} +\description{ +This function gets all possible terms (single or double terms) and their corresponding encodings. +This function limits the number of possible variables in the model to 999. +} + diff --git a/man/grow_bmodel.Rd b/man/grow_bmodel.Rd new file mode 100644 index 0000000..12c4e68 --- /dev/null +++ b/man/grow_bmodel.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/model_modification.R +\name{grow_bmodel} +\alias{grow_bmodel} +\title{Add extra genes to a Boolean model} +\usage{ +grow_bmodel(in_model, in_gene) +} +\arguments{ +\item{in_model}{data frame with 2 columns, which are targets and factors} + +\item{in_gene}{character vector. Genes to be added into the model.} +} +\description{ +This function adds extra genes to a Boolean model. Input model must be in data frame format, output model will be BoolModel object. +} + diff --git a/man/initialise_data.Rd b/man/initialise_data.Rd new file mode 100644 index 0000000..b0e3ceb --- /dev/null +++ b/man/initialise_data.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/initialisation.R +\name{initialise_data} +\alias{initialise_data} +\title{Initialise data} +\usage{ +initialise_data(state, aslogic = F) +} +\arguments{ +\item{state}{data frame. It should contain either 0/1 or F/T data of gene expression.} + +\item{aslogic}{logical. It specifies whether to convert the input data into Boolean values. Default to FALSE.} +} +\description{ +This function initialises data frame of Boolean state space. Returns initialised data frame. +} + diff --git a/man/initialise_model.Rd b/man/initialise_model.Rd new file mode 100644 index 0000000..f4bd251 --- /dev/null +++ b/man/initialise_model.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/initialisation.R +\name{initialise_model} +\alias{initialise_model} +\title{Initialise model} +\usage{ +initialise_model(init_model) +} +\arguments{ +\item{init_model}{data frame of Boolean model. It should contain two columns, targets and functions.} +} +\description{ +This function initialises a Boolean model. Returns initialised S4 BoolModel object. +Note that the model should only has 1 NOT operator. More than 1 is STRICTLY NOT allowed. +} + diff --git a/man/initialise_raw_data.Rd b/man/initialise_raw_data.Rd new file mode 100644 index 0000000..df5b019 --- /dev/null +++ b/man/initialise_raw_data.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/initialisation.R +\name{initialise_raw_data} +\alias{initialise_raw_data} +\title{Initialise raw data} +\usage{ +initialise_raw_data(x, data_type = "qpcr", uni_thre = 0.2, scale = T) +} +\arguments{ +\item{x}{matrix. Numeric data of gene expression.} + +\item{data_type}{character. Specify data types: qpcr, rnaseq.} + +\item{uni_thre}{numerical. Speficy threshold for unimodality test. Default to 0.2.} + +\item{scale}{logical. Whether to scale the data to a range of 0-1. Default to T.} +} +\description{ +This function initialise raw gene expression values in a matrix. Return a list of two matrices: (1) continuous values and (2) binary values. +Note that kmeans clustering as binarisation only works well if the data has a bimodal distribution. +} + diff --git a/man/krum_bmodel.Rd b/man/krum_bmodel.Rd new file mode 100644 index 0000000..579807b --- /dev/null +++ b/man/krum_bmodel.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/data_desc.R +\docType{data} +\name{krum_bmodel} +\alias{krum_bmodel} +\title{Myeloid Boolean Model from Krumsiek et al.} +\format{A data frame with 11 rows and 2 columns. + +Rows: each row consists of 1 gene and its associated Boolean rule. +Column 1: target gene +Column 2: associated Boolean rule} +\usage{ +data(krum_bmodel) +} +\description{ +A Boolean model describing myeloid development in mice. It contains 11 genes. Its steady states are 4 static attractors. +} + diff --git a/man/krum_istate.Rd b/man/krum_istate.Rd new file mode 100644 index 0000000..dcd8f2d --- /dev/null +++ b/man/krum_istate.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/data_desc.R +\docType{data} +\name{krum_istate} +\alias{krum_istate} +\title{Initial state from Krumsiek et al.} +\format{A data frame with 1 row and 11 columns. + +Rows: each row consists of 1 set of Boolean state. +Columns: each column is for 1 gene/variable.} +\usage{ +data(krum_istate) +} +\description{ +An intial state specified in Krumsiek et al. It contains a set of Boolean values for 11 genes. +} + diff --git a/man/m_score.Rd b/man/m_score.Rd new file mode 100644 index 0000000..010bd58 --- /dev/null +++ b/man/m_score.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/score_calculation.R +\name{m_score} +\alias{m_score} +\title{Inner function of calculating Boolean model score} +\usage{ +m_score(x, bmodel, max_varperrule, detail = F) +} +\arguments{ +\item{x}{matrix vector. Pairwise scores computed by dist_measure().} + +\item{bmodel}{S4 BoolModel object. Model to be evaluated.} + +\item{max_varperrule}{integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6.} + +\item{detail}{logical. Whether to give more details in score calculation. Default to FALSE.} +} +\description{ +This function calculates a final model score from pairwise and penalty scores. +} + diff --git a/man/match_state.Rd b/man/match_state.Rd new file mode 100644 index 0000000..cdcbcf4 --- /dev/null +++ b/man/match_state.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/general.R +\name{match_state} +\alias{match_state} +\title{Check for matching states} +\usage{ +match_state(mstate, xstate) +} +\arguments{ +\item{mstate}{data frame. It should be a state(row) x gene(column) df. colnames will be used in comparison.} + +\item{xstate}{data frame. It should be a state(row) x gene(column) df. colnames will be used in comparison.} +} +\description{ +This function finds a match between two df of states. Returns a row index vector indicating for each row of mstate, what is the corresponding row in xstate. If a match cannot be found, a 0 will be return. +Only columns that are present in both df will be used in comparison. Note that the row index starts from 1 (as in R), not from 0 (as in cpp). +} + diff --git a/man/match_state_loop.Rd b/man/match_state_loop.Rd new file mode 100644 index 0000000..c1a00cd --- /dev/null +++ b/man/match_state_loop.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/RcppExports.R +\name{match_state_loop} +\alias{match_state_loop} +\title{Find a match between two data frames.} +\usage{ +match_state_loop(mstate, xstate) +} +\arguments{ +\item{mstate}{data frame. It should be a state(row) x gene(column) df.} + +\item{xstate}{data frame. It should be a state(row) x gene(column) df.} +} +\description{ +(&&&Not for public use&&&)This function finds a match between two df of states. Used in match_state(). Return an row index vector indicating which row of mstate matches the rows in xstate. +} + diff --git a/man/match_term.Rd b/man/match_term.Rd new file mode 100644 index 0000000..4751b42 --- /dev/null +++ b/man/match_term.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/general.R +\name{match_term} +\alias{match_term} +\title{Check for matching terms} +\usage{ +match_term(t1, t2, mode = "logic") +} +\arguments{ +\item{t1}{character vector of length 1 or more. It should be a vector of gene variable.} + +\item{t2}{character vector of length 1 or more. It should be a vector of gene variables.} + +\item{mode}{character. Indicates the mode of action. Options: 'logic', 'unique'. Default to 'logic'.} +} +\description{ +This function checks if the first term is found within a vector of terms. Return a logical value. +It is smarter than simple string matching, e.g. match_term(v1s&v2s, v2s&v1s) == T, match_term(v1s, v1s&v2s) == T, match_term(v1s&v2s, v1s) == T. +} + diff --git a/man/minmod_internal.Rd b/man/minmod_internal.Rd new file mode 100644 index 0000000..4eb7b88 --- /dev/null +++ b/man/minmod_internal.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/model_modification.R +\name{minmod_internal} +\alias{minmod_internal} +\title{Inner function of minimal modification of whole Boolean model} +\usage{ +minmod_internal(bm, index) +} +\arguments{ +\item{bm}{S4 BoolModel object.} + +\item{index}{integer. Specifying rule of which gene to modify.} +} +\description{ +This function generates all possible boolean models minimally modified. Returns a lists of 2 lists, deleted models and added models +} + diff --git a/man/minmod_model.Rd b/man/minmod_model.Rd new file mode 100644 index 0000000..ac9dc93 --- /dev/null +++ b/man/minmod_model.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/model_modification.R +\name{minmod_model} +\alias{minmod_model} +\title{Minimal modification of whole Boolean model} +\usage{ +minmod_model(bm, index = NULL, overlap_gene = NULL) +} +\arguments{ +\item{bm}{S4 BoolModel object.} + +\item{index}{integer. Specifying rule of which gene to modify. If NULL, modifies all rules in the model. Defaults to NULL.} + +\item{overlap_gene}{character vector. Specify which genes are present in both model and data inputs. Only needed when index=NULL.} +} +\description{ +This function generates all possible boolean models minimally modified. Returns a lists of 2 lists, deleted models and added models +} + diff --git a/man/model_consensus.Rd b/man/model_consensus.Rd new file mode 100644 index 0000000..3011357 --- /dev/null +++ b/man/model_consensus.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/search.R +\name{model_consensus} +\alias{model_consensus} +\title{Intersection of input genes} +\usage{ +model_consensus(bmodel_list, inter_bool, max_varperrule, format = "vec") +} +\arguments{ +\item{bmodel_list}{list of BoolModel.} + +\item{inter_bool}{logical. Indicate whether to consider AND terms.} + +\item{max_varperrule}{integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6.} + +\item{format}{character. Specifies which format to return. Possible values: 'vec', 'df'. Default to 'vec'.} +} +\description{ +This function finds the intersection of input genes and provide a score for them. Return a consensus model or a vector of scores. +} + diff --git a/man/model_dist.Rd b/man/model_dist.Rd new file mode 100644 index 0000000..4f37692 --- /dev/null +++ b/man/model_dist.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/general.R +\name{model_dist} +\alias{model_dist} +\title{Calculate distance between Boolean models} +\usage{ +model_dist(x, y, inter_bool, max_varperrule) +} +\arguments{ +\item{x}{S4 BoolModel object. Test model.} + +\item{y}{S4 BoolModel object. Reference model.} + +\item{inter_bool}{logical. Indicate whether to consider AND terms.} + +\item{max_varperrule}{integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6.} +} +\description{ +This method takes in two models and calculate the distance between them. The value return indicate the number of steps between the two models. +} + diff --git a/man/model_setdiff.Rd b/man/model_setdiff.Rd new file mode 100644 index 0000000..7d8fce1 --- /dev/null +++ b/man/model_setdiff.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/general.R +\name{model_setdiff} +\alias{model_setdiff} +\title{Find the set difference between two Boolean models} +\usage{ +model_setdiff(x, y, inter_bool, max_varperrule, directed = F) +} +\arguments{ +\item{x}{S4 BoolModel object. Test model.} + +\item{y}{S4 BoolModel object. Reference model.} + +\item{inter_bool}{logical. Indicate whether to consider AND terms.} + +\item{max_varperrule}{integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6.} + +\item{directed}{logical. If TRUE, return the difference in terms with respect to x.} +} +\description{ +This method takes in two models and find the set difference between them. Return a vector with the set difference. +} + diff --git a/man/model_simplify.Rd b/man/model_simplify.Rd new file mode 100644 index 0000000..a9ef270 --- /dev/null +++ b/man/model_simplify.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/search.R +\name{model_simplify} +\alias{model_simplify} +\title{Simplifying Model} +\usage{ +model_simplify(bmodel, istate, inter_bool, max_varperrule, verbose = F) +} +\arguments{ +\item{bmodel}{S4 BoolModel object.} + +\item{istate}{data frame. Must have only 1 row, which represents 1 initial state.} + +\item{inter_bool}{logical. Indicate whether to consider AND terms.} + +\item{max_varperrule}{integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6.} + +\item{verbose}{logical. Specifies whether to give detailed output. Default to F.} +} +\description{ +This method takes in a model and remove redundant terms wrt to a single initial state. +Note that this model simplification is random, and the simplified model is not guaranteed to be the simplest model possible. It is only guaranteed to be a simpler model that can give the same state space as the orignal input model. +} + diff --git a/man/model_train.Rd b/man/model_train.Rd new file mode 100644 index 0000000..fc2fe63 --- /dev/null +++ b/man/model_train.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/search.R +\name{model_train} +\alias{model_train} +\title{Training Model} +\usage{ +model_train(bmodel = NULL, edata, istate, max_varperrule = 6, tol = 1e-06, + inter_bool, verbose = F, self_loop = F) +} +\arguments{ +\item{bmodel}{Boolean model in data frame. If NULL, use a random Boolean model. Default to NULL.} + +\item{edata}{list of 2 data frames. Initialised continuous and discretised expression data. Each data frame should have state(row) x gene(column).} + +\item{istate}{data frame. Must have only 1 row, which represents 1 initial state.} + +\item{max_varperrule}{integer. Maximum number of terms per rule (combining both act and inh rule). Note that this number must not be smaller than number of variables. Default to 6.} + +\item{tol}{numeric. Specify the tolerance in ending condition. Default to 1e-6. It cannot be lower than .Machine$double.eps ^ 0.5.} + +\item{inter_bool}{logical. Indicate whether to consider AND terms. Default to TRUE.} + +\item{verbose}{logical. Specifies whether to give detailed output. Default to F.} + +\item{self_loop}{logical. Indicates whether to allow self_loop in random starting model. Only used if is.null(bmodel). Default to F.} +} +\description{ +This function performs model training to find the best model, using information from data. It requires an initial state supplied to perform the search, and an initial model can also be supplied to be included in the initial population. +Note that if a model is supplied, and the genes in the model is different from the genes in the data, only the genes overlapping between model and data will be retained for further analysis. +} + diff --git a/man/moig_data.Rd b/man/moig_data.Rd new file mode 100644 index 0000000..ff6152d --- /dev/null +++ b/man/moig_data.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/data_desc.R +\docType{data} +\name{moig_data} +\alias{moig_data} +\title{Discretised single cell qRT-PCR expression data from Moignard et al.} +\format{A data frame with 597 rows and 18 columns. + +Rows: each row consists of discretised expression values from 1 cell. +Columns: each column is for 1 gene/variable.} +\usage{ +data(moig_data) +} +\description{ +A discretised single cell expression data obtained from multiple cell types. +} + diff --git a/man/moig_raw_data.Rd b/man/moig_raw_data.Rd new file mode 100644 index 0000000..76cd91f --- /dev/null +++ b/man/moig_raw_data.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/data_desc.R +\docType{data} +\name{moig_raw_data} +\alias{moig_raw_data} +\title{Raw single cell qRT-PCR expression data from Moignard et al.} +\format{A data frame with 597 rows and 18 columns. + +Rows: each row consists of raw expression values from 1 cell. +Columns: each column is for 1 gene/variable.} +\usage{ +data(moig_raw_data) +} +\description{ +A raw single cell expression data obtained from multiple cell types. +} + diff --git a/man/outcyto_model.Rd b/man/outcyto_model.Rd new file mode 100644 index 0000000..fe31101 --- /dev/null +++ b/man/outcyto_model.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/output_format.R +\name{outcyto_model} +\alias{outcyto_model} +\title{Output a Boolean Model into Cytoscape readable format} +\usage{ +outcyto_model(bmodel, filepath = getwd()) +} +\arguments{ +\item{bmodel}{S4 BoolModel object.} + +\item{filepath}{character vector. Specify path (AND NOT file name). Default to current working directory, i.e. getwd(). Set to NULL to disable file output.} +} +\description{ +This function outputs a Boolean Model in a format that is readable by Cytoscape. Return invisibly the edges (with edge attributes) and node attributes. (i.e. list of 2 dfs) +} + diff --git a/man/outcyto_stategraph.Rd b/man/outcyto_stategraph.Rd new file mode 100644 index 0000000..0ceabb9 --- /dev/null +++ b/man/outcyto_stategraph.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/output_format.R +\name{outcyto_stategraph} +\alias{outcyto_stategraph} +\title{Generate state transition graph readable by Cytoscapes} +\usage{ +outcyto_stategraph(mstate, bmodel, directed = F, record.both = F, + filepath = getwd()) +} +\arguments{ +\item{mstate}{data frame. It should be a state(row) x gene(column) df.} + +\item{bmodel}{S4 BoolModel object.} + +\item{directed}{logical. Indicates whether to make directed edges or not. Default to FALSE.} + +\item{record.both}{logical. Indicates whether to also record nodes that have no edges. Default to FALSE.} + +\item{filepath}{character vector. Specify path (AND NOT file name). Default to current working directory, i.e. getwd(). Set to NULL to disable file output.} +} +\description{ +This function generates a state transition graph using a Boolean model and its state space. Each node represent a state. All nodes in this graph is linked by an edge only if the 2 states have different value in only 1 gene. The output is readable by Cytoscape. +} + diff --git a/man/outgenysis_model.Rd b/man/outgenysis_model.Rd new file mode 100644 index 0000000..ed5a17a --- /dev/null +++ b/man/outgenysis_model.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/output_format.R +\name{outgenysis_model} +\alias{outgenysis_model} +\title{Output a Boolean Model into Genysis readable format} +\usage{ +outgenysis_model(bmodel, filepath = getwd()) +} +\arguments{ +\item{bmodel}{S4 BoolModel object.} + +\item{filepath}{character vector. Specify path (AND NOT file name). Default to current working directory, i.e. getwd(). Set to NULL to disable file output.} +} +\description{ +This function outputs a Boolean Model in a format that is readable by Genysis. Return invisibly the formatted vector. +} + diff --git a/man/param_bimodal.Rd b/man/param_bimodal.Rd new file mode 100644 index 0000000..f96000d --- /dev/null +++ b/man/param_bimodal.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/general.R +\name{param_bimodal} +\alias{param_bimodal} +\title{Obtain parameters for bimodal distribution from real data} +\usage{ +param_bimodal(x, data_type = "qpcr") +} +\arguments{ +\item{x}{matrix. Input expression data. Col-genes, row-samples.} + +\item{data_type}{character. Specify data types: qpcr, rnaseq.} +} +\description{ +This function obtains parameters for bimodal distribution. Returns 4 parameters: mu1, mu2, sig1, sig2. +} + diff --git a/man/printBM.Rd b/man/printBM.Rd new file mode 100644 index 0000000..2d8b0ad --- /dev/null +++ b/man/printBM.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/methods.R +\name{printBM} +\alias{printBM} +\title{Print Boolean Model} +\usage{ +printBM(bmodel, gene.names = F) +} +\arguments{ +\item{bmodel}{S4 BoolModel object.} + +\item{gene.names}{logical. Specify whether to write rules in terms of genes or internal variables. Default to FALSE.} +} +\description{ +This method converts the S4 BoolModel object back into a human-readable data frame, with two columns: (1) target genes, (2) Boolean rules. +} + diff --git a/man/rcpp_ham_dist.Rd b/man/rcpp_ham_dist.Rd new file mode 100644 index 0000000..3e91fcb --- /dev/null +++ b/man/rcpp_ham_dist.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/RcppExports.R +\name{rcpp_ham_dist} +\alias{rcpp_ham_dist} +\title{Calculating Hamming pairwise scores between model and data states.} +\usage{ +rcpp_ham_dist(x_df, y_df) +} +\arguments{ +\item{x_df}{matrix. It should be logical matrix of model states.} + +\item{y_df}{matrix. It should be logical matrix of data states.} +} +\description{ +This function calculates the pairwise scores between each row of model and data states. The score is calculated using a custom binary distance measure. +} + diff --git a/man/rcpp_m_score.Rd b/man/rcpp_m_score.Rd new file mode 100644 index 0000000..d29e7d1 --- /dev/null +++ b/man/rcpp_m_score.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/RcppExports.R +\name{rcpp_m_score} +\alias{rcpp_m_score} +\title{Inner core for m_score()} +\usage{ +rcpp_m_score(x_df) +} +\arguments{ +\item{x_df}{matrix. Matrix with columns ranked wrt each row.} +} +\description{ +This function takes in a df with columns ranked wrt each row, and try to assign each row to a unique column without repetition. +} + diff --git a/man/rcpp_man_dist.Rd b/man/rcpp_man_dist.Rd new file mode 100644 index 0000000..4f5b249 --- /dev/null +++ b/man/rcpp_man_dist.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/RcppExports.R +\name{rcpp_man_dist} +\alias{rcpp_man_dist} +\title{Calculating pairwise scores between model and data states.} +\usage{ +rcpp_man_dist(x_df, y_df) +} +\arguments{ +\item{x_df}{matrix. It should be numerical matrix of model states.} + +\item{y_df}{matrix. It should be numerical matrix of data states.} +} +\description{ +This function calculates the pairwise scores between each row of model and data states. The score is calculated using a custom binary distance measure. +} + diff --git a/man/rcpp_simulate.Rd b/man/rcpp_simulate.Rd new file mode 100644 index 0000000..4805cf5 --- /dev/null +++ b/man/rcpp_simulate.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/RcppExports.R +\name{rcpp_simulate} +\alias{rcpp_simulate} +\title{Simulate a Boolean model.} +\usage{ +rcpp_simulate(bmodel, fstate, verbose = FALSE) +} +\arguments{ +\item{bmodel}{list. A list of 4 lists created by decreate_model().} + +\item{fstate}{data frame. It must have been initialised by initialise_data(), and has gene names as column names. Must contain only 1 row.} + +\item{verbose}{logical. Indicates whether to output progress.} +} +\description{ +(&&&Not for public use&&&)This function simulates the Boolean model using an initial state. For use within simulate_model(). Returns a matrix of full asynchronous state space. +} + diff --git a/man/rcpp_validate.Rd b/man/rcpp_validate.Rd new file mode 100644 index 0000000..6cd1548 --- /dev/null +++ b/man/rcpp_validate.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/RcppExports.R +\name{rcpp_validate} +\alias{rcpp_validate} +\title{Calculating validation scores between two adjacency matrices} +\usage{ +rcpp_validate(inf_mat, true_mat) +} +\arguments{ +\item{inf_mat}{matrix. It should be adjacency matrix of inferred network.} + +\item{true_mat}{matrix. It should be adjacency matrix of true network.} +} +\description{ +This function calculates the validation scores between two adjacency matrices. +} + diff --git a/man/real_param.Rd b/man/real_param.Rd new file mode 100644 index 0000000..f971605 --- /dev/null +++ b/man/real_param.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/data_desc.R +\docType{data} +\name{real_param} +\alias{real_param} +\title{Estimated parameters from Wilson et al. data} +\format{A list with 4 numeric vectors, all_mu1, all_mu2, all_sig1, all_sig2. Note that each element in the vector is estimated from a single gene.} +\usage{ +data(real_param) +} +\description{ +A list of parameters (based on log normal distribution) estimated from Wilson et al. single-cell qPCR expression data. +} + diff --git a/man/simulate_model.Rd b/man/simulate_model.Rd new file mode 100644 index 0000000..952490f --- /dev/null +++ b/man/simulate_model.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/simulation.R +\name{simulate_model} +\alias{simulate_model} +\title{Simulating Boolean model} +\usage{ +simulate_model(bmodel, istate, steady_bool = F) +} +\arguments{ +\item{bmodel}{S4 BoolModel object.} + +\item{istate}{data frame. It must have been initialised by initialise_data(), and has gene names as column names. Must contain only 1 row.} + +\item{steady_bool}{logical. Specifies whether to return point steady states or not. Default to F.} +} +\description{ +This function simulates the Boolean model using an initial state. Returns the full asynchronous state space, and point steady states. +} + diff --git a/man/unique_raw_data.Rd b/man/unique_raw_data.Rd new file mode 100644 index 0000000..0cd4620 --- /dev/null +++ b/man/unique_raw_data.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/initialisation.R +\name{unique_raw_data} +\alias{unique_raw_data} +\title{Remove raw data duplicated wrt to the model state} +\usage{ +unique_raw_data(dx, cx) +} +\arguments{ +\item{dx}{matrix. Initialised and discretised numeric data of gene expression.} + +\item{cx}{matrix. Initialised and continuous numeric data of gene expression.} +} +\description{ +This function removes the 'duplicates' in an expression wrt to the model state. +} + diff --git a/man/validate_adjmat.Rd b/man/validate_adjmat.Rd new file mode 100644 index 0000000..159e7b4 --- /dev/null +++ b/man/validate_adjmat.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/score_calculation.R +\name{validate_adjmat} +\alias{validate_adjmat} +\title{Calculate true positive, true negative, false positive and false negative} +\usage{ +validate_adjmat(inf_mat, true_mat) +} +\arguments{ +\item{inf_mat}{matrix. It should be adjacency matrix of inferred network.} + +\item{true_mat}{matrix. It should be adjacency matrix of true network.} +} +\description{ +This function calculates the true positive, true negative, false positive and false negative values from the adjacency matrices. +} + diff --git a/man/vcat.Rd b/man/vcat.Rd new file mode 100644 index 0000000..2274b5d --- /dev/null +++ b/man/vcat.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/general.R +\name{vcat} +\alias{vcat} +\title{Verbose cat} +\usage{ +vcat(string, bool) +} +\arguments{ +\item{string}{character vector. String intended to be printed.} + +\item{bool}{logical. Specify whether to print the string to stout or not.+} +} +\description{ +This function is a simple wrapper for cat with a Boolean value for turning it on/off. +} + diff --git a/man/which.random.min.Rd b/man/which.random.min.Rd new file mode 100644 index 0000000..1ca082e --- /dev/null +++ b/man/which.random.min.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/general.R +\name{which.random.min} +\alias{which.random.min} +\title{Pick a random minimum value} +\usage{ +which.random.min(x, favour_first = F) +} +\arguments{ +\item{x}{numeric vector.} + +\item{favour_first}{logical. If this is TRUE, and the first value in the vector supplies is one of the min, the index for the first value will always be returned.} +} +\description{ +This function locates the minimum value in a vector (similar to which.min), however it will randomly break ties when there are multiple minimum values. +} + diff --git a/man/writeBM.Rd b/man/writeBM.Rd new file mode 100644 index 0000000..1fef686 --- /dev/null +++ b/man/writeBM.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/methods.R +\name{writeBM} +\alias{writeBM} +\title{Write Boolean Model} +\usage{ +writeBM(bmodel, file, gene.names = F, rownames = F) +} +\arguments{ +\item{bmodel}{S4 BoolModel object.} + +\item{file}{file name with path, or a file connection object.} + +\item{gene.names}{logical. Specify whether to write rules in terms of genes or internal variables. Default to FALSE.} + +\item{rownames}{logical. It specifies whether to write row names.} +} +\description{ +This method writes the S4 BoolModel object into a CSV file. This method is a wrapper for print.BoolModel. The output is a data frame, with two columns: (1) target genes, (2) Boolean rules. +} + diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp new file mode 100644 index 0000000..0597841 --- /dev/null +++ b/src/RcppExports.cpp @@ -0,0 +1,79 @@ +// This file was generated by Rcpp::compileAttributes +// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#include + +using namespace Rcpp; + +// match_state_loop +Rcpp::NumericVector match_state_loop(Rcpp::NumericMatrix mstate, Rcpp::NumericMatrix xstate); +RcppExport SEXP BoolTraineR_match_state_loop(SEXP mstateSEXP, SEXP xstateSEXP) { +BEGIN_RCPP + Rcpp::RObject __result; + Rcpp::RNGScope __rngScope; + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type mstate(mstateSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type xstate(xstateSEXP); + __result = Rcpp::wrap(match_state_loop(mstate, xstate)); + return __result; +END_RCPP +} +// rcpp_man_dist +Rcpp::NumericVector rcpp_man_dist(Rcpp::NumericMatrix x_df, Rcpp::NumericMatrix y_df); +RcppExport SEXP BoolTraineR_rcpp_man_dist(SEXP x_dfSEXP, SEXP y_dfSEXP) { +BEGIN_RCPP + Rcpp::RObject __result; + Rcpp::RNGScope __rngScope; + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type x_df(x_dfSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type y_df(y_dfSEXP); + __result = Rcpp::wrap(rcpp_man_dist(x_df, y_df)); + return __result; +END_RCPP +} +// rcpp_ham_dist +Rcpp::NumericVector rcpp_ham_dist(Rcpp::LogicalMatrix x_df, Rcpp::LogicalMatrix y_df); +RcppExport SEXP BoolTraineR_rcpp_ham_dist(SEXP x_dfSEXP, SEXP y_dfSEXP) { +BEGIN_RCPP + Rcpp::RObject __result; + Rcpp::RNGScope __rngScope; + Rcpp::traits::input_parameter< Rcpp::LogicalMatrix >::type x_df(x_dfSEXP); + Rcpp::traits::input_parameter< Rcpp::LogicalMatrix >::type y_df(y_dfSEXP); + __result = Rcpp::wrap(rcpp_ham_dist(x_df, y_df)); + return __result; +END_RCPP +} +// rcpp_m_score +Rcpp::IntegerVector rcpp_m_score(Rcpp::IntegerMatrix x_df); +RcppExport SEXP BoolTraineR_rcpp_m_score(SEXP x_dfSEXP) { +BEGIN_RCPP + Rcpp::RObject __result; + Rcpp::RNGScope __rngScope; + Rcpp::traits::input_parameter< Rcpp::IntegerMatrix >::type x_df(x_dfSEXP); + __result = Rcpp::wrap(rcpp_m_score(x_df)); + return __result; +END_RCPP +} +// rcpp_validate +Rcpp::NumericVector rcpp_validate(Rcpp::NumericMatrix inf_mat, Rcpp::NumericMatrix true_mat); +RcppExport SEXP BoolTraineR_rcpp_validate(SEXP inf_matSEXP, SEXP true_matSEXP) { +BEGIN_RCPP + Rcpp::RObject __result; + Rcpp::RNGScope __rngScope; + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type inf_mat(inf_matSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type true_mat(true_matSEXP); + __result = Rcpp::wrap(rcpp_validate(inf_mat, true_mat)); + return __result; +END_RCPP +} +// rcpp_simulate +Rcpp::List rcpp_simulate(Rcpp::List bmodel, Rcpp::LogicalVector fstate, bool verbose); +RcppExport SEXP BoolTraineR_rcpp_simulate(SEXP bmodelSEXP, SEXP fstateSEXP, SEXP verboseSEXP) { +BEGIN_RCPP + Rcpp::RObject __result; + Rcpp::RNGScope __rngScope; + Rcpp::traits::input_parameter< Rcpp::List >::type bmodel(bmodelSEXP); + Rcpp::traits::input_parameter< Rcpp::LogicalVector >::type fstate(fstateSEXP); + Rcpp::traits::input_parameter< bool >::type verbose(verboseSEXP); + __result = Rcpp::wrap(rcpp_simulate(bmodel, fstate, verbose)); + return __result; +END_RCPP +} diff --git a/src/general.cpp b/src/general.cpp new file mode 100644 index 0000000..fa6ffdf --- /dev/null +++ b/src/general.cpp @@ -0,0 +1,56 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +//internal C++ functions. not exported. +bool all_cpp(Rcpp::LogicalVector x) { + /*#Function to test for equality between two vectors.*/ + + return is_true(Rcpp::all(x==TRUE)); //#is_true is used to return a bool type. all() is an Rcpp sugar. +} + +//external C++ functions. exported. +//' @title Find a match between two data frames. +//' +//' @description +//' (&&&Not for public use&&&)This function finds a match between two df of states. Used in match_state(). Return an row index vector indicating which row of mstate matches the rows in xstate. +//' +//' @param mstate data frame. It should be a state(row) x gene(column) df. +//' @param xstate data frame. It should be a state(row) x gene(column) df. +// [[Rcpp::export]] //#must be called in front of each C++ function to be exported. +Rcpp::NumericVector match_state_loop(Rcpp::NumericMatrix mstate, Rcpp::NumericMatrix xstate) { + int nrow_ms = mstate.nrow(); + int nrow_xs = xstate.nrow(); + Rcpp::NumericVector ind(nrow_ms); //#must specify length if using vector. + + int turn = 0; + for(auto i=0; i +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include //for std::invalid_argument +#include //for std::abs(float) +#include //for std::accumulate + +using namespace std; + +//external C++ functions. exported. + +//Rcpp::NumericVector rcpp_test(Rcpp::NumericVector x) { +// std::vector y = Rcpp::as >(x); +// return(Rcpp::wrap(y)); +//} + +//' @title Calculating pairwise scores between model and data states. +//' +//' @description +//' This function calculates the pairwise scores between each row of model and data states. The score is calculated using a custom binary distance measure. +//' +//' @param x_df matrix. It should be numerical matrix of model states. +//' @param y_df matrix. It should be numerical matrix of data states. +// [[Rcpp::export]] +Rcpp::NumericVector rcpp_man_dist(Rcpp::NumericMatrix x_df, Rcpp::NumericMatrix y_df) { + int xdf_row = x_df.nrow(); + int ydf_row = y_df.nrow(); + + if(x_df.ncol() != y_df.ncol()) { + throw std::invalid_argument( "Two input matrices should have the same number of columns." ); + } + + std::vector tmpout(xdf_row * ydf_row); + int ind = 0; + for(auto i=0; i x = Rcpp::as >(xr); + std::vector y = Rcpp::as >(yr); + std::vector z; + + //Calculate the distance between the two vectors. (Manhattan distance) + //Calculates the absolute distance between two of each variables. + std::transform(x.begin(), x.end(), y.begin(), std::back_inserter(z), + [](double x, double y) { return std::abs(x-y); }); + + //Get the sum of all differences. + double o = std::accumulate(z.begin(), z.end(), 0.0); + + //Add the value to the output vector. + tmpout[ind] = o; + ind += 1; + + //std::copy(x.begin(), x.end(), std::ostream_iterator(std::cout, ",")); + //Rcpp::Rcout << std::endl; + } + } + + //Convert Cpp vector into R matrix. + Rcpp::NumericVector output = Rcpp::wrap(tmpout); + output.attr("dim") = Rcpp::Dimension(ydf_row, xdf_row); + + return output; +} + +//' @title Calculating Hamming pairwise scores between model and data states. +//' +//' @description +//' This function calculates the pairwise scores between each row of model and data states. The score is calculated using a custom binary distance measure. +//' +//' @param x_df matrix. It should be logical matrix of model states. +//' @param y_df matrix. It should be logical matrix of data states. +// [[Rcpp::export]] +Rcpp::NumericVector rcpp_ham_dist(Rcpp::LogicalMatrix x_df, Rcpp::LogicalMatrix y_df) { + int xdf_row = x_df.nrow(); + int ydf_row = y_df.nrow(); + + std::vector m_vec(xdf_row * ydf_row); + + int ind = 0; + for(auto i=0; i x = Rcpp::as >(xr); + std::vector y = Rcpp::as >(yr); + + //std::copy(x.begin(), x.end(), std::ostream_iterator(std::cout, ",")); + //Rcpp::Rcout << "After conversion!" << std::endl; + + //Calculate the summation of x and y between each element. + std::vector z; + if(x.size() == y.size()) { + for(unsigned k=0; k x = Rcpp::as >(xr); + + //Obtain the position for the minimum value. + std::vector::iterator min_val = std::min_element(std::begin(x), std::end(x)); //return the first minimum element. Note that the input result should not contain multiple minimum. + yr[i] = std::distance(std::begin(x), min_val); + + //Check if the value is already present, then pick the next min. + Rcpp::IntegerVector unique_yr = Rcpp::unique(yr); + Rcpp::IntegerVector sub_i = Rcpp::seq_len(i); + Rcpp::IntegerVector test_yr = yr[sub_i]; + bool dup_check = Rcpp::as(Rcpp::any(Rcpp::duplicated(test_yr))); + int ind = 2; //start with finding 2nd min value. + // if the model state space is smaller than data state space, it is not possible to assign a unique model state to each data state. + while (dup_check && unique_yr.size() < xdf_col && ind < xdf_row) { + //Rcpp::Rcout << i << " : " << ind << std::endl; + //Rcpp::Rcout << dup_check << std::endl; + //std::copy(yr.begin(), yr.end(), std::ostream_iterator(Rcpp::Rcout, " ")); + //Rcpp::Rcout << std::endl; + + std::vector::iterator alt_val = std::find(std::begin(x), std::end(x), ind); //find 2nd minimum. + yr[i] = std::distance(std::begin(x), alt_val); + + sub_i = Rcpp::seq_len(i); + test_yr = yr[sub_i]; + dup_check = Rcpp::as(Rcpp::any(Rcpp::duplicated(test_yr))); + unique_yr = Rcpp::unique(yr); + ind += 1; //for next iteration, look for the 3rd (and subsequent) values. + } + + //Checking results. + sub_i = Rcpp::seq_len(i); + test_yr = yr[sub_i]; + dup_check = Rcpp::as(Rcpp::any(Rcpp::duplicated(test_yr))); + if(xdf_col >= xdf_row && dup_check) { //if there are more columns than rows, there should not be any duplicated values. + throw std::invalid_argument( "Duplicated values are still present." ); + } + } + + return yr + 1; //change C++ positions to R positions. +} + +//' @title Calculating validation scores between two adjacency matrices +//' +//' @description +//' This function calculates the validation scores between two adjacency matrices. +//' +//' @param inf_mat matrix. It should be adjacency matrix of inferred network. +//' @param true_mat matrix. It should be adjacency matrix of true network. +// [[Rcpp::export]] +Rcpp::NumericVector rcpp_validate(Rcpp::NumericMatrix inf_mat, Rcpp::NumericMatrix true_mat) { + if(inf_mat.ncol() != true_mat.ncol()) { + throw std::invalid_argument( "Two input matrices should have the same number of columns." ); + } + if(inf_mat.nrow() != true_mat.nrow()) { + throw std::invalid_argument( "Two input matrices should have the same number of rows." ); + } + + int tp=0; + int tn=0; + int fp=0; + int fn=0; + for(auto i=0; i x = Rcpp::as >(xr); + std::vector y = Rcpp::as >(yr); + std::vector z; + + //Calculate the frequency of numbers. + //tp=true positive [1,1], tn=true negative [0,0], fp=false positive [1,0], fn=false negative [0,1]. + for(unsigned k=0; k output{tp, tn, fp, fn}; + + return Rcpp::wrap(output); +} diff --git a/src/simulation.cpp b/src/simulation.cpp new file mode 100644 index 0000000..d7f3d10 --- /dev/null +++ b/src/simulation.cpp @@ -0,0 +1,262 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +//internal C++ functions. not exported. +std::vector &split(const std::string &s, char delim, std::vector &elems) { + std::stringstream ss(s); + std::string item; + while (std::getline(ss, item, delim)) { + elems.push_back(item); + } + return elems; +} + +std::vector split(const std::string &s, char delim) { + //#Function to split a string into a vector of strings. + std::vector elems; + split(s, delim, elems); + return elems; +} + +bool cpp_eval_bool(vector arule, vector irule, vector mvar, vector val) { + //#Function to evaluate the Boolean rule of one gene with one rule at a time. Return a Boolean value for that gene. + //Note that this function assumes the elements in mvar and val are all ordered. + + //Construct a map object between mvar and val. + std::map mmap; + + mmap.insert(std::make_pair("0", false)); //handles 0 in the rule. + for(unsigned i=0; i arule_bool; + for(unsigned i=0; i tmp = split(arule[i], '&'); //split up the A&B term into A, B. must be single quote, as the function only take single char as delimiter. + + bool tmp_ans = true; //initialised var. + for(unsigned j=0;j irule_bool; + for(unsigned i=0; i tmp = split(irule[i], '&'); //split up the A&B term into A, B. must be single quote, as the function only take single char as delimiter. + + bool tmp_ans = true; //initialised var. + for(unsigned j=0;j first_state = Rcpp::as >(fstate); + std::vector model_gene = Rcpp::as >(bmodel["gene"]); + std::vector model_var = Rcpp::as >(bmodel["var"]); + + std::vector< std::vector > model_actrule; + std::vector< std::vector > model_inhrule; + + std::vector tmp_string; + //fill in model_act_rule vector of vector. + for(unsigned i=0; i >(tmp_list(i)); + model_actrule.push_back(tmp_string); + } + + //fill in model_inh_rule vector of vector. + for(unsigned i=0; i >(tmp_list(i)); + model_inhrule.push_back(tmp_string); + } + + //(2) Start simulation cycle. + int num_gene = first_state.size(); + std::vector< std::vector > next_loop; + next_loop.push_back(first_state); + std::vector< std::vector > all_state; + all_state.push_back(first_state); + std::vector< std::vector > steady_state; + int lvl = 0; + + //std::copy(all_state[0].begin(), all_state[0].end(), std::ostream_iterator(Rcpp::Rcout, ",")); + + while(true) //for debug only. change back to true. + { + std::vector< std::vector > part_state; + lvl += 1; + + //Rcpp::Rcout << "no\n"; + //Rcpp::Rcout << "Next_loop size :" << next_loop.size(); + //Rcpp::Rcout << "\n"; + + if(next_loop.size() != 0) { + for(unsigned i=0; i old_state = next_loop[i]; + + if(verbose == true) { + Rcpp::Rcout << "\rSearching at level " << lvl << ", cell state: "; + std::copy(old_state.begin(), old_state.end(), std::ostream_iterator(Rcpp::Rcout, ",")); + } + + int steady_count = 0; //for checking steady state. + for(auto j=0; j new_state = old_state; + + bool ans = false; + //Checking for the case where both arule and irule are '0'. + //In this case, the val should remain the same, and should not change. Only change the value if both arule and irule are not '0'. + //Equal to 0 indicates a match. A mismatch is -1. + if(model_actrule[j][0].compare("0") != 0 || model_inhrule[j][0].compare("0") != 0) { + ans = cpp_eval_bool(model_actrule[j], model_inhrule[j], model_var, old_state); + + if(old_state[j] != ans) { + new_state[j] = ans; + part_state.push_back(new_state); + } else { + steady_count += 1; + } + + if(steady_count == num_gene) //if all genes stay the same, then it is a point steady state. + { + steady_state.push_back(old_state); + } + } + } + } + } + + //To replace the R unique() called on part_state. all 3 std functions called below are smart to work on vector of vectors. + std::sort(part_state.begin(), part_state.end()); //NOTE that std::unique only works on consecutive duplicates, so sorting is a must! + auto unique_point = std::unique(part_state.begin(), part_state.end()); //unique does not change the data. it creates a new copy of sorted data with all the duplicates placed at the end. + part_state.erase(unique_point, part_state.end()); + + //To concatenate two vectors of vectors. + std::vector< std::vector > tmp_df = all_state; + tmp_df.insert(tmp_df.end(), part_state.begin(), part_state.end()); + + //Check for duplicated vector in tmp_df. + std::sort(tmp_df.begin(), tmp_df.end()); + auto duplicated_point = std::adjacent_find(tmp_df.begin(), tmp_df.end()); + + //Rcpp::Rcout << "Size of before part_state : " << part_state.size() << "\n"; + + if(duplicated_point != tmp_df.end()) { //if there is duplication + for(unsigned j=0; j(Rcpp::Rcout, ",")); + Rcpp::Rcout << "\n"; + } +*/ + + //Pass for next iteration, and save the unique states into all states. + next_loop = part_state; + all_state.insert(all_state.end(), next_loop.begin(), next_loop.end()); + + //Rcpp::Rcout << "Size of all_state : " << all_state.size() << "\n"; + + //Final breakout condition. Break out of loop when there is no more unique state to go. + if(next_loop.size() == 0) { + if(verbose) { + Rcpp::Rcout << "End of simulation.\n"; + } + break; + } + } + if(verbose == true) { + Rcpp::Rcout << "\n"; //to make the output cleaner. + } + + //Convert C++ objects back to R object. + int state_list_nrow = all_state.size(); + int steady_list_nrow = steady_state.size(); + Rcpp::List state_list(state_list_nrow); + Rcpp::List steady_list(steady_list_nrow); + + //Rcpp::Rcout << "Size of all_state : " << all_state.size() << "\n"; + //Rcpp::Rcout << "Size of steady_state : " << steady_state.size() << "\n"; + + for(int i=0; i