diff --git a/bon_wilsonrnaseq_hsc.R b/bon_wilsonrnaseq_hsc.R new file mode 100644 index 0000000..877e0b7 --- /dev/null +++ b/bon_wilsonrnaseq_hsc.R @@ -0,0 +1,38 @@ +#(1) Setup paths and environment. +library(BoolTraineR) +library(doParallel) + +#path='~/bool_final/' +#path='/home/cyl49/ownCloud/Research_Work/working_directory/boolean_project/res2/' +path='D:/ownCloud/Research_Work/working_directory/boolean_project/res2/' +setwd(path) + +#Setting up for parallel processing. +registerDoParallel(cores=4) + +#(2)Load data. +data(bon_bmodel) +data(bon_istate) +data(wilson_raw_rnaseq) + +inter_bool = T +max_varperrule = 6 + +bmodel = initialise_model(bon_bmodel) +istate = initialise_data(bon_istate) + +#(3)Filtering expression data. +tmp_data = initialise_raw_data(wilson_raw_rnaseq) #do not filter data at this stage. keep the whole matrix. +cdata = tmp_data[[1]] #continuous data +ddata = tmp_data[[2]] #discretised data +fcdata = cdata[grepl('hsc', rownames(cdata)),] #select only HSC cells. +fddata = ddata[grepl('hsc', rownames(ddata)),] + +#(3)Filtering expression data. +result = model_train(cdata=fcdata, ddata=fddata, bmodel=bmodel, istate=istate, preprocess=F, verbose=T) + +filename = 'bon_wilson_rnaseq_HSC_trained_bmodel.rda' +save(result, file=filename) + +#Cleaning up. +stopImplicitCluster() diff --git a/gnw_bnlearn_btr_scoring.R b/gnw_bnlearn_btr_scoring.R new file mode 100644 index 0000000..7cb74ea --- /dev/null +++ b/gnw_bnlearn_btr_scoring.R @@ -0,0 +1,215 @@ +#(1) Setup paths and environment. +library(BoolTraineR) +library(bnlearn) +library(ggplot2) +library(gridExtra) #for plotting multiple graphs +library(reshape2) + +#path='~/res1/' +#path='/home/cyl49/ownCloud/Research_Work/working_directory/boolean_project/res1/' +path='D:/ownCloud/Research_Work/working_directory/boolean_project/res1/' +#path='C:/Users/cyl49/ownCloud/Research_Work/working_directory/boolean_project/res1/' +setwd(path) + +inter_bool = T +max_varperrule = 6 +acyclic=T +nonoise=F + +for(file_ind in 1:5) +{ + #Load data. + load(paste(path, ifelse(acyclic, 'acyclic', 'cyclic'), '_data/test_data_', ifelse(nonoise, 'nonoise_', ''),'yeast', file_ind,'_20n30s10g.rda', sep='')) #test_data. + + #score the true model under Bayesian framework. + if(acyclic) + { + true_ndamat = abs(test_data$true_amat) + true_bngraph = empty.graph(colnames(test_data$true_amat)) + amat(true_bngraph) = true_ndamat #if this step throws error, that means the true graph is not DAG. Has to regenerate the random data using GNW. + true_bnscore = score(true_bngraph, as.data.frame(test_data$cdata)) + } + + #score the true model under Boolean model framework. + true_bmmodel = amat_to_bm(test_data$true_amat) + overlap_gene = unname(colnames(test_data$cdata)) + fddata = test_data$ddata + fcdata = test_data$cdata + fcdata = unique_raw_data(fddata, fcdata) #removes duplicates in continuous data. + true_bmscore = calc_mscore(bmodel=true_bmmodel, istate=test_data$istate, fcdata=fcdata, overlap_gene=overlap_gene, max_varperrule=max_varperrule) + + #score the other bn models under Bayesian framework using GNW data. + bnmod_gnw_bnscore = c() + if(acyclic) + { + for(i in 1:length(test_data$bn_modamat)) + { + for(j in 1:length(test_data$bn_modamat[[i]])) + { + tmp_bngraph = empty.graph(colnames(test_data$bn_modamat[[i]][[j]])) + amat(tmp_bngraph) = test_data$bn_modamat[[i]][[j]] + tmp_score = score(tmp_bngraph, as.data.frame(test_data$cdata)) + #tmp_score = score(tmp_bngraph, as.data.frame(bm_cdata)) + names(tmp_score) = test_data$bn_step[[i]][[j]] + + bnmod_gnw_bnscore = c(bnmod_gnw_bnscore, tmp_score) + } + } + bnmod_gnw_bnscore = split(bnmod_gnw_bnscore, names(bnmod_gnw_bnscore)) + bnmod_gnw_bnscore = bnmod_gnw_bnscore[order(as.numeric(names(bnmod_gnw_bnscore)))] + bnmod_gnw_bnscore = do.call(cbind, bnmod_gnw_bnscore) + #Add in true score. + bnmod_gnw_bnscore = cbind('0'=rep(true_bnscore, nrow(bnmod_gnw_bnscore)), bnmod_gnw_bnscore) + rownames(bnmod_gnw_bnscore) = NULL + colnames(bnmod_gnw_bnscore) = paste(seq(0,40,2)) + } + + #score the other bm models under Boolean model framework using GNW data. + bmmod_gnw_bmscore = c() + y_bmscore = c() + za_bmscore = c() + for(i in 1:length(test_data$bm_modmodel)) + { + for(j in 1:length(test_data$bm_modmodel[[i]])) + { + + tmp_bmmodel = test_data$bm_modmodel[[i]][[j]] + overlap_gene = unname(colnames(test_data$cdata)) + tmp_bmmodel@target = overlap_gene + #tmp_score = calc_mscore(bmodel=tmp_bmmodel, istate=test_data$istate, fcdata=bm_cdata, overlap_gene=overlap_gene, max_varperrule=max_varperrule, steady_bool=F, distance_only=F) + tmp_score = calc_mscore(bmodel=tmp_bmmodel, istate=test_data$istate, fcdata=fcdata, overlap_gene=overlap_gene, max_varperrule=max_varperrule, detail=T) + #tmp_score = calc_mscore(bmodel=tmp_bmmodel, istate=test_data$istate, fcdata=fddata, overlap_gene=overlap_gene, max_varperrule=max_varperrule, steady_bool=F, distance_only=F) + + names(tmp_score) = rep(j, length(tmp_score)) + + bmmod_gnw_bmscore = c(bmmod_gnw_bmscore, tmp_score[1]) + } + } + bmmod_gnw_bmscore = split(bmmod_gnw_bmscore, names(bmmod_gnw_bmscore)) + bmmod_gnw_bmscore = bmmod_gnw_bmscore[order(as.numeric(names(bmmod_gnw_bmscore)))] + bmmod_gnw_bmscore = do.call(cbind, bmmod_gnw_bmscore) + #Add in true score. + bmmod_gnw_bmscore = cbind('0'=rep(true_bmscore, nrow(bmmod_gnw_bmscore)), bmmod_gnw_bmscore) + rownames(bmmod_gnw_bmscore) = NULL + colnames(bmmod_gnw_bmscore) = paste(seq(0,40,2)) + + save(bnmod_gnw_bnscore, bmmod_gnw_bmscore, file=paste(ifelse(acyclic, 'acyclic', 'cyclic'), '_yeast', file_ind, '_scores.rda', sep='')) +} + +################################################################################################################################## + +#Setup the true networks. +true_model_list = list() +for(file_ind in 1:5) +{ + #Load data. + load(paste(path, ifelse(acyclic, 'acyclic', 'cyclic'), '_data/test_data_', ifelse(nonoise, 'nonoise_', ''),'yeast', file_ind,'_20n30s10g.rda', sep='')) #test_data. + + true_bmmodel = amat_to_bm(test_data$true_amat) + true_model_list = c(true_model_list, list(true_bmmodel)) +} + +#Make plots of true models. +for(i in 1:length(true_model_list)) +{ + png(paste(i, ifelse(acyclic, '_acyclic', '_cyclic'), ifelse(nonoise, '_nonoise', ''), '_true_network.png', sep=''), width=2000, height=2000, res=300) + plotBM(true_model_list[[i]]) + title(paste('True ', ifelse(acyclic, 'acyclic', 'cyclic'), ' network', i), cex.main=3) + dev.off() +} + +#Setup the scores for modified models. +bnscore_list = list() +bmscore_list = list() +for(file_ind in 1:5) +{ + load(paste(ifelse(acyclic, 'acyclic', 'cyclic'), '_yeast', file_ind, '_scores.rda', sep='')) #bnmod_gnw_bnscore, bmmod_gnw_bmscore + + bnscore_list = c(bnscore_list, list(bnmod_gnw_bnscore)) + bmscore_list = c(bmscore_list, list(bmmod_gnw_bmscore)) +} + +bnscore_df = melt(bnscore_list) +bnscore_df = bnscore_df[,-1] +bnscore_df[,3] = paste('Network', bnscore_df[,3]) +bmscore_df = melt(bmscore_list) +bmscore_df = bmscore_df[,-1] +bmscore_df[,3] = paste('Network', bmscore_df[,3]) +colnames(bnscore_df) = c('steps', 'score', 'network') +colnames(bmscore_df) = c('steps', 'score', 'network') + +bnscore_mid_df = melt(lapply(bnscore_list, function(x) colMeans(x))) +bnscore_mid_df[,2] = paste('Network', bnscore_mid_df[,2]) +bnscore_mid_df = cbind(rep(seq(0,40,2), 5), bnscore_mid_df) + +bnscore_se = melt(lapply(bnscore_list, function(x) apply(x, 2, function(x) sd(x)/sqrt(length(x))))) +bnscore_se = bnscore_se[,1,drop=F] +bnscore_mid_df = cbind(bnscore_mid_df, bnscore_mid_df[,2]-bnscore_se[,1], bnscore_mid_df[,2]+bnscore_se[,1]) +colnames(bnscore_mid_df) = c('steps', 'score', 'network', 'low', 'high') + +bmscore_mid_df = melt(lapply(bmscore_list, function(x) colMeans(x))) +bmscore_mid_df[,2] = paste('Network', bmscore_mid_df[,2]) +bmscore_mid_df = cbind(rep(seq(0,40,2), 5), bmscore_mid_df) + +bmscore_se = melt(lapply(bmscore_list, function(x) apply(x, 2, function(x) sd(x)/sqrt(length(x))))) +bmscore_se = bmscore_se[,1,drop=F] +bmscore_mid_df = cbind(bmscore_mid_df, bmscore_mid_df[,2]-bmscore_se[,1], bmscore_mid_df[,2]+bmscore_se[,1]) +colnames(bmscore_mid_df) = c('steps', 'score', 'network', 'low', 'high') + +#Make plot objects of scoring functions. +if(acyclic) +{ + p1_bn_box = ggplot(bnscore_df, aes(x=factor(bnscore_df[,'steps']), y=bnscore_df[,'score'])) + + geom_boxplot() + xlab('Number of different edges') + ylab('Scores') + ggtitle('BIC scoring function') + + scale_x_discrete(labels=unique(bnscore_df[,'steps'])) + + facet_wrap(~network, scales='free_y', ncol=1) + + theme(text = element_text(size=20), axis.text.x = element_text(size=10)) + + p2_bm_box = ggplot(bmscore_df, aes(x=factor(bmscore_df[,'steps']), y=bmscore_df[,'score'])) + + geom_boxplot() + xlab('Number of different edges') + ylab('Scores') + ggtitle('BSS scoring function') + + scale_x_discrete(labels=unique(bmscore_df[,'steps'])) + + facet_wrap(~network, scales='free_y', ncol=1) + + theme(text = element_text(size=20), axis.text.x = element_text(size=10)) + + png(paste('boolbaye', ifelse(acyclic, '_acyclic', '_cyclic'), ifelse(nonoise, '_nonoise', ''), '_boxplot_compare_score.png', sep=''), width=3000, height=5000, res=300) + grid.arrange(p1_bn_box, p2_bm_box, ncol=2) + dev.off() + + p1_bn_mid = ggplot(bnscore_mid_df, aes(x=bnscore_mid_df[, 'steps'], y=bnscore_mid_df[,'score'])) + + geom_errorbar(aes(ymin=bnscore_mid_df[, 'low'], ymax=bnscore_mid_df[, 'high'])) + + geom_line() + xlab('Number of different edges') + ylab('Scores') + ggtitle('BIC scoring function') + + facet_wrap(~network, scales='free_y', ncol=1) + + theme(text = element_text(size=20)) + + p2_bm_mid = ggplot(bmscore_mid_df, aes(x=bmscore_mid_df[, 'steps'], y=bmscore_mid_df[,'score'])) + + geom_errorbar(aes(ymin=bmscore_mid_df[, 'low'], ymax=bmscore_mid_df[, 'high'])) + + geom_line() + xlab('Number of different edges') + ylab('Scores') + ggtitle('BSS scoring function') + + facet_wrap(~network, scales='free_y', ncol=1) + + theme(text = element_text(size=20)) + + png(paste('boolbaye', ifelse(acyclic, '_acyclic', '_cyclic'), ifelse(nonoise, '_nonoise', ''), '_mean_compare_score.png', sep=''), width=3000, height=5000, res=300) + grid.arrange(p1_bn_mid, p2_bm_mid, ncol=2) + dev.off() +} else +{ + p2_bm_box = ggplot(bmscore_df, aes(x=factor(bmscore_df[,'steps']), y=bmscore_df[,'score'])) + + geom_boxplot() + xlab('Number of different edges') + ylab('Scores') + ggtitle('BSS scoring function') + + scale_x_discrete(labels=unique(bmscore_df[,'steps'])) + + facet_wrap(~network, scales='free_y', ncol=1) + + theme(text = element_text(size=20), axis.text.x = element_text(size=10)) + + png(paste('boolbaye', ifelse(acyclic, '_acyclic', '_cyclic'), ifelse(nonoise, '_nonoise', ''), '_boxplot_compare_score.png', sep=''), width=3000, height=5000, res=300) + grid.arrange(p2_bm_box, ncol=2) + dev.off() + + p2_bm_mid = ggplot(bmscore_mid_df, aes(x=bmscore_mid_df[, 'steps'], y=bmscore_mid_df[,'score'])) + + geom_errorbar(aes(ymin=bmscore_mid_df[, 'low'], ymax=bmscore_mid_df[, 'high'])) + + geom_line() + xlab('Number of different edges') + ylab('Scores') + ggtitle('BSS scoring function') + + facet_wrap(~network, scales='free_y', ncol=1) + + theme(text = element_text(size=20)) + + png(paste('boolbaye', ifelse(acyclic, '_acyclic', '_cyclic'), ifelse(nonoise, '_nonoise', ''), '_mean_compare_score.png', sep=''), width=3000, height=5000, res=300) + grid.arrange(p2_bm_mid, ncol=2) + dev.off() +} + diff --git a/gnw_btr_model_inference.R b/gnw_btr_model_inference.R new file mode 100644 index 0000000..f372f40 --- /dev/null +++ b/gnw_btr_model_inference.R @@ -0,0 +1,57 @@ +#(1) Setup paths and environment. +library(BoolTraineR) +library(doParallel) + +path='~/bool_final/' +#path='/home/cyl49/ownCloud/Research_Work/working_directory/boolean_project/res1/' +#path='D:/ownCloud/Research_Work/working_directory/boolean_project/res1/' +setwd(path) + +inter_bool = T +max_varperrule = 6 +acyclic = T +initial_model = T + +#Setting up for parallel processing. +registerDoParallel(cores=4) #this automatically calls mclapply() when no cl is given. + +#Setting random seed for reproducibility. +set.seed(1) + +test_result = c() +for(file_ind in 1:5) +{ + #Load data. + load(paste(path, ifelse(acyclic, 'acyclic', 'cyclic'), '_data/test_data_yeast', file_ind,'_20n30s10g.rda', sep='')) #test_data. + + #Making model inference. + start_time = proc.time() + if(initial_model) + { + ind_i = sample(seq(1,length(test_data$bm_modmodel)), 1) + ind_j = 10 + result = model_train(cdata=test_data$cdata, ddata=test_data$ddata, bmodel=test_data$bm_modmodel[[ind_i]][[ind_j]], istate=test_data$istate, and_bool=inter_bool, verbose=T, self_loop=F) #Very time consuming + } else + { + result = model_train(cdata=test_data$cdata, ddata=test_data$ddata, istate=test_data$istate, and_bool=inter_bool, verbose=T, self_loop=F) #Very time consuming + } + adj_mat = abs(bm_to_amat(result)) + end_time = proc.time() + time_taken = end_time - start_time + + #Perform model validation. + true_adj_mat = abs(test_data$true_amat) + val_res = validate_adjmat(adj_mat, true_adj_mat) + perf_score = calc_roc(val_res) + + #Record results. + tmp = c(list(true_adj_mat=true_adj_mat), list(adj_mat=adj_mat), list(perf_score=perf_score), list(time_taken=time_taken)) + test_result = c(test_result, list(tmp)) +} + +#Open file connection for writing output. +file_name = paste('result_btr-', ifelse(initial_model, 'wi', 'wo'), '_train_', ifelse(acyclic, 'acyclic', 'cyclic'), '_yeast.rda', sep='') +save(test_result, file=file_name) + +#Cleaning up. +stopImplicitCluster() diff --git a/gnw_comparison_network_inference.R b/gnw_comparison_network_inference.R new file mode 100644 index 0000000..18e4665 --- /dev/null +++ b/gnw_comparison_network_inference.R @@ -0,0 +1,166 @@ +library(BoolTraineR) +library(ggplot2) +library(gridExtra) #for plotting multiple graphs +library(colorspace) +library(reshape2) +library(Hmisc) #for capitalisation. + +#(1) Load results and combine them. +inter_bool = T +max_varperrule = 6 +acyclic = 'cyclic' #acyclic, cyclic, both + +# sect_1 ------------------------------------------------------------------ + +#path='~/res1/' +path='/home/cyl49/ownCloud/Research_Work/working_directory/boolean_project/res1/' +#path='D:/ownCloud/Research_Work/working_directory/boolean_project/res1/' +setwd(path) + +if(acyclic=='acyclic') +{ + files = list.files(path=path, pattern='^result.+_train_acyclic_yeast[.]rda', full.names = T) +} else if(acyclic=='cyclic') +{ + files = list.files(path=path, pattern='^result.+_train_cyclic_yeast[.]rda', full.names = T) +} else if(acyclic=='both') +{ + files = list.files(path=path, pattern='^result.+_train_[a-z]+_yeast[.]rda', full.names = T) +} + +# sect_2 ------------------------------------------------------------------ + +all_true_adj_mat = list() +all_adj_mat = list() +all_perf_score = c() +all_time_taken = list() +for(i in 1:length(files)) +{ + if(acyclic=='acyclic') + { + tmp_name = gsub('.+result_(.+)_train_acyclic_yeast[.]rda', '\\1', files[i]) #extract name of method. + } else if(acyclic=='cyclic') + { + tmp_name = gsub('.+result_(.+)_train_cyclic_yeast[.]rda', '\\1', files[i]) #extract name of method. + } else if(acyclic=='both') + { + tmp_name = gsub('.+result_(.+)_train_.+_yeast[.]rda', '\\1', files[i]) #extract name of method. + } + + load(files[i]) #test_result. "true_adj_mat", "adj_mat", "perf_score", "time_taken" + + algo_true_adj_mat = list() + algo_adj_mat = list() + algo_perf_score = c() + algo_time_taken = list() + for(j in 1:length(test_result)) + { + algo_true_adj_mat = c(algo_true_adj_mat, list(test_result[[j]]$true_adj_mat)) + algo_adj_mat = c(algo_adj_mat, list(test_result[[j]]$adj_mat)) + algo_perf_score = c(algo_perf_score, list(test_result[[j]]$perf_score)) + algo_time_taken = c(algo_time_taken, list(as.vector(test_result[[j]]$time_taken))) + } + algo_perf_score = do.call(rbind, algo_perf_score) + algo_time_taken = do.call(rbind, algo_time_taken) + + if(tmp_name %in% names(all_true_adj_mat)) #only happen in acyclic=both. + { + all_true_adj_mat[[tmp_name]] = c(all_true_adj_mat[[tmp_name]], algo_true_adj_mat) + all_adj_mat[[tmp_name]] = c(all_adj_mat[[tmp_name]], algo_adj_mat) + all_perf_score[[tmp_name]] = rbind(all_perf_score[[tmp_name]], algo_perf_score) + all_time_taken[[tmp_name]] = rbind(all_time_taken[[tmp_name]], algo_time_taken) + } else + { + all_true_adj_mat = c(all_true_adj_mat, setNames(list(algo_true_adj_mat), tmp_name)) + all_adj_mat = c(all_adj_mat, setNames(list(algo_adj_mat), tmp_name)) + all_perf_score = c(all_perf_score, setNames(list(algo_perf_score), tmp_name)) + all_time_taken = c(all_time_taken, setNames(list(algo_time_taken), tmp_name)) + } +} + +# sect_3 ------------------------------------------------------------------ + +#Prepare data for plotting. +method_name = toupper(as.vector(sapply(names(all_perf_score), function(x) rep(x, nrow(all_perf_score[[1]]))))) +network_name = rep(paste('net_', seq(1,nrow(all_perf_score[[1]])), sep=''), length(all_perf_score)) +comb_df = do.call(rbind, all_perf_score) + +stopifnot(nrow(comb_df)==length(method_name)) + +comb_df = data.frame(method_name, network_name, comb_df, stringsAsFactors=F) +comb_df[is.na(comb_df)] = 0 +colnames(comb_df) = c(colnames(comb_df)[1:2], toupper(colnames(comb_df)[3:10])) +colnames(comb_df)[10] = 'F-score' + +#Setting up consistent colour. +plot_col = rainbow_hcl(length(unique(comb_df[,'method_name'])), alpha=0.8) +names(plot_col) = unique(comb_df[,'method_name']) + +plot_df = melt(comb_df) + +#1st plot. +comb_p1 = ggplot(plot_df, aes(x=plot_df[,'method_name'], y=plot_df[,"value"])) + + geom_boxplot() + geom_jitter() + + theme(axis.text.x = element_text(angle = 90, hjust = 1), text = element_text(size=20)) + + xlab('Inference algorithms') + ylab('') + ggtitle(capitalize(acyclic)) + + facet_wrap(~variable, scales='free_y', ncol=2) + +png(paste('all_algorithms_', acyclic, '_score.png', sep=''), width=3000, height=4000, res=300) +plot(comb_p1) +dev.off() + +#2nd plot +tmp_list = list() +for(i in unique(method_name)) +{ + tmp_row = c(i, colMeans(subset(comb_df[,c(-1,-2)], comb_df[,'method_name']==i)), + apply(subset(comb_df[,c(-1,-2)], comb_df[,'method_name']==i), 2, function(x) sd(x)/sqrt(length(x)))) + #tmp_row = c(i, apply(subset(comb_df[,c(-1,-2)], comb_df[,'method_name']==i), 2, sum)) + names(tmp_row) = c('method_name', 'p', 'np', 'r', 'a', 's', 'plr', 'nlr', 'f', 'p_se', 'np_se', 'r_se', 'a_se', 's_se', 'plr_se', 'nlr_se', 'f_se') + tmp_list = c(tmp_list, list(tmp_row)) +} +combsum_df = do.call(rbind, tmp_list) +tmp_rownames = combsum_df[,1] +combsum_df = combsum_df[,-1] +combsum_df = apply(combsum_df, 2, as.numeric) +combsum_df = data.frame(combsum_df) +combsum_df = cbind(method_name=tmp_rownames, combsum_df) + +plot_mid_df = melt(combsum_df) +plot_val_df = subset(plot_mid_df, !grepl('[a-z]_se', plot_mid_df[,2])) +plot_se_df = subset(plot_mid_df, grepl('[a-z]_se', plot_mid_df[,2])) +plot_val_df = cbind(plot_val_df, plot_val_df[,3]-plot_se_df[,3], plot_val_df[,3]+plot_se_df[,3]) +colnames(plot_val_df) = c('method_name', 'variable', 'value', 'low', 'high') + +combmid_p1 = ggplot(plot_val_df, aes(x=plot_val_df[,'method_name'], y=plot_val_df[,'value'], fill=method_name)) + + geom_bar(stat="identity") + + geom_errorbar(aes(ymin=plot_val_df[,"low"], ymax=plot_val_df[,"high"]), width=.5) + + theme(axis.text.x = element_text(angle = 90, hjust = 1), text = element_text(size=20)) + guides(fill=FALSE) + + scale_fill_manual("Legend", values = plot_col) + + xlab('Inference algorithms') + ylab('') + ggtitle(capitalize(acyclic)) + + facet_wrap(~variable, scales='free_y', ncol=2) + +png(paste('all_algorithms_', acyclic, '_meanscore.png', sep=''), width=3000, height=4000, res=300) +plot(combmid_p1) +dev.off() + +#Individual plots for F-scores +comb_p8 = ggplot(comb_df, aes(x=reorder(comb_df[,'method_name'], -comb_df[,"F-score"], FUN=median), y=comb_df[,"F-score"])) + + geom_boxplot() + geom_jitter() + + theme(axis.text.x = element_text(angle = 90, hjust = 1), text = element_text(size=20)) + + xlab('Inference algorithms') + ylab('F-score') + ggtitle(capitalize(acyclic)) + +png(paste('all_algorithms_', acyclic, '_fscore.png', sep=''), width=2000, height=2000, res=300) +plot(comb_p8) +dev.off() + +combmid_p8 = ggplot(combsum_df, aes(x=reorder(combsum_df[,'method_name'], -combsum_df[,"f"]), y=f, fill=method_name)) + + geom_bar(stat="identity") + + geom_errorbar(aes(ymin=combsum_df[,"f"] - combsum_df[,'f_se'], ymax=combsum_df[,"f"] + combsum_df[,'f_se']), width=.5) + + theme(axis.text.x = element_text(angle = 90, hjust = 1), text = element_text(size=20)) + guides(fill=FALSE) + + scale_fill_manual("Legend", values = plot_col) + + xlab('Inference algorithms') + ylab('F-score') + ggtitle(capitalize(acyclic)) + +png(paste('all_algorithms_', acyclic, '_fmeanscore.png', sep=''), width=2000, height=2000, res=300) +plot(combmid_p8) +dev.off() diff --git a/krum_wilson_grow_hsc.R b/krum_wilson_grow_hsc.R new file mode 100644 index 0000000..54ed521 --- /dev/null +++ b/krum_wilson_grow_hsc.R @@ -0,0 +1,49 @@ +#(1) Setup paths and environment. +library(BoolTraineR) +library(doParallel) + +#path='~/bool_final/' +#path='/home/cyl49/ownCloud/Research_Work/working_directory/boolean_project/res2/' +path='D:/ownCloud/Research_Work/working_directory/boolean_project/res2/' +setwd(path) + +#Setting up for parallel processing. +registerDoParallel(cores=4) + +#(2)Load data. +data(krum_bmodel) +data(krum_istate) +data(wilson_raw_data) + +inter_bool = T +max_varperrule = 6 + +bmodel = initialise_model(krum_bmodel) +istate = initialise_data(krum_istate) + +#(3)Filtering expression data. +tmp_data = initialise_raw_data(wilson_raw_data, max_expr = 'low') #do not filter data at this stage. keep the whole matrix. +cdata = tmp_data[[1]] #continuous data +ddata = tmp_data[[2]] #discretised data +cell_ind = grepl('cmp', rownames(cdata)) | grepl('gmp', rownames(cdata)) | grepl('mep', rownames(cdata)) +fcdata = cdata[cell_ind,] #select only relevant cells. +fddata = ddata[cell_ind,] + +#(4)Adding extra genes into the model. +extra_genes = setdiff(colnames(wilson_raw_data), bmodel@target) +grown_bmodel = grow_bmodel(extra_genes[c(19, 20)], bmodel) #genes to be added: ldb1, lmo2 + +#(5)Re-estimate initial state. +#Since CMPs are upsteam of GMPs, and MEPs, the initial state of extra genes can be estimated from CMPs. +tmp_istate = colMeans(cdata[grepl('cmp', rownames(cdata)), extra_genes[c(19, 20)]]) +tmp_istate = matrix(round(tmp_istate), nrow=1, dimnames=list(1, names(tmp_istate))) +grown_istate = cbind(istate, tmp_istate) +grown_istate = initialise_data(grown_istate) + +result = model_train(cdata=fcdata, ddata=fddata, bmodel=grown_bmodel, istate=grown_istate, preprocess=F, verbose=T) + +filename = 'krum_wilson_myeloid_grow_trained_bmodel.rda' +save(result, file=filename) + +#Cleaning up. +stopImplicitCluster()