Jun 21, 2016
Fix param
|
|
|
1 |
#(1) Setup paths and environment. |
|
2 |
#library(BoolTraineR) |
|
3 |
library(BTR) |
|
4 |
library(bnlearn) |
|
5 |
library(ggplot2) |
|
6 |
library(gridExtra) #for plotting multiple graphs |
|
7 |
library(reshape2) |
|
8 |
|
|
9 |
path='~/btr_output/' |
|
10 |
setwd(path) |
|
11 |
|
|
12 |
max_varperrule = 6 |
|
13 |
nonoise=F |
|
14 |
subset_bool = F |
|
15 |
|
|
16 |
for(acyclic in c(T,F)) |
|
17 |
{ |
|
18 |
for(single_cell in c(T,F)) |
|
19 |
{ |
|
20 |
for(file_ind in 1:5) |
|
21 |
{ |
|
22 |
#Load data. |
|
23 |
load(paste(path, ifelse(acyclic, 'acyclic', 'cyclic'), '_data/test_data_', ifelse(nonoise, 'nonoise_', ''),'yeast', file_ind,'_20n30s10g.rda', sep='')) #test_data. |
|
24 |
|
|
25 |
#score the true model under Bayesian framework. |
|
26 |
if(acyclic) |
|
27 |
{ |
|
28 |
true_ndamat = abs(test_data$true_amat) |
|
29 |
true_bngraph = empty.graph(colnames(test_data$true_amat)) |
|
30 |
amat(true_bngraph) = true_ndamat #if this step throws error, that means the true graph is not DAG. Has to regenerate the random data using GNW. |
|
31 |
true_bnscore = score(true_bngraph, as.data.frame(test_data$cdata)) |
|
32 |
} |
|
33 |
|
|
34 |
#score the true model under Boolean model framework. |
|
35 |
true_bmmodel = amat_to_bm(test_data$true_amat) |
|
36 |
overlap_gene = unname(colnames(test_data$cdata)) |
|
37 |
if(single_cell) |
|
38 |
{ |
|
39 |
if(subset_bool) |
|
40 |
{ |
|
41 |
set.seed(1) |
|
42 |
sind = sample(1:nrow(test_data$sc_cdata), 50) |
|
43 |
fcdata = test_data$sc_cdata[sind, ] |
|
44 |
} else |
|
45 |
{ |
|
46 |
fcdata = test_data$sc_cdata |
|
47 |
} |
|
48 |
} else |
|
49 |
{ |
|
50 |
if(subset_bool) |
|
51 |
{ |
|
52 |
set.seed(1) |
|
53 |
sind = sample(1:nrow(test_data$cdata), 50) |
|
54 |
fcdata = test_data$cdata[sind, ] |
|
55 |
} else |
|
56 |
{ |
|
57 |
fcdata = test_data$cdata |
|
58 |
} |
|
59 |
} |
|
60 |
true_bmscore = calc_mscore(bmodel=true_bmmodel, istate=test_data$istate, fcdata=fcdata, overlap_gene=overlap_gene, max_varperrule=max_varperrule) |
|
61 |
|
|
62 |
#score the other bn models under Bayesian framework using GNW data. |
|
63 |
bnmod_gnw_bnscore = c() |
|
64 |
if(acyclic) |
|
65 |
{ |
|
66 |
for(i in 1:length(test_data$bn_modamat)) |
|
67 |
{ |
|
68 |
for(j in 1:length(test_data$bn_modamat[[i]])) |
|
69 |
{ |
|
70 |
tmp_bngraph = empty.graph(colnames(test_data$bn_modamat[[i]][[j]])) |
|
71 |
amat(tmp_bngraph) = test_data$bn_modamat[[i]][[j]] |
|
72 |
tmp_score = score(tmp_bngraph, as.data.frame(test_data$cdata)) |
|
73 |
#tmp_score = score(tmp_bngraph, as.data.frame(bm_cdata)) |
|
74 |
names(tmp_score) = test_data$bn_step[[i]][[j]] |
|
75 |
|
|
76 |
bnmod_gnw_bnscore = c(bnmod_gnw_bnscore, tmp_score) |
|
77 |
} |
|
78 |
} |
|
79 |
bnmod_gnw_bnscore = split(bnmod_gnw_bnscore, names(bnmod_gnw_bnscore)) |
|
80 |
bnmod_gnw_bnscore = bnmod_gnw_bnscore[order(as.numeric(names(bnmod_gnw_bnscore)))] |
|
81 |
bnmod_gnw_bnscore = do.call(cbind, bnmod_gnw_bnscore) |
|
82 |
#Add in true score. |
|
83 |
bnmod_gnw_bnscore = cbind('0'=rep(true_bnscore, nrow(bnmod_gnw_bnscore)), bnmod_gnw_bnscore) |
|
84 |
rownames(bnmod_gnw_bnscore) = NULL |
|
85 |
colnames(bnmod_gnw_bnscore) = paste(seq(0,40,2)) |
|
86 |
} |
|
87 |
|
|
88 |
#score the other bm models under Boolean model framework using GNW data. |
|
89 |
bmmod_gnw_bmscore = c() |
|
90 |
y_bmscore = c() |
|
91 |
za_bmscore = c() |
|
92 |
zb_bmscore = c() |
|
93 |
for(i in 1:length(test_data$bm_modmodel)) |
|
94 |
{ |
|
95 |
for(j in 1:length(test_data$bm_modmodel[[i]])) |
|
96 |
{ |
|
97 |
|
|
98 |
tmp_bmmodel = test_data$bm_modmodel[[i]][[j]] |
|
99 |
overlap_gene = unname(colnames(test_data$cdata)) |
|
100 |
tmp_bmmodel@target = overlap_gene |
|
101 |
tmp_score = calc_mscore(bmodel=tmp_bmmodel, istate=test_data$istate, fcdata=fcdata, overlap_gene=overlap_gene, max_varperrule=max_varperrule, detail=T) |
|
102 |
|
|
103 |
names(tmp_score) = rep(j, length(tmp_score)) |
|
104 |
|
|
105 |
#c('f', 'y', 'zb') |
|
106 |
bmmod_gnw_bmscore = c(bmmod_gnw_bmscore, tmp_score[1]) |
|
107 |
y_bmscore = c(y_bmscore, tmp_score[2]) |
|
108 |
za_bmscore = c(za_bmscore, tmp_score[3]) |
|
109 |
zb_bmscore = c(zb_bmscore, tmp_score[4]) |
|
110 |
} |
|
111 |
} |
|
112 |
bmmod_gnw_bmscore = split(bmmod_gnw_bmscore, names(bmmod_gnw_bmscore)) |
|
113 |
bmmod_gnw_bmscore = bmmod_gnw_bmscore[order(as.numeric(names(bmmod_gnw_bmscore)))] |
|
114 |
bmmod_gnw_bmscore = do.call(cbind, bmmod_gnw_bmscore) |
|
115 |
#Add in true score. |
|
116 |
bmmod_gnw_bmscore = cbind('0'=rep(true_bmscore, nrow(bmmod_gnw_bmscore)), bmmod_gnw_bmscore) |
|
117 |
rownames(bmmod_gnw_bmscore) = NULL |
|
118 |
colnames(bmmod_gnw_bmscore) = paste(seq(0,40,2)) |
|
119 |
|
|
120 |
y_bmscore = split(y_bmscore, names(y_bmscore)) |
|
121 |
y_bmscore = y_bmscore[order(as.numeric(names(y_bmscore)))] |
|
122 |
y_bmscore = do.call(cbind, y_bmscore) |
|
123 |
rownames(y_bmscore) = NULL |
|
124 |
colnames(y_bmscore) = paste(seq(2,40,2)) |
|
125 |
|
|
126 |
za_bmscore = split(za_bmscore, names(za_bmscore)) |
|
127 |
za_bmscore = za_bmscore[order(as.numeric(names(za_bmscore)))] |
|
128 |
za_bmscore = do.call(cbind, za_bmscore) |
|
129 |
rownames(za_bmscore) = NULL |
|
130 |
colnames(za_bmscore) = paste(seq(2,40,2)) |
|
131 |
|
|
132 |
zb_bmscore = split(zb_bmscore, names(zb_bmscore)) |
|
133 |
zb_bmscore = zb_bmscore[order(as.numeric(names(zb_bmscore)))] |
|
134 |
zb_bmscore = do.call(cbind, zb_bmscore) |
|
135 |
rownames(zb_bmscore) = NULL |
|
136 |
colnames(zb_bmscore) = paste(seq(2,40,2)) |
|
137 |
|
|
138 |
save(bnmod_gnw_bnscore, bmmod_gnw_bmscore, y_bmscore, za_bmscore, zb_bmscore, file=paste(ifelse(acyclic, 'acyclic', 'cyclic'), ifelse(single_cell, '_sc', '_nonsc'), '_yeast', file_ind, '_scores.rda', sep='')) |
|
139 |
#save(bnmod_gnw_bnscore, bmmod_gnw_bmscore, y_bmscore, zb_bmscore, file=paste(ifelse(acyclic, 'acyclic', 'cyclic'), ifelse(single_cell, '_sc', '_nonsc'), '_yeast', file_ind, '_scores.rda', sep='')) |
|
140 |
} |
|
141 |
|
|
142 |
################################################################################################################################## |
|
143 |
|
|
144 |
#Setup the true networks. |
|
145 |
true_model_list = list() |
|
146 |
for(file_ind in 1:5) |
|
147 |
{ |
|
148 |
#Load data. |
|
149 |
load(paste(path, ifelse(acyclic, 'acyclic', 'cyclic'), '_data/test_data_', ifelse(nonoise, 'nonoise_', ''),'yeast', file_ind,'_20n30s10g.rda', sep='')) #test_data. |
|
150 |
|
|
151 |
true_bmmodel = amat_to_bm(test_data$true_amat) |
|
152 |
true_model_list = c(true_model_list, list(true_bmmodel)) |
|
153 |
} |
|
154 |
|
|
155 |
#Make plots of true models. |
|
156 |
for(i in 1:length(true_model_list)) |
|
157 |
{ |
|
158 |
png(paste(i, ifelse(acyclic, '_acyclic', '_cyclic'), '_true_network.png', sep=''), width=2000, height=2000, res=300) |
|
159 |
plotBM(true_model_list[[i]]) |
|
160 |
title(paste('True ', ifelse(acyclic, 'acyclic', 'cyclic'), ' network', i), cex.main=3) |
|
161 |
dev.off() |
|
162 |
} |
|
163 |
|
|
164 |
#Setup the scores for modified models. |
|
165 |
bnscore_list = list() |
|
166 |
bmscore_list = list() |
|
167 |
for(file_ind in 1:5) |
|
168 |
{ |
|
169 |
load(paste(ifelse(acyclic, 'acyclic', 'cyclic'), ifelse(single_cell, '_sc', '_nonsc'), '_yeast', file_ind, '_scores.rda', sep='')) #bnmod_gnw_bnscore, bmmod_gnw_bmscore |
|
170 |
|
|
171 |
bnscore_list = c(bnscore_list, list(bnmod_gnw_bnscore)) |
|
172 |
bmscore_list = c(bmscore_list, list(bmmod_gnw_bmscore)) |
|
173 |
} |
|
174 |
|
|
175 |
if(acyclic) |
|
176 |
{ |
|
177 |
bnscore_df = melt(bnscore_list) |
|
178 |
bnscore_df = bnscore_df[,-1] |
|
179 |
bnscore_df[,3] = paste('Network', bnscore_df[,3]) |
|
180 |
colnames(bnscore_df) = c('steps', 'score', 'network') |
|
181 |
|
|
182 |
bnscore_mid_df = melt(lapply(bnscore_list, function(x) colMeans(x))) |
|
183 |
bnscore_mid_df[,2] = paste('Network', bnscore_mid_df[,2]) |
|
184 |
bnscore_mid_df = cbind(rep(seq(0,40,2), 5), bnscore_mid_df) |
|
185 |
|
|
186 |
bnscore_se = melt(lapply(bnscore_list, function(x) apply(x, 2, function(x) sd(x)/sqrt(length(x))))) |
|
187 |
bnscore_se = bnscore_se[,1,drop=F] |
|
188 |
bnscore_mid_df = cbind(bnscore_mid_df, bnscore_mid_df[,2]-bnscore_se[,1], bnscore_mid_df[,2]+bnscore_se[,1]) |
|
189 |
colnames(bnscore_mid_df) = c('steps', 'score', 'network', 'low', 'high') |
|
190 |
} |
|
191 |
|
|
192 |
bmscore_df = melt(bmscore_list) |
|
193 |
bmscore_df = bmscore_df[,-1] |
|
194 |
bmscore_df[,3] = paste('Network', bmscore_df[,3]) |
|
195 |
colnames(bmscore_df) = c('steps', 'score', 'network') |
|
196 |
|
|
197 |
bmscore_mid_df = melt(lapply(bmscore_list, function(x) colMeans(x))) |
|
198 |
bmscore_mid_df[,2] = paste('Network', bmscore_mid_df[,2]) |
|
199 |
bmscore_mid_df = cbind(rep(seq(0,40,2), 5), bmscore_mid_df) |
|
200 |
|
|
201 |
bmscore_se = melt(lapply(bmscore_list, function(x) apply(x, 2, function(x) sd(x)/sqrt(length(x))))) |
|
202 |
bmscore_se = bmscore_se[,1,drop=F] |
|
203 |
bmscore_mid_df = cbind(bmscore_mid_df, bmscore_mid_df[,2]-bmscore_se[,1], bmscore_mid_df[,2]+bmscore_se[,1]) |
|
204 |
colnames(bmscore_mid_df) = c('steps', 'score', 'network', 'low', 'high') |
|
205 |
|
|
206 |
#Make plot objects of scoring functions. |
|
207 |
if(acyclic) |
|
208 |
{ |
|
209 |
p1_bn_box = ggplot(bnscore_df, aes(x=factor(bnscore_df[,'steps']), y=bnscore_df[,'score'])) + |
|
210 |
geom_boxplot() + xlab('Number of different edges') + ylab('Scores') + ggtitle('BIC scoring function') + |
|
211 |
scale_x_discrete(labels=unique(bnscore_df[,'steps'])) + |
|
212 |
facet_wrap(~network, scales='free_y', ncol=1) + |
|
213 |
theme(text = element_text(size=20), axis.text.x = element_text(size=10)) |
|
214 |
|
|
215 |
p2_bm_box = ggplot(bmscore_df, aes(x=factor(bmscore_df[,'steps']), y=bmscore_df[,'score'])) + |
|
216 |
geom_boxplot() + xlab('Number of different edges') + ylab('Scores') + ggtitle('BSS scoring function') + |
|
217 |
scale_x_discrete(labels=unique(bmscore_df[,'steps'])) + |
|
218 |
facet_wrap(~network, scales='free_y', ncol=1) + |
|
219 |
theme(text = element_text(size=20), axis.text.x = element_text(size=10)) |
|
220 |
|
|
221 |
png(paste('boolbaye', ifelse(acyclic, '_acyclic', '_cyclic'), ifelse(single_cell, '_sc', '_nonsc'), '_boxplot_compare_score.png', sep=''), width=3000, height=5000, res=300) |
|
222 |
grid.arrange(p1_bn_box, p2_bm_box, ncol=2) |
|
223 |
dev.off() |
|
224 |
|
|
225 |
p1_bn_mid = ggplot(bnscore_mid_df, aes(x=bnscore_mid_df[, 'steps'], y=bnscore_mid_df[,'score'])) + |
|
226 |
geom_errorbar(aes(ymin=bnscore_mid_df[, 'low'], ymax=bnscore_mid_df[, 'high'])) + |
|
227 |
geom_line() + xlab('Number of different edges') + ylab('Scores') + ggtitle('BIC scoring function') + |
|
228 |
facet_wrap(~network, scales='free_y', ncol=1) + |
|
229 |
theme(text = element_text(size=20)) |
|
230 |
|
|
231 |
p2_bm_mid = ggplot(bmscore_mid_df, aes(x=bmscore_mid_df[, 'steps'], y=bmscore_mid_df[,'score'])) + |
|
232 |
geom_errorbar(aes(ymin=bmscore_mid_df[, 'low'], ymax=bmscore_mid_df[, 'high'])) + |
|
233 |
geom_line() + xlab('Number of different edges') + ylab('Scores') + ggtitle('BSS scoring function') + |
|
234 |
facet_wrap(~network, scales='free_y', ncol=1) + |
|
235 |
theme(text = element_text(size=20)) |
|
236 |
|
|
237 |
png(paste('boolbaye', ifelse(acyclic, '_acyclic', '_cyclic'), ifelse(single_cell, '_sc', '_nonsc'), '_mean_compare_score.png', sep=''), width=3000, height=5000, res=300) |
|
238 |
grid.arrange(p1_bn_mid, p2_bm_mid, ncol=2) |
|
239 |
dev.off() |
|
240 |
} else |
|
241 |
{ |
|
242 |
p2_bm_box = ggplot(bmscore_df, aes(x=factor(bmscore_df[,'steps']), y=bmscore_df[,'score'])) + |
|
243 |
geom_boxplot() + xlab('Number of different edges') + ylab('Scores') + ggtitle('BSS scoring function') + |
|
244 |
scale_x_discrete(labels=unique(bmscore_df[,'steps'])) + |
|
245 |
facet_wrap(~network, scales='free_y', ncol=1) + |
|
246 |
theme(text = element_text(size=20), axis.text.x = element_text(size=10)) |
|
247 |
|
|
248 |
png(paste('boolbaye', ifelse(acyclic, '_acyclic', '_cyclic'), ifelse(single_cell, '_sc', '_nonsc'), '_boxplot_compare_score.png', sep=''), width=3000, height=5000, res=300) |
|
249 |
grid.arrange(p2_bm_box, ncol=2) |
|
250 |
dev.off() |
|
251 |
|
|
252 |
p2_bm_mid = ggplot(bmscore_mid_df, aes(x=bmscore_mid_df[, 'steps'], y=bmscore_mid_df[,'score'])) + |
|
253 |
geom_errorbar(aes(ymin=bmscore_mid_df[, 'low'], ymax=bmscore_mid_df[, 'high'])) + |
|
254 |
geom_line() + xlab('Number of different edges') + ylab('Scores') + ggtitle('BSS scoring function') + |
|
255 |
facet_wrap(~network, scales='free_y', ncol=1) + |
|
256 |
theme(text = element_text(size=20)) |
|
257 |
|
|
258 |
png(paste('boolbaye', ifelse(acyclic, '_acyclic', '_cyclic'), ifelse(single_cell, '_sc', '_nonsc'), '_mean_compare_score.png', sep=''), width=3000, height=5000, res=300) |
|
259 |
grid.arrange(p2_bm_mid, ncol=2) |
|
260 |
dev.off() |
|
261 |
} |
|
262 |
} |
|
263 |
} |