Permalink
Newer
Older
100644 264 lines (227 sloc) 12 KB
Jun 21, 2016 @cheeyeelim Fix param
1 #(1) Setup paths and environment.
2 #library(BoolTraineR)
3 library(BTR)
4 library(bnlearn)
5 library(ggplot2)
6 library(gridExtra) #for plotting multiple graphs
7 library(reshape2)
8
9 path='~/btr_output/'
10 setwd(path)
11
12 max_varperrule = 6
13 nonoise=F
14 subset_bool = F
15
16 for(acyclic in c(T,F))
17 {
18 for(single_cell in c(T,F))
19 {
20 for(file_ind in 1:5)
21 {
22 #Load data.
23 load(paste(path, ifelse(acyclic, 'acyclic', 'cyclic'), '_data/test_data_', ifelse(nonoise, 'nonoise_', ''),'yeast', file_ind,'_20n30s10g.rda', sep='')) #test_data.
24
25 #score the true model under Bayesian framework.
26 if(acyclic)
27 {
28 true_ndamat = abs(test_data$true_amat)
29 true_bngraph = empty.graph(colnames(test_data$true_amat))
30 amat(true_bngraph) = true_ndamat #if this step throws error, that means the true graph is not DAG. Has to regenerate the random data using GNW.
31 true_bnscore = score(true_bngraph, as.data.frame(test_data$cdata))
32 }
33
34 #score the true model under Boolean model framework.
35 true_bmmodel = amat_to_bm(test_data$true_amat)
36 overlap_gene = unname(colnames(test_data$cdata))
37 if(single_cell)
38 {
39 if(subset_bool)
40 {
41 set.seed(1)
42 sind = sample(1:nrow(test_data$sc_cdata), 50)
43 fcdata = test_data$sc_cdata[sind, ]
44 } else
45 {
46 fcdata = test_data$sc_cdata
47 }
48 } else
49 {
50 if(subset_bool)
51 {
52 set.seed(1)
53 sind = sample(1:nrow(test_data$cdata), 50)
54 fcdata = test_data$cdata[sind, ]
55 } else
56 {
57 fcdata = test_data$cdata
58 }
59 }
60 true_bmscore = calc_mscore(bmodel=true_bmmodel, istate=test_data$istate, fcdata=fcdata, overlap_gene=overlap_gene, max_varperrule=max_varperrule)
61
62 #score the other bn models under Bayesian framework using GNW data.
63 bnmod_gnw_bnscore = c()
64 if(acyclic)
65 {
66 for(i in 1:length(test_data$bn_modamat))
67 {
68 for(j in 1:length(test_data$bn_modamat[[i]]))
69 {
70 tmp_bngraph = empty.graph(colnames(test_data$bn_modamat[[i]][[j]]))
71 amat(tmp_bngraph) = test_data$bn_modamat[[i]][[j]]
72 tmp_score = score(tmp_bngraph, as.data.frame(test_data$cdata))
73 #tmp_score = score(tmp_bngraph, as.data.frame(bm_cdata))
74 names(tmp_score) = test_data$bn_step[[i]][[j]]
75
76 bnmod_gnw_bnscore = c(bnmod_gnw_bnscore, tmp_score)
77 }
78 }
79 bnmod_gnw_bnscore = split(bnmod_gnw_bnscore, names(bnmod_gnw_bnscore))
80 bnmod_gnw_bnscore = bnmod_gnw_bnscore[order(as.numeric(names(bnmod_gnw_bnscore)))]
81 bnmod_gnw_bnscore = do.call(cbind, bnmod_gnw_bnscore)
82 #Add in true score.
83 bnmod_gnw_bnscore = cbind('0'=rep(true_bnscore, nrow(bnmod_gnw_bnscore)), bnmod_gnw_bnscore)
84 rownames(bnmod_gnw_bnscore) = NULL
85 colnames(bnmod_gnw_bnscore) = paste(seq(0,40,2))
86 }
87
88 #score the other bm models under Boolean model framework using GNW data.
89 bmmod_gnw_bmscore = c()
90 y_bmscore = c()
91 za_bmscore = c()
92 zb_bmscore = c()
93 for(i in 1:length(test_data$bm_modmodel))
94 {
95 for(j in 1:length(test_data$bm_modmodel[[i]]))
96 {
97
98 tmp_bmmodel = test_data$bm_modmodel[[i]][[j]]
99 overlap_gene = unname(colnames(test_data$cdata))
100 tmp_bmmodel@target = overlap_gene
101 tmp_score = calc_mscore(bmodel=tmp_bmmodel, istate=test_data$istate, fcdata=fcdata, overlap_gene=overlap_gene, max_varperrule=max_varperrule, detail=T)
102
103 names(tmp_score) = rep(j, length(tmp_score))
104
105 #c('f', 'y', 'zb')
106 bmmod_gnw_bmscore = c(bmmod_gnw_bmscore, tmp_score[1])
107 y_bmscore = c(y_bmscore, tmp_score[2])
108 za_bmscore = c(za_bmscore, tmp_score[3])
109 zb_bmscore = c(zb_bmscore, tmp_score[4])
110 }
111 }
112 bmmod_gnw_bmscore = split(bmmod_gnw_bmscore, names(bmmod_gnw_bmscore))
113 bmmod_gnw_bmscore = bmmod_gnw_bmscore[order(as.numeric(names(bmmod_gnw_bmscore)))]
114 bmmod_gnw_bmscore = do.call(cbind, bmmod_gnw_bmscore)
115 #Add in true score.
116 bmmod_gnw_bmscore = cbind('0'=rep(true_bmscore, nrow(bmmod_gnw_bmscore)), bmmod_gnw_bmscore)
117 rownames(bmmod_gnw_bmscore) = NULL
118 colnames(bmmod_gnw_bmscore) = paste(seq(0,40,2))
119
120 y_bmscore = split(y_bmscore, names(y_bmscore))
121 y_bmscore = y_bmscore[order(as.numeric(names(y_bmscore)))]
122 y_bmscore = do.call(cbind, y_bmscore)
123 rownames(y_bmscore) = NULL
124 colnames(y_bmscore) = paste(seq(2,40,2))
125
126 za_bmscore = split(za_bmscore, names(za_bmscore))
127 za_bmscore = za_bmscore[order(as.numeric(names(za_bmscore)))]
128 za_bmscore = do.call(cbind, za_bmscore)
129 rownames(za_bmscore) = NULL
130 colnames(za_bmscore) = paste(seq(2,40,2))
131
132 zb_bmscore = split(zb_bmscore, names(zb_bmscore))
133 zb_bmscore = zb_bmscore[order(as.numeric(names(zb_bmscore)))]
134 zb_bmscore = do.call(cbind, zb_bmscore)
135 rownames(zb_bmscore) = NULL
136 colnames(zb_bmscore) = paste(seq(2,40,2))
137
138 save(bnmod_gnw_bnscore, bmmod_gnw_bmscore, y_bmscore, za_bmscore, zb_bmscore, file=paste(ifelse(acyclic, 'acyclic', 'cyclic'), ifelse(single_cell, '_sc', '_nonsc'), '_yeast', file_ind, '_scores.rda', sep=''))
139 #save(bnmod_gnw_bnscore, bmmod_gnw_bmscore, y_bmscore, zb_bmscore, file=paste(ifelse(acyclic, 'acyclic', 'cyclic'), ifelse(single_cell, '_sc', '_nonsc'), '_yeast', file_ind, '_scores.rda', sep=''))
140 }
141
142 ##################################################################################################################################
143
144 #Setup the true networks.
145 true_model_list = list()
146 for(file_ind in 1:5)
147 {
148 #Load data.
149 load(paste(path, ifelse(acyclic, 'acyclic', 'cyclic'), '_data/test_data_', ifelse(nonoise, 'nonoise_', ''),'yeast', file_ind,'_20n30s10g.rda', sep='')) #test_data.
150
151 true_bmmodel = amat_to_bm(test_data$true_amat)
152 true_model_list = c(true_model_list, list(true_bmmodel))
153 }
154
155 #Make plots of true models.
156 for(i in 1:length(true_model_list))
157 {
158 png(paste(i, ifelse(acyclic, '_acyclic', '_cyclic'), '_true_network.png', sep=''), width=2000, height=2000, res=300)
159 plotBM(true_model_list[[i]])
160 title(paste('True ', ifelse(acyclic, 'acyclic', 'cyclic'), ' network', i), cex.main=3)
161 dev.off()
162 }
163
164 #Setup the scores for modified models.
165 bnscore_list = list()
166 bmscore_list = list()
167 for(file_ind in 1:5)
168 {
169 load(paste(ifelse(acyclic, 'acyclic', 'cyclic'), ifelse(single_cell, '_sc', '_nonsc'), '_yeast', file_ind, '_scores.rda', sep='')) #bnmod_gnw_bnscore, bmmod_gnw_bmscore
170
171 bnscore_list = c(bnscore_list, list(bnmod_gnw_bnscore))
172 bmscore_list = c(bmscore_list, list(bmmod_gnw_bmscore))
173 }
174
175 if(acyclic)
176 {
177 bnscore_df = melt(bnscore_list)
178 bnscore_df = bnscore_df[,-1]
179 bnscore_df[,3] = paste('Network', bnscore_df[,3])
180 colnames(bnscore_df) = c('steps', 'score', 'network')
181
182 bnscore_mid_df = melt(lapply(bnscore_list, function(x) colMeans(x)))
183 bnscore_mid_df[,2] = paste('Network', bnscore_mid_df[,2])
184 bnscore_mid_df = cbind(rep(seq(0,40,2), 5), bnscore_mid_df)
185
186 bnscore_se = melt(lapply(bnscore_list, function(x) apply(x, 2, function(x) sd(x)/sqrt(length(x)))))
187 bnscore_se = bnscore_se[,1,drop=F]
188 bnscore_mid_df = cbind(bnscore_mid_df, bnscore_mid_df[,2]-bnscore_se[,1], bnscore_mid_df[,2]+bnscore_se[,1])
189 colnames(bnscore_mid_df) = c('steps', 'score', 'network', 'low', 'high')
190 }
191
192 bmscore_df = melt(bmscore_list)
193 bmscore_df = bmscore_df[,-1]
194 bmscore_df[,3] = paste('Network', bmscore_df[,3])
195 colnames(bmscore_df) = c('steps', 'score', 'network')
196
197 bmscore_mid_df = melt(lapply(bmscore_list, function(x) colMeans(x)))
198 bmscore_mid_df[,2] = paste('Network', bmscore_mid_df[,2])
199 bmscore_mid_df = cbind(rep(seq(0,40,2), 5), bmscore_mid_df)
200
201 bmscore_se = melt(lapply(bmscore_list, function(x) apply(x, 2, function(x) sd(x)/sqrt(length(x)))))
202 bmscore_se = bmscore_se[,1,drop=F]
203 bmscore_mid_df = cbind(bmscore_mid_df, bmscore_mid_df[,2]-bmscore_se[,1], bmscore_mid_df[,2]+bmscore_se[,1])
204 colnames(bmscore_mid_df) = c('steps', 'score', 'network', 'low', 'high')
205
206 #Make plot objects of scoring functions.
207 if(acyclic)
208 {
209 p1_bn_box = ggplot(bnscore_df, aes(x=factor(bnscore_df[,'steps']), y=bnscore_df[,'score'])) +
210 geom_boxplot() + xlab('Number of different edges') + ylab('Scores') + ggtitle('BIC scoring function') +
211 scale_x_discrete(labels=unique(bnscore_df[,'steps'])) +
212 facet_wrap(~network, scales='free_y', ncol=1) +
213 theme(text = element_text(size=20), axis.text.x = element_text(size=10))
214
215 p2_bm_box = ggplot(bmscore_df, aes(x=factor(bmscore_df[,'steps']), y=bmscore_df[,'score'])) +
216 geom_boxplot() + xlab('Number of different edges') + ylab('Scores') + ggtitle('BSS scoring function') +
217 scale_x_discrete(labels=unique(bmscore_df[,'steps'])) +
218 facet_wrap(~network, scales='free_y', ncol=1) +
219 theme(text = element_text(size=20), axis.text.x = element_text(size=10))
220
221 png(paste('boolbaye', ifelse(acyclic, '_acyclic', '_cyclic'), ifelse(single_cell, '_sc', '_nonsc'), '_boxplot_compare_score.png', sep=''), width=3000, height=5000, res=300)
222 grid.arrange(p1_bn_box, p2_bm_box, ncol=2)
223 dev.off()
224
225 p1_bn_mid = ggplot(bnscore_mid_df, aes(x=bnscore_mid_df[, 'steps'], y=bnscore_mid_df[,'score'])) +
226 geom_errorbar(aes(ymin=bnscore_mid_df[, 'low'], ymax=bnscore_mid_df[, 'high'])) +
227 geom_line() + xlab('Number of different edges') + ylab('Scores') + ggtitle('BIC scoring function') +
228 facet_wrap(~network, scales='free_y', ncol=1) +
229 theme(text = element_text(size=20))
230
231 p2_bm_mid = ggplot(bmscore_mid_df, aes(x=bmscore_mid_df[, 'steps'], y=bmscore_mid_df[,'score'])) +
232 geom_errorbar(aes(ymin=bmscore_mid_df[, 'low'], ymax=bmscore_mid_df[, 'high'])) +
233 geom_line() + xlab('Number of different edges') + ylab('Scores') + ggtitle('BSS scoring function') +
234 facet_wrap(~network, scales='free_y', ncol=1) +
235 theme(text = element_text(size=20))
236
237 png(paste('boolbaye', ifelse(acyclic, '_acyclic', '_cyclic'), ifelse(single_cell, '_sc', '_nonsc'), '_mean_compare_score.png', sep=''), width=3000, height=5000, res=300)
238 grid.arrange(p1_bn_mid, p2_bm_mid, ncol=2)
239 dev.off()
240 } else
241 {
242 p2_bm_box = ggplot(bmscore_df, aes(x=factor(bmscore_df[,'steps']), y=bmscore_df[,'score'])) +
243 geom_boxplot() + xlab('Number of different edges') + ylab('Scores') + ggtitle('BSS scoring function') +
244 scale_x_discrete(labels=unique(bmscore_df[,'steps'])) +
245 facet_wrap(~network, scales='free_y', ncol=1) +
246 theme(text = element_text(size=20), axis.text.x = element_text(size=10))
247
248 png(paste('boolbaye', ifelse(acyclic, '_acyclic', '_cyclic'), ifelse(single_cell, '_sc', '_nonsc'), '_boxplot_compare_score.png', sep=''), width=3000, height=5000, res=300)
249 grid.arrange(p2_bm_box, ncol=2)
250 dev.off()
251
252 p2_bm_mid = ggplot(bmscore_mid_df, aes(x=bmscore_mid_df[, 'steps'], y=bmscore_mid_df[,'score'])) +
253 geom_errorbar(aes(ymin=bmscore_mid_df[, 'low'], ymax=bmscore_mid_df[, 'high'])) +
254 geom_line() + xlab('Number of different edges') + ylab('Scores') + ggtitle('BSS scoring function') +
255 facet_wrap(~network, scales='free_y', ncol=1) +
256 theme(text = element_text(size=20))
257
258 png(paste('boolbaye', ifelse(acyclic, '_acyclic', '_cyclic'), ifelse(single_cell, '_sc', '_nonsc'), '_mean_compare_score.png', sep=''), width=3000, height=5000, res=300)
259 grid.arrange(p2_bm_mid, ncol=2)
260 dev.off()
261 }
262 }
263 }