Permalink
Newer
Older
100644 168 lines (142 sloc) 8 KB
Jun 21, 2016 @cheeyeelim Fix param
1 library(BTR)
2 library(ggplot2)
3 library(gridExtra) #for plotting multiple graphs
4 library(colorspace)
5 library(reshape2)
6 library(Hmisc) #for capitalisation.
7
8 #(1) Load results and combine them.
9 for(acyclic in c('acyclic', 'cyclic', 'both'))
10 {
11 for(single_cell in c(T, F))
12 {
13 # sect_1 ------------------------------------------------------------------
14
15 path='~/btr_output'
16 setwd(path)
17
18 if(acyclic=='acyclic')
19 {
20 files = list.files(path=path, pattern=sprintf('^result.+_train_acyclic_%s_yeast[.]rda', ifelse(single_cell, 'sc', 'nonsc')), full.names = T)
21 } else if(acyclic=='cyclic')
22 {
23 files = list.files(path=path, pattern=sprintf('^result.+_train_cyclic_%s_yeast[.]rda', ifelse(single_cell, 'sc', 'nonsc')), full.names = T)
24 } else if(acyclic=='both')
25 {
26 files = list.files(path=path, pattern=sprintf('^result.+_train_[a-z]+_%s_yeast[.]rda', ifelse(single_cell, 'sc', 'nonsc')), full.names = T)
27 }
28
29 # sect_2 ------------------------------------------------------------------
30
31 all_true_adj_mat = list()
32 all_adj_mat = list()
33 all_perf_score = c()
34 all_time_taken = list()
35 for(i in 1:length(files))
36 {
37 if(acyclic=='acyclic')
38 {
39 tmp_name = gsub(sprintf('.+result_(.+)_train_acyclic_%s_yeast[.]rda', ifelse(single_cell, 'sc', 'nonsc')), '\\1', files[i]) #extract name of method.
40 } else if(acyclic=='cyclic')
41 {
42 tmp_name = gsub(sprintf('.+result_(.+)_train_cyclic_%s_yeast[.]rda', ifelse(single_cell, 'sc', 'nonsc')), '\\1', files[i]) #extract name of method.
43 } else if(acyclic=='both')
44 {
45 tmp_name = gsub(sprintf('.+result_(.+)_train_.+_%s_yeast[.]rda', ifelse(single_cell, 'sc', 'nonsc')), '\\1', files[i]) #extract name of method.
46 }
47
48 load(files[i]) #test_result. "true_adj_mat", "adj_mat", "perf_score", "time_taken"
49
50 algo_true_adj_mat = list()
51 algo_adj_mat = list()
52 algo_perf_score = c()
53 algo_time_taken = list()
54 for(j in 1:length(test_result))
55 {
56 algo_true_adj_mat = c(algo_true_adj_mat, list(test_result[[j]]$true_adj_mat))
57 algo_adj_mat = c(algo_adj_mat, list(test_result[[j]]$adj_mat))
58 algo_perf_score = c(algo_perf_score, list(test_result[[j]]$perf_score))
59 algo_time_taken = c(algo_time_taken, list(as.vector(test_result[[j]]$time_taken)))
60 }
61 algo_perf_score = do.call(rbind, algo_perf_score)
62 algo_time_taken = do.call(rbind, algo_time_taken)
63
64 if(tmp_name %in% names(all_true_adj_mat)) #only happen in acyclic=both.
65 {
66 all_true_adj_mat[[tmp_name]] = c(all_true_adj_mat[[tmp_name]], algo_true_adj_mat)
67 all_adj_mat[[tmp_name]] = c(all_adj_mat[[tmp_name]], algo_adj_mat)
68 all_perf_score[[tmp_name]] = rbind(all_perf_score[[tmp_name]], algo_perf_score)
69 all_time_taken[[tmp_name]] = rbind(all_time_taken[[tmp_name]], algo_time_taken)
70 } else
71 {
72 all_true_adj_mat = c(all_true_adj_mat, setNames(list(algo_true_adj_mat), tmp_name))
73 all_adj_mat = c(all_adj_mat, setNames(list(algo_adj_mat), tmp_name))
74 all_perf_score = c(all_perf_score, setNames(list(algo_perf_score), tmp_name))
75 all_time_taken = c(all_time_taken, setNames(list(algo_time_taken), tmp_name))
76 }
77 }
78
79 # sect_3 ------------------------------------------------------------------
80
81 #Prepare data for plotting.
82 method_name = toupper(as.vector(sapply(names(all_perf_score), function(x) rep(x, nrow(all_perf_score[[1]])))))
83 network_name = rep(paste('net_', seq(1,nrow(all_perf_score[[1]])), sep=''), length(all_perf_score))
84 comb_df = do.call(rbind, all_perf_score)
85
86 stopifnot(nrow(comb_df)==length(method_name))
87
88 comb_df = data.frame(method_name, network_name, comb_df, stringsAsFactors=F)
89 comb_df[is.na(comb_df)] = 0
90 colnames(comb_df) = c(colnames(comb_df)[1:2], toupper(colnames(comb_df)[3:10]))
91 colnames(comb_df)[10] = 'F-score'
92
93 #Setting up consistent colour.
94 plot_col = rainbow_hcl(length(unique(comb_df[,'method_name'])), alpha=0.8)
95 names(plot_col) = unique(comb_df[,'method_name'])
96
97 plot_df = melt(comb_df)
98
99 #1st plot.
100 comb_p1 = ggplot(plot_df, aes(x=plot_df[,'method_name'], y=plot_df[,"value"])) +
101 geom_boxplot() + geom_jitter() +
102 theme(axis.text.x = element_text(angle = 90, hjust = 1), text = element_text(size=20)) +
103 xlab('Inference algorithms') + ylab('') + ggtitle(Hmisc:::capitalize(acyclic)) +
104 facet_wrap(~variable, scales='free_y', ncol=2)
105
106 png(paste('all_algorithms_', acyclic, ifelse(single_cell, '_sc', '_nonsc'), '_score.png', sep=''), width=3000, height=4000, res=300)
107 plot(comb_p1)
108 dev.off()
109
110 #2nd plot
111 tmp_list = list()
112 for(i in unique(method_name))
113 {
114 tmp_row = c(i, colMeans(subset(comb_df[,c(-1,-2)], comb_df[,'method_name']==i)),
115 apply(subset(comb_df[,c(-1,-2)], comb_df[,'method_name']==i), 2, function(x) sd(x)/sqrt(length(x))))
116 #tmp_row = c(i, apply(subset(comb_df[,c(-1,-2)], comb_df[,'method_name']==i), 2, sum))
117 names(tmp_row) = c('method_name', 'p', 'np', 'r', 'a', 's', 'plr', 'nlr', 'f', 'p_se', 'np_se', 'r_se', 'a_se', 's_se', 'plr_se', 'nlr_se', 'f_se')
118 tmp_list = c(tmp_list, list(tmp_row))
119 }
120 combsum_df = do.call(rbind, tmp_list)
121 tmp_rownames = combsum_df[,1]
122 combsum_df = combsum_df[,-1]
123 combsum_df = apply(combsum_df, 2, as.numeric)
124 combsum_df = data.frame(combsum_df)
125 combsum_df = cbind(method_name=tmp_rownames, combsum_df)
126
127 plot_mid_df = melt(combsum_df)
128 plot_val_df = subset(plot_mid_df, !grepl('[a-z]_se', plot_mid_df[,2]))
129 plot_se_df = subset(plot_mid_df, grepl('[a-z]_se', plot_mid_df[,2]))
130 plot_val_df = cbind(plot_val_df, plot_val_df[,3]-plot_se_df[,3], plot_val_df[,3]+plot_se_df[,3])
131 colnames(plot_val_df) = c('method_name', 'variable', 'value', 'low', 'high')
132
133 combmid_p1 = ggplot(plot_val_df, aes(x=plot_val_df[,'method_name'], y=plot_val_df[,'value'], fill=method_name)) +
134 geom_bar(stat="identity") +
135 geom_errorbar(aes(ymin=plot_val_df[,"low"], ymax=plot_val_df[,"high"]), width=.5) +
136 theme(axis.text.x = element_text(angle = 90, hjust = 1), text = element_text(size=20)) + guides(fill=FALSE) +
137 scale_fill_manual("Legend", values = plot_col) +
138 xlab('Inference algorithms') + ylab('') + ggtitle(Hmisc:::capitalize(acyclic)) +
139 facet_wrap(~variable, scales='free_y', ncol=2)
140
141 png(paste('all_algorithms_', acyclic, ifelse(single_cell, '_sc', '_nonsc'), '_meanscore.png', sep=''), width=3000, height=4000, res=300)
142 plot(combmid_p1)
143 dev.off()
144
145 #Individual plots for F-scores
146 comb_p8 = ggplot(comb_df, aes(x=reorder(comb_df[,'method_name'], -comb_df[,"F-score"], FUN=median), y=comb_df[,"F-score"])) +
147 geom_boxplot() + geom_jitter() +
148 theme(axis.text.x = element_text(angle = 90, hjust = 1), text = element_text(size=20)) +
149 xlab('Inference algorithms') + ylab('F-score') + ggtitle(Hmisc:::capitalize(acyclic))
150
151 png(paste('all_algorithms_', acyclic, ifelse(single_cell, '_sc', '_nonsc'), '_fscore.png', sep=''), width=2000, height=2000, res=300)
152 plot(comb_p8)
153 dev.off()
154
155 combmid_p8 = ggplot(combsum_df, aes(x=reorder(combsum_df[,'method_name'], -combsum_df[,"f"]), y=f, fill=method_name)) +
156 geom_bar(stat="identity") +
157 geom_errorbar(aes(ymin=combsum_df[,"f"] - combsum_df[,'f_se'], ymax=combsum_df[,"f"] + combsum_df[,'f_se']), width=.5) +
158 theme(axis.text.x = element_text(angle = 90, hjust = 1), text = element_text(size=20)) + guides(fill=FALSE) +
159 scale_fill_manual("Legend", values = plot_col) +
160 xlab('Inference algorithms') + ylab('F-score') + ggtitle(Hmisc:::capitalize(acyclic))
161
162 png(paste('all_algorithms_', acyclic, ifelse(single_cell, '_sc', '_nonsc'), '_fmeanscore.png', sep=''), width=2000, height=2000, res=300)
163 plot(combmid_p8)
164 dev.off()
165 }
166 }
167