Skip to content

Commit

Permalink
Analysis 5.3
Browse files Browse the repository at this point in the history
  • Loading branch information
SergiPicart committed Oct 20, 2017
1 parent 30735d4 commit 87ea474
Show file tree
Hide file tree
Showing 12 changed files with 2,781 additions and 1,722 deletions.
6 changes: 3 additions & 3 deletions 00_metadata/03_sessionInfo_diseaes.txt
Expand Up @@ -15,8 +15,8 @@ attached base packages:
[1] stats graphics grDevices utils datasets methods base

other attached packages:
[1] COSNet_1.10.0 RANKS_1.0 EGAD_1.4.1 diffuStats_0.101.1 igraph_1.1.2 caret_6.0-76 ggplot2_2.2.1
[8] lattice_0.20-35 dplyr_0.5.0 plyr_1.8.4
[1] COSNet_1.10.0 RANKS_1.0 EGAD_1.4.1 diffuStats_0.101.1 igraph_1.1.2 caret_6.0-76
[7] ggplot2_2.2.1 lattice_0.20-35 dplyr_0.5.0 plyr_1.8.4

loaded via a namespace (and not attached):
[1] minqa_1.2.4 colorspace_1.3-2 hwriter_1.3.2 htmlTable_1.9
Expand All @@ -41,7 +41,7 @@ loaded via a namespace (and not attached):
[77] gridSVG_1.5-1 S4Vectors_0.14.7 foreach_1.4.3 checkmate_1.8.2
[81] caTools_1.17.1 BiocGenerics_0.22.1 GenomeInfoDb_1.2.5 rlang_0.1.1
[85] pkgconfig_2.0.1 bitops_1.0-6 labeling_0.3 htmlwidgets_0.9
[89] magrittr_1.5 R6_2.2.1 IRanges_2.10.5 gplots_3.0.1
[89] magrittr_1.5 R6_2.2.2 IRanges_2.10.5 gplots_3.0.1
[93] Hmisc_4.0-3 DBI_0.6-1 foreign_0.8-69 mgcv_1.8-22
[97] survival_2.41-3 RCurl_1.95-4.8 nnet_7.3-12 tibble_1.3.1
[101] car_2.1-4 precrec_0.9.1 KernSmooth_2.23-15 grid_3.4.2
Expand Down
44 changes: 44 additions & 0 deletions 00_metadata/03_sessionInfo_disease.txt
@@ -0,0 +1,44 @@
R version 3.4.2 (2017-09-28)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 14.04.5 LTS

Matrix products: default
BLAS: /usr/lib/openblas-base/libblas.so.3
LAPACK: /usr/lib/lapack/liblapack.so.3.0

locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8
[6] LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C

attached base packages:
[1] stats graphics grDevices utils datasets methods base

other attached packages:
[1] ggsci_2.7 COSNet_1.10.0 RANKS_1.0 EGAD_1.4.1 diffuStats_0.101.1 igraph_1.1.2 caret_6.0-76
[8] ggplot2_2.2.1 lattice_0.20-35 dplyr_0.5.0 plyr_1.8.4

loaded via a namespace (and not attached):
[1] minqa_1.2.4 colorspace_1.3-2 hwriter_1.3.2 htmlTable_1.9 XVector_0.16.0
[6] PerfMeas_1.2.1 RcppArmadillo_0.8.100.1.0 GenomicRanges_1.28.2 base64enc_0.1-3 base64_2.0
[11] MatrixModels_0.4-1 affyio_1.46.0 AnnotationDbi_1.38.1 codetools_0.2-15 splines_3.4.2
[16] impute_1.50.1 knitr_1.16 Formula_1.2-1 jsonlite_1.5 nloptr_1.0.4
[21] Cairo_1.5-9 pbkrtest_0.4-7 annotate_1.54.0 cluster_2.0.6 vsn_3.44.0
[26] graph_1.54.0 compiler_3.4.2 httr_1.3.1 backports_1.0.5 assertthat_0.2.0
[31] Matrix_1.2-11 lazyeval_0.2.0 limma_3.32.2 SVGAnnotation_0.93-1 BeadDataPackR_1.28.0
[36] acepack_1.4.1 htmltools_0.3.6 quantreg_5.33 tools_3.4.2 gtable_0.2.0
[41] affy_1.54.0 reshape2_1.4.2 Rcpp_0.12.13 Biobase_2.36.2 arrayQualityMetrics_3.32.0
[46] Biostrings_2.44.2 gdata_2.17.0 preprocessCore_1.38.1 nlme_3.1-131 setRNG_2013.9-1
[51] iterators_1.0.8 stringr_1.2.0 lme4_1.1-13 affyPLM_1.52.1 gtools_3.5.0
[56] XML_3.98-1.9 zlibbioc_1.22.0 MASS_7.3-47 zoo_1.8-0 scales_0.4.1
[61] BiocInstaller_1.26.1 RBGL_1.52.0 parallel_3.4.2 beadarray_2.26.1 GEOquery_2.42.0
[66] expm_0.999-2 SparseM_1.77 RColorBrewer_1.1-2 memoise_1.1.0 gridExtra_2.2.1
[71] rpart_4.1-11 latticeExtra_0.6-28 stringi_1.1.5 RSQLite_1.1-2 gcrma_2.48.0
[76] genefilter_1.58.1 gridSVG_1.5-1 S4Vectors_0.14.7 foreach_1.4.3 checkmate_1.8.2
[81] caTools_1.17.1 BiocGenerics_0.22.1 GenomeInfoDb_1.2.5 rlang_0.1.1 pkgconfig_2.0.1
[86] bitops_1.0-6 labeling_0.3 htmlwidgets_0.9 magrittr_1.5 R6_2.2.1
[91] IRanges_2.10.5 gplots_3.0.1 Hmisc_4.0-3 DBI_0.6-1 foreign_0.8-69
[96] mgcv_1.8-22 survival_2.41-3 RCurl_1.95-4.8 nnet_7.3-12 tibble_1.3.1
[101] car_2.1-4 precrec_0.9.1 KernSmooth_2.23-15 grid_3.4.2 data.table_1.10.4
[106] NetPreProc_1.1 ModelMetrics_1.1.0 digest_0.6.12 xtable_1.8-2 illuminaio_0.18.0
[111] openssl_0.9.7 RcppParallel_4.3.20 stats4_3.4.2 munsell_0.4.3
1 change: 1 addition & 0 deletions 03_config.R
Expand Up @@ -33,6 +33,7 @@ graph_alzh <- paste0(dir_data, "/graph_alzh.RData")
# Section 5.2 gdocs
dir_data3 <- "03_data"
graph_4disease <- paste0(dir_data3, "/graph_4disease.RData")
file_descriptive <- paste0(dir_data3, "/descriptive_diseases.csv")
file_kernel3 <- paste0(dir_kernel, "/Net4_weightsIn01.RData")
dir_performance3 <- "03_performance"
#dir_scores <- "02_scores"
Expand Down
5 changes: 5 additions & 0 deletions 03_data/descriptive_diseases.csv
@@ -0,0 +1,5 @@
"disease","n_genetic","n_drug","overlap","p_value"
"Alzheimers disease",432,103,12,0.000387288955953427
"asthma",378,80,10,0.00023349512966266
"chronic obstructive pulmonary disease",280,116,7,0.020899001242837
"rheumatoid arthritis",241,95,7,0.00334908839691964
67 changes: 58 additions & 9 deletions 03_multiple_disease.Rmd
Expand Up @@ -18,6 +18,7 @@ library(dplyr)
library(caret)
library(igraph)
library(ggplot2)
library(ggsci)
library(diffuStats)
library(EGAD)
Expand Down Expand Up @@ -54,10 +55,15 @@ df_input <- plyr::ddply(
nm <- V(g_filter)$name
x <- nm %in% (filter(disease, input == 1)$STRING_id)
val <- nm %in% (filter(disease, validation == 1)$STRING_id)
genetic_original <- setNames(numeric(length(nm)), nm)
genetic_original[disease$STRING_id] <- disease$association_score.datatypes.genetic_association
data.frame(
STRING_id = nm,
score = x*1,
validation = val*1
validation = val*1,
genetic_original = genetic_original
)
}
)
Expand All @@ -69,8 +75,8 @@ df_input <- plyr::ddply(
# all(a$score == b$validation)
df_descriptive <- plyr::ddply(
select(df_input, -validation) %>%
reshape2::dcast(STRING_id+disease~input_type, fun.aggregate = NULL),
select(df_input, -c(validation, genetic_original)) %>%
reshape2::dcast(STRING_id+disease~input_type, fun.aggregate = NULL, value.var = "score"),
"disease",
function(df) {
tibble(
Expand All @@ -82,6 +88,7 @@ df_descriptive <- plyr::ddply(
},
.id = "disease")
write.csv(df_descriptive, file = config$file_descriptive, row.names = FALSE)
df_descriptive
# check for NAs
Expand Down Expand Up @@ -123,10 +130,14 @@ list_metrics <- list(
# reproducibility
set.seed(1)
# A centrality measure
pr <- page.rank(g_filter)$vector
# This step is strugglish with 16GB RAM...
# one repetition took around 20-25'
df_perf <- plyr::ddply(
df_input,
# subset(df_input, input_type == "genetic"),
c("disease", "input_type"),
function(df_in) {
# browser()
Expand Down Expand Up @@ -200,6 +211,20 @@ df_perf <- plyr::ddply(
)$scores
list_scores$COSNet <- list_scores$COSNet[names_val]
# references and baselines
list_scores$random <- setNames(
sample(length(names_val)), names_val
)
list_scores$randomraw <- diffuStats::diffuse(
K = K,
scores = setNames(sample(vec_diffustats),
names(vec_diffustats)),
method = "raw")[names_val]
list_scores$pr <- pr[names_val]
list_scores$genetic <- setNames(
df_in$genetic_original[-split_cv_train],
names_val)
# compute metrics
df_metrics <- plyr::ldply(
list_scores,
Expand All @@ -215,10 +240,10 @@ df_perf <- plyr::ddply(
# return the metrics
df_metrics
},
.id = "split_cv"
.id = "split_cv",
.progress = "text"
)
},
.progress = "text"
}
)
# for safety
save(df_perf, file = paste0(config$dir_performance3, "/backup_perf.RData"))
Expand Down Expand Up @@ -253,16 +278,40 @@ write.csv(
g <- ggplot(df_plot, aes(x = method, y = value)) +
geom_boxplot(aes(fill = method), outlier.size = .3, lwd = .2) +
theme_bw() +
scale_fill_brewer(palette = "Set3", guide = FALSE) +
facet_grid(variable~input_type + disease, scales = "free") +
xlab("Method") +
ylab("Performance") +
ggtitle(paste0(k, "-fold (repeated x", times, ") CV"),
subtitle = "Measures averaged per fold") +
theme(aspect.ratio = 1,
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = 6.5))
ggsave(paste0(config$dir_performance3, "/4diseases.png"),
plot = g, width = 30, height = 24, units = "cm")
plyr::d_ply(
df_plot,
"input_type",
function(df_type) {
# browser()
input_t <- df_type$input_type[1] %>% as.character
gg <- ggplot(df_type, aes(x = method, y = value)) +
geom_boxplot(aes(fill = method), outlier.size = .3, lwd = .2) +
scale_fill_brewer(palette = "Set3", guide = FALSE) +
theme_bw() +
facet_grid(variable~disease, scales = "free_y") +
xlab("Method") +
ylab("Performance") +
ggtitle(paste0("Input: ", input_t, ", ", k, "-fold (repeated x", times, ") CV"),
subtitle = "Measures averaged per fold") +
theme(aspect.ratio = 1,
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = 6.5))
ggsave(filename = paste0(config$dir_performance3, "/4diseases_", input_t, ".png"),
device = NULL, plot = gg, width = 15, height = 24, units = "cm")
}
)
```

# Rank the methods according to their median value
Expand All @@ -288,7 +337,7 @@ g_list <- plyr::dlply(
y = rank,
fill = method)) +
geom_boxplot() +
scale_fill_discrete(guide = FALSE) +
scale_fill_brewer(palette = "Set3", guide = FALSE) +
xlab("Method") +
ylab("Rank (lower is better)") +
ggtitle(paste0("Input: ", input_t)) +
Expand All @@ -312,6 +361,6 @@ ggsave(paste0(config$dir_performance3, "/4diseases_method_ranking.png"),

```{r}
out <- capture.output(sessionInfo())
writeLines(out, con = paste0(config$dir_metadata, "/03_sessionInfo_diseaes.txt"))
writeLines(out, con = paste0(config$dir_metadata, "/03_sessionInfo_disease.txt"))
```

Binary file modified 03_performance/4diseases.RData
Binary file not shown.

0 comments on commit 87ea474

Please sign in to comment.