Some clarifications in the workflows

fmicompbio · Jul 15, 2023 · 194cad8 · 194cad8
1 parent 7b8beb5
commit 194cad8
Show file tree

Hide file tree

Showing 4 changed files with 31 additions and 23 deletions.
diff --git a/inst/extdata/process_FragPipe_template.Rmd b/inst/extdata/process_FragPipe_template.Rmd
@@ -141,7 +141,7 @@ settingsList <- list(
     "Min. protein score" = minScore,
     "Imputation method" = imputeMethod,
     "Assays(s) to use for exported values" = paste(assaysForExport, collapse = ", "), 
-    "Min. nbr valid values" = minNbrValidValues,
+    "Min. nbr valid values required for testing" = minNbrValidValues,
     "Model fit" = ifelse(singleFit, "Single (one model fit for all samples)", 
                          "Separate model fit for each comparison"),
     "Groups to merge" = paste(unlist(
@@ -153,7 +153,7 @@ settingsList <- list(
                           collapse = "; "),
     "Control group" = ctrlGroup,
     "Do all pairwise comparisons" = allPairwiseComparisons,
-    "Subtract baseline" = subtractBaseline,
+    "Batch correction via baseline subtraction" = subtractBaseline,
     "Baseline group" = baselineGroup,
     "Normalization method" = normMethod,
     "Spike features" = paste(spikeFeatures, collapse = ","),
@@ -167,8 +167,8 @@ settingsList <- list(
     "s0" = volcanoS0,
     "Features to always label in volcano plots" = paste(volcanoFeaturesToLabel,
                                                         collapse = ", "),
-    "Feature collections" = paste(includeFeatureCollections, collapse = "; "),
-    "Min size to keep feature set" = minSizeToKeepSet,
+    "Feature collections for enrichment testing" = paste(includeFeatureCollections, collapse = "; "),
+    "Minimal required size for feature sets" = minSizeToKeepSet,
     "Complexes file" = gsub(".+\\/(.+.rds)", "\\1", complexDbPath),
     "Complexes from species" = complexSpecies,
     "Custom complexes" = paste(names(customComplexes), collapse = ";"),
@@ -900,6 +900,8 @@ for (nm in names(testres$topsets)) {
 The table below provides autogenerated links to the UniProt and 
 AlphaFold pages (as well as selected organism-specific databases) for the 
 majority protein IDs corresponding to each feature in the data set. 
+The 'pid' column represents the unique feature ID used by `einprot`, and 
+the `einprotLabel` column contains the user-defined feature labels.
 UniProt is a resource of protein sequence and functional information 
 hosted by EMBL-EBI, PIR and SIB. The AlphaFold Protein Structure Database, 
 developed by DeepMind and EMBL-EBI, provides open access to protein structure 
@@ -1044,12 +1046,12 @@ dev.off()
 
 # Correlation plot
 
-The plot below shows the pairwise correlation between all pairs of samples, 
-based on the `r aNames$assayImputed` assay. 
+The plot below shows the pairwise Pearson correlations between all pairs of 
+samples, based on the `r aNames$assayImputed` assay. 
 
 ```{r corrplot, fig.height = 9, fig.width = 10}
 plotassay <- assay(sce, aNames$assayImputed)
-ggplot(data = as.data.frame(cor(plotassay)) %>%
+ggplot(data = as.data.frame(cor(plotassay, method = "pearson")) %>%
            rownames_to_column("sample1") %>% 
            tidyr::pivot_longer(names_to = "sample2", values_to = "correlation",
                                -"sample1"), 

diff --git a/inst/extdata/process_MaxQuant_template.Rmd b/inst/extdata/process_MaxQuant_template.Rmd
@@ -141,7 +141,7 @@ settingsList <- list(
     "Min. protein score" = minScore,
     "Imputation method" = imputeMethod,
     "Assays(s) to use for exported values" = paste(assaysForExport, collapse = ", "), 
-    "Min. nbr valid values" = minNbrValidValues,
+    "Min. nbr valid values required for testing" = minNbrValidValues,
     "Model fit" = ifelse(singleFit, "Single (one model fit for all samples)", 
                          "Separate model fit for each comparison"),
     "Groups to merge" = paste(unlist(
@@ -153,7 +153,7 @@ settingsList <- list(
                           collapse = "; "),
     "Control group" = ctrlGroup,
     "Do all pairwise comparisons" = allPairwiseComparisons,
-    "Subtract baseline" = subtractBaseline,
+    "Batch correction via baseline subtraction" = subtractBaseline,
     "Baseline group" = baselineGroup,
     "Normalization method" = normMethod,
     "Spike features" = paste(spikeFeatures, collapse = ","),
@@ -167,8 +167,8 @@ settingsList <- list(
     "s0" = volcanoS0,
     "Features to always label in volcano plots" = paste(volcanoFeaturesToLabel,
                                                         collapse = ", "),
-    "Feature collections" = paste(includeFeatureCollections, collapse = "; "),
-    "Min size to keep feature set" = minSizeToKeepSet,
+    "Feature collections for enrichment testing" = paste(includeFeatureCollections, collapse = "; "),
+    "Minimal required size for feature sets" = minSizeToKeepSet,
     "Complexes file" = gsub(".+\\/(.+.rds)", "\\1", complexDbPath),
     "Complexes from species" = complexSpecies,
     "Custom complexes" = paste(names(customComplexes), collapse = ";"),
@@ -903,6 +903,8 @@ for (nm in names(testres$topsets)) {
 The table below provides autogenerated links to the UniProt and 
 AlphaFold pages (as well as selected organism-specific databases) for the 
 majority protein IDs corresponding to each feature in the data set. 
+The 'pid' column represents the unique feature ID used by `einprot`, and 
+the `einprotLabel` column contains the user-defined feature labels.
 UniProt is a resource of protein sequence and functional information 
 hosted by EMBL-EBI, PIR and SIB. The AlphaFold Protein Structure Database, 
 developed by DeepMind and EMBL-EBI, provides open access to protein structure 
@@ -1047,12 +1049,12 @@ dev.off()
 
 # Correlation plot
 
-The plot below shows the pairwise correlation between all pairs of samples, 
-based on the `r aNames$assayImputed` assay. 
+The plot below shows the pairwise Pearson correlations between all pairs of 
+samples, based on the `r aNames$assayImputed` assay. 
 
 ```{r corrplot, fig.height = 9, fig.width = 10}
 plotassay <- assay(sce, aNames$assayImputed)
-ggplot(data = as.data.frame(cor(plotassay)) %>%
+ggplot(data = as.data.frame(cor(plotassay, method = "pearson")) %>%
            rownames_to_column("sample1") %>% 
            tidyr::pivot_longer(names_to = "sample2", values_to = "correlation",
                                -"sample1"), 

diff --git a/inst/extdata/process_PD_TMT_PTM_template.Rmd b/inst/extdata/process_PD_TMT_PTM_template.Rmd
@@ -96,15 +96,15 @@ makeTableFromList(list(
 settingsList <- list(
     "Assay to use for tests" = assayForTests,
     "Assay with imputation information" = assayImputation,
-    "Min. nbr valid values" = minNbrValidValues,
+    "Min. nbr valid values required for testing" = minNbrValidValues,
     "Model fit" = ifelse(singleFit, "Single (one model fit for all samples)", 
                          "Separate model fit for each comparison"),
     "Comparisons" = paste(unlist(lapply(comparisons, 
                                         function(x) paste(x, collapse = " vs "))),
                           collapse = "; "),
     "Control group" = ctrlGroup,
     "Do all pairwise comparisons" = allPairwiseComparisons,
-    "Subtract baseline" = subtractBaseline,
+    "Batch correction via baseline subtraction" = subtractBaseline,
     "Baseline group" = baselineGroup,
     "Statistical testing approach" = testType,
     "Minimal fold change (limma/treat)" = minlFC,
@@ -523,6 +523,8 @@ if (length(tests) > 1 && sum(colSums(tmpsign) > 0) > 1) {
 The table below provides autogenerated links to the UniProt and 
 AlphaFold pages (as well as selected organism-specific databases) for the 
 protein IDs corresponding to each feature in the data set. 
+The 'pid' column represents the unique feature ID used by `einprot`, and 
+the `einprotLabel` column contains the user-defined feature labels.
 UniProt is a resource of protein sequence and functional information 
 hosted by EMBL-EBI, PIR and SIB. The AlphaFold Protein Structure Database, 
 developed by DeepMind and EMBL-EBI, provides open access to protein structure 

diff --git a/inst/extdata/process_PD_TMT_template.Rmd b/inst/extdata/process_PD_TMT_template.Rmd
@@ -152,7 +152,7 @@ settingsList <- list(
     "Only retain master proteins" = masterProteinsOnly,
     "Imputation method" = imputeMethod,
     "Assays(s) to use for exported values" = paste(assaysForExport, collapse = ", "), 
-    "Min. nbr valid values" = minNbrValidValues,
+    "Min. nbr valid values required for testing" = minNbrValidValues,
     "Model fit" = ifelse(singleFit, "Single (one model fit for all samples)", 
                          "Separate model fit for each comparison"),
     "Groups to merge" = paste(unlist(
@@ -164,7 +164,7 @@ settingsList <- list(
                           collapse = "; "),
     "Control group" = ctrlGroup,
     "Do all pairwise comparisons" = allPairwiseComparisons,
-    "Subtract baseline" = subtractBaseline,
+    "Batch correction via baseline subtraction" = subtractBaseline,
     "Baseline group" = baselineGroup,
     "Normalization method" = normMethod,
     "Spike features" = paste(spikeFeatures, collapse = ","),
@@ -178,8 +178,8 @@ settingsList <- list(
     "s0" = volcanoS0,
     "Features to always label in volcano plots" = paste(volcanoFeaturesToLabel,
                                                         collapse = ", "),
-    "Feature collections" = paste(includeFeatureCollections, collapse = "; "),
-    "Min size to keep feature set" = minSizeToKeepSet,
+    "Feature collections for enrichment testing" = paste(includeFeatureCollections, collapse = "; "),
+    "Minimal required size for feature sets" = minSizeToKeepSet,
     "Complexes file" = gsub(".+\\/(.+.rds)", "\\1", complexDbPath),
     "Complexes from species" = complexSpecies,
     "Custom complexes" = paste(names(customComplexes), collapse = ";"),
@@ -907,6 +907,8 @@ for (nm in names(testres$topsets)) {
 The table below provides autogenerated links to the UniProt and 
 AlphaFold pages (as well as selected organism-specific databases) for the 
 protein IDs corresponding to each feature in the data set. 
+The 'pid' column represents the unique feature ID used by `einprot`, and 
+the `einprotLabel` column contains the user-defined feature labels.
 UniProt is a resource of protein sequence and functional information 
 hosted by EMBL-EBI, PIR and SIB. The AlphaFold Protein Structure Database, 
 developed by DeepMind and EMBL-EBI, provides open access to protein structure 
@@ -1036,12 +1038,12 @@ plot(hclust(sampledists, method = "ward.D2"), hang = -1, xlab = "", sub = "")
 
 # Correlation plot
 
-The plot below shows the pairwise correlation between all pairs of samples, 
-based on the `r aNames$assayImputed` assay. 
+The plot below shows the pairwise Pearson correlations between all pairs of 
+samples, based on the `r aNames$assayImputed` assay. 
 
 ```{r corrplot, fig.height = 9, fig.width = 10}
 plotassay <- assay(sce, aNames$assayImputed)
-ggplot(data = as.data.frame(cor(plotassay)) %>%
+ggplot(data = as.data.frame(cor(plotassay, method = "pearson")) %>%
            rownames_to_column("sample1") %>% 
            tidyr::pivot_longer(names_to = "sample2", values_to = "correlation",
                                -"sample1"),