Add examples, expand documentation

fmicompbio · Jul 2, 2023 · 27b698d · 27b698d
1 parent b2e4695
commit 27b698d
Show file tree

Hide file tree

Showing 88 changed files with 910 additions and 379 deletions.
diff --git a/R/addSampleAnnots.R b/R/addSampleAnnots.R
@@ -1,7 +1,7 @@
 #' Add sample annotations to SummarizedExperiment object
 #'
 #' Add sample annotations from an external annotation table to an existing
-#' `SummarizedExperiment` object.
+#' \code{SummarizedExperiment} object.
 #'
 #' @param sce A \code{SummarizedExperiment} object (or a derivative).
 #' @param sampleAnnot A \code{data.frame} with sample annotations. Must

diff --git a/R/defineAssayNames.R b/R/defineAssayNames.R
@@ -1,19 +1,19 @@
 #' Define assay names
 #'
 #' Starting from a base assay name and a few decisions about the workflow,
-#' define the names of assays that will be generated in the einprot workflow
-#' and included in the final \code{SingleCellExperiment} object.
+#' define the names of assays that will be generated in the \code{einprot}
+#' workflow and included in the final \code{SingleCellExperiment} object.
 #'
 #' @param aName Base assay name, typically obtained from
 #'     \code{importExperiment}.
 #' @param normMethod Character scalar, indicating the normalization method.
-#'     See \code{doNormalization} for available options. Set to `"none"` if
+#'     See \code{doNormalization} for available options. Set to \code{"none"} if
 #'     no between-sample normalization will be performed.
 #' @param doBatchCorr Logical scalar indicating whether or not batch correction
 #'     will be performed.
 #'
 #' @returns A list with assay names that will be used for assays created at
-#' different steps in the `einprot` workflows.
+#' different steps in the \code{einprot} workflows.
 #'
 #' @author Charlotte Soneson
 #' @export

diff --git a/R/doFilter.R b/R/doFilter.R
@@ -2,7 +2,7 @@
 #'
 #' Exclude features with 'Score' below \code{minScore}, 'Peptides' below
 #' \code{minPeptides}, or identified as either 'Reverse',
-#' 'Potential.contaminant' or 'Only.identified.by.site' by `MaxQuant`.
+#' 'Potential.contaminant' or 'Only.identified.by.site' by \code{MaxQuant}.
 #'
 #' @author Charlotte Soneson
 #' @export
@@ -21,6 +21,17 @@
 #'
 #' @returns A filtered object of the same type as \code{sce}.
 #'
+#' @examples
+#' sce <- importExperiment(inFile = system.file("extdata", "mq_example",
+#'                                              "1356_proteinGroups.txt",
+#'                                              package = "einprot"),
+#'                         iColPattern = "^LFQ.intensity.")$sce
+#'
+#' dim(sce)
+#' sce <- filterMaxQuant(sce = sce, minScore = 2, minPeptides = 2,
+#'                       plotUpset = TRUE)
+#' dim(sce)
+#'
 #' @importFrom SummarizedExperiment rowData
 #' @importFrom dplyr select mutate across
 #' @importFrom ComplexUpset upset
@@ -114,45 +125,45 @@ filterMaxQuant <- function(sce, minScore, minPeptides, plotUpset = TRUE,
 #' If \code{inputLevel} is "Proteins", exclude features with
 #' 'Score.Sequest.HT.Sequest.HT' below \code{minScore},
 #' 'Number.of.Peptides' below \code{minPeptides}, or identified as
-#' 'Contaminant' by ProteomeDiscoverer.
+#' 'Contaminant' by \code{ProteomeDiscoverer}.
 #' If \code{inputLevel} is "PeptideGroups", exclude features with
 #' 'Delta.Score.by.Search.Engine.Sequest.HT' below \code{minDeltaScore},
 #' 'Number.of.PSMs' below \code{minPSMs}, or identified as
-#' 'Contaminant' by `ProteomeDiscoverer`.
+#' 'Contaminant' by \code{ProteomeDiscoverer}.
 #'
 #' @author Charlotte Soneson
 #' @export
 #'
 #' @param sce A \code{SummarizedExperiment} object (or a derivative).
-#' @param inputLevel Either "Proteins" or "PeptideGroups", indicating the type
-#'     of features in \code{sce}.
+#' @param inputLevel Either \code{"Proteins"} or \code{"PeptideGroups"},
+#'     indicating the type of features in \code{sce}.
 #' @param minScore Numeric scalar, the minimum allowed value in the
 #'     'Score.Sequest.HT.Sequest.HT' column in order to retain the feature.
-#'     Only used if \code{inputLevel} is "Proteins".
+#'     Only used if \code{inputLevel} is \code{"Proteins"}.
 #' @param minPeptides Numeric scalar, the minimum allowed value in the
 #'     'Number.of.Peptides' column in order to retain the feature.
-#'     Only used if \code{inputLevel} is "Proteins".
+#'     Only used if \code{inputLevel} is \code{"Proteins"}.
 #' @param minDeltaScore Numeric scalar, the minimum allowed value in the
 #'     'Delta.Score.by.Search.Engine.Sequest.HT' column in order to retain the
-#'     feature. Only used if \code{inputLevel} is "PeptideGroups".
+#'     feature. Only used if \code{inputLevel} is \code{"PeptideGroups"}.
 #' @param minPSMs Numeric scalar, the minimum allowed value in the
 #'     'Number.of.PSMs' column in order to retain the feature.
-#'     Only used if \code{inputLevel} is "PeptideGroups".
+#'     Only used if \code{inputLevel} is \code{"PeptideGroups"}.
 #' @param masterProteinsOnly Logical scalar indicating whether only master
 #'     proteins (where the \code{Master} column value is
 #'     \code{IsMasterProtein}) should be retained.
 #' @param modificationsCol Character string pointing to a column containing
 #'     modification details. \code{excludeUnmodifiedPeptides} and
 #'     \code{keepModifications} will use information from this column. Only
-#'     used if \code{inputLevel} is "PeptideGroups".
+#'     used if \code{inputLevel} is \code{"PeptideGroups"}.
 #' @param excludeUnmodifiedPeptides Logical scalar, whether to filter out
 #'     peptides without modifications. Only used if \code{inputLevel} is
-#'     "PeptideGroups".
+#'     \code{"PeptideGroups"}.
 #' @param keepModifications Character string (or \code{NULL}) indicating
 #'     which modifications to retain in the analysis. Can be a regular
 #'     expression, which will be matched against the \code{modificationsCol}.
 #'     If \code{NULL} (the default), all rows are retained. Only used if
-#'     \code{inputLevel} is "PeptideGroups".
+#'     \code{inputLevel} is \code{"PeptideGroups"}.
 #' @param plotUpset Logical scalar, whether to generate an UpSet plot
 #'     detailing the reasons for features being filtered out. Only
 #'     generated if any feature is in fact filtered out.
@@ -162,6 +173,33 @@ filterMaxQuant <- function(sce, minScore, minPeptides, plotUpset = TRUE,
 #'
 #' @returns A filtered object of the same type as \code{sce}.
 #'
+#' @examples
+#' ## Proteins
+#' sce <- importExperiment(
+#'     inFile = system.file("extdata", "pdtmt_example",
+#'                          "Fig2_m23139_RTS_QC_varMods_Proteins.txt",
+#'                          package = "einprot"),
+#'     iColPattern = "^Abundance.F.+.Sample.")$sce
+#'
+#' dim(sce)
+#' sce <- filterPDTMT(sce = sce, inputLevel = "Proteins", minScore = 2,
+#'                    minPeptides = 2, plotUpset = TRUE)
+#' dim(sce)
+#'
+#' ## PeptideGroups
+#' sce <- importExperiment(
+#'     inFile = system.file("extdata", "pdtmt_example",
+#'                          "Fig2_m23139_RTS_QC_varMods_PeptideGroups.txt",
+#'                          package = "einprot"),
+#'     iColPattern = "^Abundance.F.+.Sample.")$sce
+#'
+#' dim(sce)
+#' sce <- filterPDTMT(sce = sce, inputLevel = "PeptideGroups",
+#'                    minPSMs = 2, plotUpset = TRUE, minDeltaScore = 0.2,
+#'                    modificationsCol = "Modifications.in.Master.Proteins",
+#'                    excludeUnmodifiedPeptides = TRUE)
+#' dim(sce)
+#'
 #' @importFrom SummarizedExperiment rowData
 #' @importFrom dplyr select mutate
 #' @importFrom ComplexUpset upset
@@ -363,9 +401,9 @@ filterPDTMT <- function(sce, inputLevel, minScore = 0, minPeptides = 0,
 #' Filter out features in FragPipe data
 #'
 #' Exclude features with 'Combined.Total.Peptides' below \code{minPeptides},
-#' or identified as either 'Reverse' (Protein name starting with
+#' or identified as either 'Reverse' (Protein name matching
 #' \code{revPattern}) or 'Potential.contaminant' (Protein name starting
-#' with `contam_`) by `FragPipe`.
+#' with \code{contam_}) by \code{FragPipe}.
 #'
 #' @author Charlotte Soneson
 #' @export
@@ -385,6 +423,18 @@ filterPDTMT <- function(sce, inputLevel, minScore = 0, minPeptides = 0,
 #'
 #' @returns A filtered object of the same type as \code{sce}.
 #'
+#' @examples
+#' sce <- importExperiment(inFile = system.file("extdata", "fp_example",
+#'                                              "combined_protein.tsv",
+#'                                              package = "einprot"),
+#'                         iColPattern = ".MaxLFQ.Intensity$")$sce
+#'
+#' dim(sce)
+#' sce <- filterFragPipe(sce = sce, minPeptides = 2,
+#'                       plotUpset = TRUE,
+#'                       revPattern = "^rev_")
+#' dim(sce)
+#'
 #' @importFrom SummarizedExperiment rowData
 #' @importFrom dplyr select mutate across
 #' @importFrom ComplexUpset upset

diff --git a/R/doImputation.R b/R/doImputation.R
@@ -1,16 +1,17 @@
 #' Perform imputation of NA values
 #'
-#' Perform imputation of missing values (represented by `NA`) in one assay in
-#' a `SummarizedExperiment`, and generate a new assay containing the complete
-#' data (including imputed values).
+#' Perform imputation of missing values (represented by \code{NA}) in one assay
+#' in a \code{SummarizedExperiment}, and generate a new assay containing the
+#' complete data (including imputed values).
 #'
 #' @param sce A \code{SummarizedExperiment} object (or a derivative).
 #' @param method Character scalar giving the imputation method. Currently,
-#'     `"MinProb"` (provided in the \code{MsCoreUtils} package) and
-#'     `"impSeqRob"` (provided in the \code{rrcovNA} package) are supported.
+#'     \code{"MinProb"} (provided in the \code{MsCoreUtils} package) and
+#'     \code{"impSeqRob"} (provided in the \code{rrcovNA} package) are
+#'     supported.
 #' @param assayName Character scalar giving the name of the assay in \code{sce}
 #'     to be imputed. The matrix should have missing values represented as
-#'     `NA`.
+#'     \code{NA}.
 #' @param imputedAssayName Character scalar providing the name that will be
 #'     given to the assay containing the imputed values.
 #'

diff --git a/R/doPCA.R b/R/doPCA.R
@@ -36,13 +36,15 @@
 #'
 #' @returns A list with the following components:
 #' \itemize{
-#'  \item{sce}{the input sce, expanded with the calculated PCs, in addition the
-#'  feature coefficients will be added to the `rowData`}
-#'  \item{plotcoord}{a list of `ggplot` objects containing coordinate plots for
-#'  the desired pairs of components}
-#'  \item{plotcombined}{a list of `ggplot` objects containing combined
-#'  coordinate, scree and coefficient plots for the desired pairs of components}
-#'  \item{plotpairs}{a `ggpairs` plot with all extracted components}
+#'  \item{\code{sce}}{ - the input sce, expanded with the calculated PCs, in
+#'  addition the feature coefficients will be added to the \code{rowData}.}
+#'  \item{\code{plotcoord}}{ - a list of \code{ggplot} objects containing
+#'  coordinate plots for the desired pairs of components.}
+#'  \item{\code{plotcombined}}{ - a list of \code{ggplot} objects containing
+#'  combined coordinate, scree and coefficient plots for the desired pairs of
+#'  components.}
+#'  \item{\code{plotpairs}}{ - a \code{ggpairs} plot with all extracted
+#'  components.}
 #' }
 #'
 #' @importFrom scater runPCA

diff --git a/R/fixFeatureIds.R b/R/fixFeatureIds.R
@@ -48,8 +48,9 @@ getNthId <- function(df, colName, N, separator = ";") {
 
 #' Combine multiple columns into a new column
 #'
-#' Combine values from multiple columns from a `data.frame` into a new column,
-#' typically representing an identifier used to represent or label features.
+#' Combine values from multiple columns from a \code{data.frame} into a new
+#' column, typically representing an identifier used to represent or label
+#' features.
 #'
 #' @export
 #' @author Charlotte Soneson
@@ -58,10 +59,10 @@ getNthId <- function(df, colName, N, separator = ";") {
 #' @param combineCols Character vector giving the names of the columns of
 #'     \code{df} that should be combined.
 #' @param combineWhen Character scalar indicating when to combine columns.
-#'     Must be either `"always"` (which always combines the columns),
-#'     `"nonunique"` (which only combines the columns if it's necessary to
-#'     obtain unique names), or `"missing"` (which uses subsequent columns if
-#'     all previous columns have missing values in a given position).
+#'     Must be either \code{"always"} (which always combines the columns),
+#'     \code{"nonunique"} (which only combines the columns if it's necessary to
+#'     obtain unique names), or \code{"missing"} (which uses subsequent columns
+#'     if all previous columns have missing values in a given position).
 #' @param splitSeparator Character scalar, character vector of length
 #'     equal to the length of \code{combineCols}, or \code{NULL}. If not
 #'     \code{NULL}, indicates the separator by which to split the entries in

diff --git a/R/getIntensityColumns.R b/R/getIntensityColumns.R
@@ -1,7 +1,9 @@
-#' Get column names from quantification file
+#' Get column names
 #'
-#' @param inFile The path to an input file (e.g. MaxQuant
-#'     peptideGroups.txt or ProteomeDiscoverer Proteins.txt).
+#' Utility function to retrieve column names from quantification text file.
+#'
+#' @param inFile Path to a tab-delimited input text file (e.g. \code{MaxQuant}
+#'     peptideGroups.txt or \code{ProteomeDiscoverer} Proteins.txt).
 #'
 #' @export
 #' @author Charlotte Soneson
@@ -31,8 +33,8 @@ getColumnNames <- function(inFile) {
 #' define specific columns to retain or exclude. All column names in the
 #' file can be listed using the \code{getColumnNames} function.
 #'
-#' @param inFile The path to an input file (e.g. MaxQuant
-#'     peptideGroups.txt or ProteomeDiscoverer Proteins.txt).
+#' @param inFile Path to a tab-delimited input text file (e.g. \code{MaxQuant}
+#'     peptideGroups.txt or \code{ProteomeDiscoverer} Proteins.txt).
 #' @param iColPattern Character scalar defining a regular expression to
 #'     identify intensity columns.
 #' @param includeOnlySamples,excludeSamples Character vectors defining
@@ -54,7 +56,8 @@ getColumnNames <- function(inFile) {
 #' icols <- getIntensityColumns(system.file("extdata", "mq_example",
 #'                                          "1356_proteinGroups.txt",
 #'                                          package = "einprot"),
-#'                              iColPattern = "^LFQ\\.intensity\\.")
+#'                              iColPattern = "^LFQ\\.intensity\\.",
+#'                              excludeSamples = "Adnp")
 #' icols
 #'
 #' @importFrom utils read.delim

diff --git a/R/getSupportedSpecies.R b/R/getSupportedSpecies.R
@@ -1,11 +1,13 @@
-#' Get a list of species supported by einprot
+#' List supported species
+#'
+#' Get a list of species supported by \code{einprot}.
 #'
 #' @author Charlotte Soneson
 #' @export
 #'
-#' @returns A `data.frame` with three columns (`taxId`,
-#'     `species` and `speciesCommon`) for each of the species
-#'     supported by `einprot`.
+#' @returns A \code{data.frame} with three columns (\code{taxId},
+#'     \code{species} and \code{speciesCommon}) for each of the species
+#'     supported by \code{einprot}.
 #'
 #' @examples
 #' getSupportedSpecies()
@@ -25,8 +27,8 @@ getSupportedSpecies <- function() {
 #' Get species info
 #'
 #' Get the scientific species name, the common name and the taxonomic ID for
-#' any of the species supported by `einprot` (see \code{getSupportedSpecies()}
-#' for a list of supported species).
+#' any of the species supported by \code{einprot} (see
+#' \code{getSupportedSpecies()} for a list of supported species).
 #'
 #' @param species Character or numeric scalar, representing either a
 #'     scientific species ID, a common species name or a taxonomic ID for

diff --git a/R/getUniProtToGeneSymbolMapping.R b/R/getUniProtToGeneSymbolMapping.R
@@ -27,10 +27,10 @@
 
 #' Get mapping from UniProt IDs to another ID type
 #'
-#' Generate a data.frame with the mapping between UniProtIDs and another ID
-#' type.
+#' Generate a \code{data.frame} with the mapping between UniProtIDs and
+#' another ID type.
 #' The mapping is obtained from the UniProt ID mapping files (downloaded from
-#' https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/).
+#' \url{https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/}).
 #'
 #' @param species Either a taxonomy ID, a species ID or a common species name.
 #'     See \code{getSupportedSpecies()} for valid values.
@@ -47,6 +47,10 @@
 #'     duplicated values, if there is not a one-to-one mapping between
 #'     UniProtIDs and the other ID type.
 #'
+#' @examples
+#' df <- getUniProtToIDMapping("fission yeast", targetId = "Gene_Name")
+#' head(df)
+#'
 #' @importFrom readr read_tsv
 #' @importFrom tidyr pivot_wider unnest
 #' @importFrom dplyr filter rename all_of

diff --git a/R/importExperiment.R b/R/importExperiment.R
@@ -73,19 +73,22 @@
 
 #' Import an abundance file
 #'
-#' Import data from a quantification file (e.g. MaxQuant peptideGroups.txt,
-#' Proteome Discoverer Proteins.txt) into a `SingleCellExperiment` object.
+#' Import data from a quantification file (e.g. \code{MaxQuant}
+#' peptideGroups.txt, \code{Proteome Discoverer} Proteins.txt) into a
+#' \code{SingleCellExperiment} object.
 #' Typically sample-specific columns will be used to form assays, and other
-#' columns will be added as `rowData` columns.
+#' columns will be added as \code{rowData} columns.
 #'
-#' @param inFile The path to an input text file (e.g. MaxQuant
-#'     peptideGroups.txt, PD Proteins.txt or FragPipe combined_protein.tsv).
+#' @param inFile Path to a tab-delimited input text file (e.g. \code{MaxQuant}
+#'     peptideGroups.txt, \code{Proteome Discoverer} Proteins.txt or
+#'     \code{FragPipe} combined_protein.tsv).
 #' @param iColPattern Character scalar defining a regular expression to
-#'     identify sample columns. For MaxQuant output, this is typically
-#'     one of `"^iBAQ\\."`, `"^LFQ\\.intensity\\."` or `"^Intensity\\."`. For
-#'     PD, it is typically `"^Abundance\\."`, `"^Abundance\\.F[0-9]+\\."` or
-#'     `"^Abundance\\.F.+\\.Sample\\."`. For FragPipe,
-#'     it is typically `"\\.MaxLFQ\\.Intensity$"`. Columns matching the
+#'     identify sample columns. For \code{MaxQuant} output, this is typically
+#'     one of \code{"^iBAQ\\."}, \code{"^LFQ\\.intensity\\."} or
+#'     \code{"^Intensity\\."}. For \code{Proteome Discoverer}, it is typically
+#'     \code{"^Abundance\\."}, \code{"^Abundance\\.F[0-9]+\\."} or
+#'     \code{"^Abundance\\.F.+\\.Sample\\."}. For \code{FragPipe},
+#'     it is typically \code{"\\.MaxLFQ\\.Intensity$"}. Columns matching the
 #'     given pattern will form the first assay in the output object.
 #' @param includeOnlySamples,excludeSamples Character vectors defining
 #'     regular expressions to match against the extracted columns to

diff --git a/R/listComplexDBs.R b/R/listComplexDBs.R
@@ -1,5 +1,8 @@
 #' List available complex DBs
 #'
+#' \code{einprot} provides a built-in database of known complexes. This
+#' function lists available versions of this database.
+#'
 #' @param dbDir Character scalar pointing to the database directory to search
 #'     for complex DBs.
 #'