refactor: styler & linter

currocam · Jul 17, 2023 · edfc834 · edfc834
1 parent d0aeda3
commit edfc834
Show file tree

Hide file tree

Showing 37 changed files with 906 additions and 909 deletions.
diff --git a/.lintr b/.lintr
@@ -0,0 +1,2 @@
+linters: linters_with_defaults() # see vignette("lintr")
+encoding: "UTF-8"
diff --git a/R/HMMER.R b/R/HMMER.R
@@ -58,7 +58,7 @@ post_query <- function(query) {
 # Parses results from HMMER into a tibble.
 #
 # @param results list with results (@seealso HMMERutils::post_query())
-parse_results_into_tbl <- function(results) {
+parse_results_into_tbl <- function(results) { # nolint
     tibble::tibble(
         "algorithm" = purrr::pluck(results, "algo", .default = NA),
         "uuid" = purrr::pluck(results, "uuid", .default = NA),
@@ -67,7 +67,7 @@ parse_results_into_tbl <- function(results) {
     ) %>%
         tidyr::unnest_wider("stats", names_sep = ".") %>%
         tidyr::unnest_wider("hits", names_sep = ".") %>%
-        dplyr::mutate("hits.evalue" = fix_evalue_column(.[["hits.evalue"]]))
+        dplyr::mutate("hits.evalue" = fix_evalue_column(.[["hits.evalue"]])) # nolint
 }
 
 # Fix E-value column which sometimes can be a list, a character vector
@@ -91,7 +91,7 @@ fix_evalue_column <- function(column) {
 # - seqdb a string with seqdb (for phmmer, hmmsearch or jackhmmer)
 # - timeout_in_seconds an integer with the number of
 # seconds to wait before exits.
-search_in_hmmer <- function(...) {
+search_in_hmmer <- function(...) { # nolint
     r <- params_into_query_list(...) %>%
         post_query()
     if (r$status != 200) {

diff --git a/R/add_physicochemical_properties_to_HMMER_tbl.R b/R/add_physicochemical_properties_to_HMMER_tbl.R
@@ -27,7 +27,7 @@
 #'  other proteins as receptors, to normalize it is divided by the number of
 #'  residues. A protein have high binding potential if the index value is
 #'  higher than 2.48.
-#' - hydrophobicity: GRAVY hydrophobicity index of an amino acids sequence 
+#' - hydrophobicity: GRAVY hydrophobicity index of an amino acids sequence
 #'   using KyteDoolittle hydophobicity scale.
 #' - instaIndex: Guruprasad's instability index.
 #' This index predicts the stability of a protein based
@@ -54,9 +54,9 @@
 #'     colname = "hits.fullfasta"
 #' )
 #' @export
-#'
-add_physicochemical_properties_to_HMMER_tbl <- function(
-    data, colname = "hits.fullfasta") {
+#' @importFrom magrittr `%>%`
+
+add_physicochemical_properties_to_HMMER_tbl <- function(data, colname = "hits.fullfasta") { # nolint
     if (!requireNamespace("Peptides", quietly = TRUE)) {
         stop("Package \"Peptides\" must be installed to use this function.",
             call. = FALSE
@@ -88,7 +88,7 @@ add_physicochemical_properties_to_HMMER_tbl <- function(
         })
 }
 
-calculate_peptides <- function(y) {
+calculate_peptides <- function(y) { # nolint
     Peptides::aaComp(y) %>%
         purrr::map_dfr(~ {
             as.data.frame(.x) %>%

diff --git a/R/add_sequences_to_hmmer_tbl.R b/R/add_sequences_to_hmmer_tbl.R
@@ -12,8 +12,7 @@
 #'
 #' @export
 #'
-add_sequences_to_hmmer_tbl <- function(data, extension = "fullfasta",
-    max_times = 3) {
+add_sequences_to_hmmer_tbl <- function(data, extension = "fullfasta", max_times = 3) { # nolint
     stopifnot(any("uuid" %in% colnames(data)))
     stopifnot(any("hits.name" %in% colnames(data)))
     inner_function <- purrr::insistently(
@@ -32,14 +31,14 @@ add_sequences_to_hmmer_tbl <- function(data, extension = "fullfasta",
         dplyr::group_by(!!group_var) %>%
         dplyr::group_split() %>%
         purrr::map_dfr(inner_function) %>%
-        delete_na_rows
+        delete_na_rows()
 }
 
 delete_na_rows <- function(data) {
-    data[rowSums(is.na(data)) <= nrow(data),]
+    data[rowSums(is.na(data)) <= nrow(data), ]
 }
 
-add_AAStringSet_to_tbl <- function(fasta, data, extension) {
+add_AAStringSet_to_tbl <- function(fasta, data, extension) { # nolint
     col_name <- paste0("hits.", extension)
     x <- tibble::tibble("hits.name" = names(fasta))
     x[c(col_name)] <- as.character(fasta)

diff --git a/R/add_taxa_to_hmmer_tbl.R b/R/add_taxa_to_hmmer_tbl.R
@@ -1,4 +1,3 @@
-
 #' Add taxonomic information to a Data Frame obtained
 #'  from HMMER with a "hits.taxid" column.
 #'
@@ -20,8 +19,8 @@
 #' )
 #' @export
 #'
-add_taxa_to_hmmer_tbl <- function(data, mode = "remote", rank_vc = NULL) {
-    inner_function <- function(x) {
+add_taxa_to_hmmer_tbl <- function(data, mode = "remote", rank_vc = NULL) { # nolint
+    inner_function <- function(x) { # nolint
         annotate_with_NCBI_taxid(
             taxid = unique(x$hits.taxid),
             mode = mode, rank_vc = rank_vc

diff --git a/R/annotate_with_ncbi.R b/R/annotate_with_ncbi.R
@@ -15,7 +15,7 @@
 #' @examples
 #' annotate_with_NCBI_taxid(7955, mode = "remote")
 #' @export
-annotate_with_NCBI_taxid <- function(taxid, rank_vc = NULL, mode = "remote") {
+annotate_with_NCBI_taxid <- function(taxid, rank_vc = NULL, mode = "remote") { # nolint
     if (!requireNamespace("taxizedb", quietly = TRUE) && mode == "local") {
         stop(
             "Package \"taxizedb\" must be installed to use this function with a

diff --git a/R/data.R b/R/data.R
@@ -7,10 +7,10 @@
 #' \item{\code{algorithm }}{HMMER algorithm}
 #' \item{\code{uuid}}{unique hmmer identifier}
 #' \item{\code{stats.page}}{}
-#' \item{\code{stats.nhits}}{The number of hits found above reporting 
+#' \item{\code{stats.nhits}}{The number of hits found above reporting
 #' thresholds}
 #' \item{\code{stats.elapsed}}{}
-#' \item{\code{stats.Z}}{The number of sequences or models in the target 
+#' \item{\code{stats.Z}}{The number of sequences or models in the target
 #' database}
 #' \item{\code{stats.Z_setby}}{}
 #' \item{\code{stats.n_past_msv}}{}
@@ -36,7 +36,7 @@
 #' \item{\code{hits.ndom}}{Total number of domains identified in this sequence}
 #' \item{\code{hits.extlink}}{}
 #' \item{\code{hits.fullfasta}}{Protein sequences as a character vector.}
-#' \item{\code{hits.taxid}}{The NCBI taxonomy identifier of the target (if 
+#' \item{\code{hits.taxid}}{The NCBI taxonomy identifier of the target (if
 #' applicable)}
 #' \item{\code{hits.acc}}{Accession of the target}
 #' \item{\code{hits.taxlink}}{}
@@ -45,26 +45,26 @@
 #' \item{\code{hits.flags}}{}
 #' \item{\code{hits.nregions}}{Number of regions evaluated}
 #' \item{\code{hits.niseqs}}{}
-#' \item{\code{hits.name}}{Name of the target (sequence for phmmer/hmmsearch, 
+#' \item{\code{hits.name}}{Name of the target (sequence for phmmer/hmmsearch,
 #' HMM for hmmscan)}
 #' \item{\code{hits.species}}{The species name of the target (if applicable)}
-#' \item{\code{hits.score}}{Bit score of the sequence (all domains, without 
+#' \item{\code{hits.score}}{Bit score of the sequence (all domains, without
 #' correction)}
 #' \item{\code{hits.bias}}{}
 #' \item{\code{hits.sindex}}{}
-#' \item{\code{hits.nincluded}}{Number of domains satisfying inclusion 
+#' \item{\code{hits.nincluded}}{Number of domains satisfying inclusion
 #' thresholding}
 #' \item{\code{hits.domains}}{The domain or hit hash contains the details of the
 #'  match, in particular the alignment between the query and the target.}
 #' \item{\code{hits.pdbs}}{Array of pdb identifiers (which chains information)}
 #' \item{\code{hits.evalue}}{E-value of the score}
-#' \item{\code{hits.nreported}}{Number of domains satisfying reporting 
+#' \item{\code{hits.nreported}}{Number of domains satisfying reporting
 #' thresholding}
 #' \item{\code{hits.archindex}}{}
 #' \item{\code{hits.acc2}}{Secondary accession of the target}
 #' }
 #'
-#' For further details, see 
+#' For further details, see
 #' \url{https://hmmer-web-docs.readthedocs.io/en/latest/appendices.html}
 #'
 "phmmer_2abl"
@@ -78,10 +78,10 @@
 #' \item{\code{algorithm }}{HMMER algorithm}
 #' \item{\code{uuid}}{unique hmmer identifier}
 #' \item{\code{stats.page}}{}
-#' \item{\code{stats.nhits}}{The number of hits found above reporting 
+#' \item{\code{stats.nhits}}{The number of hits found above reporting
 #' thresholds}
 #' \item{\code{stats.elapsed}}{}
-#' \item{\code{stats.Z}}{The number of sequences or models in the target 
+#' \item{\code{stats.Z}}{The number of sequences or models in the target
 #' database}
 #' \item{\code{stats.Z_setby}}{}
 #' \item{\code{stats.n_past_msv}}{}
@@ -109,9 +109,9 @@
 #' \item{\code{hits.nreported}}{}
 #' \item{\code{hits.hindex}}{}
 #' \item{\code{hits.ndom}}{Total number of domains identified in this sequence}
-#' \item{\code{hits.name}}{Name of the target (sequence for phmmer/hmmsearch, 
+#' \item{\code{hits.name}}{Name of the target (sequence for phmmer/hmmsearch,
 #' HMM for hmmscan)}
-#' \item{\code{hits.score}}{Bit score of the sequence (all domains, without 
+#' \item{\code{hits.score}}{Bit score of the sequence (all domains, without
 #' correction)}
 #' \item{\code{hits.bias}}{}
 #' \item{\code{hits.domains}}{The domain or hit hash contains the details of the
@@ -120,7 +120,7 @@
 #' \item{\code{hits.nincluded}}{E-value of the score}
 #' }
 #'
-#' For further details, see 
+#' For further details, see
 #' \url{https://hmmer-web-docs.readthedocs.io/en/latest/appendices.html}
 #'
 "hmmscan_2abl"
diff --git a/R/extract_from_hmmer.R b/R/extract_from_hmmer.R
@@ -13,62 +13,62 @@
 #'     column = "hits.domains"
 #' )
 #' @export
-extract_from_hmmer <- function(data, column = "hits.domains") {
+extract_from_hmmer <- function(data, column = "hits.domains") { # nolint
     # Create copy of data
     data2 <- data.frame(data)
     # Save number of rows
-    n.rows <- nrow(data2)
+    n_rows <- nrow(data2)
     # Initialize list with new column
-    new.column <- list()
-    for (row in seq_len(n.rows)) {
-        new.column[[length(new.column) + 1]] <- list()
+    new_column <- list()
+    for (row in seq_len(n_rows)) {
+        new_column[[length(new_column) + 1]] <- list()
     }
 
     # Iterate over each row of dataframe
-    for (row in seq_len(n.rows)) {
+    for (row in seq_len(n_rows)) {
         # Calculate number of domains/sequences in actual row
-        n.elements <- length(data2[row, column][[1]])
+        n_elements <- length(data2[row, column][[1]])
 
         # If there are more than one domain/sequence, add them at the end of
         # dataframe
-        if (n.elements > 1) {
-            for (el in c(2:n.elements)) {
+        if (n_elements > 1) {
+            for (el in c(2:n_elements)) {
                 data2[nrow(data2) + 1, ] <- data2[row, ]
-                assigned.element <- data2[row, column][[1]][[el]]
+                assigned_element <- data2[row, column][[1]][[el]]
 
-                if (is.null(assigned.element)) {
-                    assigned.element <- NA
+                if (is.null(assigned_element)) {
+                    assigned_element <- NA
                 }
-                new.column[[length(new.column) + 1]] <- assigned.element
+                new_column[[length(new_column) + 1]] <- assigned_element
             }
         }
         # Access to actual row list
-        assigned.element <- data2[row, column][[1]][[1]]
-        if (is.null(assigned.element)) {
-            assigned.element <- NA
+        assigned_element <- data2[row, column][[1]][[1]]
+        if (is.null(assigned_element)) {
+            assigned_element <- NA
         }
-        new.column[[row]] <- assigned.element
+        new_column[[row]] <- assigned_element
     }
     # Substitute new.column by column name and unnest column list into
     # multiple columns
-    data2 <- data2 %>% bind_and_unnest(column, new.column)
+    data2 <- data2 %>% bind_and_unnest(column, new_column)
     data2
 }
 
 
-bind_and_unnest <- function(data, old.column, new.column) {
+bind_and_unnest <- function(data, old_column, new.column) { # nolint
     data2 <- data.frame(data)
     data2 <- cbind(data2, I(new.column))
 
     data2 <- data2 %>%
         dplyr::select(-c({
-            old.column
+            old_column
         })) %>%
-        dplyr::rename({{ old.column }} := new.column)
+        dplyr::rename({{ old_column }} := new.column)
 
-    if (old.column != "hits.pdbs") {
+    if (old_column != "hits.pdbs") {
         data2 <- data2 %>%
-            tidyr::unnest_wider({{ old.column }}, names_sep = ".")
+            tidyr::unnest_wider({{ old_column }}, names_sep = ".")
     }
 
     # Remove empty columns
@@ -78,16 +78,18 @@ bind_and_unnest <- function(data, old.column, new.column) {
     # Remove 'hits.' prefix from colnames
     colnames(data2) <- colnames(data2) %>%
         stringr::str_replace_all(
-            old.column,
-            stringr::str_remove(old.column, "hits.")
+            old_column,
+            stringr::str_remove(old_column, "hits.")
         )
 
     # Coerce some columns to numeric
-    if (old.column == "hits.domains"){
-        to_coerce <- c("domains.ievalue", "domains.bias", "domains.cevalue",
-        "domains.oasc")
+    if (old_column == "hits.domains") {
+        to_coerce <- c(
+            "domains.ievalue", "domains.bias", "domains.cevalue",
+            "domains.oasc"
+        )
 
-        data2[to_coerce] <- lapply(data2[to_coerce], as.numeric)  
+        data2[to_coerce] <- lapply(data2[to_coerce], as.numeric)
     }
 
     # Return new dataframe

diff --git a/R/filter_hmmer.R b/R/filter_hmmer.R
@@ -14,7 +14,7 @@
 #'     by = "hits.evalue"
 #' )
 #' @export
-filter_hmmer <- function(data, threshold = 0.0005, by = "hits.evalue") {
+filter_hmmer <- function(data, threshold = 0.0005, by = "hits.evalue") { # nolint
     data2 <- data.frame(data)
 
     # Extract type
@@ -39,18 +39,18 @@ extract_evalue_from_domains <- function(data, by = "ievalue") {
     new_evalue <- c()
 
     # Save number of rows
-    n.rows <- nrow(data2)
+    n_rows <- nrow(data2)
 
     # Iterate over all rows
-    for (row in seq_len(n.rows)) {
+    for (row in seq_len(n_rows)) {
         # Calculate number of domains in actual row
-        n.elements <- length(data2[row, "hits.domains"][[1]])
+        n_elements <- length(data2[row, "hits.domains"][[1]])
 
         lowest <- as.double(data2[row, "hits.domains"][[1]][[1]][by])
 
         # Iterate over each domain and keep lowest
-        if (n.elements > 1) {
-            for (el in seq_len(n.elements)) {
+        if (n_elements > 1) {
+            for (el in seq_len(n_elements)) {
                 element <- as.double(data2[row, "hits.domains"][[1]][[el]][by])
 
                 if (element < lowest) { # Keep lowest

diff --git a/R/hmmer_evalues_cleveland_dot_plot.R b/R/hmmer_evalues_cleveland_dot_plot.R
@@ -15,18 +15,19 @@
 #'     threshold = 0.001
 #' )
 #'
-hmmer_evalues_cleveland_dot_plot <- function(data,
-    threshold = 0.001) {
+hmmer_evalues_cleveland_dot_plot <- function(data, threshold = 0.001) { # nolint
     df <- data %>%
         extract_from_hmmer()
     df$domains.ievalue <- as.numeric(df$domains.ievalue)
     df <- df %>%
-        dplyr::group_by(.data$uuid, .data$hits.name, .data$hits.acc) %>%
-        dplyr::mutate("best.ievalue" = 
-        min(as.numeric(.data$domains.ievalue))) %>%
+        dplyr::group_by(.data$uuid, .data$hits.name, .data$hits.acc) %>% # nolint
+        dplyr::mutate(
+            "best.ievalue" =
+                min(as.numeric(.data$domains.ievalue))
+        ) %>%
         dplyr::ungroup()
     df %>%
-        dplyr::arrange(-log(.data$best.ievalue)) %>%
+        dplyr::arrange(-log(.data$best.ievalue)) %>% # nolint
         ggplot2::ggplot() +
         ggplot2::geom_segment(
             ggplot2::aes(