Skip to content

Commit

Permalink
version 1.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
ubeattie authored and cran-robot committed Dec 5, 2023
0 parents commit 8c665b1
Show file tree
Hide file tree
Showing 51 changed files with 2,275 additions and 0 deletions.
57 changes: 57 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
Package: profrep
Title: Profile Repeatability
Version: 1.0.0
Authors@R: c(
person("Ursula K.", "Beattie",
email="ursula.beattie@tufts.edu",
role=c("cre", "aut", "cph"),
comment=c(ORCID="0000-0002-7131-3712")
),
person("David", "Harris",
email="drharris115@gmail.com",
role=c("aut", "cph")
),
person("L. Michael", "Romero",
email="michael.romero@tufts.edu",
role=c("aut", "cph"),
comment=c(ORCID="0000-0002-8854-8884")
),
person("J. Michael", "Reed",
email="michael.reed@tufts.edu",
role=c("aut", "cph"),
comment=c(ORCID="0000-0002-3571-2652")
),
person("Zachary R.", "Weaver",
email="zrweaver447@gmail.com",
role=c("aut", "cph"),
comment=c(ORCID="0000-0001-6314-0690")
)
)
Description: Calculates profile repeatability for replicate stress response
curves, or similar time-series data. Profile repeatability is an individual
repeatability metric that uses the variances at each timepoint, the maximum
variance, the number of crossings (lines that cross over each other), and
the number of replicates to compute the repeatability score.
For more information see Reed et al. (2019) <doi:10.1016/j.ygcen.2018.09.015>.
Depends: R (>= 2.10)
License: MIT + file LICENSE
URL: https://ubeattie.github.io/profrep/
BugReports: https://github.com/ubeattie/profrep/issues
Encoding: UTF-8
RoxygenNote: 7.2.3
Suggests: knitr, rmarkdown, testthat (>= 3.0.0)
Config/testthat/edition: 3
Imports: stats
LazyData: true
VignetteBuilder: knitr
NeedsCompilation: no
Packaged: 2023-12-01 20:50:46 UTC; cleanaccount
Author: Ursula K. Beattie [cre, aut, cph]
(<https://orcid.org/0000-0002-7131-3712>),
David Harris [aut, cph],
L. Michael Romero [aut, cph] (<https://orcid.org/0000-0002-8854-8884>),
J. Michael Reed [aut, cph] (<https://orcid.org/0000-0002-3571-2652>),
Zachary R. Weaver [aut, cph] (<https://orcid.org/0000-0001-6314-0690>)
Maintainer: Ursula K. Beattie <ursula.beattie@tufts.edu>
Repository: CRAN
Date/Publication: 2023-12-04 16:40:08 UTC
2 changes: 2 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
YEAR: 2023
COPYRIGHT HOLDER: profrep authors
50 changes: 50 additions & 0 deletions MD5
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
e6298f479f277bbe4522ec63cc3de286 *DESCRIPTION
c673bd4b5854835da51ddf9b6885ebc2 *LICENSE
34572eac501bc5100ad23dc98830af0c *NAMESPACE
8e51344e6eb12ecb2394afd845c8b3b2 *NEWS.md
1ca78358f97042fdb8f491ba0fc57861 *R/calculate_crossovers.R
576119294ad4eb059317f3cd504bae99 *R/clean_data.R
192656bb75e3666f3ad4367424fefcba *R/data.R
3acfb0709fbe2bef428ef8040ad7aeaf *R/do_ordering.R
2e8033940e5f5e800d851fcce734f318 *R/find_next_good_datapoint.R
0b203e9d2c4881782960fa67b703a6d1 *R/get_vars.R
8acd46b20b3211d4845d85cb9589f9da *R/math_helpers.R
43bd67968b1c1a40cda56c7469e1f026 *R/profrep.R
52147f37fce73f735b29e5de35f67693 *R/retrieve_good_data.R
b647f1598704d670b6be1c17cb4c8c56 *R/score_dfs.R
f7ddb49946273e29ecad6bbd1f4c4026 *R/score_individual_df.R
ecceae992e494864bbeda8ac08163299 *README.md
2213ddbc2f31d22f2d576c97aacc63ea *build/vignette.rds
6e92e02fe5e0c6bad0cadfd134d1838f *data/example_two_point_data.rda
0a29398ccd3773964931980954f69f4d *data/sparrow_repeatability_three_point.rda
bfe85f57e0bf875f0b0f40e40faef7ea *data/synthetic_data_four_point.rda
c16ea34db695b640bdfb46fcc766728e *inst/CITATION
8f483c5d47aff30c5f4045c5678da77a *inst/doc/profrep.R
096f0937466d622974f36edf21650d4d *inst/doc/profrep.Rmd
fa542f5e23c2ac5c724dce2031627267 *inst/doc/profrep.html
9d75fe6f389eccc0c05a90062104afc5 *man/calculate_crossovers.Rd
d63166952e259db0a2d8b701545b2481 *man/clean_data.Rd
e858adbe1e78c77d56c706d5b0deff8d *man/do_ordering.Rd
25bca58a5b0a6ac4bf34e858cc05dba4 *man/example_two_point_data.Rd
32cb276b6dcaf4f2afe9e06868a42cfa *man/figures/README-pressure-1.png
b27988176fca5a36829bb73a5442e914 *man/find_next_good_datapoint.Rd
82d548899046cedd9ac1c8cc8fd85512 *man/get_vars.Rd
4006b9244131e17d8ef06b742e2341be *man/profrep.Rd
c4489fbc9057aae1a890031d5b208ddf *man/retrieve_good_data.Rd
5b5335651cdbd2cafa97487fb47b4b7e *man/score_dfs.Rd
d5f65da02a92cbd56f352a3225a490a9 *man/score_individual_df.Rd
e272951c8e2d5206662a25cd78744881 *man/sigmoid.Rd
3970882116ae02715df5ccbbcc4f1051 *man/sparrow_repeatability_three_point.Rd
4b0ee5042ea9bce950f1dc828cab52d7 *man/synthetic_data_four_point.Rd
c8bdad69e0dcb2358b5765e3203a4bc6 *tests/testthat.R
f3a39141a83bd329b1f3e36de27a36eb *tests/testthat/test-calculate_crossovers.R
29d8c55a3b60c64b2ab023326503c1e9 *tests/testthat/test-clean_data.R
5448bbec379e9f4a4679412378d6992e *tests/testthat/test-do_ordering.R
770e8726889cb9c67fdba310e5da362d *tests/testthat/test-find_next_good_datapoint.R
6b4b9b9111dcf49fa85b8ca19b9d79db *tests/testthat/test-get_vars.R
ec9f3de3019bb3919d55266853a8880f *tests/testthat/test-math_helpers.R
976396153e4653c0701196ff42840011 *tests/testthat/test-profrep.R
e05c02d8b4d0e4cf29872a2fb038bed8 *tests/testthat/test-retrieve_good_data.R
5236d851dcc82c65b2730f76d0fc129c *tests/testthat/test-score_dfs.R
40bf69d9d539f62e04eca95337a0f942 *tests/testthat/test-score_individual_df.R
096f0937466d622974f36edf21650d4d *vignettes/profrep.Rmd
14 changes: 14 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Generated by roxygen2: do not edit by hand

export(calculate_crossovers)
export(clean_data)
export(do_ordering)
export(find_next_good_datapoint)
export(get_vars)
export(profrep)
export(retrieve_good_data)
export(score_dfs)
export(score_individual_df)
export(sigmoid)
importFrom(stats,na.omit)
importFrom(utils,tail)
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# profrep 1.0.0

# profrep 1.0.0

* Initial CRAN submission.
59 changes: 59 additions & 0 deletions R/calculate_crossovers.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#' @title Calculate the Number of Crossovers
#'
#' @details
#' This function calculates the number of crossovers in a dataset by comparing
#' the values of replicates across multiple trials. It assumes that missing
#' values (NAs) have been interpolated using the `clean_data` function.
#'
#' @param individual_df A data frame containing the individual dataset.
#' @param n_trials The total number of trials in the dataset (the number of rows)
#' @param n_replicates The total number of replicates in each trial (the number of columns - 2)
#'
#' @return The number of crossovers detected in the dataset.
#'
#' @seealso \code{\link{clean_data}} for information on data cleaning.
#'
#' @examples
#' data <- matrix(
#' c(
#' 1, 60, 1, 2, 3, 4, 5, # No NA values
#' 1, 90, 9, NA, 4, NA, 2, # NA Values in row
#' 1, 120, 3, 6, NA, NA, 9 # Consecutive NA values
#' ),
#' nrow = 3,
#' byrow=TRUE
#' )
#' n_trials <- nrow(data)
#' n_replicates <- ncol(data) - 2
#' crossovers <- calculate_crossovers(data, n_trials, n_replicates)
#' cat("Number of crossovers:", crossovers, "\n")
#'
#' @export
calculate_crossovers <- function(individual_df, n_trials, n_replicates) {
indicators <- c()
clean_df <- clean_data(data=individual_df, n_trials=n_trials, n_replicates=n_replicates)

for (t in 1:n_trials) {
for (i in 1:(n_replicates - 1)) {
val <- clean_df[t, i + 2]
for (j in (i + 1):n_replicates) {
nxt <- -1
if (val >= clean_df[t, j + 2]) {
nxt <- 1
}
indicators <- c(indicators, nxt)
}
}
}
n_pairs = n_replicates * (n_replicates - 1) / 2
M <- matrix(indicators, n_pairs, n_trials)
n_crossings <- 0
for (t in 1:(n_trials-1)) {
for (i in 1:n_pairs) {
if (M[i,t] * M[i, t+1] == -1) {
n_crossings <- n_crossings + 1
}
}
}
return(n_crossings)
}
60 changes: 60 additions & 0 deletions R/clean_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#' @title Clean Data by Interpolating Missing Values
#'
#' @details
#' This function cleans a dataset by interpolating missing values in the replicate
#' columns of each row using neighboring values. If the data frame ends in null values
#' (the last columns are nulls), it will extrapolate from the last value. If the
#' first value is null, it will loop around and pull from the last replicate to
#' perform the interpolation between the last replicate and the second replicate.
#'
#' @param data A data frame containing the dataset to be cleaned.
#' @param n_trials The total number of rows in the dataset.
#' @param n_replicates The total number of replicate columns in each row.
#'
#' @return A cleaned data frame with missing values interpolated.
#'
#' @seealso \code{\link{find_next_good_datapoint}} for details on the interpolation process.
#'
#' @examples
#' my_data <- matrix(
#' c(
#' 1, 60, 1, 2, 3, 4, 5, # No NA values
#' 1, 90, 9, NA, 4, NA, 2, # NA Values in row
#' 1, 120, 3, 6, NA, NA, 9 # Consecutive NA values
#' ),
#' nrow = 3,
#' byrow=TRUE
#' )
#' cleaned_data <- clean_data(my_data, n_trials = 3, n_replicates = 5)
#' print(my_data)
#' print(cleaned_data)
#'
#' @importFrom stats na.omit
#' @importFrom utils tail
#'
#' @export
clean_data <- function(data, n_trials, n_replicates) {
missing_set <- c()

for (t in 1:n_trials) { # loops through rows
trial_row <- data[t, 3:ncol(data)] # get the row of replicate data only

for (i_rep in 1:n_replicates) { # loops through replicate columns
# replicate columns are really index 3 to the end of the data frame
if (is.na(trial_row[i_rep])) {
n_missing <- length(missing_set) + 1 # increase number missing
missing_set <- c(missing_set, t*(n_replicates -1) + i_rep) # add to missing set

interp_val <- find_next_good_datapoint(trial_row, i_rep, n_replicates)
# If the first value is null, need to pull the last replicate value.
if (i_rep == 1) {first_val = utils::tail(stats::na.omit(unlist(trial_row[1,])), 1)}
else {first_val = trial_row[i_rep - 1]}

replacement_val <- first_val + interp_val / 2
trial_row[i_rep] <- replacement_val
}
}
data[t, 3:ncol(data)] <- trial_row
}
return(data)
}
56 changes: 56 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#' Example Data: Two Point Data
#'
#' An example of data that one would perform profile repeatability on.
#' Consists of 9 individual animals, with corticosterone data taken at 2 timepoints (n_trials = 2), baseline (time = 3) and stress-induced (time = 30).
#' Then, there are 28 replicate columns.
#'
#' @format ## `example_two_point_data`
#' A dataframe with 10 rows and 30 columns:
#' \describe{
#' \item{Animal}{The animal name/unique identifier}
#' \item{Time}{The time of the measurement, in days.}
#' \item{SD.DR}{The name of the replicate column.}
#' }
#'
#' @source This data was extracted from Romero & Rich 2007 (Comp Biochem. Physiol. Part A Mol. Integr. Physiol. 147, 562-568. https://doi.org/10.1016/j.cbpa.2007.02.004)
"example_two_point_data"

#' Example Data: Sparrow Repeatability (3 Point Data)
#'
#' An example of data that one would perform profile repeatability on.
#' Consists of 12 individual animals, with corticosterone data taken at 3 times (n_trials = 3), baseline (time = 0) and two stress-induced (time = 15 and 30).
#' Then, there are 10 replicate columns. This example also shows what happens
#' when there are null data records for some individuals.
#'
#' @format ## `sparrow_repeatability_three_point`
#' A dataframe with 36 rows and 12 columns:
#' \describe{
#' \item{Animal}{The animal name/unique identifier}
#' \item{TIME}{The time of the measurement, in days}
#' \item{LD.500}{The name of the replicate column}
#' }
#'
#' @source This data was extracted from Rich & Romero 2001 (J. Comp. Physiol. Part B Biochem. Syst. Environ. Physiol. 171, 543-647. https://doi.org/10.1007/s003600100204)
"sparrow_repeatability_three_point"

#' Example Data: Synthetic 4-Point Data
#'
#' An example of data that one would perform profile repeatability on.
#' The data is synthetic data created for testing purposes and is designed to span a range of perceived repeatability scores.
#' Consists of 11 individual animals, with data taken at 4 times (n_trials = 4), baseline (time = 0) and three stress-induced (time = 15, 30, and 45).
#' Then, there are four replicate columns. Replicate column names refer to
#' sample tests performed on the animal.
#'
#' @format ## `synthetic_data_four_point`
#' A dataframe with 44 rows and 6 columns:
#' \describe{
#' \item{Animal}{The animal name/unique identifier}
#' \item{TIME}{The time of the measurement (unit not important)}
#' \item{A}{The (unimportant) name of a replicate column.}
#' \item{B}{The (unimportant) name of a replicate column.}
#' \item{C}{The (unimportant) name of a replicate column.}
#' \item{D}{The (unimportant) name of a replicate column.}
#' }
#'
#' @source Data created for testing purposes by Reed et al., 2019 (Gen. Comp. Endocrinol. 270, 1-9. https://doi.org/10.1016/j.ygcen.2018.09.015)
"synthetic_data_four_point"
74 changes: 74 additions & 0 deletions R/do_ordering.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#' @title Score and Order Data
#'
#' @details
#' Performs the ordering of input data by scoring each individual data frame.
#'
#' The main function of the package, this will send each individuals data out
#' for scoring. Then, when all scores are computed, it will order the result
#' data frame by score and assign a rank.
#'
#' Ranks are assigned with ties allowed - if N individuals have a tie, their rank
#' is averaged. For example, if the max score is 1, and two individuals have
#' that score, their rank is 1.5
#'
#' @param n_trials The number of rows an individual sample will have.
#' @param id_list The list of unique individual or sample names
#' @param df_list The list of data frames per unique individual
#' @param n_replicates The number of replicates in the study.
#' @param verbose A boolean parameter the defaults to FALSE. Determines whether messages are printed.
#' @param sort A boolean parameter that defaults to TRUE. If TRUE, sorts the returned data frame by score. If FALSE, returns the data in the individual order it was provided in
#'
#' @returns Returns a data frame of the results, in the following form:
#'
#' - Column 1: "individual" - the unique identifier of an individual or sample
#' - Column 2: "n_crossings" - the calculated number of crossings.
#' - Column 3: "max_variance" - the maximum of the variances of the replicate measurements at a single time for the individual or sample.
#' - Column 4: "ave_variance" - the average of the variances of the replicate measurements at a single time for the individual or sample.
#' - Column 5: "base_score" - the original, unnormalized profile repeatability score. Smaller numbers rank higher.
#' - Column 6: "final_score" - the base score, normalized by the sigmoid function. Constrained to be between 0 and 1. Scores closer to 1 rank higher.
#' - Column 7: "rank" - the calculated ranking of the individual or sample, against all other individuals or samples in the data set.
#'
#' @examples
#' df <- data.frame(
#' col_a = c('A', 'A', 'B', 'B'),
#' col_b = c(5, 15, 5, 15),
#' col_c = c(5, 10, 1, 2),
#' col_d = c(10, 15, 3, 4)
#' )
#' id_list <- unique(df[, 1])
#' individuals <- list()
#' for (i in 1:length(id_list)) {
#' individuals[[i]] <- df[df[, 1] == id_list[i], ]
#' }
#' ret_df <- do_ordering(n_trials=2, id_list=id_list, df_list=individuals, n_replicates=2)
#' print(ret_df)
#'
#' @export
do_ordering <- function(n_trials, id_list, df_list, n_replicates, verbose=FALSE, sort=TRUE) {
# Generate Scores
if (verbose) {message("Scoring each set of data per individual.")}
scores_df <- score_dfs(
id_list=id_list,
df_list=df_list,
n_replicates=n_replicates,
n_trials=n_trials,
verbose=verbose
)

# Order Scores and Individuals
if (sort) {
if (verbose) {message("Ordering by score.")}
ordered_df <- scores_df[order(scores_df$final_score, decreasing=TRUE), ]
}
else {ordered_df <- scores_df}

# need the minus to rank the highest score the highest
# average will make ties be the same decimal value
ordered_df$rank <- rank(-ordered_df$final_score, ties.method="average")

# Resets the index
rownames(ordered_df) <- NULL

return(ordered_df)
}

0 comments on commit 8c665b1

Please sign in to comment.