-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 8c665b1
Showing
51 changed files
with
2,275 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
Package: profrep | ||
Title: Profile Repeatability | ||
Version: 1.0.0 | ||
Authors@R: c( | ||
person("Ursula K.", "Beattie", | ||
email="ursula.beattie@tufts.edu", | ||
role=c("cre", "aut", "cph"), | ||
comment=c(ORCID="0000-0002-7131-3712") | ||
), | ||
person("David", "Harris", | ||
email="drharris115@gmail.com", | ||
role=c("aut", "cph") | ||
), | ||
person("L. Michael", "Romero", | ||
email="michael.romero@tufts.edu", | ||
role=c("aut", "cph"), | ||
comment=c(ORCID="0000-0002-8854-8884") | ||
), | ||
person("J. Michael", "Reed", | ||
email="michael.reed@tufts.edu", | ||
role=c("aut", "cph"), | ||
comment=c(ORCID="0000-0002-3571-2652") | ||
), | ||
person("Zachary R.", "Weaver", | ||
email="zrweaver447@gmail.com", | ||
role=c("aut", "cph"), | ||
comment=c(ORCID="0000-0001-6314-0690") | ||
) | ||
) | ||
Description: Calculates profile repeatability for replicate stress response | ||
curves, or similar time-series data. Profile repeatability is an individual | ||
repeatability metric that uses the variances at each timepoint, the maximum | ||
variance, the number of crossings (lines that cross over each other), and | ||
the number of replicates to compute the repeatability score. | ||
For more information see Reed et al. (2019) <doi:10.1016/j.ygcen.2018.09.015>. | ||
Depends: R (>= 2.10) | ||
License: MIT + file LICENSE | ||
URL: https://ubeattie.github.io/profrep/ | ||
BugReports: https://github.com/ubeattie/profrep/issues | ||
Encoding: UTF-8 | ||
RoxygenNote: 7.2.3 | ||
Suggests: knitr, rmarkdown, testthat (>= 3.0.0) | ||
Config/testthat/edition: 3 | ||
Imports: stats | ||
LazyData: true | ||
VignetteBuilder: knitr | ||
NeedsCompilation: no | ||
Packaged: 2023-12-01 20:50:46 UTC; cleanaccount | ||
Author: Ursula K. Beattie [cre, aut, cph] | ||
(<https://orcid.org/0000-0002-7131-3712>), | ||
David Harris [aut, cph], | ||
L. Michael Romero [aut, cph] (<https://orcid.org/0000-0002-8854-8884>), | ||
J. Michael Reed [aut, cph] (<https://orcid.org/0000-0002-3571-2652>), | ||
Zachary R. Weaver [aut, cph] (<https://orcid.org/0000-0001-6314-0690>) | ||
Maintainer: Ursula K. Beattie <ursula.beattie@tufts.edu> | ||
Repository: CRAN | ||
Date/Publication: 2023-12-04 16:40:08 UTC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
YEAR: 2023 | ||
COPYRIGHT HOLDER: profrep authors |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
e6298f479f277bbe4522ec63cc3de286 *DESCRIPTION | ||
c673bd4b5854835da51ddf9b6885ebc2 *LICENSE | ||
34572eac501bc5100ad23dc98830af0c *NAMESPACE | ||
8e51344e6eb12ecb2394afd845c8b3b2 *NEWS.md | ||
1ca78358f97042fdb8f491ba0fc57861 *R/calculate_crossovers.R | ||
576119294ad4eb059317f3cd504bae99 *R/clean_data.R | ||
192656bb75e3666f3ad4367424fefcba *R/data.R | ||
3acfb0709fbe2bef428ef8040ad7aeaf *R/do_ordering.R | ||
2e8033940e5f5e800d851fcce734f318 *R/find_next_good_datapoint.R | ||
0b203e9d2c4881782960fa67b703a6d1 *R/get_vars.R | ||
8acd46b20b3211d4845d85cb9589f9da *R/math_helpers.R | ||
43bd67968b1c1a40cda56c7469e1f026 *R/profrep.R | ||
52147f37fce73f735b29e5de35f67693 *R/retrieve_good_data.R | ||
b647f1598704d670b6be1c17cb4c8c56 *R/score_dfs.R | ||
f7ddb49946273e29ecad6bbd1f4c4026 *R/score_individual_df.R | ||
ecceae992e494864bbeda8ac08163299 *README.md | ||
2213ddbc2f31d22f2d576c97aacc63ea *build/vignette.rds | ||
6e92e02fe5e0c6bad0cadfd134d1838f *data/example_two_point_data.rda | ||
0a29398ccd3773964931980954f69f4d *data/sparrow_repeatability_three_point.rda | ||
bfe85f57e0bf875f0b0f40e40faef7ea *data/synthetic_data_four_point.rda | ||
c16ea34db695b640bdfb46fcc766728e *inst/CITATION | ||
8f483c5d47aff30c5f4045c5678da77a *inst/doc/profrep.R | ||
096f0937466d622974f36edf21650d4d *inst/doc/profrep.Rmd | ||
fa542f5e23c2ac5c724dce2031627267 *inst/doc/profrep.html | ||
9d75fe6f389eccc0c05a90062104afc5 *man/calculate_crossovers.Rd | ||
d63166952e259db0a2d8b701545b2481 *man/clean_data.Rd | ||
e858adbe1e78c77d56c706d5b0deff8d *man/do_ordering.Rd | ||
25bca58a5b0a6ac4bf34e858cc05dba4 *man/example_two_point_data.Rd | ||
32cb276b6dcaf4f2afe9e06868a42cfa *man/figures/README-pressure-1.png | ||
b27988176fca5a36829bb73a5442e914 *man/find_next_good_datapoint.Rd | ||
82d548899046cedd9ac1c8cc8fd85512 *man/get_vars.Rd | ||
4006b9244131e17d8ef06b742e2341be *man/profrep.Rd | ||
c4489fbc9057aae1a890031d5b208ddf *man/retrieve_good_data.Rd | ||
5b5335651cdbd2cafa97487fb47b4b7e *man/score_dfs.Rd | ||
d5f65da02a92cbd56f352a3225a490a9 *man/score_individual_df.Rd | ||
e272951c8e2d5206662a25cd78744881 *man/sigmoid.Rd | ||
3970882116ae02715df5ccbbcc4f1051 *man/sparrow_repeatability_three_point.Rd | ||
4b0ee5042ea9bce950f1dc828cab52d7 *man/synthetic_data_four_point.Rd | ||
c8bdad69e0dcb2358b5765e3203a4bc6 *tests/testthat.R | ||
f3a39141a83bd329b1f3e36de27a36eb *tests/testthat/test-calculate_crossovers.R | ||
29d8c55a3b60c64b2ab023326503c1e9 *tests/testthat/test-clean_data.R | ||
5448bbec379e9f4a4679412378d6992e *tests/testthat/test-do_ordering.R | ||
770e8726889cb9c67fdba310e5da362d *tests/testthat/test-find_next_good_datapoint.R | ||
6b4b9b9111dcf49fa85b8ca19b9d79db *tests/testthat/test-get_vars.R | ||
ec9f3de3019bb3919d55266853a8880f *tests/testthat/test-math_helpers.R | ||
976396153e4653c0701196ff42840011 *tests/testthat/test-profrep.R | ||
e05c02d8b4d0e4cf29872a2fb038bed8 *tests/testthat/test-retrieve_good_data.R | ||
5236d851dcc82c65b2730f76d0fc129c *tests/testthat/test-score_dfs.R | ||
40bf69d9d539f62e04eca95337a0f942 *tests/testthat/test-score_individual_df.R | ||
096f0937466d622974f36edf21650d4d *vignettes/profrep.Rmd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(calculate_crossovers) | ||
export(clean_data) | ||
export(do_ordering) | ||
export(find_next_good_datapoint) | ||
export(get_vars) | ||
export(profrep) | ||
export(retrieve_good_data) | ||
export(score_dfs) | ||
export(score_individual_df) | ||
export(sigmoid) | ||
importFrom(stats,na.omit) | ||
importFrom(utils,tail) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# profrep 1.0.0 | ||
|
||
# profrep 1.0.0 | ||
|
||
* Initial CRAN submission. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
#' @title Calculate the Number of Crossovers | ||
#' | ||
#' @details | ||
#' This function calculates the number of crossovers in a dataset by comparing | ||
#' the values of replicates across multiple trials. It assumes that missing | ||
#' values (NAs) have been interpolated using the `clean_data` function. | ||
#' | ||
#' @param individual_df A data frame containing the individual dataset. | ||
#' @param n_trials The total number of trials in the dataset (the number of rows) | ||
#' @param n_replicates The total number of replicates in each trial (the number of columns - 2) | ||
#' | ||
#' @return The number of crossovers detected in the dataset. | ||
#' | ||
#' @seealso \code{\link{clean_data}} for information on data cleaning. | ||
#' | ||
#' @examples | ||
#' data <- matrix( | ||
#' c( | ||
#' 1, 60, 1, 2, 3, 4, 5, # No NA values | ||
#' 1, 90, 9, NA, 4, NA, 2, # NA Values in row | ||
#' 1, 120, 3, 6, NA, NA, 9 # Consecutive NA values | ||
#' ), | ||
#' nrow = 3, | ||
#' byrow=TRUE | ||
#' ) | ||
#' n_trials <- nrow(data) | ||
#' n_replicates <- ncol(data) - 2 | ||
#' crossovers <- calculate_crossovers(data, n_trials, n_replicates) | ||
#' cat("Number of crossovers:", crossovers, "\n") | ||
#' | ||
#' @export | ||
calculate_crossovers <- function(individual_df, n_trials, n_replicates) { | ||
indicators <- c() | ||
clean_df <- clean_data(data=individual_df, n_trials=n_trials, n_replicates=n_replicates) | ||
|
||
for (t in 1:n_trials) { | ||
for (i in 1:(n_replicates - 1)) { | ||
val <- clean_df[t, i + 2] | ||
for (j in (i + 1):n_replicates) { | ||
nxt <- -1 | ||
if (val >= clean_df[t, j + 2]) { | ||
nxt <- 1 | ||
} | ||
indicators <- c(indicators, nxt) | ||
} | ||
} | ||
} | ||
n_pairs = n_replicates * (n_replicates - 1) / 2 | ||
M <- matrix(indicators, n_pairs, n_trials) | ||
n_crossings <- 0 | ||
for (t in 1:(n_trials-1)) { | ||
for (i in 1:n_pairs) { | ||
if (M[i,t] * M[i, t+1] == -1) { | ||
n_crossings <- n_crossings + 1 | ||
} | ||
} | ||
} | ||
return(n_crossings) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
#' @title Clean Data by Interpolating Missing Values | ||
#' | ||
#' @details | ||
#' This function cleans a dataset by interpolating missing values in the replicate | ||
#' columns of each row using neighboring values. If the data frame ends in null values | ||
#' (the last columns are nulls), it will extrapolate from the last value. If the | ||
#' first value is null, it will loop around and pull from the last replicate to | ||
#' perform the interpolation between the last replicate and the second replicate. | ||
#' | ||
#' @param data A data frame containing the dataset to be cleaned. | ||
#' @param n_trials The total number of rows in the dataset. | ||
#' @param n_replicates The total number of replicate columns in each row. | ||
#' | ||
#' @return A cleaned data frame with missing values interpolated. | ||
#' | ||
#' @seealso \code{\link{find_next_good_datapoint}} for details on the interpolation process. | ||
#' | ||
#' @examples | ||
#' my_data <- matrix( | ||
#' c( | ||
#' 1, 60, 1, 2, 3, 4, 5, # No NA values | ||
#' 1, 90, 9, NA, 4, NA, 2, # NA Values in row | ||
#' 1, 120, 3, 6, NA, NA, 9 # Consecutive NA values | ||
#' ), | ||
#' nrow = 3, | ||
#' byrow=TRUE | ||
#' ) | ||
#' cleaned_data <- clean_data(my_data, n_trials = 3, n_replicates = 5) | ||
#' print(my_data) | ||
#' print(cleaned_data) | ||
#' | ||
#' @importFrom stats na.omit | ||
#' @importFrom utils tail | ||
#' | ||
#' @export | ||
clean_data <- function(data, n_trials, n_replicates) { | ||
missing_set <- c() | ||
|
||
for (t in 1:n_trials) { # loops through rows | ||
trial_row <- data[t, 3:ncol(data)] # get the row of replicate data only | ||
|
||
for (i_rep in 1:n_replicates) { # loops through replicate columns | ||
# replicate columns are really index 3 to the end of the data frame | ||
if (is.na(trial_row[i_rep])) { | ||
n_missing <- length(missing_set) + 1 # increase number missing | ||
missing_set <- c(missing_set, t*(n_replicates -1) + i_rep) # add to missing set | ||
|
||
interp_val <- find_next_good_datapoint(trial_row, i_rep, n_replicates) | ||
# If the first value is null, need to pull the last replicate value. | ||
if (i_rep == 1) {first_val = utils::tail(stats::na.omit(unlist(trial_row[1,])), 1)} | ||
else {first_val = trial_row[i_rep - 1]} | ||
|
||
replacement_val <- first_val + interp_val / 2 | ||
trial_row[i_rep] <- replacement_val | ||
} | ||
} | ||
data[t, 3:ncol(data)] <- trial_row | ||
} | ||
return(data) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
#' Example Data: Two Point Data | ||
#' | ||
#' An example of data that one would perform profile repeatability on. | ||
#' Consists of 9 individual animals, with corticosterone data taken at 2 timepoints (n_trials = 2), baseline (time = 3) and stress-induced (time = 30). | ||
#' Then, there are 28 replicate columns. | ||
#' | ||
#' @format ## `example_two_point_data` | ||
#' A dataframe with 10 rows and 30 columns: | ||
#' \describe{ | ||
#' \item{Animal}{The animal name/unique identifier} | ||
#' \item{Time}{The time of the measurement, in days.} | ||
#' \item{SD.DR}{The name of the replicate column.} | ||
#' } | ||
#' | ||
#' @source This data was extracted from Romero & Rich 2007 (Comp Biochem. Physiol. Part A Mol. Integr. Physiol. 147, 562-568. https://doi.org/10.1016/j.cbpa.2007.02.004) | ||
"example_two_point_data" | ||
|
||
#' Example Data: Sparrow Repeatability (3 Point Data) | ||
#' | ||
#' An example of data that one would perform profile repeatability on. | ||
#' Consists of 12 individual animals, with corticosterone data taken at 3 times (n_trials = 3), baseline (time = 0) and two stress-induced (time = 15 and 30). | ||
#' Then, there are 10 replicate columns. This example also shows what happens | ||
#' when there are null data records for some individuals. | ||
#' | ||
#' @format ## `sparrow_repeatability_three_point` | ||
#' A dataframe with 36 rows and 12 columns: | ||
#' \describe{ | ||
#' \item{Animal}{The animal name/unique identifier} | ||
#' \item{TIME}{The time of the measurement, in days} | ||
#' \item{LD.500}{The name of the replicate column} | ||
#' } | ||
#' | ||
#' @source This data was extracted from Rich & Romero 2001 (J. Comp. Physiol. Part B Biochem. Syst. Environ. Physiol. 171, 543-647. https://doi.org/10.1007/s003600100204) | ||
"sparrow_repeatability_three_point" | ||
|
||
#' Example Data: Synthetic 4-Point Data | ||
#' | ||
#' An example of data that one would perform profile repeatability on. | ||
#' The data is synthetic data created for testing purposes and is designed to span a range of perceived repeatability scores. | ||
#' Consists of 11 individual animals, with data taken at 4 times (n_trials = 4), baseline (time = 0) and three stress-induced (time = 15, 30, and 45). | ||
#' Then, there are four replicate columns. Replicate column names refer to | ||
#' sample tests performed on the animal. | ||
#' | ||
#' @format ## `synthetic_data_four_point` | ||
#' A dataframe with 44 rows and 6 columns: | ||
#' \describe{ | ||
#' \item{Animal}{The animal name/unique identifier} | ||
#' \item{TIME}{The time of the measurement (unit not important)} | ||
#' \item{A}{The (unimportant) name of a replicate column.} | ||
#' \item{B}{The (unimportant) name of a replicate column.} | ||
#' \item{C}{The (unimportant) name of a replicate column.} | ||
#' \item{D}{The (unimportant) name of a replicate column.} | ||
#' } | ||
#' | ||
#' @source Data created for testing purposes by Reed et al., 2019 (Gen. Comp. Endocrinol. 270, 1-9. https://doi.org/10.1016/j.ygcen.2018.09.015) | ||
"synthetic_data_four_point" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
#' @title Score and Order Data | ||
#' | ||
#' @details | ||
#' Performs the ordering of input data by scoring each individual data frame. | ||
#' | ||
#' The main function of the package, this will send each individuals data out | ||
#' for scoring. Then, when all scores are computed, it will order the result | ||
#' data frame by score and assign a rank. | ||
#' | ||
#' Ranks are assigned with ties allowed - if N individuals have a tie, their rank | ||
#' is averaged. For example, if the max score is 1, and two individuals have | ||
#' that score, their rank is 1.5 | ||
#' | ||
#' @param n_trials The number of rows an individual sample will have. | ||
#' @param id_list The list of unique individual or sample names | ||
#' @param df_list The list of data frames per unique individual | ||
#' @param n_replicates The number of replicates in the study. | ||
#' @param verbose A boolean parameter the defaults to FALSE. Determines whether messages are printed. | ||
#' @param sort A boolean parameter that defaults to TRUE. If TRUE, sorts the returned data frame by score. If FALSE, returns the data in the individual order it was provided in | ||
#' | ||
#' @returns Returns a data frame of the results, in the following form: | ||
#' | ||
#' - Column 1: "individual" - the unique identifier of an individual or sample | ||
#' - Column 2: "n_crossings" - the calculated number of crossings. | ||
#' - Column 3: "max_variance" - the maximum of the variances of the replicate measurements at a single time for the individual or sample. | ||
#' - Column 4: "ave_variance" - the average of the variances of the replicate measurements at a single time for the individual or sample. | ||
#' - Column 5: "base_score" - the original, unnormalized profile repeatability score. Smaller numbers rank higher. | ||
#' - Column 6: "final_score" - the base score, normalized by the sigmoid function. Constrained to be between 0 and 1. Scores closer to 1 rank higher. | ||
#' - Column 7: "rank" - the calculated ranking of the individual or sample, against all other individuals or samples in the data set. | ||
#' | ||
#' @examples | ||
#' df <- data.frame( | ||
#' col_a = c('A', 'A', 'B', 'B'), | ||
#' col_b = c(5, 15, 5, 15), | ||
#' col_c = c(5, 10, 1, 2), | ||
#' col_d = c(10, 15, 3, 4) | ||
#' ) | ||
#' id_list <- unique(df[, 1]) | ||
#' individuals <- list() | ||
#' for (i in 1:length(id_list)) { | ||
#' individuals[[i]] <- df[df[, 1] == id_list[i], ] | ||
#' } | ||
#' ret_df <- do_ordering(n_trials=2, id_list=id_list, df_list=individuals, n_replicates=2) | ||
#' print(ret_df) | ||
#' | ||
#' @export | ||
do_ordering <- function(n_trials, id_list, df_list, n_replicates, verbose=FALSE, sort=TRUE) { | ||
# Generate Scores | ||
if (verbose) {message("Scoring each set of data per individual.")} | ||
scores_df <- score_dfs( | ||
id_list=id_list, | ||
df_list=df_list, | ||
n_replicates=n_replicates, | ||
n_trials=n_trials, | ||
verbose=verbose | ||
) | ||
|
||
# Order Scores and Individuals | ||
if (sort) { | ||
if (verbose) {message("Ordering by score.")} | ||
ordered_df <- scores_df[order(scores_df$final_score, decreasing=TRUE), ] | ||
} | ||
else {ordered_df <- scores_df} | ||
|
||
# need the minus to rank the highest score the highest | ||
# average will make ties be the same decimal value | ||
ordered_df$rank <- rank(-ordered_df$final_score, ties.method="average") | ||
|
||
# Resets the index | ||
rownames(ordered_df) <- NULL | ||
|
||
return(ordered_df) | ||
} | ||
|
Oops, something went wrong.