-
Notifications
You must be signed in to change notification settings - Fork 1
/
make_phenotypes.R
127 lines (109 loc) · 5.45 KB
/
make_phenotypes.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#' Generate phenotypes - Combine genetic values and plot errors
#'
#' Creates a data frame of phenotypes by combining genetic values with plot errors
#' generated with the function \link[FieldSimR]{field_trial_error}.
#' Requires genetic values generated with the functions \link[FieldSimR]{compsym_asr_output}
#' or \link[FieldSimR]{unstr_asr_output},
#' or any data frame matching the description below.
#'
#' @param gv.df A data frame of genetic values. Must contain the columns 'env', 'rep', genotype 'id',
#' and the genetic values for each trait.
#' @param error.df A data frame of plot errors. Must contain the columns 'env', 'block',
#' 'col', 'row', and the plot errors for each trait.
#' @param randomise When \code{TRUE} (default is \code{FALSE}), genotypes are randomly allocated to plots according to
#' a randomized complete block design (RCBD).\cr
#' \strong{Note:} Other experimental designs are being implemented and should be generated externally.
#' @param return.effects When \code{TRUE} (default is \code{FALSE}), a list is returned with additional
#' entries containing the genetic values and plot errors for each trait.
#'
#' @return A data frame with columns 'env', 'block', 'column', 'row', genotype 'id', and
#' the phenotypes for each trait. When \code{return.effects = TRUE}, a list is returned with additional
#' entries containing the genetic values and plot errors for each trait.
#'
#' @examples
#' # Generate phenotypes by combining the genetic values and plot errors provided
#' # in the two example data frames gv_df_unstr and error_df_bivar.
#'
#' pheno_df <- make_phenotypes(
#' gv.df = gv_df_unstr,
#' error.df = error_df_bivar,
#' randomise = TRUE,
#' return.effects = TRUE
#' )
#'
#' @export
make_phenotypes <- function(gv.df,
error.df,
randomise = FALSE,
return.effects = FALSE) {
if (inherits(gv.df, "list")) gv.df <- gv.df[[1]]
if (inherits(error.df, "list")) error.df <- error.df[[1]]
if (!is.data.frame(gv.df)) {
stop("'gv.df' must be a data frame")
}
if (!is.data.frame(error.df)) {
stop("'error.df' must be a data frame")
}
colnames(gv.df)[grep("env|rep|id", tolower(colnames(gv.df)))] <- tolower(colnames(gv.df))[grep("env|rep|id", tolower(colnames(gv.df)))]
colnames(error.df)[grep("env|block|col|row", tolower(colnames(error.df)))] <- tolower(colnames(error.df))[grep("env|block|col|row", tolower(colnames(error.df)))]
if (any(!c("env", "rep", "id") %in% colnames(gv.df))) {
stop("'gv.df' must contain the columns 'env', 'rep', 'id', and the genetic values for each trait")
}
colnames(error.df)[grep("column", colnames(error.df))] <- "col"
if (any(!c("env", "block", "col", "row") %in% colnames(error.df))) {
stop("'error.df' must contain the columns 'env', 'block', 'col', 'row', and the plot errors for each trait")
}
ntraits <- ncol(gv.df) - 3
if (ncol(error.df) - 4 != ntraits) {
stop("'gv.df' must contain the columns 'env', 'rep', 'id', and the genetic values for each trait.
'error.df' must contain the columns 'env', 'block', 'col', 'row', and the plot errors for each trait")
}
error.df$env <- factor(as.numeric(as.character(error.df$env)))
error.df$block <- factor(as.numeric(as.character(error.df$block)))
error.df$col <- factor(as.numeric(as.character(error.df$col)))
error.df$row <- factor(as.numeric(as.character(error.df$row)))
error.df <- error.df[order(error.df$env, error.df$block), ]
error.df <- unique(error.df)
gv.df$env <- factor(as.numeric(as.character(gv.df$env)))
gv.df$rep <- factor(as.numeric(as.character(gv.df$rep)))
gv.df$id <- factor(as.numeric(as.character(gv.df$id)))
gv.df <- gv.df[order(gv.df$env, gv.df$rep), ]
gv.df <- unique(gv.df)
if (nrow(gv.df) != nrow(error.df)) stop("number of rows in 'gv.df' and 'error.df' must match")
if (randomise) {
id <- unique(gv.df$id)
nblocks_total <- nrow(gv.df) / length(id)
gv.df$ord <- unlist((lapply(seq_len(nblocks_total), function(x) sample(id))))
gv.df <- gv.df[order(gv.df$env, gv.df$rep, gv.df$ord), ]
}
y <- error.df[, !(colnames(error.df) %in% c("env", "block", "col", "row"))] +
gv.df[, !(colnames(gv.df) %in% c("env", "rep", "id", "ord"))]
gv_env_names <- gv.df[["env"]]
error_env_names <- as.character(error.df[["env"]])
if (any(gv_env_names != error_env_names)) warning("'env' names in 'gv.df' and 'error.df' do not match, names in 'gv.df' will be used")
gv_df_names <- data.frame(env = gv_env_names)
error_df_names <- error.df[, c("block", "col", "row")]
ids <- factor(as.numeric(as.character(gv.df$id)))
pheno_df <- cbind(
gv_df_names,
error_df_names,
id = ids,
y
)
pheno_df <- pheno_df[order(pheno_df$env, pheno_df$col, pheno_df$row), ]
rownames(pheno_df) <- NULL
colnames(pheno_df) <- c("env", "block", "col", "row", "id", paste0("y.Trait", 1:ntraits))
if (return.effects) {
gv_ls <- as.list(as.data.frame(gv.df[, 4:(ntraits + 3)]))
error_ls <- as.list(as.data.frame(error.df[, 5:(ntraits + 4)]))
effects_df <- mapply(function(x, y) cbind(pheno_df[, 1:5], x, y), x = gv_ls, y = error_ls, SIMPLIFY = FALSE)
effects_df <- mapply(function(x, y) {
colnames(x)[6:7] <- c(paste0("gv.Trait", y), paste0("e.Trait", y))
x
}, x = effects_df, y = seq_len(ntraits), SIMPLIFY = FALSE)
list_names <- c("pheno.df", paste0("Trait", 1:ntraits))
pheno_df <- c(list(pheno_df), effects_df)
names(pheno_df) <- list_names
}
return(pheno_df)
}