/
mine_gmatrix.R
143 lines (140 loc) · 7.19 KB
/
mine_gmatrix.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#' Mine G-matrices
#'
#' Mine networks for establishing the link between genome and g-matrix. The
#' output from this function is required to run individual-based simulations
#' in the rest of the package. The key input to this function, 'gmatrix', is a
#' (square) covariance matrix, with each row and column representing a trait for
#' the individual-based model. This function will run an evolutionary algorithm
#' to try to find a network that produces traits with the covariance structure
#' of gmatrix from a set of random standard normal values. The network from loci
#' values to trait values goes through a number of linked nodes to achieve this,
#' and each generation tests the stress of the resulting network in terms of
#' expected squared deviation of trait covariances from the input gmatrix.
#' Simulations can take minutes to hours or longer, depending on parameters
#' chosen and the number of traits. See vignettes for a more comprehensive
#' explanation for what this function is doing.
#'
#'@param loci The number of loci for an individual. Simulations can allow for
#' both haploid and diploid individuals. Allele values at each loci affect trait
#' values through a network of intermediary nodes.
#'@param layers The number of hidden layers in the network linking loci to
#' traits.
#'@param indivs The number of individuals initialised in each generation of the
#' evolutionary algorithm to test among-individual trait correlations.
#' Individuals are initialised with allele values drawn from a standard normal
#' distribution.
#'@param npsize The size of the population of networks in each generation of the
#' evolutionary algorithm. Each network is a discrete individual in the
#' population.
#'@param mu_pr The probability that a value in the network will mutate in a
#' generation. Mutation events change the existing value by adding a new value
#' drawn from a normal distribution with a mean of 0 and standard deviation of
#' mu_sd.
#'@param mu_sd The standard deviation of the random normal value mean centered
#' at 0 that is added to the existing value of the network when a mutation event
#' occurs.
#'@param max_gen The maximum number of generations that the evolutionary
#' algorithm is allowed to run before terminating (regardless of how well the
#' evolved covariance structure matches the pre-specified gmatrix).
#'@param pr_cross The probability that a focal network in the population will
#' initiate a crossover of a subset of its values with a randomly selected
#' second network (note that any given network might therefore be included in
#' more than one crossover event in a generation). The size of the subset is
#' determined randomly.
#'@param sampleK During a round of selection, the number of random networks
#' chosen to compete in a tournament. A single generation will include as many
#' tournaments as necessary to create a new network population of size npsize.
#'@param chooseK During a round of selection tournament, the number of networks
#' within the sampleK random subset of the tournament that have the highest
#' fitness will be selected to populate the next generation of networks
#'@param term_cri The criteria for terminating the evolutionary algorithm. The
#' algorithm will terminate if a network is found in which the mean squared
#' deviation of the covariance matrix elements from gmatrix is less than
#' exp(term_crit).
#'@param sd_ini The standard deviation of initialised network values at the
#' start of the evolutionary algorithm. All network values are initialised by
#' randomly sampling from a normal distribution with a mean of 0 and a
#' standard deviation of sd_ini
#'@param use_cor Should the gmatrix be treated as a correlation matrix rather
#' than a covariance matrix when calculating fitness?
#'@param prnt_out Should the function print out progress showing the stress for
#' each generation
#'@param gmatrix The pre-specified trait covariance matrix. This will define
#' what the covariance will be between each trait when allele values are drawn
#' from a standard normal distribution.
#'@return A list of eight elements that includes the following: (1) A vector of
#'input parameters, (2) the pre-specified covariance matrix, (3) matrix defining
#'the effects of loci values on the first layer of the network, (4) a three
#'dimensional array link the first network layer to trait values, (5) a matrix
#'of the marginal effect of each locus on each trait, (6) the mined covariance
#'structure, (7) all network values to be inserted into individual genomes, and
#'(8) the log stress of the mined matrix against the pre-specified matrix.
#'@examples
#'gmt <- matrix(data = 0, nrow = 4, ncol = 4);
#'diag(gmt) <- 1;
#'mg <- mine_gmatrix(gmatrix = gmt, loci = 4, layers = 3, indivs = 100,
#' npsize = 100, max_gen = 2, prnt_out = FALSE);
#'@useDynLib resevol
#'@importFrom stats rnorm rpois runif rbinom
#'@importFrom utils read.csv write.csv
#'@export
mine_gmatrix <- function(loci = 18,
layers = 6,
indivs = 1000,
npsize = 2000,
mu_pr = 0.05,
mu_sd = 0.01,
max_gen = 1000,
pr_cross = 0.05,
sampleK = 40,
chooseK = 4,
term_cri = -5.3,
sd_ini = 0.1,
use_cor = FALSE,
prnt_out = TRUE,
gmatrix){
paras <- c(loci, layers, indivs, npsize, mu_pr, mu_sd, max_gen, pr_cross,
sampleK, chooseK, term_cri, sd_ini, use_cor, as.numeric(prnt_out)
);
if(loci < 2 | loci %% 1 > 0){
stop("ERROR: 'loci' needs to be an integer value above 1.")
}
if(layers < 1 | layers %% 1 > 0){
stop("ERROR: 'layers' needs to be an integer value above 0.")
}
if(indivs < 10 | indivs %% 1 > 0){
stop("ERROR: 'indivs' needs to be an integer value above 9.")
}
if(npsize < 10 | npsize %% 1 > 0){
stop("ERROR: 'npsize' needs to be an integer value above 9.")
}
if(mu_pr > 1 | mu_pr < 0){
stop("ERROR: 'mu_pr' must be a value between 0 and 1.")
}
if(mu_sd <= 0){
stop("ERROR: 'mu_sd' must be a value greater than 0.")
}
if(max_gen < 1 | max_gen %% 1 > 0){
stop("ERROR: 'max_gen' needs to be an integer value above 0.")
}
if(pr_cross > 1 | pr_cross < 0){
stop("ERROR: 'pr_cross' must be a value between 0 and 1.")
}
if(sampleK < 1 | sampleK %% 1 > 0){
stop("ERROR: 'sampleK' needs to be an integer value above 0.")
}
if(chooseK < 1 | chooseK %% 1 > 0){
stop("ERROR: 'chooseK' needs to be an integer value above 0.")
}
if(sd_ini <= 0){
stop("ERROR: 'sd_ini' must be a value greater than 0.")
}
if(dim(gmatrix)[1] != dim(gmatrix)[2]){
stop("ERROR: 'gmatrix' needs to be a square matrix");
}
GOUT <- run_mine_gmatrix(PARAS = paras, GMATRIX_c = gmatrix);
return(GOUT); # Returns the values to produce the desired g-matrix
}
run_mine_gmatrix <- function(PARAS, GMATRIX_c){
.Call("mine_gmatrix", PARAS, GMATRIX_c);
}