-
Notifications
You must be signed in to change notification settings - Fork 3
/
data_simulation.R
193 lines (181 loc) · 6.59 KB
/
data_simulation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#' Simulate data
#'
#' Generate data for given DAG. The flexible framework allows for different
#' distributions for source and child nodes. Default distributions are
#' negative binomial (with mean = 100 and 1/dispersion = 100), and poisson,
#' respectively.
#' @param graph Graph to simulate on
#' @param n Number of samples
#' @param dist_fun distribution function for nodes without parents
#' @param dist_args list of arguments for dist_fun
#' @param child_fun distribution function for nodes with parents
#' @param child_args list of arguments for child_fun
#' @param child_dep link_fun computes an output for the expression of
#' nodes without parents. this output is than used as input for child_fun.
#' child_dep defines the parameter (a a string) of child_fun, which is used for
#' the input. E.g., the link_fun is the identity and the child_fun is rnorm, we
#' usually set child_dep = "mean".
#' @param link_fun special link function for the negative binomial
#' distribution
#' @param link_args list of arguments for link_fun
#' @param pop_size numeric for the population size, e.g., pop_size=1000 adds
#' 1000-n random genes not in the graph
#' @param latent number of latent variables
#' @param latent_fun uniform "unif" or exponential "exp" distribution of latent
#' coefficients
#' @return graph
#' @export
#' @rdname simulate_data-methods
#' @importFrom methods is
#' @importFrom pcalg wgtMatrix
#' @importFrom MASS rnegbin
#' @examples
#' dag <- create_random_DAG(30, 0.2)
#' X <- simulate_data(dag)
setGeneric(
"simulate_data",
function(
graph, n = 100,
dist_fun = rnbinom, dist_args = list(mu = 1000, size = 100),
child_fun = rpois, child_args = list(), child_dep = "lambda",
link_fun = negative.binomial.special()$linkfun,
link_args = list(offset = 1),
pop_size = 0, latent = 0, latent_fun = "unif"
) {
standardGeneric("simulate_data")
},
package = "dce"
)
setOldClass("igraph")
#' @rdname simulate_data-methods
#' @importFrom igraph V
#' @importFrom naturalsort naturalorder
setMethod(
"simulate_data",
signature = signature(graph = "igraph"),
function(
graph, n = 100,
dist_fun = rnbinom, dist_args = list(mu = 1000, size = 100),
child_fun = rpois, child_args = list(), child_dep = "lambda",
link_fun = negative.binomial.special()$linkfun,
link_args = list(offset = 1),
pop_size = 0, latent = 0, latent_fun = "unif"
) {
mat <- as(igraph::as_adjacency_matrix(graph, attr = "weight"),
"matrix")
colnames(mat) <- rownames(mat) <- V(graph)
mat <- mat[naturalorder(rownames(mat)), naturalorder(colnames(mat))]
simulate_data(mat,
n, dist_fun, dist_args, child_fun, child_args, child_dep,
link_fun, link_args, pop_size, latent
)
}
)
#' @rdname simulate_data-methods
setMethod(
"simulate_data",
signature = signature(graph = "graphNEL"),
function(
graph, n = 100,
dist_fun = rnbinom, dist_args = list(mu = 1000, size = 100),
child_fun = rpois, child_args = list(), child_dep = "lambda",
link_fun = negative.binomial.special()$linkfun,
link_args = list(offset = 1),
pop_size = 0, latent = 0, latent_fun = "unif"
) {
a <- as(graph, "matrix")
a <- a[naturalorder(rownames(a)), naturalorder(colnames(a))]
simulate_data(
a,
n, dist_fun, dist_args, child_fun, child_args, child_dep,
link_fun, link_args, pop_size, latent
)
}
)
#' @rdname simulate_data-methods
#' @importFrom naturalsort naturalorder
setMethod(
"simulate_data",
signature = signature(graph = "matrix"),
function(
graph, n = 100,
dist_fun = rnbinom, dist_args = list(mu = 1000, size = 100),
child_fun = rpois, child_args = list(), child_dep = "lambda",
link_fun = negative.binomial.special()$linkfun,
link_args = list(offset = 1),
pop_size = 0, latent = 0, latent_fun = "unif"
) {
start <- 2
p <- dim(graph)[[1]]
if (latent > 0) {
if (latent_fun == "unif") {
H1 <- matrix(runif(p * latent, -1, 1), latent, p)
} else if (latent_fun == "exp") {
H1 <- matrix(exp(p * latent), latent, p)
}
H0 <- matrix(0, p + latent, latent)
graph <- cbind(H0, rbind(H1, graph))
start <- latent + 1
p <- dim(graph)[[1]]
}
if (pop_size < p) {
pop_size <- p
}
# sanity checks
stopifnot(p >= 2)
nonzero_idx <- which(graph != 0, arr.ind = TRUE)
if (
(nrow(nonzero_idx) > 0) &&
(
any(nonzero_idx[, 2] - nonzero_idx[, 1] < 0) ||
any(diag(graph) != 0)
)
) {
stop("Input DAG must be topologically ordered!")
}
# setup data
X <- matrix(do.call(dist_fun, c(n * p, dist_args)),
nrow = n, ncol = p)
colnames(X) <- colnames(graph)
# simulate data
for (j in seq(start, p)) {
ij <- seq_len(j - 1)
betas <- graph[ij, j]
if (any(betas != 0)) {
# current node has parents
mu <- do.call(link_fun, c(list(X[, ij, drop = FALSE] %*% betas),
link_args))
X[, j] <- unlist(lapply(mu, function(x) {
y_list <- c(1, child_args)
y_list[[child_dep]] <- x
y <- do.call(child_fun, y_list)
return(y)
}))
}
}
X <- X[, naturalorder(colnames(X))]
if (pop_size > p & latent == 0) {
Y <- matrix(do.call(dist_fun, c(n * (pop_size - p), dist_args)),
nrow = n, ncol = pop_size - p)
colnames(Y) <- paste0("n", (p + 1):pop_size)
X <- cbind(X, Y)
} else if (pop_size > p & latent > 0) {
H <- matrix(
runif(latent * (pop_size - p + latent), -1, 1),
latent, pop_size - p + latent
)
Y <- X[, seq_len(latent), drop = FALSE] %*% H
Y <- apply(Y, 2, function(x) {
y <- do.call(child_fun,
c(list(n, do.call(link_fun, c(x, link_args)))))
return(y)
})
colnames(Y) <- paste0("n", (p + 1 - latent):pop_size)
X <- cbind(X, Y)
}
if (latent > 0) {
X <- X[, -seq_len(latent), drop = FALSE]
}
return(X)
}
)