/
opt-loop-invariant.R
251 lines (223 loc) · 7.71 KB
/
opt-loop-invariant.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
#' Optimizer: Loop-invariant Code Motion.
#'
#' Performs one loop-invariant code motion pass.
#' Carefully examine the results after running this function!
#'
#' @param texts A list of character vectors with the code to optimize.
#'
#' @examples
#' code <- paste(
#' "i <- 0",
#' "while (i < n) {",
#' " x <- y + z",
#' " a[i] <- 6 * i + x * x",
#' " i <- i + 1",
#' "}",
#' sep = "\n"
#' )
#' cat(opt_loop_invariant(list(code))$codes[[1]])
#' @export
#'
opt_loop_invariant <- function(texts) {
# todo: invariant subexpressions motion?
# while (i < n) { i <- (x * y) + 1 } is equivalent to
# is_1 <- (x * y); while (i < n) { i <- is_1 + 1 }
# todo: check that assigned vars that are moved, were present in parent env.
# if not, add an `if`.
# while (FALSE) { x <- 3 } is not equivalent to x <- 3; while (FALSE) { }
res <- list()
res$codes <- lapply(texts, li_one_file)
res
}
# Executes loop-invariant code motion on one file of code.
#
# @param text A character vector with code to optimize.
#
li_one_file <- function(text) {
pd <- parse_text(text)
res_pd <- pd[pd$parent < 0, ] # keep lines with just comments
new_pd <- pd[pd$parent >= 0, ] # keep lines with just comments
new_pd <- li_one_pd(new_pd)
res_pd <- rbind(res_pd, new_pd)
if (nrow(res_pd) > 0) {
res_pd <- res_pd[order(res_pd$pos_id), ]
}
deparse_data(res_pd)
}
# Executes loop-invariant code motion of a pd.
#
# @param pd A parsed data data.frame.
#
li_one_pd <- function(pd) {
res_pd <- pd
# get loops
# For the moment, remove `repeat` loop invariant
loop_parent_ids <- pd$parent[pd$token %in% c("FOR", "WHILE")] # %in% loops]
# remove loops that have function calls inside
loop_parent_ids <- loop_parent_ids[!sapply(loop_parent_ids, function(act_prnt) {
"SYMBOL_FUNCTION_CALL" %in% get_children(pd, act_prnt)$token
})]
# remove loops that have next or break calls inside
loop_parent_ids <- loop_parent_ids[!sapply(loop_parent_ids, function(act_prnt) {
any(c("BREAK", "NEXT") %in% get_children(pd, act_prnt)$token)
})]
# for each loop do the invariant code motion
for (loop_parent_id in loop_parent_ids) {
res_pd <- li_in_loop(res_pd, loop_parent_id)
}
res_pd
}
# Executes loop-invariant code motion in one loop.
#
# @param pd A parsed data data.frame.
# @param id A numeric indicating the node ID of the loop.
#
li_in_loop <- function(pd, id) {
lv_vars <- get_loop_variant_vars(pd, id)
# start visiting the loop body
visit_nodes <- utils::tail(pd$id[pd$parent == id], 1)
to_unloop_ids <- c()
while (length(visit_nodes) > 0) {
new_visit <- c()
for (act_parent in visit_nodes) {
act_pd <- get_children(pd, act_parent)
act_sblngs <- act_pd[act_pd$parent == act_parent, ]
if (act_sblngs$token[[1]] == "'{'" || "';'" %in% act_sblngs$token) {
new_visit <- c(new_visit, act_sblngs$id[!act_sblngs$terminal])
} else if (all(
act_pd$token %in%
c(ops, precedence_ops, constants, assigns, "expr", "SYMBOL")
)) {
if (!any(lv_vars %in% act_pd$text[act_pd$token == "SYMBOL"])) {
to_unloop_ids <- c(to_unloop_ids, act_parent)
}
}
}
visit_nodes <- new_visit
}
unloop_expr(pd, to_unloop_ids, id)
}
# Moves expressions that are inside a loop to outside of it.
#
# @param pd A parsed data data.frame.
# @param exprs_ids A numeric vector indicating the node IDs of the expressions.
# @param loop_id A numeric indicating the node ID of the parent loop.
#
unloop_expr <- function(pd, exprs_ids, loop_id) {
if (length(exprs_ids) == 0) {
return(pd)
}
res_pd <- remove_nodes(pd, exprs_ids)
loop_pd <- get_children(pd, loop_id)
exprs_pd <- get_children(loop_pd, exprs_ids)
exprs <- deparse_data(exprs_pd)
exprs <- sub("^\n*", "", exprs)
loop_token <- pd$token[pd$parent == loop_id][[1]]
if (loop_token == "WHILE") {
loop_cond_id <- pd$id[pd$parent == loop_id & pd$token == "expr"][[1]]
loop_cond <- sub(
"^\n*", "",
deparse_data(get_children(pd, loop_cond_id))
)
new_expr <- paste0("if (", loop_cond, ") {\n", exprs, "}")
} else if (loop_token == "FOR") {
loop_cond_id <- pd$id[pd$parent == loop_id & pd$token == "forcond"][[1]]
loop_cond_id <- pd$id[pd$parent == loop_cond_id & pd$token == "expr"]
loop_cond <- sub(
"^\n*", "",
deparse_data(get_children(pd, loop_cond_id))
)
new_expr <- paste0("if (length(", loop_cond, ") > 0) {\n", exprs, "}")
}
new_expr_pd <- parse_text(new_expr)
new_expr_pd$prev_spaces[new_expr_pd$terminal][[1]] <-
loop_pd$prev_spaces[loop_pd$terminal][[1]]
new_expr_pd$line1[new_expr_pd$terminal][[1]] <-
loop_pd$line1[loop_pd$terminal][[1]]
new_expr_pd$prev_spaces[nrow(new_expr_pd)] <-
loop_pd$prev_spaces[loop_pd$terminal][[1]]
new_expr_pd$next_lines[nrow(new_expr_pd)] <- 1
if (any(exprs_ids %in% utils::tail(pd$id[pd$parent == loop_id]))) {
# if we are removing the entire loop body, then remove the loop
res_pd <- remove_nodes(res_pd, loop_id)
}
rbind(
res_pd,
replace_pd(get_children(loop_pd, loop_id), new_expr_pd)
)
}
# Returns which variables vary depending on loop execution.
#
# @param pd A parsed data data.frame.
# @param id A numeric indicating the node ID of the loop.
#
get_loop_variant_vars <- function(pd, id) {
act_pd <- get_children(pd, id)
act_sblngs <- act_pd[act_pd$parent == id, ]
assigns_ids <- pd$parent[pd$token %in% assigns]
# remove function definitions
act_pd <- remove_nodes(
act_pd,
act_pd$id[act_pd$parent == act_pd$parent[act_pd$token == "FUNCTION"]]
)
lv_vars_ids <- c()
# get for condition's IN vars
# FOR '(' forcond ')' ; where forcond ~> SYMBOL IN expr
lv_vars_ids <- c(lv_vars_ids, act_pd$id[which(act_pd$token == "IN") - 1])
# get updated vars, e.g., x <- x + 1
lv_vars_ids <- c(lv_vars_ids, get_updated_vars_ids(act_pd))
old_lv_vars_ids <- c()
while (length(lv_vars_ids) != length(old_lv_vars_ids)) {
old_lv_vars_ids <- lv_vars_ids
lv_vars <- pd$text[pd$id %in% lv_vars_ids]
for (act_id in assigns_ids) {
used_pd <- get_children(act_pd, get_assigned_exprs_ids(act_pd, act_id))
if (any(used_pd$text[used_pd$token == "SYMBOL"] %in% lv_vars)) {
lv_vars_ids <- c(lv_vars_ids, get_assigned_vars_ids(act_pd, act_id))
}
}
lv_vars_ids <- unique(lv_vars_ids)
}
unique(act_pd$text[act_pd$id %in% lv_vars_ids])
}
# Returns the node ids of all vars that are being updated, e.g., x <- x + 1.
#
# @param pd A parsed data data.frame.
#
get_updated_vars_ids <- function(pd) {
assigned_ids <- get_assigned_vars_ids(pd, get_roots(pd)$id)
assigns_ids <- pd$parent[pd$token %in% assigns]
assigned_ids[sapply(assigned_ids, function(act_id) {
ancestors_ids <- get_ancestors(pd, act_id)
ass_prnt_id <- intersect(assigns_ids, ancestors_ids)
ass_expr_pd <- get_children(pd, get_assigned_exprs_ids(pd, ass_prnt_id))
pd$text[pd$id == act_id] %in%
ass_expr_pd$text[ass_expr_pd$token == "SYMBOL"] &&
sum(ass_expr_pd$terminal) > 1
})]
}
# Returns the ids of the pd exprs that are being assigned.
#
# @param pd A parsed data data.frame.
# @param id A numeric indicating the node ID to find assigns.
#
get_assigned_exprs_ids <- function(pd, id) {
act_pd <- get_children(pd, id)
# get parents of <- <<- -> ->> and =
assign_exprs_prnts <- act_pd[
act_pd$token %in% assigns & act_pd$text != ":=",
"parent"
]
# get the assigned expr pd id
sapply(assign_exprs_prnts, function(assign_exprs_prnt) {
aux <- act_pd[act_pd$parent == assign_exprs_prnt, ]
while (any(assigns %in% aux$token)) {
if (aux$token[[2]] == "RIGHT_ASSIGN") {
aux <- act_pd[act_pd$parent == aux$id[[1]], ]
} else {
aux <- act_pd[act_pd$parent == aux$id[[3]], ]
}
}
unique(aux$parent)
})
}