/
compileSApply.R
128 lines (113 loc) · 4.03 KB
/
compileSApply.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
getSApplyType =
# returns NULL if not a case we would rewrite an sapply()
function(call, env, funName = as.character(call[[1]])) {
if(funName == "sapply" && isSEXPType(type <- getDataType(call[[2]], env)) )
type
else
NULL
}
rewriteSApply =
#
# This rewrites a call to sapply() into a for loop, allocating the space for the answer, etc.
# This is used when the type of the first argument to the sapply() call is a SEXP and not
# a native array/pointer.
#
# The approach below doesn't compile the code directly, but
# rewrites the code so that we can use the existing facilities to compile it.
#
# See fgets.Rdb
#
# For now, ignore simplify and USE.NAMES. Assume they aren't there.
#
# If we know the types of the vector and the return type of the function,
# we can insert them now. Otherwise, we could use special functions that our
# "compiler" will know to replace
#
# When we compile this we need to know that INTGER() maps to int *, etc.
# When we find these globals before we generate code, we need to register them.
#
# ty = getDataType(sym, env)
#
function(call, vecType, returnType, addReturn = TRUE, env = NULL, ir = NULL, ...)
{
X = call[[2]]
# get length of the R vector.
len = quote(n <- Rf_length(x))
len[[3]][[2]] = X # assume just a symbol
# get the raw data for the elements. Doesn't work for character(). Need to call GET_STRING_ELT() within the loop.
els = quote(els <- foo(x))
els[[3]][[2]] = X
els[[3]][[1]] = as.name(getSEXPDataAccessor(vecType)) # get the INTEGER, REAL, etc. for the type
# allocate answer
alloc = quote(r_ans <- Rf_allocVector(sexpEnum, n))
alloc[[3]][[2]] = sexpTypeNum = getSEXPTypeNum(returnType) # get INTSXP, REALSXP, etc.
# create the instruction to get the element
funCall = quote(tmp <- f(el))
funCall[[3]][[1]] = call[[3]]
if(length(call) > 3)
funCall[[3]][seq(3, length = length(call) - 3)] = call[-(1:3)]
loop =
quote(for(i in 1:n) {
el = els[i]
x
r_ans[i] = tmp # leave to the compiler make sense of this.
})
loop[[4]][[3]] = funCall
# Add the type for r_ans to the compiler's known types
# This might indicate that we have created that variable already
# So we may need to maintain a list of the variables we have explicitly
# allocated separately from the type information that we know ahead of time.
if(!is.null(env))
env$.types$r_ans = getSEXPType(names(sexpTypeNum))
ans = c(len,
els,
alloc,
quote(Rf_protect(r_ans)),
loop,
quote(Rf_unprotect(1L)),
if(addReturn)
quote(return(r_ans))
)
}
getSEXPDataAccessor =
function(type)
{
if(is(type, "SEXPType"))
return(switch(class(type),
REALSXPType = "REAL",
INTSXPType = "INTEGER",
LGLSXPType = "LOGICAL",
stop("no accessor for any other type")))
else
stop("cannot determine type of SEXP")
# the following doesn't make sense anymore as we use the same pointer for all the types and only distinguish the SEXPs by the R class.
if(sameType(type, getSEXPType("INT")))
"INTEGER"
else if(sameType(type, getSEXPType("LGL")))
"INTEGER"
else if(sameType(type, getSEXPType("REAL")))
"REAL"
else
stop("problem getting R data accessor routine name")
}
# Should borrow from Rllvm.
STRSXP = 16L
LGLSXP = 9L
REALSXP = 14L
INTSXP = 13L
ANYSXP = 18L
CHARSXP = 9L
getSEXPTypeNum =
function(type)
{
if(sameType(type, getSEXPType("STR")) || sameType(type, StringType))
c(STR = STRSXP)
else if(sameType(type, getSEXPType("REAL")) || sameType(type, DoubleType))
c(REAL = REALSXP)
else if(sameType(type, getSEXPType("LGL")))
c(LGL = LGLSXP)
else if(sameType(type, getSEXPType("INT")) || sameType(type, Int32Type))
c(INT = INTSXP)
else
stop("don't know what SEXP type corresponds to this type")
}