-
Notifications
You must be signed in to change notification settings - Fork 35
/
infer.lisp
486 lines (462 loc) · 20.4 KB
/
infer.lisp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
;; JavaScript type-inferrer. Takes an AST as parse-js produces them
;; and tags the elements in that AST with types. Code isn't very
;; pretty, and is too messy to prove correct in even the slightest
;; way, so testing will have to show whether it works for all cases.
;;
;; Interface:
;; infer-types (ast)
;; Tags the given AST.
;; ast-type (ast)
;; Returns the type (if any) of the given AST node.
(in-package :cl-js)
;; A type-cell, an updatable structure that is used to track the
;; widest type of something.
(defstruct tc
tp ;; Current type of the cell. Any of T, nil, :null, :undefined,
;; :object, :number, :integer, :boolean, :string. T means this
;; could be any type, nil means no information is available yet.
rels ;; Relations this tc has to other tcs. Resolved by resolve-tc
deps) ;; Used internally by resolve-tc when solving cyclic relations
(defun tc (tp &rest rels)
(make-tc :tp tp :rels rels))
;; Function type, used to deduce return types and argument types in a
;; few simple situations (direct call to function literal, local
;; functions that don't escape). args is a list of tcs, returns a
;; single tc.
(defstruct ft args returns)
;; Takes two types (as in the tp slot of a type cell), and combines
;; them into a single type.
(defun combine-types (tp1 tp2)
(cond ((not tp1) tp2) ((not tp2) tp1)
((eq tp1 tp2) tp1)
((and (member tp1 '(:number :integer)) (member tp2 '(:number :integer))) :number)
(t t)))
;; Add a possible type to a type-cell.
(defun add-type (tc tp)
(setf (tc-tp tc) (combine-types tp (tc-tp tc))))
;; Make a type-cell depend on another cell. We don't just do (add-type
;; tc1 (tc-tp tc2)), since tc2 might be updated down the line, so the
;; adding happens at resolution time.
(defun link-tc (tc1 tc2)
(push (list :and tc2) (tc-rels tc1)))
;; Environments are a list of scopes. Scopes are either :with (for a
;; with(x){} scope), or lists of variables. Each variable is a (init
;; delayed-functions "name" tc) list. Any change requires a new
;; environment to be consed up, since they are shared.
;; They are used mostly to track variable initialization. In JS, a
;; variable always starts out undefined, but is usually used only
;; after assignment. Environments are threaded through the infer
;; methods in such a way that each method receives an environment that
;; passed through all code paths that could possibly be executed
;; before it.
;; Delayed functions are function definitions that should be inferred
;; once the variable is actually being used. This is done to prevent
;; the function from being inferred in an environment where all
;; variables are uninitialized. This may cause function bodies to be
;; inferred multiple times, but this is harmless.
;; Called when exiting a scope.
(defun pop-scope (env) (cdr env))
;; Update the mutable fields of a var in an environment, consing as
;; little as possible.
(defun update-env (env var init delayed-function)
(labels ((iter-scope (scope)
(let ((found (and (not (eq (car scope) :with)) (find var (car scope)))))
(if found
(cons (iter-var (car scope)) (cdr scope))
(cons (car scope) (iter-scope (cdr scope))))))
(iter-var (list)
(if (eq (car list) var)
(cons (list* init delayed-function (cddr var)) (cdr list))
(cons (car list) (iter-var (cdr list))))))
(iter-scope env)))
;; Mark all visible locals as potentially containing any type.
(defun ruin-env-for-eval (env)
(let ((vars-seen ()))
(dolist (scope env)
(unless (eq scope :with)
(dolist (var scope)
(unless (member (third var) vars-seen :test #'string=)
(push (third var) vars-seen)
(add-type (var-tc var) t)))))))
;; Lookup a variable definition in the current env. Returns a second
;; value of T if a with scope was passed before finding the variable.
(defun find-in-env (env name)
(let ((passed-with nil))
(dolist (scope env)
(if (eq scope :with)
(setf passed-with t)
(dolist (var scope)
(when (string= (third var) name)
(return-from find-in-env (values var passed-with))))))))
;; Accessors for environment var lists.
(defun var-tc (var) (fourth var))
(defun var-init (var) (first var))
(defun var-delayed-function (var) (second var))
;; Merge two environments (for example, after an if/else branch). The
;; arguments should have the same shape, with only different values
;; for init and delayed-functions slots
(defun merge-env (env1 env2)
(when (eq env1 env2) (return-from merge-env env1))
(loop :for scope1 :in env1 :for scope2 :in env2
:if (eq scope1 :with) :collect ':with :else :collect
(loop :for var1 :in scope1 :for var2 :in scope2 :collect
(if (and (eq (var-init var1) (var-init var2))
(equal (var-delayed-function var1) (var-delayed-function var2)))
var1
(list* (and (var-init var1) (var-init var2))
(or (var-delayed-function var1) (var-delayed-function var2))
(cddr var1))))))
;; Assign a type to a local variable. type can be a type cell or a
;; type symbol. Returns a new environment in which the variable will
;; be marked as initialized.
(defun assign (name type env)
(multiple-value-bind (found passed-with) (find-in-env env name)
(when found
(etypecase type
(tc (link-tc (var-tc found) type))
(symbol (add-type (var-tc found) type)))
(let ((initialized (or (and (var-init found) t) (not passed-with))))
(when (or (not (eq (var-init found) initialized))
(var-delayed-function found))
(setf env (update-env env found initialized ())))))
env))
;; Compute the various type-cell relations.
(defun compute-rel (type a b)
(ecase type
(:and a) ;; used by link-tc
(:+ ;; the interesting effects of the + operator
(cond ((or (eq a :string) (eq b :string)) :string)
((or (member a '(t :object)) (member b '(t :object))) t)
((and (eq a :integer) (eq b :integer)) :integer)
((or (not a) (not b)) nil) ;; Will be recomputed later
(t :number)))
(:either (combine-types a b))
(:maybe-int ;; result of other numeric operations
(cond ((eq b :none) (case a (:integer :integer) ((nil) nil) (t :number)))
((and (eq a :integer) (eq b :integer)) :integer)
((or (not a) (not b)) nil)
(t :number)))))
;; 'Resolve' a type-cell, computing its final type from its current
;; type and its relations. Done in a second pass after inference has
;; completed.
(defun resolve-tc (tc)
;; Solves a system of dependencies by iterating until no more
;; changes are produced. Since types always become looser, never
;; stricter, this terminates.
(labels ((apply-rel (tc rel)
(let* ((result (compute-rel (car rel) (resolve-tc (second rel))
(if (third rel) (resolve-tc (third rel)) :none)))
(combined (combine-types result (tc-tp tc))))
(unless (eq combined (tc-tp tc))
(setf (tc-tp tc) combined)
(loop :for (tc . rel) :in (tc-deps tc) :do
(apply-rel tc rel))))))
(when (eq (tc-tp tc) t) (return-from resolve-tc t))
(let ((rels (or (tc-rels tc) (return-from resolve-tc (tc-tp tc)))))
(setf (tc-rels tc) nil)
(loop :for rel :in rels :while (not (eq (tc-tp tc) t)) :do
(push (cons tc rel) (tc-deps (second rel)))
(unless (or (not (third rel)) (eq (third rel) (second rel)))
(push (cons tc rel) (tc-deps (third rel))))
(apply-rel tc rel))
(tc-tp tc))))
(defstruct typing val) ;; Used in returned ast
(defun resolve-tcs (ast)
(labels ((resolve (val)
(typecase val
(cons (map-into val #'resolve val))
(tc (make-typing :val (or (resolve-tc val) t)))
(t val))))
(resolve ast)))
;; API to the inferrer. Tags the given ast.
(defun infer-types (ast)
(infer ast ())
(resolve-tcs ast))
;; Access the type of a tagged AST element.
(defun ast-type (ast)
(let ((maybe (car (last ast))))
(and (typing-p maybe) (typing-val maybe))))
(defun num-type (type)
(or (eq type :number) (eq type :integer)))
;; This is later defined for all possible elements in the AST. Returns
;; (values env [tc] [ft]), where only expressions should return a tc,
;; and only things that return a function that we are trying to
;; statically analyse should return an ft.
(defgeneric apply-infer-rule (ast-tag ast-args env))
(defmacro definfer ((type &rest args) &body body)
(let ((form-arg (gensym)))
`(defmethod apply-infer-rule ((,(gensym) (eql ,type)) ,form-arg env)
(destructuring-bind (,@args &rest ,form-arg) ,form-arg
(declare (ignore ,form-arg))
,@body))))
;; Call the apply-infer-rule method for the ast element, and then tags
;; the ast with the tc for that expression, if a tc was returned.
(defun infer (form env &optional context)
(multiple-value-bind (env tc ftype) (apply-infer-rule (car form) (cdr form) env)
;; If an ftype is used in a non-call context, we can no longer
;; guarantee how it will be called, so the argtypes have to be
;; cleared. This is a bit of a hack---the whole function analysis
;; is, in fact, a hack.
(when (and ftype (not (eq context :call)))
(dolist (arg (ft-args ftype)) (add-type arg t))
(setf ftype nil))
;; Tag the AST list by adding an extra element (if it doesn't
;; exist yet, since sometimes inner functions are inferred
;; multiple times).
(when tc
(let ((last (last form)))
(unless (tc-p (car last))
(setf (cdr last) (list tc)))))
(values env tc ftype)))
;; Used to hold the (arg-tcs . return-tc) for the current function,
;; since some of the methods need direct access to those.
(defparameter *function-tcs* nil)
;; See if a function may fall off its end without returning, since
;; that is relevant for the return type we assign to it. This isn't
;; always correct (there are many complicated ways in which a function
;; can guarantee returning), but works for basic cases.
(defun may-fall-off (fbody)
(labels ((see-body (stats)
(see (car (last stats))))
(see (stat)
(case (car stat)
((nil :for :for-in :do :while :stat :break
:continue :defun :var :switch) t)
((:return :throw) nil)
(:if (or (not (fourth stat)) (see (third stat)) (see (fourth stat))))
(:with (see (third stat)))
(:block (see-body (second stat)))
(:try (and (or (not (fourth stat)) (see (fourth stat)))
(or (not (third stat)) (see (second stat)) (see (third stat))))))))
(see-body fbody)))
;; Sets up the scope for a function, and infers it in this scope.
(defun infer-func (fname args body env)
(let ((locals (find-locals body `("this" "arguments" ,@args
,@(and fname (list fname)))))
(ret-tc (tc ()))
(arg-tcs (loop :repeat (length args) :collect (tc ()))))
(multiple-value-bind (defuns body) (split-out-defuns body)
(when (may-fall-off body) (add-type ret-tc :undefined))
(let* ((*function-tcs* (cons arg-tcs ret-tc))
tmp
(ft (make-ft :args arg-tcs :returns ret-tc))
;; The new scope list
(sc (loop :for name :in locals :collect
(cond ((string= name "this") (list t nil name (tc :object)))
;; defuns get some magic to store them as delayed functions,
;; and to be able to keep their ftype
((setf tmp (find name defuns :key #'second :test #'string=))
(list (list nil) (cons :function (cdr tmp)) name (tc :object)))
((equal name fname)
(list (list ft) nil name (tc :object)))
;; arguments
((setf tmp (position name args :test #'string=))
(list t nil name (nth tmp arg-tcs)))
((string= name "arguments") (list t nil name (tc :object)))
(t (list nil nil name (tc ()))))))
(env (cons sc env)))
(dolist (stat body) (setf env (infer stat env)))
(values (pop-scope env) ft)))))
;; Note that definfer automatically adds an env parameter to each of
;; these methods.
(definfer (:function name args body)
(multiple-value-bind (env ftype) (infer-func name args body env)
(values env (tc :object) ftype)))
;; This is only invoked for defuns that are not at the top-level of a
;; function. Those are hoisted to the top of the function, and handled
;; more cleverly.
(definfer (:defun name args body)
(setf env (assign name :object env))
(multiple-value-bind (env ftype) (infer-func nil args body env)
(dolist (tc (ft-args ftype)) (add-type tc t))
env))
(definfer (:atom atom)
(values env (case atom
((:true :false) (tc :boolean))
(:null (tc :null))
(t (tc t)))))
(definfer (:object props)
(loop :for (nil . val) :in props :do
(setf env (infer (if (member (car val) '(:get :set)) (cdr val) val) env)))
(values env (tc :object)))
(definfer (:regexp)
(values env (tc :object)))
(definfer (:label name form)
(declare (ignore name))
(values (infer form env)))
(definfer (:var bindings)
(loop :for (name . val) :in bindings :do
(when val
(multiple-value-bind (env1 tc) (infer val env)
(setf env (assign name tc env1)))))
env)
(definfer (:name name)
;; Stuff like arguments[1] = null can mess with argument types, so
;; we clear those at the first sign of trouble.
(when (string= name "arguments")
(dolist (tc (car *function-tcs*)) (add-type tc t)))
(multiple-value-bind (var passed-with) (find-in-env env name)
(if var
;; For local variables, stuff gets complicated...
(let* ((update nil)
(init (var-init var))
;; For locals that hold a function, the init value is a
;; cons, and its car is the ft structure for this
;; function. Ergh.
(ftype (and (consp init) (car init))))
;; If this is not initialized yet, it is used before
;; initialization, and :undefined is added to the type.
(unless init
(add-type (var-tc var) :undefined)
(setf update t))
;; When a delayed function is present, infer it in this
;; environment. Also, if we don't have an ftype yet, use the
;; one returned by this infer call.
(when (var-delayed-function var)
(multiple-value-bind (env1 tc ftype1)
(infer (var-delayed-function var) env :call)
(declare (ignore tc))
(setf env env1)
(unless ftype (setf ftype (setf (car init) ftype1))))
(setf update t))
;; Update the env if anything was changed.
(values (if update (update-env env var init nil) env)
;; If the lookup passed a with environment, we can't
;; say anything about the type.
(if passed-with (tc t) (var-tc var))
ftype))
;; Global variables are unknowable.
(values env (tc t)))))
(definfer (:num num)
(values env (tc (if (typep num 'fixnum) :integer :number))))
(definfer (:toplevel body)
(dolist (stat body) (setf env (infer stat env)))
env)
(definfer (:assign op place val)
(unless (eq (car place) :name) (setf env (infer place env)))
(multiple-value-bind (env tc)
(infer (if (eq op t) val `(:binary ,op ,place ,val)) env)
(when (eq (car place) :name)
(setf env (assign (second place) tc env)))
(values env tc)))
(definfer (:stat form)
(values (infer form env)))
(definfer (:string)
(values env (tc :string)))
(definfer (:return value)
(multiple-value-bind (env tc) (if value (infer value env) env)
(when *function-tcs*
(if tc
(link-tc (cdr *function-tcs*) tc)
(add-type (cdr *function-tcs*) :undefined)))
env))
(definfer (:for init cond step body)
(when init (setf env (infer init env)))
(let ((env2 env))
(when cond (setf env (infer cond env)))
(when step (setf env (infer step env)))
(merge-env env2 (infer body env))))
(definfer (:while cond body)
(merge-env env (infer body (infer cond env))))
(definfer (:do cond body)
(merge-env env (infer cond (infer body env))))
(definfer (:for-in init lhs obj body)
(when init (setf env (infer init env)))
(setf env (infer obj env))
(when (eq (car lhs) :name)
(setf env (assign (second lhs) :string env)))
(merge-env env (infer body env)))
(definfer (:switch expr cases)
(setf env (infer expr env))
(loop :for (val . body) :in cases :do
(when val (setf env (infer val env)))
(dolist (stat body)
(setf env (infer stat env))))
env)
(definfer (:if test then else)
(setf env (infer test env))
(merge-env (infer then env) (if else (infer else env) env)))
(definfer (:conditional test then else)
(setf env (infer test env))
(multiple-value-bind (env1 tc1) (infer then env)
(multiple-value-bind (env2 tc2) (infer else env)
(let ((tc (tc ())))
(link-tc tc tc1) (link-tc tc tc2)
(values (merge-env env1 env2) tc)))))
(definfer (:try body catch finally)
(let ((catch-env (if catch
(let ((env (cons `((t nil ,(car catch) ,(tc t))) env)))
(pop-scope (infer (cdr catch) env)))
env)))
(setf env (merge-env (infer body env) catch-env))
(if finally (infer finally env) env)))
(definfer (:throw expr)
(values (infer expr env)))
(definfer (:with obj body)
(pop-scope (infer body (infer obj (cons :with env)))))
(definfer (:new func args)
(setf env (infer func env))
(dolist (arg args)
(setf env (infer arg env)))
(values env (tc :object)))
(definfer (:call func args)
(if (ast-is-eval-var func)
;; Eval might screw up every visible local variable.
(progn (ruin-env-for-eval env) (values env (tc t)))
(multiple-value-bind (env ftc ftype) (infer func env :call)
(declare (ignore ftc))
;; Infer the arguments, linking the arg type-cells for this
;; function (if any) to the results.
(loop :for arg-tc := (and ftype (ft-args ftype)) :then (cdr arg-tc)
:for arg :in args :do
(multiple-value-bind (env1 tc) (infer arg env)
(setf env env1)
(when arg-tc (link-tc (car arg-tc) tc)))
:finally (dolist (tc arg-tc) (add-type tc :undefined)))
(values env (if ftype (ft-returns ftype) (tc t))))))
(definfer (:binary op lhs rhs)
(multiple-value-bind (env lhst) (infer lhs env)
(multiple-value-bind (env rhst) (infer rhs env)
(values env
(case op
(:+ (tc nil `(:+ ,lhst ,rhst)))
((:== :=== :!= :!== :instanceof :in :< :> :<= :>=) (tc :boolean))
((:^ :& :|\|| :>> :<< :>>>) (tc :integer))
((:&& :|\|\||) (tc nil `(:either ,lhst ,rhst)))
((:- :* :%) (tc nil `(:maybe-int ,lhst ,rhst)))
(:/ (tc :number)))))))
(definfer (:unary-prefix op place)
(multiple-value-bind (env argt) (infer place env)
(ecase op
(:typeof (values env (tc :string)))
(:void (values env (tc :undefined)))
((:delete :!) (values env (tc :boolean)))
((:-- :++) (let ((tc (tc nil `(:maybe-int ,argt))))
(when (eq (car place) :name)
(setf env (assign (second place) tc env)))
(values env tc)))
(:+ (values env (tc nil `(:maybe-int ,argt))))
(:- (values env (tc :number)))
(:~ (values env (tc :integer))))))
(definfer (:unary-postfix op place)
(declare (ignore op)) ;; always :++ or :--
(multiple-value-bind (env argt) (infer place env)
(when (eq (car place) :name)
(setf env (assign (second place) (tc nil `(:maybe-int ,argt)) env)))
(values env argt)))
(definfer (:array elems)
(dolist (elem elems) (when elem (setf env (infer elem env))))
(values env (tc :object)))
(definfer (:block forms)
(dolist (stat forms) (setf env (infer stat env)))
env)
(definfer (:seq form1 result)
(infer result (infer form1 env)))
(definfer (:dot obj)
(values (infer obj env) (tc t)))
(definfer (:sub obj attr)
(values (infer attr (infer obj env)) (tc t)))
(definfer (:break)
env)
(definfer (:continue)
env)