Skip to content

Commit

Permalink
1.0.18.25: tweak stack allocation on x86 and x86-64
Browse files Browse the repository at this point in the history
 * Use MAYBE-PSEUDO-ATOMIC in the LIST-OR-LIST* VOP: stack allocation
   doesn't need PA.

 * When using STACK-ALLOCATE-P parameter with ALLOCATION, also pass in
   the lowtag. This allows us to generate

     LEA REG [STACK_REG+LOWTAG]

   instead of

     MOV REG STACK_REG
     LEA REG [REG+LOWTAG]

   for stack allocation & tagging.

   On x86-64 can use the same trick in the inline path for heap
   allocation as well.
  • Loading branch information
nikodemus committed Jul 19, 2008
1 parent ca09462 commit 1fc851c
Show file tree
Hide file tree
Showing 8 changed files with 59 additions and 59 deletions.
2 changes: 2 additions & 0 deletions NEWS
@@ -1,5 +1,7 @@
;;;; -*- coding: utf-8; -*-
changes in sbcl-1.0.19 relative to 1.0.18:
* optimization: stack allocation is slightly more efficient on x86
and x86-64.
* bug fix: compiler no longer makes erronous assumptions in the
presense of non-foldable SATISFIES types.
* bug fix: stack analysis missed cleanups of dynamic-extent
Expand Down
25 changes: 11 additions & 14 deletions src/compiler/x86-64/alloc.lisp
Expand Up @@ -44,13 +44,12 @@
(move temp ,tn)
temp))))
(storew reg ,list ,slot list-pointer-lowtag))))
(let ((cons-cells (if star (1- num) num)))
(pseudo-atomic
(let ((cons-cells (if star (1- num) num))
(stack-allocate-p (awhen (sb!c::node-lvar node)
(sb!c::lvar-dynamic-extent it))))
(maybe-pseudo-atomic stack-allocate-p
(allocation res (* (pad-data-block cons-size) cons-cells) node
(awhen (sb!c::node-lvar node)
(sb!c::lvar-dynamic-extent it)))
(inst lea res
(make-ea :byte :base res :disp list-pointer-lowtag))
stack-allocate-p list-pointer-lowtag)
(move ptr res)
(dotimes (i (1- cons-cells))
(store-car (tn-ref-tn things) ptr)
Expand Down Expand Up @@ -119,11 +118,11 @@
;; FIXME: It would be good to check for stack overflow here.
(move ecx words)
(inst shr ecx n-fixnum-tag-bits)
(allocation result result node t)
(allocation result result node t other-pointer-lowtag)
(inst cld)
(inst lea res
(make-ea :byte :base result :disp (* vector-data-offset n-word-bytes)))
(inst lea result (make-ea :byte :base result :disp other-pointer-lowtag))
(make-ea :byte :base result :disp (- (* vector-data-offset n-word-bytes)
other-pointer-lowtag)))
(storew type result 0 other-pointer-lowtag)
(storew length result vector-length-slot other-pointer-lowtag)
(zeroize zero)
Expand Down Expand Up @@ -215,9 +214,8 @@
(:generator 10
(maybe-pseudo-atomic stack-allocate-p
(let ((size (+ length closure-info-offset)))
(allocation result (pad-data-block size) node stack-allocate-p)
(inst lea result
(make-ea :byte :base result :disp fun-pointer-lowtag))
(allocation result (pad-data-block size) node stack-allocate-p
fun-pointer-lowtag)
(storew (logior (ash (1- size) n-widetag-bits) closure-header-widetag)
result 0 fun-pointer-lowtag))
(loadw temp function closure-fun-slot fun-pointer-lowtag)
Expand Down Expand Up @@ -256,8 +254,7 @@
(:node-var node)
(:generator 50
(maybe-pseudo-atomic stack-allocate-p
(allocation result (pad-data-block words) node stack-allocate-p)
(inst lea result (make-ea :byte :base result :disp lowtag))
(allocation result (pad-data-block words) node stack-allocate-p lowtag)
(when type
(storew (logior (ash (1- words) n-widetag-bits) type)
result
Expand Down
3 changes: 1 addition & 2 deletions src/compiler/x86-64/call.lisp
Expand Up @@ -1298,8 +1298,7 @@
(inst jrcxz done)
(inst lea dst (make-ea :qword :base rcx :index rcx))
(maybe-pseudo-atomic stack-allocate-p
(allocation dst dst node stack-allocate-p)
(inst lea dst (make-ea :byte :base dst :disp list-pointer-lowtag))
(allocation dst dst node stack-allocate-p list-pointer-lowtag)
(inst shr rcx (1- n-lowtag-bits))
;; Set decrement mode (successive args at lower addresses)
(inst std)
Expand Down
34 changes: 18 additions & 16 deletions src/compiler/x86-64/macros.lisp
Expand Up @@ -138,30 +138,31 @@
;;; node-var then it is used to make an appropriate speed vs size
;;; decision.

(defun allocation-dynamic-extent (alloc-tn size)
(defun allocation-dynamic-extent (alloc-tn size lowtag)
(inst sub rsp-tn size)
;; see comment in x86/macros.lisp implementation of this
(inst and rsp-tn #.(lognot lowtag-mask))
(aver (not (location= alloc-tn rsp-tn)))
(inst mov alloc-tn rsp-tn)
(inst lea alloc-tn (make-ea :byte :base rsp-tn :disp lowtag))
(values))

;;; This macro should only be used inside a pseudo-atomic section,
;;; which should also cover subsequent initialization of the
;;; object.
(defun allocation-tramp (alloc-tn size &optional ignored)
(declare (ignore ignored))
(defun allocation-tramp (alloc-tn size lowtag)
(inst push size)
(inst lea temp-reg-tn (make-ea :qword
:disp (make-fixup "alloc_tramp" :foreign)))
(inst call temp-reg-tn)
(inst pop alloc-tn)
(when lowtag
(inst lea alloc-tn (make-ea :byte :base alloc-tn :disp lowtag)))
(values))

(defun allocation (alloc-tn size &optional ignored dynamic-extent)
(defun allocation (alloc-tn size &optional ignored dynamic-extent lowtag)
(declare (ignore ignored))
(when dynamic-extent
(allocation-dynamic-extent alloc-tn size)
(allocation-dynamic-extent alloc-tn size lowtag)
(return-from allocation (values)))
(let ((NOT-INLINE (gen-label))
(DONE (gen-label))
Expand All @@ -188,7 +189,7 @@
:scale 1 :disp
(make-fixup "boxed_region" :foreign 8))))
(cond (in-elsewhere
(allocation-tramp alloc-tn size))
(allocation-tramp alloc-tn size lowtag))
(t
(inst mov temp-reg-tn free-pointer)
(if (tn-p size)
Expand All @@ -201,17 +202,19 @@
(inst cmp end-addr alloc-tn)
(inst jmp :be NOT-INLINE)
(inst mov free-pointer alloc-tn)
(inst mov alloc-tn temp-reg-tn)
(if lowtag
(inst lea alloc-tn (make-ea :byte :base temp-reg-tn :disp lowtag))
(inst mov alloc-tn temp-reg-tn))
(emit-label DONE)
(assemble (*elsewhere*)
(emit-label NOT-INLINE)
(cond ((numberp size)
(allocation-tramp alloc-tn size))
(allocation-tramp alloc-tn size lowtag))
(t
(inst sub alloc-tn free-pointer)
(allocation-tramp alloc-tn alloc-tn)))
(inst jmp DONE))
(values)))))
(allocation-tramp alloc-tn alloc-tn lowtag)))
(inst jmp DONE))))
(values)))

;;; Allocate an other-pointer object of fixed SIZE with a single word
;;; header having the specified WIDETAG value. The result is placed in
Expand All @@ -222,11 +225,10 @@
(bug "empty &body in WITH-FIXED-ALLOCATION"))
(once-only ((result-tn result-tn) (size size) (stack-allocate-p stack-allocate-p))
`(maybe-pseudo-atomic ,stack-allocate-p
(allocation ,result-tn (pad-data-block ,size) ,inline ,stack-allocate-p)
(allocation ,result-tn (pad-data-block ,size) ,inline ,stack-allocate-p
other-pointer-lowtag)
(storew (logior (ash (1- ,size) n-widetag-bits) ,widetag)
,result-tn)
(inst lea ,result-tn
(make-ea :qword :base ,result-tn :disp other-pointer-lowtag))
,result-tn 0 other-pointer-lowtag)
,@forms)))

;;;; error code
Expand Down
24 changes: 11 additions & 13 deletions src/compiler/x86/alloc.lisp
Expand Up @@ -44,12 +44,12 @@
(move temp ,tn)
temp))))
(storew reg ,list ,slot list-pointer-lowtag))))
(let ((cons-cells (if star (1- num) num)))
(pseudo-atomic
(let ((cons-cells (if star (1- num) num))
(stack-allocate-p (awhen (sb!c::node-lvar node)
(sb!c::lvar-dynamic-extent it))))
(maybe-pseudo-atomic stack-allocate-p
(allocation res (* (pad-data-block cons-size) cons-cells) node
(awhen (sb!c::node-lvar node) (sb!c::lvar-dynamic-extent it)))
(inst lea res
(make-ea :byte :base res :disp list-pointer-lowtag))
stack-allocate-p list-pointer-lowtag)
(move ptr res)
(dotimes (i (1- cons-cells))
(store-car (tn-ref-tn things) ptr)
Expand Down Expand Up @@ -143,11 +143,11 @@
;; FIXME: It would be good to check for stack overflow here.
(move ecx words)
(inst shr ecx n-fixnum-tag-bits)
(allocation result result node t)
(allocation result result node t other-pointer-lowtag)
(inst cld)
(inst lea res
(make-ea :byte :base result :disp (* vector-data-offset n-word-bytes)))
(inst lea result (make-ea :byte :base result :disp other-pointer-lowtag))
(make-ea :byte :base result :disp (- (* vector-data-offset n-word-bytes)
other-pointer-lowtag)))
(sc-case type
(immediate
(aver (typep (tn-value type) '(unsigned-byte 8)))
Expand Down Expand Up @@ -245,9 +245,8 @@
(maybe-pseudo-atomic stack-allocate-p
(let ((size (+ length closure-info-offset)))
(allocation result (pad-data-block size) node
stack-allocate-p)
(inst lea result
(make-ea :byte :base result :disp fun-pointer-lowtag))
stack-allocate-p
fun-pointer-lowtag)
(storew (logior (ash (1- size) n-widetag-bits) closure-header-widetag)
result 0 fun-pointer-lowtag))
(loadw temp function closure-fun-slot fun-pointer-lowtag)
Expand Down Expand Up @@ -306,8 +305,7 @@
(aver (null type))
(inst call (make-fixup dst :assembly-routine)))
(maybe-pseudo-atomic stack-allocate-p
(allocation result (pad-data-block words) node stack-allocate-p)
(inst lea result (make-ea :byte :base result :disp lowtag))
(allocation result (pad-data-block words) node stack-allocate-p lowtag)
(when type
(storew (logior (ash (1- words) n-widetag-bits) type)
result
Expand Down
3 changes: 1 addition & 2 deletions src/compiler/x86/call.lisp
Expand Up @@ -1354,8 +1354,7 @@
(inst jecxz done)
(inst lea dst (make-ea :dword :base ecx :index ecx))
(maybe-pseudo-atomic stack-allocate-p
(allocation dst dst node stack-allocate-p)
(inst lea dst (make-ea :byte :base dst :disp list-pointer-lowtag))
(allocation dst dst node stack-allocate-p list-pointer-lowtag)
(inst shr ecx 2)
;; Set decrement mode (successive args at lower addresses)
(inst std)
Expand Down
25 changes: 14 additions & 11 deletions src/compiler/x86/macros.lisp
Expand Up @@ -177,7 +177,7 @@
;;; the duration. Now we have pseudoatomic there's no need for that
;;; overhead.

(defun allocation-dynamic-extent (alloc-tn size)
(defun allocation-dynamic-extent (alloc-tn size lowtag)
(inst sub esp-tn size)
;; FIXME: SIZE _should_ be double-word aligned (suggested but
;; unfortunately not enforced by PAD-DATA-BLOCK and
Expand All @@ -187,7 +187,7 @@
;; 2004-03-30
(inst and esp-tn (lognot lowtag-mask))
(aver (not (location= alloc-tn esp-tn)))
(inst mov alloc-tn esp-tn)
(inst lea alloc-tn (make-ea :byte :base esp-tn :disp lowtag))
(values))

(defun allocation-notinline (alloc-tn size)
Expand Down Expand Up @@ -269,12 +269,16 @@

;;; (FIXME: so why aren't we asserting this?)

(defun allocation (alloc-tn size &optional inline dynamic-extent)
(defun allocation (alloc-tn size &optional inline dynamic-extent lowtag)
(cond
(dynamic-extent (allocation-dynamic-extent alloc-tn size))
(dynamic-extent
(allocation-dynamic-extent alloc-tn size lowtag))
((or (null inline) (policy inline (>= speed space)))
(allocation-inline alloc-tn size))
(t (allocation-notinline alloc-tn size)))
(t
(allocation-notinline alloc-tn size)))
(when (and lowtag (not dynamic-extent))
(inst lea alloc-tn (make-ea :byte :base alloc-tn :disp lowtag)))
(values))

;;; Allocate an other-pointer object of fixed SIZE with a single word
Expand All @@ -286,12 +290,11 @@
(bug "empty &body in WITH-FIXED-ALLOCATION"))
(once-only ((result-tn result-tn) (size size) (stack-allocate-p stack-allocate-p))
`(maybe-pseudo-atomic ,stack-allocate-p
(allocation ,result-tn (pad-data-block ,size) ,inline ,stack-allocate-p)
(storew (logior (ash (1- ,size) n-widetag-bits) ,widetag)
,result-tn)
(inst lea ,result-tn
(make-ea :byte :base ,result-tn :disp other-pointer-lowtag))
,@forms)))
(allocation ,result-tn (pad-data-block ,size) ,inline ,stack-allocate-p
other-pointer-lowtag)
(storew (logior (ash (1- ,size) n-widetag-bits) ,widetag)
,result-tn 0 other-pointer-lowtag)
,@forms)))

;;;; error code
(defun emit-error-break (vop kind code values)
Expand Down
2 changes: 1 addition & 1 deletion version.lisp-expr
Expand Up @@ -17,4 +17,4 @@
;;; checkins which aren't released. (And occasionally for internal
;;; versions, especially for internal versions off the main CVS
;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
"1.0.18.24"
"1.0.18.25"

0 comments on commit 1fc851c

Please sign in to comment.