Permalink
Browse files

Port WITH-TLS-EA and other remaining FS prefix uses to Windows.

- current-thread-offset-sap

- emit-single-step-test

- Allocation routines

- Disable PSEUDO-ATOMIC on threaded Windows entirely instead of
  changing TLS uses of pseudo-atomic-bits.  We would need a
  temporary register for those changes, and Windows threading is all
  safepoint-based, without the need to support asynchronous signals.

Thanks to Dmitry Kalyanov and Anton Kovalenko.
  • Loading branch information...
1 parent e6d83d2 commit 1dd3616e9eadaba9f1ca86b72d64551fbd75f399 @lichtblau lichtblau committed Jul 13, 2011
@@ -1465,10 +1465,12 @@
;; register on -SB-THREAD.
#!+sb-thread
(progn
- (with-tls-ea (EA :base :unused
+ #!+win32 (inst push eax-tn)
+ (with-tls-ea (EA :base #!+win32 eax-tn #!-win32 :unused
:disp-type :constant
:disp (* thread-stepping-slot n-word-bytes))
- (inst cmp EA nil-value :maybe-fs)))
+ (inst cmp EA nil-value :maybe-fs))
+ #!+win32 (inst pop eax-tn))
#!-sb-thread
(inst cmp (make-ea-for-symbol-value sb!impl::*stepping*)
nil-value))
@@ -221,8 +221,15 @@
:foreign))))
(defun allocation-inline (alloc-tn size)
- (let ((ok (gen-label))
+ (let* ((ok (gen-label)) ;reindent after merging
(done (gen-label))
+ #!+(and sb-thread win32)
+ (scratch-tns (loop for my-tn in `(,eax-tn ,ebx-tn ,edx-tn ,ecx-tn)
+ when (and (not (location= alloc-tn my-tn))
+ (or (not (tn-p size))
+ (not (location= size my-tn))))
+ collect my-tn))
+ (tls-prefix #!+sb-thread :fs #!-sb-thread nil)
(free-pointer
(make-ea :dword :disp
#!+sb-thread (* n-word-bytes thread-alloc-region-slot)
@@ -232,11 +239,23 @@
(make-ea :dword :disp
#!+sb-thread (* n-word-bytes (1+ thread-alloc-region-slot))
#!-sb-thread (make-fixup "boxed_region" :foreign 4)
- :scale 1))) ; thread->alloc_region.end_addr
+ :scale 1)) ; thread->alloc_region.end_addr
+ #!+(and sb-thread win32) (scratch-tn (pop scratch-tns))
+ #!+(and sb-thread win32) (swap-tn (pop scratch-tns)))
(unless (and (tn-p size) (location= alloc-tn size))
(inst mov alloc-tn size))
- (inst add alloc-tn free-pointer #!+sb-thread :fs)
- (inst cmp alloc-tn end-addr #!+sb-thread :fs)
+ #!+(and sb-thread win32)
+ (progn
+ (inst push scratch-tn)
+ (inst push swap-tn)
+ (inst mov scratch-tn
+ (make-ea :dword :disp
+ +win32-tib-arbitrary-field-offset+) tls-prefix)
+ (setf (ea-base free-pointer) scratch-tn
+ (ea-base end-addr) scratch-tn
+ tls-prefix nil))
+ (inst add alloc-tn free-pointer tls-prefix)
+ (inst cmp alloc-tn end-addr tls-prefix)
(inst jmp :be ok)
(let ((dst (ecase (tn-offset alloc-tn)
(#.eax-offset "alloc_overflow_eax")
@@ -251,15 +270,26 @@
;; Swap ALLOC-TN and FREE-POINTER
(cond ((and (tn-p size) (location= alloc-tn size))
;; XCHG is extremely slow, use the xor swap trick
- (inst xor alloc-tn free-pointer #!+sb-thread :fs)
- (inst xor free-pointer alloc-tn #!+sb-thread :fs)
- (inst xor alloc-tn free-pointer #!+sb-thread :fs))
+ #!-(and sb-thread win32)
+ (progn
+ (inst xor alloc-tn free-pointer tls-prefix)
+ (inst xor free-pointer alloc-tn tls-prefix)
+ (inst xor alloc-tn free-pointer tls-prefix))
+ #!+(and sb-thread win32)
+ (progn
+ (inst mov swap-tn free-pointer tls-prefix)
+ (inst mov free-pointer alloc-tn tls-prefix)
+ (inst mov alloc-tn swap-tn)))
(t
;; It's easier if SIZE is still available.
- (inst mov free-pointer alloc-tn #!+sb-thread :fs)
+ (inst mov free-pointer alloc-tn tls-prefix)
(inst sub alloc-tn size)))
- (emit-label done))
- (values))
+ (emit-label done)
+ #!+(and sb-thread win32)
+ (progn
+ (inst pop swap-tn)
+ (inst pop scratch-tn))
+ (values)))
;;; Emit code to allocate an object with a size in bytes given by
@@ -273,6 +303,7 @@
;;; (FIXME: so why aren't we asserting this?)
(defun allocation (alloc-tn size &optional inline dynamic-extent lowtag)
+ (declare (ignorable inline))
(cond
(dynamic-extent
(allocation-dynamic-extent alloc-tn size lowtag))
@@ -366,6 +397,9 @@
;;; pa section.
#!+sb-thread
(defmacro %clear-pseudo-atomic ()
+ #!+win32
+ `(progn)
+ #!-win32
'(inst mov (make-ea :dword :disp (* 4 thread-pseudo-atomic-bits-slot)) 0 :fs))
#!+sb-safepoint
@@ -375,6 +409,9 @@
#!+sb-thread
(defmacro pseudo-atomic (&rest forms)
+ #!+win32
+ `(progn ,@forms (emit-safepoint))
+ #!-win32
(with-unique-names (label)
`(let ((,label (gen-label)))
(inst mov (make-ea :dword :disp (* 4 thread-pseudo-atomic-bits-slot))
@@ -624,6 +661,7 @@ collection."
The value of the BASE register is undefined following the macro invocation."
(check-type base-already-live-p boolean)
(check-type disp-type (member :index :constant))
+ #!-(and win32 sb-thread)
(let ((body (subst :fs :maybe-fs body)))
(ecase disp-type
(:constant
@@ -641,4 +679,20 @@ collection."
(inst ,(if base-already-live-p 'add 'mov) ,base ,disp)
,@(subst `(make-ea :dword :base ,base)
ea-var
- body))))))
+ body)))))
+ #!+(and win32 sb-thread)
+ ;; goes through a temporary register to add the thread address into it
+ (multiple-value-bind (constant-disp ea-disp)
+ (ecase disp-type
+ (:constant (values disp nil))
+ (:index (values 0 disp)))
+ `(progn
+ ,@(when ea-disp
+ `((inst ,(if base-already-live-p 'add 'mov) ,base ,ea-disp)))
+ (inst ,(if (or base-already-live-p ea-disp) 'add 'mov)
+ ,base
+ (make-ea :dword :disp +win32-tib-arbitrary-field-offset+)
+ :fs)
+ ,@(subst `(make-ea :dword :base ,base :disp ,constant-disp)
+ ea-var
+ (subst nil :maybe-fs body)))))
@@ -383,3 +383,6 @@
;;; FIXME: Is this used? Delete it or document it.
;;; cf the sparc PARMS.LISP
(defparameter *assembly-unit-length* 8)
+
+#!+win32
+(defconstant +win32-tib-arbitrary-field-offset+ #.(+ #xE10 (* 4 63)))
@@ -270,10 +270,19 @@
(:results (sap :scs (sap-reg)))
(:result-types system-area-pointer)
(:translate current-thread-offset-sap)
- (:args (n :scs (unsigned-reg) :target sap))
+ (:args (n :scs (unsigned-reg)
+ #!+win32 #!+win32 :to :save
+ #!-win32 #!-win32 :target sap))
(:arg-types unsigned-num)
(:policy :fast-safe)
(:generator 2
+ #!+win32
+ (progn
+ ;; Note that SAP conflicts with N in this case, hence the reader
+ ;; conditionals above.
+ (inst mov sap (make-ea :dword :disp +win32-tib-arbitrary-field-offset+) :fs)
+ (inst mov sap (make-ea :dword :base sap :disp 0 :index n :scale 4)))
+ #!-win32
(inst mov sap (make-ea :dword :disp 0 :index n :scale 4) :fs)))
(define-vop (halt)
View
@@ -1255,10 +1255,12 @@ gc_heap_exhausted_error_or_lose (long available, long requested)
else {
/* FIXME: assert free_pages_lock held */
(void)thread_mutex_unlock(&free_pages_lock);
+#if !(defined(LISP_FEATURE_WIN32) && defined(LISP_FEATURE_SB_THREAD))
gc_assert(get_pseudo_atomic_atomic(thread));
clear_pseudo_atomic_atomic(thread);
if (get_pseudo_atomic_interrupted(thread))
do_pending_interrupt();
+#endif
/* Another issue is that signalling HEAP-EXHAUSTED error leads
* to running user code at arbitrary places, even in a
* WITHOUT-INTERRUPTS which may lead to a deadlock without
@@ -4181,8 +4183,10 @@ general_alloc_internal(long nbytes, int page_type_flag, struct alloc_region *reg
gc_assert((((unsigned long)region->free_pointer & LOWTAG_MASK) == 0)
&& ((nbytes & LOWTAG_MASK) == 0));
+#if !(defined(LISP_FEATURE_WIN32) && defined(LISP_FEATURE_SB_THREAD))
/* Must be inside a PA section. */
gc_assert(get_pseudo_atomic_atomic(thread));
+#endif
if (nbytes > large_allocation)
large_allocation = nbytes;
@@ -4284,7 +4288,9 @@ general_alloc(long nbytes, int page_type_flag)
lispobj *
alloc(long nbytes)
{
+#if !(defined(LISP_FEATURE_WIN32) && defined(LISP_FEATURE_SB_THREAD))
gc_assert(get_pseudo_atomic_atomic(arch_os_get_current_thread()));
+#endif
return general_alloc(nbytes, BOXED_PAGE_FLAG);
}
View
@@ -712,10 +712,18 @@ DEFINE_ALLOC_TO_REG(alloc_16_to_edi,%edi,$16)
#define START_REGION GNAME(boxed_region)
#endif
-#define ALLOC_OVERFLOW(size) \
- /* Calculate the size for the allocation. */ \
- subl START_REGION,size; \
- ALLOC(size)
+#if defined(LISP_FEATURE_SB_THREAD) && defined(LISP_FEATURE_WIN32)
+#define ALLOC_OVERFLOW(size,scratch) \
+ movl SBCL_THREAD_BASE_EA, scratch; \
+ /* Calculate the size for the allocation. */ \
+ subl THREAD_ALLOC_REGION_OFFSET(scratch),size; \
+ ALLOC(size)
+#else
+#define ALLOC_OVERFLOW(size,scratch) \
+ /* Calculate the size for the allocation. */ \
+ subl START_REGION,size; \
+ ALLOC(size)
+#endif
/* This routine handles an overflow with eax=crfp+size. So the
size=eax-crfp. */
@@ -725,7 +733,7 @@ DEFINE_ALLOC_TO_REG(alloc_16_to_edi,%edi,$16)
GNAME(alloc_overflow_eax):
pushl %ecx # Save ecx
pushl %edx # Save edx
- ALLOC_OVERFLOW(%eax)
+ ALLOC_OVERFLOW(%eax,%edx)
popl %edx # Restore edx.
popl %ecx # Restore ecx.
ret
@@ -737,7 +745,7 @@ GNAME(alloc_overflow_eax):
GNAME(alloc_overflow_ecx):
pushl %eax # Save eax
pushl %edx # Save edx
- ALLOC_OVERFLOW(%ecx)
+ ALLOC_OVERFLOW(%ecx,%edx)
movl %eax,%ecx # setup the destination.
popl %edx # Restore edx.
popl %eax # Restore eax.
@@ -750,7 +758,7 @@ GNAME(alloc_overflow_ecx):
GNAME(alloc_overflow_edx):
pushl %eax # Save eax
pushl %ecx # Save ecx
- ALLOC_OVERFLOW(%edx)
+ ALLOC_OVERFLOW(%edx,%ecx)
movl %eax,%edx # setup the destination.
popl %ecx # Restore ecx.
popl %eax # Restore eax.
@@ -766,7 +774,7 @@ GNAME(alloc_overflow_ebx):
pushl %eax # Save eax
pushl %ecx # Save ecx
pushl %edx # Save edx
- ALLOC_OVERFLOW(%ebx)
+ ALLOC_OVERFLOW(%ebx,%edx)
movl %eax,%ebx # setup the destination.
popl %edx # Restore edx.
popl %ecx # Restore ecx.
@@ -783,7 +791,7 @@ GNAME(alloc_overflow_esi):
pushl %eax # Save eax
pushl %ecx # Save ecx
pushl %edx # Save edx
- ALLOC_OVERFLOW(%esi)
+ ALLOC_OVERFLOW(%esi,%edx)
movl %eax,%esi # setup the destination.
popl %edx # Restore edx.
popl %ecx # Restore ecx.
@@ -798,7 +806,7 @@ GNAME(alloc_overflow_edi):
pushl %eax # Save eax
pushl %ecx # Save ecx
pushl %edx # Save edx
- ALLOC_OVERFLOW(%edi)
+ ALLOC_OVERFLOW(%edi,%edx)
movl %eax,%edi # setup the destination.
popl %edx # Restore edx.
popl %ecx # Restore ecx.

0 comments on commit 1dd3616

Please sign in to comment.