Skip to content

Commit

Permalink
1.0.16.35: improved TIME output
Browse files Browse the repository at this point in the history
 * Print measured times using fixed-width decimal output with the
   measured precision, instead of converting to floats for printing.

 * Report processor cycle counts on x86 and x86-64.
   ** Since Intel doesn't seem to consider it necessary to issue a
      CPUID both before and after RDTSC, maybe we don't need to do
      that either.
   ** New feature, :CYCLE-COUNTER, for platforms that implement
      SB-VM::%READ-CYCLE-COUNTER.

 * Instead of reporting %EVAL calls, report "interpreted forms", which means
   both %EVAL and SIMPLE-EVAL-IN-LEXENV.

 * Report "lambdas converted" for the compiler, not counting TL-XEPs.

 * Report CPU percentage (computed from real and run time.)

 * Report total run time separately. Condence run time output slightly
   by reporting total, user, and system on the same line.

 * Report non-GC time as well.

 * Condence output by omitting page faults, converted lambdas, and
   interpreted forms when they are zero.
  • Loading branch information
nikodemus committed May 17, 2008
1 parent 496071a commit 9b1fade
Show file tree
Hide file tree
Showing 12 changed files with 174 additions and 60 deletions.
10 changes: 10 additions & 0 deletions NEWS
Expand Up @@ -6,6 +6,16 @@ changes in sbcl-1.0.17 relative to 1.0.16:
use this feature in the meanwhile.
* new feature: runtime argument --control-stack-size can be used to
adjust thread default control stack size.
* enhancement: improved TIME output
** all times are reported using the measured accuracy (milliseconds
for real and GC times, microseconds for everything else.)
** processor cycle counts on x86 and x86-64.
** interpreted forms are counted for both evaluator modes.
** number of lambdas converted by the compiler is reported.
** CPU percentage report (computed from real and total run time.)
** more comprehensive run time reporting, using a condenced format
** interperted form, lambda, and page fault counts are omitted
when zero.
* optimization: ADJOIN and PUSHNEW are upto ~70% faster in normal
SPEED policies.
* optimization: APPEND is upto ~10% faster in normal SPEED policies.
Expand Down
4 changes: 2 additions & 2 deletions make-config.sh
Expand Up @@ -283,7 +283,7 @@ cd "$original_dir"
if [ "$sbcl_arch" = "x86" ]; then
printf ' :gencgc :stack-grows-downward-not-upward :c-stack-is-control-stack' >> $ltf
printf ' :compare-and-swap-vops :unwind-to-frame-and-call-vop' >> $ltf
printf ' :stack-allocatable-closures :alien-callbacks' >> $ltf
printf ' :stack-allocatable-closures :alien-callbacks :cycle-counter' >> $ltf
case "$sbcl_os" in
linux | freebsd | netbsd | openbsd | sunos | darwin | win32)
printf ' :linkage-table' >> $ltf
Expand All @@ -296,7 +296,7 @@ if [ "$sbcl_arch" = "x86" ]; then
elif [ "$sbcl_arch" = "x86-64" ]; then
printf ' :gencgc :stack-grows-downward-not-upward :c-stack-is-control-stack :linkage-table' >> $ltf
printf ' :compare-and-swap-vops :unwind-to-frame-and-call-vop' >> $ltf
printf ' :stack-allocatable-closures :alien-callbacks' >> $ltf
printf ' :stack-allocatable-closures :alien-callbacks :cycle-counter' >> $ltf
elif [ "$sbcl_arch" = "mips" ]; then
printf ' :linkage-table' >> $ltf
printf ' :stack-allocatable-closures' >> $ltf
Expand Down
8 changes: 5 additions & 3 deletions package-data-list.lisp-expr
Expand Up @@ -1256,7 +1256,9 @@ is a good idea, but see SB-SYS re. blurring of boundaries."
"%WITH-ARRAY-DATA/FP"
"%WITH-ARRAY-DATA-MACRO"
"*CURRENT-LEVEL-IN-PRINT*"
"*EMPTY-TYPE*" "*GC-INHIBIT*" "*GC-PENDING*"
"*EMPTY-TYPE*"
"*EVAL-CALLS*"
"*GC-INHIBIT*" "*GC-PENDING*"
#!+sb-thread "*STOP-FOR-GC-PENDING*"
"*UNIVERSAL-TYPE*"
"*UNIVERSAL-FUN-TYPE*" "*UNPARSE-FUN-TYPE-SIMPLIFY*"
Expand Down Expand Up @@ -2549,6 +2551,7 @@ structure representations"
:name "SB!EVAL"
:doc "internal: the evaluator implementation used to execute code without compiling it."
:use ("CL" "SB!KERNEL" "SB!EXT")
:reexport ("*EVAL-CALLS*")
:export ("INTERPRETED-FUNCTION"
"INTERPRETED-FUNCTION-P"
"INTERPRETED-FUNCTION-NAME"
Expand All @@ -2561,8 +2564,7 @@ structure representations"
"EVAL-IN-NATIVE-ENVIRONMENT"
"PREPARE-FOR-COMPILE"
"COUNT-EVAL-CALLS"
"*EVAL-LEVEL*"
"*EVAL-CALLS*"))
"*EVAL-LEVEL*"))

#!+win32
#s(sb-cold:package-data
Expand Down
3 changes: 1 addition & 2 deletions src/code/cold-init.lisp
Expand Up @@ -108,8 +108,7 @@
sb!kernel::*gc-epoch* (cons nil nil))

;; I'm not sure where eval is first called, so I put this first.
#!+sb-eval
(show-and-call sb!eval::!full-eval-cold-init)
(show-and-call !eval-cold-init)

(show-and-call thread-init-or-reinit)
(show-and-call !typecheckfuns-cold-init)
Expand Down
7 changes: 0 additions & 7 deletions src/code/early-full-eval.lisp
Expand Up @@ -12,15 +12,8 @@
(in-package "SB!EVAL")

(defparameter *eval-level* -1)
(defparameter *eval-calls* 0)
(defparameter *eval-verbose* nil)

(defun !full-eval-cold-init ()
(setf *eval-level* -1
*eval-calls* 0
*eval-verbose* nil
*evaluator-mode* :compile))

;; !defstruct-with-alternate-metaclass is unslammable and the
;; RECOMPILE restart doesn't work on it. This is the main reason why
;; this stuff is split out into its own file. Also, it lets the
Expand Down
10 changes: 10 additions & 0 deletions src/code/eval.lisp
Expand Up @@ -11,6 +11,15 @@

(in-package "SB!IMPL")

(defparameter *eval-calls* 0)

(defun !eval-cold-init ()
(setf *eval-calls* 0
*evaluator-mode* :compile)
#!+sb-eval
(setf sb!eval::*eval-level* -1
sb!eval::*eval-verbose* nil))

;;; general case of EVAL (except in that it can't handle toplevel
;;; EVAL-WHEN magic properly): Delegate to #'COMPILE.
(defun %simple-eval (expr lexenv)
Expand Down Expand Up @@ -93,6 +102,7 @@
(defun simple-eval-in-lexenv (original-exp lexenv)
(declare (optimize (safety 1)))
;; (aver (lexenv-simple-p lexenv))
(incf *eval-calls*)
(handler-bind
((sb!c:compiler-error
(lambda (c)
Expand Down
163 changes: 130 additions & 33 deletions src/code/time.lisp
Expand Up @@ -265,14 +265,93 @@ format."

(defmacro time (form)
#!+sb-doc
"Execute FORM and print timing information on *TRACE-OUTPUT*."
"Execute FORM and print timing information on *TRACE-OUTPUT*.
On some hardware platforms estimated processor cycle counts are
included in this output; this number is slightly inflated, since it
includes the pipeline involved in reading the cycle counter --
executing \(TIME NIL) a few times will give you an idea of the
overhead, and its variance. The cycle counters are also per processor,
not per thread: if multiple threads are running on the same processor,
the reported counts will include cycles taken up by all threads
running on the processor where TIME was executed. Furthermore, if the
operating system migrates the thread to another processor between
reads of the cycle counter, the results will be completely bogus.
Finally, the counter is cycle counter, incremented by the hardware
even when the process is halted -- which is to say that cycles pass
normally during operations like SLEEP."
`(%time (lambda () ,form)))

;;; Return all the data that we want TIME to report.
(defun time-get-sys-info ()
(multiple-value-bind (user sys faults) (sb!sys:get-system-info)
(values user sys faults (get-bytes-consed))))


(defun elapsed-cycles (h0 l0 h1 l1)
(declare (ignorable h0 l0 h1 l1))
#!+cycle-counter
(+ (ash (- h1 h0) 32)
(- l1 l0))
#!-cycle-counter
nil)
(declaim (inline read-cycle-counter))
(defun read-cycle-counter ()
#!+cycle-counter
(sb!vm::%read-cycle-counter)
#!-cycle-counter
(values 0 0))

;;; This is so that we don't have to worry about the vagaries of
;;; floating point printing, or about conversions to floats dropping
;;; or introducing decimals, which are liable to imply wrong precision.
(defun format-microseconds (stream usec &optional colonp atp)
(declare (ignore colonp))
(%format-decimal stream usec 6)
(unless atp
(write-string " seconds" stream)))

(defun format-milliseconds (stream usec &optional colonp atp)
(declare (ignore colonp))
(%format-decimal stream usec 3)
(unless atp
(write-string " seconds" stream)))

(defun %format-decimal (stream number power)
(declare (stream stream)
(integer number power))
(when (minusp number)
(write-char #\- stream)
(setf number (- number)))
(let ((scale (expt 10 power)))
(flet ((%fraction (fraction)
(let ((scaled (* 10 fraction)))
(loop while (< scaled scale)
do (write-char #\0 stream)
(setf scaled (* scaled 10))))
(format stream "~D" fraction))
(%zeroes ()
(let ((scaled (/ scale 10)))
(write-char #\0 stream)
(loop while (> scaled 1)
do (write-char #\0 stream)
(setf scaled (/ scaled 10))))))
(cond ((zerop number)
(write-string "0." stream)
(%zeroes))
((< number scale)
(write-string "0." stream)
(%fraction number))
((= number scale)
(write-string "1." stream)
(%zeroes))
((> number scale)
(multiple-value-bind (whole fraction) (floor number scale)
(format stream "~D." whole)
(%fraction fraction))))))

nil)

;;; The guts of the TIME macro. Compute overheads, run the (compiled)
;;; function, report the times.
(defun %time (fun)
Expand Down Expand Up @@ -316,35 +395,53 @@ format."
(old-run-utime old-run-stime old-page-faults old-bytes-consed)
(time-get-sys-info))
(setq old-real-time (get-internal-real-time))
(let ((start-gc-run-time *gc-run-time*)
#!+sb-eval (sb!eval:*eval-calls* 0))
(declare #!+sb-eval (special sb!eval:*eval-calls*))
(multiple-value-prog1
;; Execute the form and return its values.
(funcall fun)
(multiple-value-setq
(new-run-utime new-run-stime new-page-faults new-bytes-consed)
(time-get-sys-info))
(setq new-real-time (- (get-internal-real-time) real-time-overhead))
(let ((gc-run-time (max (- *gc-run-time* start-gc-run-time) 0)))
(format *trace-output*
"~&Evaluation took:~% ~
~S second~:P of real time~% ~
~S second~:P of user run time~% ~
~S second~:P of system run time~% ~
~@[[Run times include ~S second~:P GC run time.]~% ~]~
~@[~S call~:P to %EVAL~% ~]~
~S page fault~:P and~% ~
~:D bytes consed.~%"
(max (/ (- new-real-time old-real-time)
(float sb!xc:internal-time-units-per-second))
0.0)
(max (/ (- new-run-utime old-run-utime) 1000000.0) 0.0)
(max (/ (- new-run-stime old-run-stime) 1000000.0) 0.0)
(unless (zerop gc-run-time)
(/ (float gc-run-time)
(float sb!xc:internal-time-units-per-second)))
#!+sb-eval sb!eval:*eval-calls* #!-sb-eval nil
(max (- new-page-faults old-page-faults) 0)
(max (- new-bytes-consed old-bytes-consed) 0)))))))

(let ((start-gc-internal-run-time *gc-run-time*)
(*eval-calls* 0)
(sb!c::*lambda-conversions* 0))
(declare (special *eval-calls* sb!c::*lambda-conversions*))
(multiple-value-bind (h0 l0) (read-cycle-counter)
(multiple-value-prog1
;; Execute the form and return its values.
(funcall fun)
(multiple-value-bind (h1 l1) (read-cycle-counter)
(let ((stop-gc-internal-run-time *gc-run-time*))
(multiple-value-setq
(new-run-utime new-run-stime new-page-faults new-bytes-consed)
(time-get-sys-info))
(setq new-real-time (- (get-internal-real-time) real-time-overhead))
(let* ((gc-internal-run-time (max (- stop-gc-internal-run-time start-gc-internal-run-time) 0))
(real-time (max (- new-real-time old-real-time) 0))
(user-run-time (max (- new-run-utime old-run-utime) 0))
(system-run-time (max (- new-run-stime old-run-stime) 0))
(total-run-time (+ user-run-time system-run-time))
(cycles (elapsed-cycles h0 l0 h1 l1))
(page-faults (max (- new-page-faults old-page-faults) 0)))
(format *trace-output*
"~&Evaluation took:~%~
~@< ~@;~/sb-impl::format-milliseconds/ of real time~%~
~/sb-impl::format-microseconds/ of total run time ~
(~@/sb-impl::format-microseconds/ user, ~@/sb-impl::format-microseconds/ system)~%~
~[[ Run times consist of ~/sb-impl::format-milliseconds/ GC time, ~
and ~/sb-impl::format-milliseconds/ non-GC time. ]~%~;~2*~]~
~,2F% CPU~%~
~@[~:D form~:P interpreted~%~]~
~@[~:D lambda~:P converted~%~]~
~@[~:D processor cycles~%~]~
~@[~:D page fault~:P~%~]~
~:D bytes consed~:>~%"
real-time
total-run-time
user-run-time
system-run-time
(if (zerop gc-internal-run-time) 1 0)
gc-internal-run-time
;; Round up so we don't mislead by saying 0.0 seconds of non-GC time...
(- (ceiling total-run-time 1000) gc-internal-run-time)
(if (zerop real-time)
100.0
(float (* 100 (/ (round total-run-time 1000) real-time))))
(unless (zerop *eval-calls*) *eval-calls*)
(unless (zerop sb!c::*lambda-conversions*) sb!c::*lambda-conversions*)
cycles
(unless (zerop page-faults) page-faults)
(max (- new-bytes-consed old-bytes-consed) 0))))))))))
1 change: 1 addition & 0 deletions src/compiler/early-c.lisp
Expand Up @@ -123,6 +123,7 @@
(defvar *trace-table*)
(defvar *undefined-warnings*)
(defvar *warnings-p*)
(defvar *lambda-conversions*)

;;; This lock is seized in the compiler, and related areas: the
;;; compiler is not presently thread-safe
Expand Down
6 changes: 6 additions & 0 deletions src/compiler/ir1tran-lambda.lisp
Expand Up @@ -933,6 +933,12 @@
:debug-name debug-name))))
(setf (functional-inline-expansion res) form)
(setf (functional-arg-documentation res) (cadr form))
(when (boundp '*lambda-conversions*)
;; KLUDGE: Not counting TL-XEPs is a lie, of course, but
;; keeps things less confusing to users of TIME, where this
;; count gets used.
(unless (and (consp debug-name) (eq 'tl-xep (car debug-name)))
(incf *lambda-conversions*)))
res))))

(defun wrap-forms-in-debug-catch (forms)
Expand Down
10 changes: 4 additions & 6 deletions src/compiler/x86-64/system.lisp
Expand Up @@ -336,14 +336,12 @@
(:result-types unsigned-num unsigned-num)
(:generator 5
(zeroize eax)
;; Intel docs seem quite consistent on only using CPUID before RDTSC,
;; not both before and after. Go figure.
(inst cpuid)
(inst rdtsc)
(inst push edx)
(inst push eax)
(zeroize eax)
(inst cpuid)
(inst pop lo)
(inst pop hi)))
(move lo eax)
(move hi edx)))

(defmacro with-cycle-counter (&body body)
"Returns the primary value of BODY as the primary value, and the
Expand Down
10 changes: 4 additions & 6 deletions src/compiler/x86/system.lisp
Expand Up @@ -330,14 +330,12 @@
(:result-types unsigned-num unsigned-num)
(:generator 5
(inst xor eax eax)
;; Intel docs seem quite consistent on only using CPUID before RDTSC,
;; not both before and after. Go figure.
(inst cpuid)
(inst rdtsc)
(inst push edx)
(inst push eax)
(inst xor eax eax)
(inst cpuid)
(inst pop lo)
(inst pop hi)))
(move lo eax)
(move hi edx)))

(defmacro with-cycle-counter (&body body)
"Returns the primary value of BODY as the primary value, and the
Expand Down
2 changes: 1 addition & 1 deletion version.lisp-expr
Expand Up @@ -17,4 +17,4 @@
;;; checkins which aren't released. (And occasionally for internal
;;; versions, especially for internal versions off the main CVS
;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
"1.0.16.34"
"1.0.16.35"

0 comments on commit 9b1fade

Please sign in to comment.