Skip to content

Commit

Permalink
0.8.18.25:
Browse files Browse the repository at this point in the history
	Improvements to the statistical profiler.
        * Add support for x86-64.
        * Time spent in foreign functions was previously attributed to
          "elsewhere". Now reported individually for all of them, and
          "foreign function __open", "foreign function sin", etc.
        * Walk the call stack to a depth of 8 instead of relying only
          on the PC and RA from os-context-t (currently only on x86/x86-64).
          Thus instead of just knowing that a lot of time is being spent
          on (for example) bignum operations, we might find out where
	  the bignum operations are being initiated.
        * Add a column for accrued time to the flat report.
        * Add kludgy workaround for invalid frame-pointers in the
          os-context-t structure causing segfaults.
  • Loading branch information
jsnell committed Jan 11, 2005
1 parent 0faed7c commit 2f595e9
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 67 deletions.
4 changes: 4 additions & 0 deletions NEWS
Expand Up @@ -5,6 +5,10 @@ changes in sbcl-0.8.19 relative to sbcl-0.8.18:
* enhancement: saving cores with foreign code loaded is now
supported on ppc/Darwin in addition to the previously supported
platforms.
* enhancement: the statistical profiler now walks deeper into the
call stack for more meaningful call-graphs and accrued time
reports (x86/x86-64 only). It also now reports time spent in
foreign functions.
* bug fix: invalid :DEFAULT-INITARGS are detected in compiled calls
to MAKE-INSTANCE.
* bug fix: defaulted initargs are passed to INITIALIZE-INSTANCE and
Expand Down
174 changes: 108 additions & 66 deletions contrib/sb-sprof/sb-sprof.lisp
Expand Up @@ -508,7 +508,9 @@
"Default number of samples taken.")
(declaim (type sb-impl::index *max-samples*))

(defconstant +sample-size+ 2)
(defconstant +sample-size+
#+(or x86 x86-64) 8
#-(or x86 x86-64) 2)

(defvar *samples* nil)
(declaim (type (or null (vector address)) *samples*))
Expand Down Expand Up @@ -536,7 +538,7 @@

(defun show-progress (format-string &rest args)
(when *show-progress*
(apply #'format t format-string args)
(apply #'format t format-string args)
(finish-output)))

(defun start-sampling ()
Expand All @@ -552,16 +554,15 @@
`(let ((*sampling* ,on))
,@body))

(defun sort-samples (&key (key :pc))
(defun sort-samples (key-offset)
"Sort *Samples* using comparison Test. Key must be one of
:Pc or :Return-Pc for sorting by pc or return pc."
(declare (type (member :pc :return-pc) key))
(when (plusp *samples-index*)
(qsort *samples*
:from 0
:to (- *samples-index* +sample-size+)
:element-size +sample-size+
:key-offset (if (eq key :pc) 0 1))))
:key-offset key-offset)))

(defun record (pc)
(declare (type address pc))
Expand All @@ -570,23 +571,44 @@

;;; SIGPROF handler. Record current PC and return address in
;;; *SAMPLES*.
#+x86
#+(or x86 x86-64)
(defun sigprof-handler (signal code scp)
(declare (ignore signal code) (type system-area-pointer scp))
(when (and *sampling*
(< *samples-index* (length *samples*)))
(sb-sys:without-gcing
(with-alien ((scp (* os-context-t) :local scp))
(locally (declare (optimize (inhibit-warnings 2)))
(let* ((pc-ptr (sb-vm:context-pc scp))
(fp (sb-vm::context-register scp #.sb-vm::ebp-offset))
(ra (sap-ref-word (int-sap fp)
(- (* (1+ sb-vm::return-pc-save-offset)
sb-vm::n-word-bytes)))))
(record (sap-int pc-ptr))
(record ra)))))))

#-x86
(locally (declare (optimize (inhibit-warnings 2)))
(with-alien ((scp (* os-context-t) :local scp))
;; For some reason completely bogus small values for the
;; frame pointer are returned every now and then, leading
;; to segfaults. Try to avoid these cases.
;;
;; FIXME: Do a more thorough sanity check on ebp, or figure
;; out why this is happening.
;; -- JES, 2005-01-11
(when (< (sb-vm::context-register scp #.sb-vm::ebp-offset)
4096)
(dotimes (i +sample-size+)
(record 0))
(return-from sigprof-handler nil))
(let* ((pc-ptr (sb-vm:context-pc scp))
(fp (sb-vm::context-register scp #.sb-vm::ebp-offset)))
(record (sap-int pc-ptr))
(let ((fp (int-sap fp))
ra)
(dotimes (i (1- +sample-size+))
(cond (fp
(setf (values ra fp)
(sb-di::x86-call-context fp :depth i))
(record (if ra
(sap-int ra)
0)))
(t
(record 0)))))))))))

;; FIXME: On non-x86 platforms we don't yet walk the call stack deeper
;; than one level.
#-(or x86 x86-64)
(defun sigprof-handler (signal code scp)
(declare (ignore signal code))
(when (and *sampling*
Expand All @@ -596,7 +618,7 @@
(locally (declare (optimize (inhibit-warnings 2)))
(let* ((pc-ptr (sb-vm:context-pc scp))
(fp (sb-vm::context-register scp #.sb-vm::cfp-offset))
(ra (sap-ref-word
(ra (sap-ref-word
(int-sap fp)
(* sb-vm::lra-save-offset sb-vm::n-word-bytes))))
(record (sap-int pc-ptr))
Expand Down Expand Up @@ -634,13 +656,9 @@
*dynamic-space-code-info*))))
(map-dynamic-space-code #'record-address)))

;;; Adjust pcs or return-pcs in *SAMPLES* for address changes of
;;; dynamic-space code objects. KEY being :PC means adjust pcs.
(defun adjust-samples (key)
(declare (type (member :pc :return-pc) key))
(sort-samples :key key)
(let ((sidx 0)
(offset (if (eq key :pc) 0 1)))
(defun adjust-samples (offset)
(sort-samples offset)
(let ((sidx 0))
(declare (type sb-impl::index sidx))
(dolist (info *dynamic-space-code-info*)
(unless (= (dyninfo-new-start info) (dyninfo-start info))
Expand Down Expand Up @@ -672,8 +690,8 @@
(setf (dyninfo-new-start info)
(code-start (dyninfo-code info))))
(progn
(adjust-samples :pc)
(adjust-samples :return-pc))
(dotimes (i +sample-size+)
(adjust-samples i)))
(dolist (info *dynamic-space-code-info*)
(let ((size (- (dyninfo-end info) (dyninfo-start info))))
(setf (dyninfo-start info) (dyninfo-new-start info))
Expand Down Expand Up @@ -800,8 +818,10 @@
(%make-node :name name
:start-pc (+ start-pc start-offset)
:end-pc (+ start-pc end-offset))))
(t
(%make-node :name (sb-di::debug-fun-name info)))))
(sb-di::debug-fun
(%make-node :name (sb-di::debug-fun-name info)))
(t
(%make-node :name (coerce info 'string)))))

;;; Return something serving as debug info for address PC. If we can
;;; get something from SB-DI:DEBUG-FUNCTION-FROM-PC, return that.
Expand All @@ -810,17 +830,23 @@
(defun debug-info (pc)
(declare (type address pc))
(let ((ptr (sb-di::component-ptr-from-pc (int-sap pc))))
(unless (sap= ptr (int-sap 0))
(let* ((code (sb-di::component-from-component-ptr ptr))
(code-header-len (* (sb-kernel:get-header-data code)
sb-vm:n-word-bytes))
(pc-offset (- pc
(- (sb-kernel:get-lisp-obj-address code)
sb-vm:other-pointer-lowtag)
code-header-len))
(df (ignore-errors (sb-di::debug-fun-from-pc code
pc-offset))))
(or df code)))))
(cond ((sap= ptr (int-sap 0))
(let ((name (foreign-symbol-in-address (int-sap pc))))
(when name
(format nil "foreign function ~a" name))))
(t
(let* ((code (sb-di::component-from-component-ptr ptr))
(code-header-len (* (sb-kernel:get-header-data code)
sb-vm:n-word-bytes))
(pc-offset (- pc
(- (sb-kernel:get-lisp-obj-address code)
sb-vm:other-pointer-lowtag)
code-header-len))
(df (ignore-errors (sb-di::debug-fun-from-pc code
pc-offset))))
(or df
code))))))


;;; One function can have more than one COMPILED-DEBUG-FUNCTION with
;;; the same name. Reduce the number of calls to Debug-Info by first
Expand Down Expand Up @@ -877,10 +903,11 @@
collect node))

;;; Value is a CALL-GRAPH for the current contents of *SAMPLES*.
(defun make-call-graph-1 ()
(let ((elsewhere-count 0))
(defun make-call-graph-1 (depth)
(let ((elsewhere-count 0)
visited-nodes)
(with-lookup-tables ()
(loop for i below *samples-index* by +sample-size+
(loop for i below (1- *samples-index*) ;; by +sample-size+
as pc = (aref *samples* i)
as return-pc = (aref *samples* (1+ i))
as callee = (lookup-node pc)
Expand All @@ -889,22 +916,33 @@
(let ((caller (lookup-node return-pc)))
(when caller
caller)))
when (and *show-progress* (plusp i)) do
do
(when (and *show-progress* (plusp i))
(cond ((zerop (mod i 1000))
(show-progress "~d" i))
((zerop (mod i 100))
(show-progress ".")))
if callee do
(incf (node-count callee))
else do
(incf elsewhere-count)
when (and callee caller) do
(let ((call (find callee (node-edges caller)
:key #'call-vertex)))
(pushnew caller (node-callers callee))
(if call
(incf (call-count call))
(push (make-call callee) (node-edges caller)))))
(show-progress "."))))
(when (< (mod i +sample-size+) depth)
(when (= (mod i +sample-size+) 0)
(setf visited-nodes nil)
(cond (callee
(incf (node-accrued-count callee))
(incf (node-count callee)))
(t
(incf elsewhere-count))))
(when callee
(push callee visited-nodes))
(when caller
(unless (member caller visited-nodes)
(incf (node-accrued-count caller)))
(when callee
(let ((call (find callee (node-edges caller)
:key #'call-vertex)))
(pushnew caller (node-callers callee))
(if call
(unless (member caller visited-nodes)
(incf (call-count call)))
(push (make-call callee) (node-edges caller))))))))
(let ((sorted-nodes (sort (collect-nodes) #'> :key #'node-count)))
(loop for node in sorted-nodes and i from 1 do
(setf (node-index node) i))
Expand Down Expand Up @@ -944,16 +982,16 @@
;;; *SAMPLES*. The result contain a list of nodes sorted by self-time
;;; in the FLAT-NODES slot, and a dag in VERTICES, with call cycles
;;; reduced to CYCLE structures.
(defun make-call-graph ()
(defun make-call-graph (depth)
(stop-profiling)
(show-progress "~&Computing call graph ")
(let ((call-graph (without-gcing (make-call-graph-1))))
(let ((call-graph (without-gcing (make-call-graph-1 depth))))
(setf (call-graph-flat-nodes call-graph)
(copy-list (graph-vertices call-graph)))
(show-progress "~&Finding cycles")
(reduce-call-graph call-graph)
(show-progress "~&Propagating counts")
(compute-accrued-counts call-graph)
#+nil (compute-accrued-counts call-graph)
call-graph))


Expand Down Expand Up @@ -991,8 +1029,8 @@
0)))
(when print-header
(print-call-graph-header call-graph))
(format t "~& Self Total~%")
(format t "~& Nr Count % Count % Function~%")
(format t "~& Self Cumul Total~%")
(format t "~& Nr Count % Count % Count % Function~%")
(print-separator)
(let ((elsewhere-count (call-graph-elsewhere-count call-graph))
(i 0))
Expand All @@ -1001,16 +1039,21 @@
(< (node-count node) min-count))
(return))
(let* ((count (node-count node))
(percent (samples-percent call-graph count)))
(percent (samples-percent call-graph count))
(accrued-count (node-accrued-count node))
(accrued-percent (samples-percent call-graph accrued-count)))
(incf total-count count)
(incf total-percent percent)
(format t "~&~4d ~6d ~5,1f ~6d ~5,1f ~s~%"
(format t "~&~4d ~6d ~5,1f ~6d ~5,1f ~6d ~5,1f ~s~%"
(node-index node)
count
percent
accrued-count
accrued-percent
total-count
total-percent
(node-name node))))
(node-name node))
(finish-output)))
(print-separator)
(format t "~& ~6d ~5,1f elsewhere~%"
elsewhere-count
Expand Down Expand Up @@ -1110,8 +1153,7 @@
Value of this function is a Call-Graph object representing the
resulting call-graph."
(declare (type report-type type))
(let ((graph (or call-graph (make-call-graph))))
(let ((graph (or call-graph (make-call-graph (1- +sample-size+)))))
(ecase type
(:flat
(print-flat graph :stream stream :max max :min-percent min-percent))
Expand Down
2 changes: 1 addition & 1 deletion version.lisp-expr
Expand Up @@ -17,4 +17,4 @@
;;; checkins which aren't released. (And occasionally for internal
;;; versions, especially for internal versions off the main CVS
;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
"0.8.18.24"
"0.8.18.25"

0 comments on commit 2f595e9

Please sign in to comment.