Permalink
Browse files

Fully reindent all code in Emacs.

Some custom macro indentation rules are added to formula.el
Apparently, &rest is broken in nested lists, so some of them
use long sequences of fixed numbers to work around it.
  • Loading branch information...
1 parent 3987995 commit 36f71ce4391a5e693e935ffd8b79b206ece5f410 @angavrilov committed Sep 3, 2009
View
110 compute-macros.lisp
@@ -1,80 +1,80 @@
-;;;; kate: indent-width 4; replace-tabs yes; space-indent on;
+;;; -*- mode:lisp; indent-tabs-mode: nil; -*-
(in-package fast-compute)
(defvar *compute-with-cuda* t)
(defmacro error-fallback (message fallback &body code)
- `(block handle
- (handler-bind ((condition
- #'(lambda (cond)
- (format t ,message cond)
- (return-from handle ,fallback))))
- ,@code)))
+ "Args: (message fallback &body code)"
+ `(block handle
+ (handler-bind
+ ((condition #'(lambda (cond)
+ (format t ,message cond)
+ (return-from handle ,fallback))))
+ ,@code)))
#-cuda
(defmacro compute (&whole original &rest args)
- (apply #'do-make-lisp-compute original args))
+ (apply #'do-make-lisp-compute original args))
#+cuda
(defmacro compute (&whole original &rest args)
- (case *compute-with-cuda*
- ((nil)
- (apply #'do-make-lisp-compute original args))
- (:force
- (error-fallback
- "CUDA compilation failed:~% ~A~%"
- (apply #'do-make-lisp-compute original args)
- (apply #'do-make-cuda-compute original args)))
- (otherwise
- (let ((lisp-code
- (apply #'do-make-lisp-compute original args)))
- (error-fallback
- "CUDA compilation failed:~% ~A~%"
- lisp-code
- `(if (cuda:valid-context-p)
- ,(apply #'do-make-cuda-compute original args)
- ,lisp-code))))))
+ (case *compute-with-cuda*
+ ((nil)
+ (apply #'do-make-lisp-compute original args))
+ (:force
+ (error-fallback
+ "CUDA compilation failed:~% ~A~%"
+ (apply #'do-make-lisp-compute original args)
+ (apply #'do-make-cuda-compute original args)))
+ (otherwise
+ (let ((lisp-code (apply #'do-make-lisp-compute original args)))
+ (error-fallback
+ "CUDA compilation failed:~% ~A~%"
+ lisp-code
+ `(if (cuda:valid-context-p)
+ ,(apply #'do-make-cuda-compute original args)
+ ,lisp-code))))))
#-cuda
(define-compiler-macro compute (&whole original &rest args)
- (error-fallback
- "~%Fast C compilation failed:~% ~A~%Reverting to ordinary lisp.~%"
- original
- (apply #'do-make-c-compute original args)))
+ (error-fallback
+ "~%Fast C compilation failed:~% ~A~%Reverting to ordinary lisp.~%"
+ original
+ (apply #'do-make-c-compute original args)))
#+cuda
(define-compiler-macro compute (&whole original &rest args)
- (case *compute-with-cuda*
- ((nil)
- (error-fallback
- "~%Fast C compilation failed:~% ~A~%Reverting to ordinary lisp.~%"
- original
- (apply #'do-make-c-compute original args)))
- (:force
+ (case *compute-with-cuda*
+ ((nil)
+ (error-fallback
+ "~%Fast C compilation failed:~% ~A~%Reverting to ordinary lisp.~%"
+ original
+ (apply #'do-make-c-compute original args)))
+ (:force
+ (error-fallback
+ "CUDA compilation failed:~% ~A~%"
+ (apply #'do-make-lisp-compute original args)
+ (apply #'do-make-cuda-compute original args)))
+ (otherwise
+ (let ((lisp-code
(error-fallback
- "CUDA compilation failed:~% ~A~%"
- (apply #'do-make-lisp-compute original args)
- (apply #'do-make-cuda-compute original args)))
- (otherwise
- (let ((lisp-code
- (error-fallback
- "~%Fast C compilation failed:~% ~A~%Reverting to ordinary lisp.~%"
- (apply #'do-make-lisp-compute original args)
- (apply #'do-make-c-compute original args))))
- (error-fallback
- "CUDA compilation failed:~% ~A~%"
- lisp-code
- `(if (cuda:valid-context-p)
- ,(apply #'do-make-cuda-compute original args)
- ,lisp-code))))))
+ "~%Fast C compilation failed:~% ~A~%Reverting to ordinary lisp.~%"
+ (apply #'do-make-lisp-compute original args)
+ (apply #'do-make-c-compute original args))))
+ (error-fallback
+ "CUDA compilation failed:~% ~A~%"
+ lisp-code
+ `(if (cuda:valid-context-p)
+ ,(apply #'do-make-cuda-compute original args)
+ ,lisp-code))))))
#-cuda
(defmacro compute-batch (&body code)
- `(progn ,@code))
+ `(progn ,@code))
#+cuda
(defmacro compute-batch (&body code)
- `(unwind-protect
- (cuda:with-async ,@code)
- (cuda:synchronize)))
+ `(unwind-protect
+ (cuda:with-async ,@code)
+ (cuda:synchronize)))
View
73 compute-pkg.lisp
@@ -1,40 +1,37 @@
-;;;; kate: indent-width 4; replace-tabs yes; space-indent on;
+;;; -*- mode:lisp; indent-tabs-mode: nil; -*-
(defpackage fast-compute
- (:documentation "Fast array computation library")
- (:use "COMMON-LISP" "CL-MATCH" "ALEXANDRIA"
- "FSET" "GMAP" "NEW-LET" "LEXICAL-CONTEXTS")
- (:export
- "MULTIVALUE" "MULTIVALUE-DATA" "MULTIVALUE-SYNC"
- "DEF-MULTIVALUE" "COPY-MULTIVALUE" "DEF-MULTIVALUE-MACRO"
- "ALLOC-MULTIVALUES" "WITH-LOCAL-MULTIVALUES"
- "IREF" "ENABLE-EXPR-QUOTES" "LOOP-INDEXES"
- "*CURRENT-COMPUTE*" "COMPUTE"
- "LETV" "CALC" "_GRP"
- "SET-COMPUTE-THREAD-COUNT" "PARALLEL-LOOP"
- "*COMPUTE-WITH-CUDA*"
- "DUMP-ARRAY" "RESTORE-ARRAY"
- "ALLOW-DENORMALIZED-FLOATS"
- "COMPUTE-BATCH"
- )
- (:import-from "STANDARD-CL"
- "USE-STD-READTABLE" "DO-HASHTABLE"
- "SPLIT-LIST" "SUM" "WHILE" "UNTIL")
- (:shadowing-import-from "COMMON-LISP" "LET" "COND" "LAST")
- (:shadowing-import-from "FSET"
- ;; Shadowed type/constructor names
- "SET" "MAP"
- ;; Alexandria conflicts
- "REMOVEF" "UNIONF" "COMPOSE"
- ;; Shadowed set operations
- "UNION" "INTERSECTION" "SET-DIFFERENCE" "COMPLEMENT"
- ;; Shadowed sequence operations
- "FIRST" "SUBSEQ" "REVERSE" "SORT" "STABLE-SORT"
- "REDUCE"
- "FIND" "FIND-IF" "FIND-IF-NOT"
- "COUNT" "COUNT-IF" "COUNT-IF-NOT"
- "POSITION" "POSITION-IF" "POSITION-IF-NOT"
- "REMOVE" "REMOVE-IF" "REMOVE-IF-NOT"
- "SUBSTITUTE" "SUBSTITUTE-IF" "SUBSTITUTE-IF-NOT"
- "SOME" "EVERY" "NOTANY" "NOTEVERY"
- ))
+ (:documentation "Fast array computation library")
+ (:use "COMMON-LISP" "CL-MATCH" "ALEXANDRIA"
+ "FSET" "GMAP" "NEW-LET" "LEXICAL-CONTEXTS")
+ (:export "MULTIVALUE" "MULTIVALUE-DATA" "MULTIVALUE-SYNC"
+ "DEF-MULTIVALUE" "COPY-MULTIVALUE" "DEF-MULTIVALUE-MACRO"
+ "ALLOC-MULTIVALUES" "WITH-LOCAL-MULTIVALUES"
+ "IREF" "ENABLE-EXPR-QUOTES" "LOOP-INDEXES"
+ "*CURRENT-COMPUTE*" "COMPUTE"
+ "LETV" "CALC" "_GRP"
+ "SET-COMPUTE-THREAD-COUNT" "PARALLEL-LOOP"
+ "*COMPUTE-WITH-CUDA*"
+ "DUMP-ARRAY" "RESTORE-ARRAY"
+ "ALLOW-DENORMALIZED-FLOATS"
+ "COMPUTE-BATCH")
+ (:import-from "STANDARD-CL"
+ "USE-STD-READTABLE" "DO-HASHTABLE"
+ "SPLIT-LIST" "SUM" "WHILE" "UNTIL")
+ (:shadowing-import-from "COMMON-LISP" "LET" "COND" "LAST")
+ (:shadowing-import-from "FSET"
+ ;; Shadowed type/constructor names
+ "SET" "MAP"
+ ;; Alexandria conflicts
+ "REMOVEF" "UNIONF" "COMPOSE"
+ ;; Shadowed set operations
+ "UNION" "INTERSECTION" "SET-DIFFERENCE" "COMPLEMENT"
+ ;; Shadowed sequence operations
+ "FIRST" "SUBSEQ" "REVERSE" "SORT" "STABLE-SORT"
+ "REDUCE"
+ "FIND" "FIND-IF" "FIND-IF-NOT"
+ "COUNT" "COUNT-IF" "COUNT-IF-NOT"
+ "POSITION" "POSITION-IF" "POSITION-IF-NOT"
+ "REMOVE" "REMOVE-IF" "REMOVE-IF-NOT"
+ "SUBSTITUTE" "SUBSTITUTE-IF" "SUBSTITUTE-IF-NOT"
+ "SOME" "EVERY" "NOTANY" "NOTEVERY"))
View
237 cuda/compiler-api.lisp
@@ -1,65 +1,61 @@
-;;;; kate: indent-width 4; replace-tabs yes; space-indent on;
+;;; -*- mode:lisp; indent-tabs-mode: nil; -*-
(in-package cuda)
(defun translate-args (func-var args)
- (let ((arg-strings nil)
- (arg-forms nil)
- (offset 0))
- (dolist (arg args)
- (destructuring-bind
- (atype aname aform) arg
- (labels
- ((add-arg (size type-str set-form &key (align size))
- (setf offset
- (logand (+ offset align -1) (lognot (- align 1))))
- (push
- `(,set-form ,func-var ,offset ,aform)
- arg-forms)
- (push
- (format nil "~A ~A" type-str aname)
- arg-strings)
- (incf offset size)))
- (case atype
- ((:char :byte)
- (add-arg 1 "char" 'param-set-int))
- ((:unsigned-char :unsigned-byte)
- (add-arg 1 "unsigned char" 'param-set-uint))
- (:int
- (add-arg 4 "int" 'param-set-int))
- (:unsigned-int
- (add-arg 4 "unsigned int" 'param-set-uint))
- (:float
- (add-arg 4 "float" 'param-set-float))
- (:double
- (add-arg 8 "double" 'param-set-double))
- (:float-ptr
- (add-arg +ptr-size+ "float*" 'param-set-ptr))
- (:double-ptr
- (add-arg +ptr-size+ "double*" 'param-set-ptr))
- (otherwise
- (error "Unsupported kernel parameter type: ~A" atype))))))
- (values
- (nreverse arg-strings)
+ (let ((arg-strings nil)
+ (arg-forms nil)
+ (offset 0))
+ (dolist (arg args)
+ (destructuring-bind (atype aname aform) arg
+ (labels
+ ((add-arg (size type-str set-form &key (align size))
+ (setf offset
+ (logand (+ offset align -1) (lognot (- align 1))))
+ (push `(,set-form ,func-var ,offset ,aform)
+ arg-forms)
+ (push (format nil "~A ~A" type-str aname)
+ arg-strings)
+ (incf offset size)))
+ (case atype
+ ((:char :byte)
+ (add-arg 1 "char" 'param-set-int))
+ ((:unsigned-char :unsigned-byte)
+ (add-arg 1 "unsigned char" 'param-set-uint))
+ (:int
+ (add-arg 4 "int" 'param-set-int))
+ (:unsigned-int
+ (add-arg 4 "unsigned int" 'param-set-uint))
+ (:float
+ (add-arg 4 "float" 'param-set-float))
+ (:double
+ (add-arg 8 "double" 'param-set-double))
+ (:float-ptr
+ (add-arg +ptr-size+ "float*" 'param-set-ptr))
+ (:double-ptr
+ (add-arg +ptr-size+ "double*" 'param-set-ptr))
+ (otherwise
+ (error "Unsupported kernel parameter type: ~A" atype))))))
+ (values (nreverse arg-strings)
(nreverse arg-forms)
offset)))
(defun get-texture-name (spec)
- (third spec))
+ (third spec))
(defun get-texture-decl (spec)
- (destructuring-bind
+ (destructuring-bind
(vtype dim name arg) spec
- (assert (eql vtype :float))
- (assert (or (eql dim 1) (eql dim 2)))
- (format nil "texture<float,~A> ~A" dim name)))
+ (assert (eql vtype :float))
+ (assert (or (eql dim 1) (eql dim 2)))
+ (format nil "texture<float,~A> ~A" dim name)))
(defun get-texture-assn (fun var idx spec)
- (destructuring-bind
+ (destructuring-bind
(vtype dim name arg) spec
- (ecase dim
- (1 `(param-set-texture-1d ,fun (svref ,var ,idx) ,arg))
- (2 `(param-set-texture-2d ,fun (svref ,var ,idx) ,arg)))))
+ (ecase dim
+ (1 `(param-set-texture-1d ,fun (svref ,var ,idx) ,arg))
+ (2 `(param-set-texture-2d ,fun (svref ,var ,idx) ,arg)))))
(defparameter *compiled-cache* (make-hash-table :test #'equal))
@@ -70,82 +66,79 @@
(defvar *print-kernel-code* nil)
(defun do-compile-kernel (code)
- (let* ((tmpname (ext:mkstemp #P"TMP:CUDAKERNEL"))
- (srcname (make-pathname :type "cu" :defaults tmpname))
- (outname (make-pathname
- :type (if *nvcc-cubin* "cubin" "ptx")
- :defaults tmpname))
- (cmd (format nil
- "~A ~A ~A -m~A --output-file=~A ~A"
- *nvcc*
- (if *nvcc-cubin* "--cubin" "--ptx")
- *nvcc-flags*
- (* +ptr-size+ 8)
- outname srcname)))
- (with-open-file (src srcname :direction :output
- :if-exists :supersede)
- (write-string code src))
- (when *print-kernel-code*
- (format t "Compiling:~%~A" code))
- (unwind-protect
- (progn
- (format t "Running command:~% ~A~%" cmd)
- (let ((rv (ext:system cmd)))
- (unless (= rv 0)
- (error "Compilation failed: ~A~%" rv)))
- (with-open-file (out outname)
- (let ((buffer (make-string (file-length out)
- :element-type 'base-char)))
- (read-sequence buffer out)
- (when *print-kernel-code*
- (format t "Result:~%~A" buffer))
- buffer)))
- (when (probe-file srcname)
- (delete-file srcname))
- (when (probe-file outname)
- (delete-file outname)))))
+ (let* ((tmpname (ext:mkstemp #P"TMP:CUDAKERNEL"))
+ (srcname (make-pathname :type "cu" :defaults tmpname))
+ (outname (make-pathname :type (if *nvcc-cubin* "cubin" "ptx")
+ :defaults tmpname))
+ (cmd (format nil "~A ~A ~A -m~A --output-file=~A ~A"
+ *nvcc*
+ (if *nvcc-cubin* "--cubin" "--ptx")
+ *nvcc-flags*
+ (* +ptr-size+ 8)
+ outname srcname)))
+ (with-open-file (src srcname :direction :output
+ :if-exists :supersede)
+ (write-string code src))
+ (when *print-kernel-code*
+ (format t "Compiling:~%~A" code))
+ (unwind-protect
+ (progn
+ (format t "Running command:~% ~A~%" cmd)
+ (let ((rv (ext:system cmd)))
+ (unless (= rv 0)
+ (error "Compilation failed: ~A~%" rv)))
+ (with-open-file (out outname)
+ (let ((buffer (make-string (file-length out)
+ :element-type 'base-char)))
+ (read-sequence buffer out)
+ (when *print-kernel-code*
+ (format t "Result:~%~A" buffer))
+ buffer)))
+ (when (probe-file srcname)
+ (delete-file srcname))
+ (when (probe-file outname)
+ (delete-file outname)))))
(defun compile-kernel (code)
- (let ((cached-code (gethash code *compiled-cache*)))
- (if cached-code cached-code
- (setf (gethash code *compiled-cache*)
- (do-compile-kernel code)))))
+ (let ((cached-code (gethash code *compiled-cache*)))
+ (if cached-code cached-code
+ (setf (gethash code *compiled-cache*)
+ (do-compile-kernel code)))))
(defmacro kernel (args code &key
- (grid-size '(1 1)) (block-size '(1 1 1))
- (name "kernel_func") (max-registers nil)
- (textures nil))
- (assert (= (length block-size) 3)
- (block-size) "Bad block size spec: ~A" block-size)
- (assert (= (length grid-size) 2)
- (grid-size) "Bad grid size spec: ~A" grid-size)
- (let* ((func-var (gensym))
- (grp-var (gensym)))
- (multiple-value-bind
- (arg-strings arg-forms arg-size)
- (translate-args func-var args)
- (let* ((full-code
- (format nil
- "~{~A;~%~}extern \"C\" __global__ __device__
+ (grid-size '(1 1)) (block-size '(1 1 1))
+ (name "kernel_func") (max-registers nil)
+ (textures nil))
+ (assert (= (length block-size) 3)
+ (block-size) "Bad block size spec: ~A" block-size)
+ (assert (= (length grid-size) 2)
+ (grid-size) "Bad grid size spec: ~A" grid-size)
+ (let* ((func-var (gensym))
+ (grp-var (gensym)))
+ (multiple-value-bind (arg-strings arg-forms arg-size)
+ (translate-args func-var args)
+ (let* ((full-code
+ (format nil
+ "~{~A;~%~}extern \"C\" __global__ __device__
void ~A(~{~A~^, ~}) {~%~A~%}~%"
- (mapcar #'get-texture-decl textures)
- name arg-strings code))
- (compiled-code (compile-kernel full-code))
- (texs (if textures
- (list :textures
- (mapcar #'get-texture-name textures))))
- (args (if max-registers
- (list* :max-registers max-registers texs)
- texs))
- (load-spec `(load-kernel '(,name ,compiled-code ,@args)))
- (letspec (if textures
- `((,grp-var (the vector ,load-spec))
- (,func-var (svref ,grp-var 0)))
- `((,func-var ,load-spec)))))
- `(let* ,letspec
- (declare (optimize (safety 1) (debug 0)))
- ,@arg-forms
- ,@(loop for tex in textures
- for idx from 1
- collect (get-texture-assn func-var grp-var idx tex))
- (launch-kernel ,func-var ,arg-size ,@block-size ,@grid-size))))))
+ (mapcar #'get-texture-decl textures)
+ name arg-strings code))
+ (compiled-code (compile-kernel full-code))
+ (texs (if textures
+ (list :textures
+ (mapcar #'get-texture-name textures))))
+ (args (if max-registers
+ (list* :max-registers max-registers texs)
+ texs))
+ (load-spec `(load-kernel '(,name ,compiled-code ,@args)))
+ (letspec (if textures
+ `((,grp-var (the vector ,load-spec))
+ (,func-var (svref ,grp-var 0)))
+ `((,func-var ,load-spec)))))
+ `(let* ,letspec
+ (declare (optimize (safety 1) (debug 0)))
+ ,@arg-forms
+ ,@(loop for tex in textures
+ for idx from 1
+ collect (get-texture-assn func-var grp-var idx tex))
+ (launch-kernel ,func-var ,arg-size ,@block-size ,@grid-size))))))
View
920 cuda/driver-api.lisp
@@ -1,19 +1,17 @@
-;;;; kate: indent-width 4; replace-tabs yes; space-indent on;
+;;; -*- mode:lisp; indent-tabs-mode: nil; -*-
(defpackage cuda
- (:documentation "Interface to the NVidia CUDA driver")
- (:use "COMMON-LISP")
- (:export
- "+DEVICE-COUNT+" "GET-CAPS"
- "*CURRENT-CONTEXT*" "VALID-CONTEXT-P"
- "CREATE-CONTEXT" "DESTROY-CONTEXT"
- "CREATE-LINEAR-BUFFER" "DESTROY-LINEAR-BUFFER"
- "VALID-LINEAR-BUFFER-P"
- "LINEAR-SIZE" "LINEAR-EXTENT" "LINEAR-PITCH" "LINEAR-PITCHED-P"
- "CREATE-LINEAR-FOR-ARRAY" "COPY-LINEAR-FOR-ARRAY"
- "KERNEL" "DISCARD-CODE-CACHE"
- "*LAUNCH-ASYNC*" "WITH-ASYNC" "SYNCHRONIZE"
- ))
+ (:documentation "Interface to the NVidia CUDA driver")
+ (:use "COMMON-LISP")
+ (:export "+DEVICE-COUNT+" "GET-CAPS"
+ "*CURRENT-CONTEXT*" "VALID-CONTEXT-P"
+ "CREATE-CONTEXT" "DESTROY-CONTEXT"
+ "CREATE-LINEAR-BUFFER" "DESTROY-LINEAR-BUFFER"
+ "VALID-LINEAR-BUFFER-P"
+ "LINEAR-SIZE" "LINEAR-EXTENT" "LINEAR-PITCH" "LINEAR-PITCHED-P"
+ "CREATE-LINEAR-FOR-ARRAY" "COPY-LINEAR-FOR-ARRAY"
+ "KERNEL" "DISCARD-CODE-CACHE"
+ "*LAUNCH-ASYNC*" "WITH-ASYNC" "SYNCHRONIZE"))
(in-package cuda)
@@ -108,125 +106,122 @@
(ffi:clines "
#define FOREIGNP(objp) ((IMMEDIATE(objp) == 0) && ((objp)->d.t == t_foreign))
#define FOREIGN_WITH_TAGP(objp,tagv) (FOREIGNP(objp) && ((objp)->foreign.tag == tagv))
- ")
+ ")
(defmacro def-foreign-handle (typename destroy-f valid-f ctype free-code)
- `(progn
- (ffi:def-foreign-type ,typename :void)
-
- (defun ,destroy-f (handle)
- (check-ffi-type handle ,typename)
- (ffi:c-inline (handle) (:object) :void
- ,(format nil "{
- ~A ptr = ecl_foreign_data_pointer_safe(#0);
- if (ptr) { ~A; }
- (#0)->foreign.data = NULL;
- }" ctype free-code)))
-
- (defun ,valid-f (handle)
- (declare (optimize (safety 0) (debug 0)))
- (ffi:c-inline (handle ',typename) (:object :object) :object
- "((FOREIGN_WITH_TAGP(#0,#1) && ((#0)->foreign.data != NULL))
- ? Ct : Cnil)"
- :one-liner t))))
+ `(progn
+ (ffi:def-foreign-type ,typename :void)
+
+ (defun ,destroy-f (handle)
+ (check-ffi-type handle ,typename)
+ (ffi:c-inline (handle) (:object) :void
+ ,(format nil "{
+ ~A ptr = ecl_foreign_data_pointer_safe(#0);
+ if (ptr) { ~A; }
+ (#0)->foreign.data = NULL;
+ }" ctype free-code)))
+
+ (defun ,valid-f (handle)
+ (declare (optimize (safety 0) (debug 0)))
+ (ffi:c-inline (handle ',typename) (:object :object) :object
+ "((FOREIGN_WITH_TAGP(#0,#1) && ((#0)->foreign.data != NULL)) ? Ct : Cnil)"
+ :one-liner t))))
(defmacro check-ffi-type (var typespec)
- `(progn
- (check-type ,var si:foreign-data)
- (assert (eql (si:foreign-data-tag ,var) ',typespec)
- (,var)
- "Type mismatch: ~A is not a wrapped foreign ~A" ,var ',typespec)))
+ `(progn
+ (check-type ,var si:foreign-data)
+ (assert (eql (si:foreign-data-tag ,var) ',typespec)
+ (,var) "Type mismatch: ~A is not a wrapped foreign ~A" ,var ',typespec)))
(defmacro discard-ffi-handle (handle)
- (check-type handle si:foreign-data)
- (ffi:c-inline (handle) (:object) :void
- "(#0)->foreign.data = NULL;"))
+ (check-type handle si:foreign-data)
+ (ffi:c-inline (handle) (:object) :void
+ "(#0)->foreign.data = NULL;"))
;;; Driver initialization
(defvar *initialized* nil)
(unless *initialized*
- (ffi:c-inline () () :void "check_error(cuInit(0));")
- (setf *initialized* t))
+ (ffi:c-inline () () :void "check_error(cuInit(0));")
+ (setf *initialized* t))
(pushnew :cuda *features*)
;;; Device count
(defun get-device-count ()
- (ffi:c-inline () () :int "{
- int major, minor, count;
- check_error(cuDeviceGetCount(&count));
-
- if (count > 0) {
- check_error(cuDeviceComputeCapability(&major, &minor, 0));
- if (major == 9999 && minor == 9999)
- count = 0;
- }
+ (ffi:c-inline () () :int "{
+ int major, minor, count;
+ check_error(cuDeviceGetCount(&count));
+
+ if (count > 0) {
+ check_error(cuDeviceComputeCapability(&major, &minor, 0));
+ if (major == 9999 && minor == 9999)
+ count = 0;
+ }
- @(return) = count;
- }"))
+ @(return) = count;
+ }"))
(defparameter +device-count+ (get-device-count))
;;; Device capabilities
(defstruct (capabilities (:conc-name caps-))
- revision name memory mp-count const-memory shared-memory reg-count warp-size
- max-threads tex-alignment has-overlap has-mapping has-timeout)
+ revision name memory mp-count const-memory shared-memory reg-count warp-size
+ max-threads tex-alignment has-overlap has-mapping has-timeout)
(defun get-caps (device)
- (multiple-value-bind
+ (multiple-value-bind
(revision name memory mp-count const-memory shared-memory reg-count warp-size
- max-threads tex-alignment has-overlap has-timeout has-mapping)
- (ffi:c-inline
- (device) (:int)
- (values
- :object :object :int :int :int :int :int
- :int :int :int :int :int :int)
- "{
- int major, minor, tmp;
- CUdevprop props;
- int dev = #0;
- char name[256];
-
- check_error(cuDeviceComputeCapability(&major, &minor, dev));
- @(return 0) = ecl_cons(ecl_make_integer(major), ecl_make_integer(minor));
-
- check_error(cuDeviceGetName(name, 256, dev));
- @(return 1) = make_base_string_copy(name);
-
- check_error(cuDeviceTotalMem(&tmp, dev));
- @(return 2) = tmp;
-
- check_error(cuDeviceGetAttribute(&tmp, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, dev));
- @(return 3) = tmp;
-
- check_error(cuDeviceGetProperties(&props, dev));
- @(return 4) = props.totalConstantMemory;
- @(return 5) = props.sharedMemPerBlock;
- @(return 6) = props.regsPerBlock;
- @(return 7) = props.SIMDWidth;
- @(return 8) = props.maxThreadsPerBlock;
- @(return 9) = props.textureAlign;
-
- check_error(cuDeviceGetAttribute(&tmp, CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, dev));
- @(return 10) = tmp;
-
- check_error(cuDeviceGetAttribute(&tmp, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, dev));
- @(return 11) = tmp;
-
- check_error(cuDeviceGetAttribute(&tmp, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, dev));
- @(return 12) = tmp;
- }")
- (make-capabilities
- :revision revision :name name :memory memory :mp-count mp-count
- :const-memory const-memory :shared-memory shared-memory :reg-count reg-count
- :warp-size warp-size :tex-alignment tex-alignment :has-overlap (/= 0 has-overlap)
- :has-mapping (/= 0 has-mapping) :has-timeout (/= 0 has-timeout) :max-threads max-threads)))
+ max-threads tex-alignment has-overlap has-timeout has-mapping)
+ (ffi:c-inline
+ (device) (:int)
+ (values :object :object :int :int :int :int :int
+ :int :int :int :int :int :int)
+ "{
+ int major, minor, tmp;
+ CUdevprop props;
+ int dev = #0;
+ char name[256];
+
+ check_error(cuDeviceComputeCapability(&major, &minor, dev));
+ @(return 0) = ecl_cons(ecl_make_integer(major), ecl_make_integer(minor));
+
+ check_error(cuDeviceGetName(name, 256, dev));
+ @(return 1) = make_base_string_copy(name);
+
+ check_error(cuDeviceTotalMem(&tmp, dev));
+ @(return 2) = tmp;
+
+ check_error(cuDeviceGetAttribute(&tmp, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, dev));
+ @(return 3) = tmp;
+
+ check_error(cuDeviceGetProperties(&props, dev));
+ @(return 4) = props.totalConstantMemory;
+ @(return 5) = props.sharedMemPerBlock;
+ @(return 6) = props.regsPerBlock;
+ @(return 7) = props.SIMDWidth;
+ @(return 8) = props.maxThreadsPerBlock;
+ @(return 9) = props.textureAlign;
+
+ check_error(cuDeviceGetAttribute(&tmp, CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, dev));
+ @(return 10) = tmp;
+
+ check_error(cuDeviceGetAttribute(&tmp, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, dev));
+ @(return 11) = tmp;
+
+ check_error(cuDeviceGetAttribute(&tmp, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, dev));
+ @(return 12) = tmp;
+ }")
+ (make-capabilities :revision revision :name name :memory memory :mp-count mp-count
+ :const-memory const-memory :shared-memory shared-memory
+ :reg-count reg-count :warp-size warp-size :tex-alignment tex-alignment
+ :has-overlap (/= 0 has-overlap) :has-mapping (/= 0 has-mapping)
+ :has-timeout (/= 0 has-timeout) :max-threads max-threads)))
;;; CUDA streams
@@ -235,14 +230,15 @@
"CUstream" "check_error(cuStreamDestroy(ptr))")
(defun create-stream ()
- (ffi:c-inline
- ('stream-pointer)
- (:object)
- :object "{
- CUstream str;
- check_error(cuStreamCreate(&str,0));
- @(return) = ecl_make_foreign_data(#0, 0, str);
- }"))
+ (ffi:c-inline
+ ('stream-pointer)
+ (:object)
+ :object
+ "{
+ CUstream str;
+ check_error(cuStreamCreate(&str,0));
+ @(return) = ecl_make_foreign_data(#0, 0, str);
+ }"))
;;; CUDA context management
@@ -253,68 +249,66 @@
(defvar *current-context* nil)
(defstruct context
- (device (error "Device required") :read-only t)
- (handle (error "Handle required") :read-only t)
- (device-caps (error "Caps required") :read-only t)
- (async-stream (create-stream) :read-only t)
- (linear-buffers nil)
- (module-cache (make-hash-table :test #'equal))
- (kernel-cache (make-hash-table :test #'eq)))
+ (device (error "Device required") :read-only t)
+ (handle (error "Handle required") :read-only t)
+ (device-caps (error "Caps required") :read-only t)
+ (async-stream (create-stream) :read-only t)
+ (linear-buffers nil)
+ (module-cache (make-hash-table :test #'equal))
+ (kernel-cache (make-hash-table :test #'eq)))
(defun valid-context-p (&optional (context *current-context*))
- (and (typep context 'context)
- (valid-context-handle-p (context-handle context))))
+ (and (typep context 'context)
+ (valid-context-handle-p (context-handle context))))
(defun destroy-context (&optional (context *current-context*))
- (destroy-context-handle (context-handle context))
- (dolist (item (context-linear-buffers context))
- (discard-linear-buffer item))
- (setf (context-linear-buffers context) nil)
- (clrhash (context-module-cache context))
- (clrhash (context-kernel-cache context))
- (ext:set-finalizer context nil)
- (when (eql context *current-context*)
- (setf *current-context* nil)))
+ (destroy-context-handle (context-handle context))
+ (dolist (item (context-linear-buffers context))
+ (discard-linear-buffer item))
+ (setf (context-linear-buffers context) nil)
+ (clrhash (context-module-cache context))
+ (clrhash (context-kernel-cache context))
+ (ext:set-finalizer context nil)
+ (when (eql context *current-context*)
+ (setf *current-context* nil)))
(defun create-context (device &key sync-mode with-mapping)
- (assert (not (valid-context-p)))
- (let* ((map-flag (if with-mapping 1 0))
- (sync-flag (case sync-mode
- ((nil) 0) (:auto 0) (:spin 1) (:yield 2) (:block 3)
- (t (error "Invalid sync mode: ~A" sync-mode))))
- (handle
- (ffi:c-inline
- (device map-flag sync-flag 'context-pointer)
- (:int :int :int :object)
- :object "{
- CUcontext ctx;
- int flags = 0, dev = #0;
- if (#1)
- flags |= CU_CTX_MAP_HOST;
- switch (#2) {
- case 0: flags |= CU_CTX_SCHED_AUTO; break;
- case 1: flags |= CU_CTX_SCHED_SPIN; break;
- case 2: flags |= CU_CTX_SCHED_YIELD; break;
- case 3: flags |= CU_CTX_BLOCKING_SYNC; break;
- }
- check_error(cuCtxCreate(&ctx, flags, dev));
- @(return) = ecl_make_foreign_data(#3, 0, ctx);
- }"))
- (context
- (make-context
- :device device :handle handle
- :device-caps (get-caps device))))
- (ext:set-finalizer context #'destroy-context)
- (setf *current-context* context)))
+ (assert (not (valid-context-p)))
+ (let* ((map-flag (if with-mapping 1 0))
+ (sync-flag (case sync-mode
+ ((nil) 0) (:auto 0) (:spin 1) (:yield 2) (:block 3)
+ (t (error "Invalid sync mode: ~A" sync-mode))))
+ (handle
+ (ffi:c-inline
+ (device map-flag sync-flag 'context-pointer)
+ (:int :int :int :object)
+ :object "{
+ CUcontext ctx;
+ int flags = 0, dev = #0;
+ if (#1)
+ flags |= CU_CTX_MAP_HOST;
+ switch (#2) {
+ case 0: flags |= CU_CTX_SCHED_AUTO; break;
+ case 1: flags |= CU_CTX_SCHED_SPIN; break;
+ case 2: flags |= CU_CTX_SCHED_YIELD; break;
+ case 3: flags |= CU_CTX_BLOCKING_SYNC; break;
+ }
+ check_error(cuCtxCreate(&ctx, flags, dev));
+ @(return) = ecl_make_foreign_data(#3, 0, ctx);
+ }"))
+ (context (make-context :device device :handle handle
+ :device-caps (get-caps device))))
+ (ext:set-finalizer context #'destroy-context)
+ (setf *current-context* context)))
;;; Linear buffer management
(ffi:def-struct linear-buffer
- (width :unsigned-int)
- (height :unsigned-int)
- (pitch :unsigned-int)
- (device-ptr :unsigned-int))
+ (width :unsigned-int)
+ (height :unsigned-int)
+ (pitch :unsigned-int)
+ (device-ptr :unsigned-int))
(ffi:clines "
typedef struct {
@@ -326,112 +320,114 @@
")
(defun valid-linear-buffer-p (handle)
- (declare (optimize (safety 0) (debug 0)))
- (ffi:c-inline (handle 'linear-buffer) (:object :object) :object
- "((FOREIGN_WITH_TAGP(#0,#1) &&
- (((LinearBuffer*)((#0)->foreign.data))->device_ptr != NULL))
- ? Ct : Cnil)"
- :one-liner t))
+ (declare (optimize (safety 0) (debug 0)))
+ (ffi:c-inline (handle 'linear-buffer) (:object :object) :object
+ "((FOREIGN_WITH_TAGP(#0,#1) &&
+ (((LinearBuffer*)((#0)->foreign.data))->device_ptr != NULL))
+ ? Ct : Cnil)"
+ :one-liner t))
(defun free-linear-buffer (buffer)
- (check-ffi-type buffer linear-buffer)
- (ffi:c-inline (buffer) (:object) :void "{
- LinearBuffer *pbuf = ecl_foreign_data_pointer_safe(#0);
- if (pbuf->device_ptr)
- check_error(cuMemFree(pbuf->device_ptr));
- pbuf->device_ptr = NULL;
- }"))
+ (check-ffi-type buffer linear-buffer)
+ (ffi:c-inline (buffer) (:object) :void "{
+ LinearBuffer *pbuf = ecl_foreign_data_pointer_safe(#0);
+ if (pbuf->device_ptr)
+ check_error(cuMemFree(pbuf->device_ptr));
+ pbuf->device_ptr = NULL;
+ }"))
(defun discard-linear-buffer (buffer)
- (check-ffi-type buffer linear-buffer)
- (ffi:c-inline (buffer) (:object) :void "{
- LinearBuffer *pbuf = ecl_foreign_data_pointer_safe(#0);
- pbuf->device_ptr = NULL;
- }"))
+ (check-ffi-type buffer linear-buffer)
+ (ffi:c-inline (buffer) (:object) :void "{
+ LinearBuffer *pbuf = ecl_foreign_data_pointer_safe(#0);
+ pbuf->device_ptr = NULL;
+ }"))
(defun destroy-linear-buffer (buffer)
- (check-ffi-type buffer linear-buffer)
- (when (valid-linear-buffer-p buffer)
- (prog2
- (assert (and (valid-context-p)
- (find buffer
- (context-linear-buffers *current-context*))))
- (free-linear-buffer buffer)
- (setf (context-linear-buffers *current-context*)
- (delete buffer
+ (check-ffi-type buffer linear-buffer)
+ (when (valid-linear-buffer-p buffer)
+ (prog2
+ (assert (and (valid-context-p)
+ (find buffer
+ (context-linear-buffers *current-context*))))
+ (free-linear-buffer buffer)
+ (setf (context-linear-buffers *current-context*)
+ (delete buffer
(context-linear-buffers *current-context*))))))
(defun create-linear-buffer (width &optional (height 1) &key pitched)
- (assert (valid-context-p))
- (let* ((buffer
- (ffi:c-inline
- (width height (or pitched 0) 'linear-buffer)
- (:int :int :int :object)
- :object "{
- cl_object buf = ecl_allocate_foreign_data(#3,sizeof(LinearBuffer));
- LinearBuffer *pbuf = buf->foreign.data;
- pbuf->width = #0;
- pbuf->height = #1;
- if (#2 > 0 && pbuf->height > 1) {
- check_error(cuMemAllocPitch(&pbuf->device_ptr, &pbuf->pitch,
- pbuf->width, pbuf->height, #2));
- } else {
- pbuf->pitch = pbuf->width;
- check_error(cuMemAlloc(&pbuf->device_ptr, pbuf->width*pbuf->height));
- }
- @(return) = buf;
- }")))
- (push buffer (context-linear-buffers *current-context*))
- buffer))
+ (assert (valid-context-p))
+ (let* ((buffer
+ (ffi:c-inline
+ (width height (or pitched 0) 'linear-buffer)
+ (:int :int :int :object)
+ :object
+ "{
+ cl_object buf = ecl_allocate_foreign_data(#3,sizeof(LinearBuffer));
+ LinearBuffer *pbuf = buf->foreign.data;
+ pbuf->width = #0;
+ pbuf->height = #1;
+ if (#2 > 0 && pbuf->height > 1) {
+ check_error(cuMemAllocPitch(&pbuf->device_ptr, &pbuf->pitch,
+ pbuf->width, pbuf->height, #2));
+ } else {
+ pbuf->pitch = pbuf->width;
+ check_error(cuMemAlloc(&pbuf->device_ptr, pbuf->width*pbuf->height));
+ }
+ @(return) = buf;
+ }")))
+ (push buffer (context-linear-buffers *current-context*))
+ buffer))
(defun linear-size (buffer)
- (* (ffi:get-slot-value buffer 'linear-buffer 'width)
- (ffi:get-slot-value buffer 'linear-buffer 'height)))
+ (* (ffi:get-slot-value buffer 'linear-buffer 'width)
+ (ffi:get-slot-value buffer 'linear-buffer 'height)))
(defun linear-extent (buffer)
- (* (ffi:get-slot-value buffer 'linear-buffer 'pitch)
- (ffi:get-slot-value buffer 'linear-buffer 'height)))
+ (* (ffi:get-slot-value buffer 'linear-buffer 'pitch)
+ (ffi:get-slot-value buffer 'linear-buffer 'height)))
(declaim (ftype (function (t) fixnum) linear-pitch))
(defun linear-pitch (buffer)
- (declare (optimize (safety 0) (debug 0)))
- (ffi:c-inline
- (buffer 'linear-buffer)
- (:object :object)
- :int "{
- if (!FOREIGN_WITH_TAGP(#0,#1)) FEerror(\"Not a linear buffer: ~A\",1,#0);
- LinearBuffer *buf = (#0)->foreign.data;
- @(return) = buf->pitch;
- }"))
+ (declare (optimize (safety 0) (debug 0)))
+ (ffi:c-inline
+ (buffer 'linear-buffer)
+ (:object :object)
+ :int
+ "{
+ if (!FOREIGN_WITH_TAGP(#0,#1)) FEerror(\"Not a linear buffer: ~A\",1,#0);
+ LinearBuffer *buf = (#0)->foreign.data;
+ @(return) = buf->pitch;
+ }"))
(defun linear-pitched-p (buffer)
- (/= (ffi:get-slot-value buffer 'linear-buffer 'pitch)
- (ffi:get-slot-value buffer 'linear-buffer 'width)))
+ (/= (ffi:get-slot-value buffer 'linear-buffer 'pitch)
+ (ffi:get-slot-value buffer 'linear-buffer 'width)))
;; Linear buffers for Lisp arrays
(defun array-element-size (arr)
- (let ((tname (array-element-type arr)))
- (case tname
- (single-float 4)
- (double-float 8)
- (otherwise
- (error "Unsupported element type: ~A" tname)))))
+ (let ((tname (array-element-type arr)))
+ (case tname
+ (single-float 4)
+ (double-float 8)
+ (otherwise
+ (error "Unsupported element type: ~A" tname)))))
(defun create-linear-for-array (arr)
- (let* ((item-size (array-element-size arr))
- (dims (reverse (array-dimensions arr)))
- (width (* item-size (car dims)))
- (height (reduce #'* (cdr dims))))
- (create-linear-buffer width height :pitched item-size)))
+ (let* ((item-size (array-element-size arr))
+ (dims (reverse (array-dimensions arr)))
+ (width (* item-size (car dims)))
+ (height (reduce #'* (cdr dims))))
+ (create-linear-buffer width height :pitched item-size)))
(defun copy-linear-for-array (buffer arr &key from-device)
- (check-ffi-type buffer linear-buffer)
- (ffi:c-inline
- (buffer arr (if from-device 1 0))
- (:object :object :int)
- :void "{
+ (check-ffi-type buffer linear-buffer)
+ (ffi:c-inline
+ (buffer arr (if from-device 1 0))
+ (:object :object :int)
+ :void "{
LinearBuffer *pbuf = ecl_foreign_data_pointer_safe(#0);
cl_object arr = #1;
void *data;
@@ -513,262 +509,266 @@
"CUtexref" "")
(defun load-module (code max-registers)
- (check-type code base-string)
- (assert (valid-context-p))
- (let* ((cache (context-module-cache *current-context*))
- (tag (list code max-registers))
- (handle (gethash tag cache)))
- (if (valid-module-handle-p handle)
- handle
- (let ((new-handle
- (ffi:c-inline
- (code 'module-pointer max-registers)
- (:object :object :int)
- :object "{
- CUmodule mod;
- int max_regs = #2;
- CUjit_option options[] = { CU_JIT_MAX_REGISTERS };
- void *opt_vals[] = { (void*)max_regs };
- int fpstate = fedisableexcept(FE_ALL_EXCEPT);
- CUresult res = cuModuleLoadDataEx(
- &mod, #0->base_string.self,
- 1, &options, &opt_vals
- );
- feenableexcept(fpstate);
- check_error(res);
- @(return) = ecl_make_foreign_data(#1, 0, mod);
- }")))
- (setf (gethash tag cache) new-handle)))))
+ (check-type code base-string)
+ (assert (valid-context-p))
+ (let* ((cache (context-module-cache *current-context*))
+ (tag (list code max-registers))
+ (handle (gethash tag cache)))
+ (if (valid-module-handle-p handle)
+ handle
+ (let ((new-handle
+ (ffi:c-inline
+ (code 'module-pointer max-registers)
+ (:object :object :int)
+ :object
+ "{
+ CUmodule mod;
+ int max_regs = #2;
+ CUjit_option options[] = { CU_JIT_MAX_REGISTERS };
+ void *opt_vals[] = { (void*)max_regs };
+ int fpstate = fedisableexcept(FE_ALL_EXCEPT);
+ CUresult res = cuModuleLoadDataEx(
+ &mod, #0->base_string.self,
+ 1, &options, &opt_vals
+ );
+ feenableexcept(fpstate);
+ check_error(res);
+ @(return) = ecl_make_foreign_data(#1, 0, mod);
+ }")))
+ (setf (gethash tag cache) new-handle)))))
(defun module-get-function (module name)
- (check-type name base-string)
- (ffi:c-inline
- (name module 'function-pointer)
- (:object :object :object)
- :object "{
- CUmodule mod = ecl_foreign_data_pointer_safe(#1);
- CUfunction fun;
- check_error(cuModuleGetFunction(&fun, mod, #0->base_string.self));
- @(return) = ecl_make_foreign_data(#2, 0, fun);
- }"))
+ (check-type name base-string)
+ (ffi:c-inline
+ (name module 'function-pointer)
+ (:object :object :object)
+ :object
+ "{
+ CUmodule mod = ecl_foreign_data_pointer_safe(#1);
+ CUfunction fun;
+ check_error(cuModuleGetFunction(&fun, mod, #0->base_string.self));
+ @(return) = ecl_make_foreign_data(#2, 0, fun);
+ }"))
(defun module-get-texture (module name)
- (check-type name base-string)
- (ffi:c-inline
- (name module 'texture-pointer)
- (:object :object :object)
- :object "{
- CUmodule mod = ecl_foreign_data_pointer_safe(#1);
- CUtexref tex;
- check_error(cuModuleGetTexRef(&tex, mod, #0->base_string.self));
- @(return) = ecl_make_foreign_data(#2, 0, tex);
- }"))
+ (check-type name base-string)
+ (ffi:c-inline
+ (name module 'texture-pointer)
+ (:object :object :object)
+ :object
+ "{
+ CUmodule mod = ecl_foreign_data_pointer_safe(#1);
+ CUtexref tex;
+ check_error(cuModuleGetTexRef(&tex, mod, #0->base_string.self));
+ @(return) = ecl_make_foreign_data(#2, 0, tex);
+ }"))
(defun function-info (func)
- (check-ffi-type func function-pointer)
- (ffi:c-inline
- (func) (:object)
- (values :int :int :int :int)
- "{
- CUfunction fun = #0->foreign.data;
- int tmp;
- check_error(cuFuncGetAttribute(&tmp, CU_FUNC_ATTRIBUTE_NUM_REGS, fun));
- @(return 0) = tmp;
- check_error(cuFuncGetAttribute(&tmp, CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, fun));
- @(return 1) = tmp;
- check_error(cuFuncGetAttribute(&tmp, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, fun));
- @(return 2) = tmp;
- check_error(cuFuncGetAttribute(&tmp, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, fun));
- @(return 3) = tmp;
- }"))
+ (check-ffi-type func function-pointer)
+ (ffi:c-inline
+ (func) (:object)
+ (values :int :int :int :int)
+ "{
+ CUfunction fun = #0->foreign.data;
+ int tmp;
+ check_error(cuFuncGetAttribute(&tmp, CU_FUNC_ATTRIBUTE_NUM_REGS, fun));
+ @(return 0) = tmp;
+ check_error(cuFuncGetAttribute(&tmp, CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, fun));
+ @(return 1) = tmp;
+ check_error(cuFuncGetAttribute(&tmp, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, fun));
+ @(return 2) = tmp;
+ check_error(cuFuncGetAttribute(&tmp, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, fun));
+ @(return 3) = tmp;
+ }"))
(defun print-function-info (name func)
- (multiple-value-bind
- (regs local shared max-threads)
- (function-info func)
- (let* ((caps (context-device-caps *current-context*))
- (max-rt (floor (caps-reg-count caps) regs))
- (max-t (min max-threads max-rt))
- (max-b (floor (caps-shared-memory caps) shared))
- (max-w (/ max-t (caps-warp-size caps))))
- (format t "Loaded ~A: ~A regs, ~A shared; max ~A blocks/~A~@[(~A)~] threads/~A warps.~%"
- name regs shared max-b max-t
- (if (> max-rt max-t) max-rt) max-w)
- (when (< max-w 6)
- (format t "!!! SLOW: Low occupancy !!!~%"))
- (when (> local 0)
- (format t "!!! SLOW: ~A bytes (~A words) of uncached local memory !!!~%"
- local (/ local 4))))))
+ (multiple-value-bind (regs local shared max-threads)
+ (function-info func)
+ (let* ((caps (context-device-caps *current-context*))
+ (max-rt (floor (caps-reg-count caps) regs))
+ (max-t (min max-threads max-rt))
+ (max-b (floor (caps-shared-memory caps) shared))
+ (max-w (/ max-t (caps-warp-size caps))))
+ (format t "Loaded ~A: ~A regs, ~A shared; max ~A blocks/~A~@[(~A)~] threads/~A warps.~%"
+ name regs shared max-b max-t
+ (if (> max-rt max-t) max-rt) max-w)
+ (when (< max-w 6)
+ (format t "!!! SLOW: Low occupancy !!!~%"))
+ (when (> local 0)
+ (format t "!!! SLOW: ~A bytes (~A words) of uncached local memory !!!~%"
+ local (/ local 4))))))
(defvar *log-kernel-loads* t)
(defun load-kernel (key)
- (declare (optimize (safety 1) (debug 0)))
- (let* ((cache (context-kernel-cache *current-context*))
- (handle (gethash key cache)))
- (if handle handle
- (destructuring-bind
- (func-name code &key
- (max-registers 64)
- (textures nil has-tex-p)) key
- (let* ((module (load-module code max-registers))
- (new-handle (module-get-function module func-name))
- (tex-refs (mapcar
- #'(lambda (name) (module-get-texture module name))
- textures)))
- (when *log-kernel-loads*
- (print-function-info func-name new-handle))
- (setf (gethash key cache)
- (if has-tex-p
- (make-array
- (1+ (length tex-refs))
- :initial-contents
- (list* new-handle tex-refs))
- new-handle)))))))
+ (declare (optimize (safety 1) (debug 0)))
+ (let* ((cache (context-kernel-cache *current-context*))
+ (handle (gethash key cache)))
+ (if handle handle
+ (destructuring-bind (func-name code &key
+ (max-registers 64)
+ (textures nil has-tex-p))
+ key
+ (let* ((module (load-module code max-registers))
+ (new-handle (module-get-function module func-name))
+ (tex-refs (mapcar #'(lambda (name)
+ (module-get-texture module name))
+ textures)))
+ (when *log-kernel-loads*
+ (print-function-info func-name new-handle))
+ (setf (gethash key cache)
+ (if has-tex-p
+ (make-array (1+ (length tex-refs))
+ :initial-contents
+ (list* new-handle tex-refs))
+ new-handle)))))))
(defun discard-code-cache ()
- (when (valid-context-p)
- (clrhash (context-kernel-cache *current-context*))
- (maphash #'(lambda (code handle)
- (destroy-module-handle handle))
- (context-module-cache *current-context*))
- (clrhash (context-module-cache *current-context*))
- nil))
+ (when (valid-context-p)
+ (clrhash (context-kernel-cache *current-context*))
+ (maphash #'(lambda (code handle)
+ (destroy-module-handle handle))
+ (context-module-cache *current-context*))
+ (clrhash (context-module-cache *current-context*))
+ nil))
;;; Kernel parameter specification
(defconstant +ptr-size+ 4)
(defmacro make-param-set-simple (name ltype ctype cmd)
- `(defun ,name (fhandle offset value)
- (declare (optimize (safety 0) (debug 0)))
- (ffi:c-inline
- (fhandle offset value 'function-pointer)
- (:object :int ,ltype :object)
- :void
- ,(format nil "{
- if (!FOREIGN_WITH_TAGP(#0,#3)) FEerror(\"Not a function handle\",1,#0);
- {
- CUfunction fun = #0->foreign.data;
- int ofs = #1;
- ~A val = #2;
- check_error(~A);
- }
- }" ctype cmd))))
-
-(make-param-set-simple
- param-set-int :int "int" "cuParamSeti(fun,ofs,val)")
-
-(make-param-set-simple
- param-set-uint :unsigned-int "unsigned int" "cuParamSeti(fun,ofs,val)")
-
-(make-param-set-simple
- param-set-float :float "float" "cuParamSetf(fun,ofs,val)")
-
-(make-param-set-simple
- param-set-double :double "double" "cuParamSetv(fun,ofs,&val,sizeof(val))")
-
-(defun param-set-ptr (fhandle offset value)
- (declare (optimize (safety 0) (debug 0)))
- (ffi:c-inline
- (fhandle offset value 'function-pointer 'linear-buffer)
- (:object :int :object :object :object)
- :void "{
+ `(defun ,name (fhandle offset value)
+ (declare (optimize (safety 0) (debug 0)))
+ (ffi:c-inline
+ (fhandle offset value 'function-pointer)
+ (:object :int ,ltype :object)
+ :void
+ ,(format nil "{
if (!FOREIGN_WITH_TAGP(#0,#3)) FEerror(\"Not a function handle\",1,#0);
- if (!FOREIGN_WITH_TAGP(#2,#4)) FEerror(\"Not a linear buffer\",1,#2);
{
CUfunction fun = #0->foreign.data;
- LinearBuffer *pbuf = #2->foreign.data;
- if (!pbuf->device_ptr) FEerror(\"Linear buffer not allocated.\",0);
- check_error(cuParamSeti(fun,#1,pbuf->device_ptr));
+ int ofs = #1;
+ ~A val = #2;
+ check_error(~A);
}
- }"))
+ }" ctype cmd))))
+
+(make-param-set-simple param-set-int :int
+ "int" "cuParamSeti(fun,ofs,val)")
+
+(make-param-set-simple param-set-uint :unsigned-int
+ "unsigned int" "cuParamSeti(fun,ofs,val)")
+
+(make-param-set-simple param-set-float :float
+ "float" "cuParamSetf(fun,ofs,val)")
+
+(make-param-set-simple param-set-double :double
+ "double" "cuParamSetv(fun,ofs,&val,sizeof(val))")
+
+(defun param-set-ptr (fhandle offset value)
+ (declare (optimize (safety 0) (debug 0)))
+ (ffi:c-inline
+ (fhandle offset value 'function-pointer 'linear-buffer)
+ (:object :int :object :object :object)
+ :void
+ "{
+ if (!FOREIGN_WITH_TAGP(#0,#3)) FEerror(\"Not a function handle\",1,#0);
+ if (!FOREIGN_WITH_TAGP(#2,#4)) FEerror(\"Not a linear buffer\",1,#2);
+ {
+ CUfunction fun = #0->foreign.data;
+ LinearBuffer *pbuf = #2->foreign.data;
+ if (!pbuf->device_ptr) FEerror(\"Linear buffer not allocated.\",0);
+ check_error(cuParamSeti(fun,#1,pbuf->device_ptr));
+ }
+ }"))
(defun param-set-texture-1d (fhandle thandle value)
- (declare (optimize (safety 0) (debug 0)))
- (ffi:c-inline
- (fhandle thandle value
- 'function-pointer 'texture-pointer 'linear-buffer)
- (:object :object :object :object :object :object)
- :void "{
- if (!FOREIGN_WITH_TAGP(#0,#3)) FEerror(\"Not a function handle\",1,#0);
- if (!FOREIGN_WITH_TAGP(#1,#4)) FEerror(\"Not a texture handle\",1,#1);
- if (!FOREIGN_WITH_TAGP(#2,#5)) FEerror(\"Not a linear buffer\",1,#2);
- {
- CUfunction fun = #0->foreign.data;
- CUtexref tex = #1->foreign.data;
- LinearBuffer *pbuf = #2->foreign.data;
- if (!pbuf->device_ptr) FEerror(\"Linear buffer not allocated.\",0);
- if (pbuf->height != 1) FEerror(\"2D linear buffer in 1D texture.\",0);
- check_error(cuTexRefSetAddress(NULL,tex,pbuf->device_ptr,pbuf->width));
- check_error(cuParamSetTexRef(fun,CU_PARAM_TR_DEFAULT,tex));
- }
- }"))
+ (declare (optimize (safety 0) (debug 0)))
+ (ffi:c-inline
+ (fhandle thandle value
+ 'function-pointer 'texture-pointer 'linear-buffer)
+ (:object :object :object :object :object :object)
+ :void
+ "{
+ if (!FOREIGN_WITH_TAGP(#0,#3)) FEerror(\"Not a function handle\",1,#0);
+ if (!FOREIGN_WITH_TAGP(#1,#4)) FEerror(\"Not a texture handle\",1,#1);
+ if (!FOREIGN_WITH_TAGP(#2,#5)) FEerror(\"Not a linear buffer\",1,#2);
+ {
+ CUfunction fun = #0->foreign.data;
+ CUtexref tex = #1->foreign.data;
+ LinearBuffer *pbuf = #2->foreign.data;
+ if (!pbuf->device_ptr) FEerror(\"Linear buffer not allocated.\",0);
+ if (pbuf->height != 1) FEerror(\"2D linear buffer in 1D texture.\",0);
+ /*check_error(cuTexRefSetFormat(tex,CU_AD_FORMAT_FLOAT,1));*/
+ check_error(cuTexRefSetAddress(NULL,tex,pbuf->device_ptr,pbuf->width));
+ check_error(cuParamSetTexRef(fun,CU_PARAM_TR_DEFAULT,tex));
+ }
+ }"))
(defun param-set-texture-2d (fhandle thandle value)
- (declare (optimize (safety 0) (debug 0)))
- (ffi:c-inline
- (fhandle thandle value
- 'function-pointer 'texture-pointer 'linear-buffer)
- (:object :object :object :object :object :object)
- :void "{
- if (!FOREIGN_WITH_TAGP(#0,#3)) FEerror(\"Not a function handle\",1,#0);
- if (!FOREIGN_WITH_TAGP(#1,#4)) FEerror(\"Not a texture handle\",1,#1);
- if (!FOREIGN_WITH_TAGP(#2,#5)) FEerror(\"Not a linear buffer\",1,#2);
- {
- CUfunction fun = #0->foreign.data;
- CUtexref tex = #1->foreign.data;
- LinearBuffer *pbuf = #2->foreign.data;
- CUDA_ARRAY_DESCRIPTOR desc;
- if (!pbuf->device_ptr) FEerror(\"Linear buffer not allocated.\",0);
- desc.Format = CU_AD_FORMAT_FLOAT;
- desc.Height = pbuf->height;
- desc.NumChannels = 1;
- desc.Width = pbuf->width/4;
- check_error(cuTexRefSetAddress2D(tex,&desc,pbuf->device_ptr,pbuf->pitch));
- check_error(cuParamSetTexRef(fun,CU_PARAM_TR_DEFAULT,tex));
- }
- }"))
+ (declare (optimize (safety 0) (debug 0)))
+ (ffi:c-inline
+ (fhandle thandle value
+ 'function-pointer 'texture-pointer 'linear-buffer)
+ (:object :object :object :object :object :object)
+ :void
+ "{
+ if (!FOREIGN_WITH_TAGP(#0,#3)) FEerror(\"Not a function handle\",1,#0);
+ if (!FOREIGN_WITH_TAGP(#1,#4)) FEerror(\"Not a texture handle\",1,#1);
+ if (!FOREIGN_WITH_TAGP(#2,#5)) FEerror(\"Not a linear buffer\",1,#2);
+ {
+ CUfunction fun = #0->foreign.data;
+ CUtexref tex = #1->foreign.data;
+ LinearBuffer *pbuf = #2->foreign.data;
+ CUDA_ARRAY_DESCRIPTOR desc;
+ if (!pbuf->device_ptr) FEerror(\"Linear buffer not allocated.\",0);
+ desc.Format = CU_AD_FORMAT_FLOAT;
+ desc.Height = pbuf->height;
+ desc.NumChannels = 1;
+ desc.Width = pbuf->width/4;
+ check_error(cuTexRefSetAddress2D(tex,&desc,pbuf->device_ptr,pbuf->pitch));
+ check_error(cuParamSetTexRef(fun,CU_PARAM_TR_DEFAULT,tex));
+ }
+ }"))
;;; Kernel launch
(defparameter *launch-async* nil)
(defun launch-kernel (fhandle arg-size blkx blky blkz grdx grdy)
- (declare (optimize (safety 0) (debug 0)))
- (ffi:c-inline
- (fhandle 'function-pointer arg-size blkx blky blkz grdx grdy
- *launch-async* 'stream-pointer)
- (:object :object :int :int :int :int :int :int
- :object :object)
- :void "{
- if (!FOREIGN_WITH_TAGP(#0,#1)) FEerror(\"Not a function handle\",1,#0);
- {
- cl_object sref = #8;
- CUfunction fun = #0->foreign.data;
- check_error(cuParamSetSize(fun,#2));
- check_error(cuFuncSetBlockShape(fun,#3,#4,#5));
- if (sref == Cnil)
- check_error(cuLaunchGrid(fun,#6,#7));
- else if (sref == Ct)
- check_error(cuLaunchGridAsync(fun,#6,#7,0));
- else {
- if (!FOREIGN_WITH_TAGP(sref,#9)) FEerror(\"Not a stream handle\",1,sref);
- CUstream str = sref->foreign.data;
- check_error(cuLaunchGridAsync(fun,#6,#7,str));
- }
+ (declare (optimize (safety 0) (debug 0)))
+ (ffi:c-inline
+ (fhandle 'function-pointer arg-size blkx blky blkz grdx grdy
+ *launch-async* 'stream-pointer)
+ (:object :object :int :int :int :int :int :int
+ :object :object)
+ :void
+ "{
+ if (!FOREIGN_WITH_TAGP(#0,#1)) FEerror(\"Not a function handle\",1,#0);
+ {
+ cl_object sref = #8;
+ CUfunction fun = #0->foreign.data;
+ check_error(cuParamSetSize(fun,#2));
+ check_error(cuFuncSetBlockShape(fun,#3,#4,#5));
+ if (sref == Cnil)
+ check_error(cuLaunchGrid(fun,#6,#7));
+ else if (sref == Ct)
+ check_error(cuLaunchGridAsync(fun,#6,#7,0));
+ else {
+ if (!FOREIGN_WITH_TAGP(sref,#9)) FEerror(\"Not a stream handle\",1,sref);
+ CUstream str = sref->foreign.data;
+ check_error(cuLaunchGridAsync(fun,#6,#7,str));
}
- }"))
+ }
+ }"))
(defmacro with-async (&body code)
- `(let ((*launch-async*
- (context-async-stream *current-context*)))
- ,@code))
+ `(let ((*launch-async* (context-async-stream *current-context*)))
+ ,@code))
(defun synchronize ()
- (ffi:c-inline
- () ()
- :void "{
- check_error(cuCtxSynchronize());
- }"))
+ (ffi:c-inline
+ () ()
+ :void
+ "{ check_error(cuCtxSynchronize()); }"))
View
60 expr/canonify.lisp
@@ -1,4 +1,4 @@
-;;;; kate: indent-width 4; replace-tabs yes; space-indent on;
+;;; -*- mode:lisp; indent-tabs-mode: nil; -*-
(in-package fast-compute)
@@ -25,10 +25,10 @@
(canonic-expr-id expr) (canonic-expr-node expr)))
(defmethod compare ((e1 canonic-expr) (e2 canonic-expr))
- (let ((cv (compare-slots e1 e2 #'canonic-expr-id)))
- (if (and (eql cv :equal) (not (eql e1 e2)))
- :unequal
- cv)))
+ (let ((cv (compare-slots e1 e2 #'canonic-expr-id)))
+ (if (and (eql cv :equal) (not (eql e1 e2)))
+ :unequal
+ cv)))
(define-cross-type-compare-methods canonic-expr)
@@ -88,7 +88,7 @@
(stable-sort (mapcar #'lookup args)
#'less?))))
(_
- (mapcar #'lookup-unwrap expr)))))
+ (mapcar #'lookup-unwrap expr)))))
(lookup expr))))
(defun canonify-tree (expr &key (cache *canonify-cache*))
@@ -124,31 +124,31 @@
(lookup-canonic expr2)))
(defun common-sublist (list1 list2)
- (cond
- ((null list1) nil)
- ((null list2) nil)
- ((eql (car list1) (car list2))
- (cons (car list1)
- (common-sublist (cdr list1) (cdr list2))))
- ((canonify-compare (car list1) (car list2))
- (common-sublist (cdr list1) list2))
- ((canonify-compare (car list2) (car list1))
- (common-sublist (cdr list1) list2))
- (t
- (common-sublist (cdr list1) (cdr list2)))))
+ (cond
+ ((null list1) nil)
+ ((null list2) nil)
+ ((eql (car list1) (car list2))
+ (cons (car list1)
+ (common-sublist (cdr list1) (cdr list2))))
+ ((canonify-compare (car list1) (car list2))
+ (common-sublist (cdr list1) list2))
+ ((canonify-compare (car list2) (car list1))
+ (common-sublist (cdr list1) list2))
+ (t
+ (common-sublist (cdr list1) (cdr list2)))))
(defun subtract-list (list1 list2)
- (cond
- ((null list2) list1)
- ((null list1)
- (error "Cannot subtract ~A from NIL" list2))
- ((eql (car list1) (car list2))
- (subtract-list (cdr list1) (cdr list2)))
- ((canonify-compare (car list1) (car list2))
- (cons (car list1)
- (subtract-list (cdr list1) list2)))
- (t
- (error "Cannot subtract ~A from ~A" list2 list1))))
+ (cond
+ ((null list2) list1)
+ ((null list1)
+ (error "Cannot subtract ~A from NIL" list2))
+ ((eql (car list1) (car list2))
+ (subtract-list (cdr list1) (cdr list2)))
+ ((canonify-compare (car list1) (car list2))
+ (cons (car list1)
+ (subtract-list (cdr list1) list2)))
+ (t
+ (error "Cannot subtract ~A from ~A" list2 list1))))
;;; Helper macros
@@ -167,7 +167,7 @@
(labels ((recurse (expr)
(use-cache (expr memo)
(multiple-value-bind (subs-res final) (funcall engine expr)
- (if final ; Cut off recursion
+ (if final ; Cut off recursion
(or subs-res (lookup-canonic expr))
(let* ((new-expr (if subs-res
(canonic-expr-force-unwrap subs-res)
View
317 expr/cse.lisp
@@ -1,176 +1,175 @@
-;;;; kate: indent-width 4; replace-tabs yes; space-indent on;
+;;; -*- mode:lisp; indent-tabs-mode: nil; -*-
(in-package fast-compute)
(defun count-subexprs-rec (expr cnt-table)
- ;; Increment the counter, and avoid walking trees twice
- (when (> (incf-nil (gethash expr cnt-table)) 1)
- (return-from count-subexprs-rec nil))
- (match expr
- ((type atom var)
- nil)
- (`(index ,@_)
- nil)
- (`(declare ,@_)
- nil)
- (`(multivalue-data ,@_)
- nil)
- (`(loop-range ,(ranging-spec _ :min minv :max maxv) ,@body)
- (count-subexprs-rec minv cnt-table)
- (count-subexprs-rec maxv cnt-table)
- (dolist (item body)
- (count-subexprs-rec item cnt-table)))
- (`(temporary ,_ ,dims ,@_)
- (dolist (titem dims)
- (count-subexprs-rec titem cnt-table)))
- (`(setf ,target ,val)
- (dolist (titem (cdr target))
- (count-subexprs-rec titem cnt-table))
- (count-subexprs-rec val cnt-table))
- (`(safety-check ,checks ,@body)
- (dolist (check checks)
- (count-subexprs-rec (first check) cnt-table))
- (dolist (item body)
- (count-subexprs-rec item cnt-table)))
- (_
- (dolist (item (cdr expr))
- (count-subexprs-rec item cnt-table)))))
+ ;; Increment the counter, and avoid walking trees twice
+ (when (> (incf-nil (gethash expr cnt-table)) 1)
+ (return-from count-subexprs-rec nil))
+ (match expr
+ ((type atom var)
+ nil)
+ (`(index ,@_)
+ nil)
+ (`(declare ,@_)
+ nil)
+ (`(multivalue-data ,@_)
+ nil)
+ (`(loop-range ,(ranging-spec _ :min minv :max maxv) ,@body)
+ (count-subexprs-rec minv cnt-table)
+ (count-subexprs-rec maxv cnt-table)
+ (dolist (item body)
+ (count-subexprs-rec item cnt-table)))
+ (`(temporary ,_ ,dims ,@_)
+ (dolist (titem dims)
+ (count-subexprs-rec titem cnt-table)))
+ (`(setf ,target ,val)
+ (dolist (titem (cdr target))
+ (count-subexprs-rec titem cnt-table))
+ (count-subexprs-rec val cnt-table))
+ (`(safety-check ,checks ,@body)
+ (dolist (check checks)
+ (count-subexprs-rec (first check) cnt-table))
+ (dolist (item body)
+ (count-subexprs-rec item cnt-table)))
+ (_
+ (dolist (item (cdr expr))
+ (count-subexprs-rec item cnt-table)))))
(defun count-subexprs (expr)
- (let ((table (make-hash-table :test #'equal)))
- (count-subexprs-rec expr table)
- table))
+ (let ((table (make-hash-table :test #'equal)))
+ (count-subexprs-rec expr table)
+ table))
(defun build-factor-table (cnt-table pull-symbols)
- (let ((fct-table (make-hash-table :test #'equal)))
- (maphash
- #'(lambda (expr cnt)
- ;; Factor common subexpressions
- (when (> cnt 1)
- (setf (gethash expr fct-table) t))
- ;; Factor symbols
- (when (and pull-symbols (symbolp expr))
- (setf (gethash expr fct-table) t))
- ;; Factor temporaries and arrays
- (ifmatch `(,(or 'temporary 'multivalue-data) ,@_) expr
- (setf (gethash expr fct-table) t))
- ;; Factor references used on the lhs of an
- ;; assignment, and also somewhere else
- (when (and (consp expr)
- (eql (car expr) 'setf)
- (>= (or (gethash (second expr) cnt-table) 0) 1))
- (setf (gethash (second expr) fct-table) t))
- ;; Factor loop-invariant subexpressions
- (when (and (consp expr)
+ (let ((fct-table (make-hash-table :test #'equal)))
+ (maphash #'(lambda (expr cnt)
+ ;; Factor common subexpressions
+ (when (> cnt 1)
+ (setf (gethash expr fct-table) t))
+ ;; Factor symbols
+ (when (and pull-symbols (symbolp expr))
+ (setf (gethash expr fct-table) t))
+ ;; Factor temporaries and arrays
+ (ifmatch `(,(or 'temporary 'multivalue-data) ,@_) expr
+ (setf (gethash expr fct-table) t))
+ ;; Factor references used on the lhs of an
+ ;; assignment, and also somewhere else
+ (when (and (consp expr)
+ (eql (car expr) 'setf)
+ (>= (or (gethash (second expr) cnt-table) 0) 1))
+ (setf (gethash (second expr) fct-table) t))
+ ;; Factor loop-invariant subexpressions
+ (when (and (consp expr)
(not (find (car expr)
- '(index safety-check loop-range setf))))
- (let ((level (min-loop-level expr)))
- (dolist (sub (cdr expr))
- (unless (eql level (min-loop-level sub))
- (setf (gethash sub fct-table) t))))))
- cnt-table)
- (maphash
- #'(lambda (expr flag)
- ;; Don't factor constants and loop vars
- (when (match expr
- ((type number _) t)
- ((type string _) t)
- ('nil t)
- (`(_grp ,@_) t)
- ((type symbol var) (not pull-symbols))
- ((ranging-spec _ :loop-level level) level)
- (_ nil))
- (remhash expr fct-table)))
- fct-table)
- fct-table))
+ '(index safety-check loop-range setf))))
+ (let ((level (min-loop-level expr)))
+ (dolist (sub (cdr expr))
+ (unless (eql level (min-loop-level sub))
+ (setf (gethash sub fct-table) t))))))
+ cnt-table)
+ (maphash #'(lambda (expr flag)
+ ;; Don't factor constants and loop vars
+ (when (match expr
+ ((type number _) t)
+ ((type string _) t)
+ ('nil t)
+ (`(_grp ,@_) t)
+ ((type symbol var) (not pull-symbols))
+ ((ranging-spec _ :loop-level level) level)
+ (_ nil))
+ (remhash expr fct-table)))
+ fct-table)
+ fct-table))
(defun factor-vars-dumb (expr fct-table cur-level var-list nil-list)
- (if (or (atom expr) (find (car expr) '(index)))
- expr
- (mapcar-save-old
- #'(lambda (x) (factor-vars-rec x fct-table cur-level var-list nil-list))
- expr)))
+ (if (or (atom expr) (find (car expr) '(index)))
+ expr
+ (mapcar-save-old #'(lambda (x)
+ (factor-vars-rec x fct-table
+ cur-level var-list nil-list))
+ expr)))
(defun factor-vars-rec (expr fct-table cur-level var-list nil-list)
- (match expr
- (`(declare ,@_) expr)
- (`(quote ,@_) expr)
- (`(loop-range ,range ,@body)
- (let* ((range-info (ranging-info range))
- (level (range-loop-level range-info))
- (level-gap (if cur-level (- cur-level level 1) 0))
- (pad-list (loop for i from 1 to level-gap collect nil))
- (vlist (cons nil (nconc pad-list var-list)))
- (nbody (factor-vars-rec body fct-table level vlist nil-list)))
- (unless (or (null cur-level) (< level cur-level))
- (error "Invalid loop nesting: ~A at level ~A" expr cur-level))
- ;; Factor the loop range args
- (setf (range-min range-info)
- (factor-vars-rec (range-min range-info)
- fct-table cur-level var-list nil-list))
- (setf (range-max range-info)
- (factor-vars-rec (range-max range-info)
- fct-table cur-level var-list nil-list))
- ;; Pop the substitutions
- (dolist (subs (car vlist))
- (setf (gethash
- (get (first subs) 'full-expr)
- fct-table)
- t))
- ;; Verify that the gap is empty
- (loop for i from 1 to level-gap
- do (unless (null (nth i vlist))
- (error "Invalid loop nesting: ~A of level ~A in gap ~A to ~A"
- (nth i vlist) (+ level i) level cur-level)))
- ;; Wrap with let if needed
- (if (car vlist)
- `(loop-range ,range (let* ,(nreverse (car vlist)) ,@nbody))
- `(loop-range ,range ,@nbody))))
- (`(setf ,target ,val)
- `(setf ,(factor-vars-dumb target fct-table cur-level var-list nil-list)
- ,(optimize-tree
- (factor-vars-rec val fct-table cur-level var-list nil-list))))
- (`(safety-check ,checks ,@body)
- (cons-save-old expr 'safety-check
- (cons-save-old (cdr expr)
- (mapcar-save-old
- #'(lambda (check)
- (cons-save-old check
- (factor-vars-rec (car check)
- fct-table cur-level var-list nil-list)
- (cdr check)))
- checks)
- (factor-vars-dumb body
- fct-table cur-level var-list nil-list))))
- (_
- (let ((factor (gethash expr fct-table)))
- (cond
- ((eql factor nil)
- (factor-vars-dumb expr fct-table cur-level var-list nil-list))
- ((eql factor t)
- (let* ((sym (get-new-symbol))
- (nexpr (factor-vars-dumb expr fct-table cur-level var-list nil-list))
- (level (min-loop-level expr))
- (expr-pair (list sym (optimize-tree nexpr))))
- (setf (get sym 'let-clause) expr-pair)
- (setf (get sym 'full-expr) expr)
- (setf (get sym 'loop-level) level)
- (cond
- ((eql level nil)
- (push expr-pair (car nil-list)))
- ((and cur-level (>= level cur-level))
- (push expr-pair (nth (- level cur-level) var-list)))
- (t
- (error "Invalid level ~A at current ~A" level cur-level)))
- (setf (gethash expr fct-table) sym)))
- (t factor))))))
+ (match expr
+ (`(declare ,@_) expr)
+ (`(quote ,@_) expr)
+ (`(loop-range ,range ,@body)
+ (let* ((range-info (ranging-info range))
+ (level (range-loop-level range-info))
+ (level-gap (if cur-level (- cur-level level 1) 0))
+ (pad-list (loop for i from 1 to level-gap collect nil))
+ (vlist (cons nil (nconc pad-list var-list)))
+ (nbody (factor-vars-rec body fct-table level vlist nil-list)))
+ (unless (or (null cur-level) (< level cur-level))
+ (error "Invalid loop nesting: ~A at level ~A" expr cur-level))
+ ;; Factor the loop range args
+ (setf (range-min range-info)
+ (factor-vars-rec (range-min range-info)
+ fct-table cur-level var-list nil-list))
+ (setf (range-max range-info)
+ (factor-vars-rec (range-max range-info)
+ fct-table cur-level var-list nil-list))
+ ;; Pop the substitutions
+ (dolist (subs (car vlist))
+ (setf (gethash
+ (get (first subs) 'full-expr)
+ fct-table)
+ t))
+ ;; Verify that the gap is empty
+ (loop for i from 1 to level-gap
+ do (unless (null (nth i vlist))
+ (error "Invalid loop nesting: ~A of level ~A in gap ~A to ~A"
+ (nth i vlist) (+ level i) level cur-level)))
+ ;; Wrap with let if needed
+ (if (car vlist)
+ `(loop-range ,range (let* ,(nreverse (car vlist)) ,@nbody))
+ `(loop-range ,range ,@nbody))))
+ (`(setf ,target ,val)
+ `(setf ,(factor-vars-dumb target fct-table cur-level var-list nil-list)
+ ,(optimize-tree
+ (factor-vars-rec val fct-table cur-level var-list nil-list))))
+ (`(safety-check ,checks ,@body)
+ (cons-save-old expr 'safety-check
+ (cons-save-old (cdr expr)
+ (mapcar-save-old
+ #'(lambda (check)
+ (cons-save-old check
+ (factor-vars-rec (car check)
+ fct-table cur-level var-list nil-list)
+ (cdr check)))
+ checks)
+ (factor-vars-dumb body
+ fct-table cur-level var-list nil-list))))
+ (_
+ (let ((factor (gethash expr fct-table)))
+ (cond
+ ((eql factor nil)
+ (factor-vars-dumb expr fct-table cur-level var-list nil-list))
+ ((eql factor t)
+ (let* ((sym (get-new-symbol))
+ (nexpr (factor-vars-dumb expr fct-table cur-level var-list nil-list))
+ (level (min-loop-level expr))
+ (expr-pair (list sym (optimize-tree nexpr))))
+ (setf (get sym 'let-clause) expr-pair)
+ (setf (get sym 'full-expr) expr)
+ (setf (get sym 'loop-level) level)
+ (cond
+ ((eql level nil)
+ (push expr-pair (car nil-list)))
+ ((and cur-level (>= level cur-level))
+ (push expr-pair (nth (- level cur-level) var-list)))
+ (t
+ (error "Invalid level ~A at current ~A" level cur-level)))
+ (setf (gethash expr fct-table) sym)))
+ (t factor))))))
(defun factor-vars (expr fct-table)
- (let* ((nil-list (list nil))
- (nexpr (factor-vars-rec expr fct-table nil nil nil-list)))
- (if (car nil-list)
- `(let* ,(nreverse (car nil-list)) ,nexpr)
- nexpr)))
+ (let* ((nil-list (list nil))
+ (nexpr (factor-vars-rec expr fct-table nil nil nil-list)))
+ (if (car nil-list)
+ `(let* ,(nreverse (car nil-list)) ,nexpr)
+ nexpr)))
(defun code-motion (expr &key pull-symbols)
- (factor-vars expr (build-factor-table (count-subexprs expr) pull-symbols)))
+ (factor-vars expr (build-factor-table (count-subexprs expr) pull-symbols)))
View
116 expr/form-defs.lisp
@@ -1,4 +1,4 @@
-;;;; kate: indent-width 4; replace-tabs yes; space-indent on;
+;;; -*- mode:lisp; indent-tabs-mode: nil; -*-
(in-package fast-compute)
@@ -48,7 +48,7 @@
;;; Array dimension macro
(defmacro arr-dim (arr idx rank)
- `(array-dimension ,arr ,idx))
+ `(array-dimension ,arr ,idx))
;;; Expression flattening barrier
@@ -57,31 +57,31 @@
;;; Temporary buffer
(defmacro temporary (name dims level &optional mode)
- (if (null dims)
- 0.0
- `(the (array single-float)
- (make-array (list ,@dims)
- :element-type 'single-float
- :initial-element 0.0))))
+ (if (null dims)
+ 0.0
+ `(the (array single-float)
+ (make-array (list ,@dims)
+ :element-type 'single-float
+ :initial-element 0.0))))
(defmacro tmp-ref (temp &rest dims)
- (if (null dims)
- temp
- `(aref ,temp ,@dims)))
+ (if (null dims)
+ temp
+ `(aref ,temp ,@dims)))
;;; Annotated range loop
(defmacro loop-range (rangespec &body code)
- (letmatch (ranging-spec var minv maxv stepv) rangespec
- (if (> stepv 0)
- `(do ((,var ,minv (+ ,var ,stepv)))
- ((> ,var ,maxv) nil)
- (declare (type fixnum ,var))
- ,@code)
- `(do ((,var ,maxv (- ,var ,(- stepv))))
- ((< ,var ,minv) nil)
- (declare (type fixnum ,var))
- ,@code))))
+ (letmatch (ranging-spec var minv maxv stepv) rangespec
+ (if (> stepv 0)
+ `(do ((,var ,minv (+ ,var ,stepv)))
+ ((> ,var ,maxv) nil)
+ (declare (type fixnum ,var))
+ ,@code)
+ `(do ((,var ,maxv (- ,var ,(- stepv))))
+ ((< ,var ,minv) nil)
+ (declare (type fixnum ,var))
+ ,@code))))
;;; Range annotation helpers
@@ -115,27 +115,27 @@
;;; Factored expression helpers
(defun get-full-expr (expr)
- (cond
- ((symbolp expr)
- (or (get expr 'full-expr) expr))
- ((consp expr)
- (mapcar-save-old #'get-full-expr expr))
- (t
- expr)))
+ (cond
+ ((symbolp expr)
+ (or (get expr 'full-expr) expr))
+ ((consp expr)
+ (mapcar-save-old #'get-full-expr expr))
+ (t
+ expr)))
(defun unwrap-factored (expr)
- (let ((full-expr (if (symbolp expr) (get expr 'let-clause))))
- (or (cadr full-expr) expr)))
+ (let ((full-expr (if (symbolp expr) (get expr 'let-clause))))
+ (or (cadr full-expr) expr)))
(defun recurse-factored (fun expr &rest args)
- (apply fun (unwrap-factored expr) args))
+ (apply fun (unwrap-factored expr) args))
;;; Fixed index expression predicate
(defun index-expr-p (expr)
- (or (numberp expr)
- (and (consp expr)
- (find (car expr)
+ (or (numberp expr)
+ (and (consp expr)
+ (find (car expr)
'(+ - * / 1+ 1- floor ceiling mod rem truncate index)))))
;;; Tree walker for skipping structure
@@ -166,38 +166,38 @@
(mapcar-save-old func dims)
tail))
(_
- (mapcar-save-old func expr))))
+ (mapcar-save-old func expr))))
(defun apply-skipping-structure (fun expr args)
- (match expr
- (`(progn ,@rest)
- (dolist (item rest)
- (apply-skipping-structure fun item args)))
- (`(,(or 'let 'let* 'symbol-macrolet 'loop-range) ,_ ,@rest)
- (dolist (item rest)
- (apply-skipping-structure fun item args)))
- (`(safety-check ,checks ,@rest)
- (dolist (item checks)
- (apply-skipping-structure fun (first item) args))
- (dolist (item rest)
- (apply-skipping-structure fun item args)))
- (`(setf ,_ ,_)
- (apply fun expr args))
- (`(declare ,@_) nil)
- (_
-; (format t "Unknown structure statement: ~A" expr)
- (apply fun expr args))))
+ (match expr
+ (`(progn ,@rest)
+ (dolist (item rest)
+ (apply-skipping-structure fun item args)))
+ (`(,(or 'let 'let* 'symbol-macrolet 'loop-range) ,_ ,@rest)
+ (dolist (item rest)
+ (apply-skipping-structure fun item args)))
+ (`(safety-check ,checks ,@rest)
+ (dolist (item checks)
+ (apply-skipping-structure fun (first item) args))
+ (dolist (item rest)
+ (apply-skipping-structure fun item args)))
+ (`(setf ,_ ,_)
+ (apply fun expr args))
+ (`(declare ,@_) nil)
+ (_
+ ;; (format t "Unknown structure statement: ~A" expr)
+ (apply fun expr args))))
;;; Misc
(defun range-band-master (range)
- (let ((idx (second range)))
- (or (get idx 'band-master)
- idx)))
+ (let ((idx (second range)))
+ (or (get idx 'band-master)
+ idx)))
(defun prepend-loop-item (rloop entry)
- (setf (cddr rloop)
+ (setf (cddr rloop)
(cons entry (cddr rloop))))
(defun append-loop-item (rloop entry)
- (nconc rloop (list entry)))
+ (nconc rloop (list entry)))
View
227 expr/let-utils.lisp
@@ -1,142 +1,137 @@
-;;;; kate: indent-width 4; replace-tabs yes; space-indent on;
+;;; -*- mode:lisp; indent-tabs-mode: nil; -*-
(in-package fast-compute)
(defun replace-let (let-data replace-tbl)
- (let ((new-defs (mapcar-save-old
- #'(lambda (item)
- (cons-save-old item
- (car item)
- (replace-unquoted (cdr item) replace-tbl)))
- (car let-data)))
- (new-table (set-difference replace-tbl (car let-data) :key #'car)))
- (cons-save-old let-data
- new-defs
- (mapcar-save-old
- #'(lambda (subexpr) (replace-unquoted subexpr new-table))
- (cdr let-data)))))
+ (let ((new-defs (mapcar-save-old
+ #'(lambda (item)
+ (cons-save-old item
+ (car item)
+ (replace-unquoted (cdr item) replace-tbl)))
+ (car let-data)))
+ (new-table (set-difference replace-tbl (car let-data) :key #'car)))
+ (cons-save-old let-data
+ new-defs
+ (mapcar-save-old #'(lambda (subexpr)
+ (replace-unquoted subexpr new-table))
+ (cdr let-data)))))
(defun replace-let* (let-data replace-tbl)
- (let* ((new-table replace-tbl)
- (new-defs (mapcar-save-old
- #'(lambda (item)
- (let ((newv (replace-unquoted (cdr item) new-table)))
- (setf new-table
- (remove (car item) new-table :key #'car))
- (cons-save-old item (car item) newv)))
- (car let-data))))
- (cons-save-old let-data
- new-defs
- (mapcar-save-old
- #'(lambda (subexpr) (replace-unquoted subexpr new-table))
- (cdr let-data)))))
+ (let* ((new-table replace-tbl)
+ (new-defs (mapcar-save-old
+ #'(lambda (item)
+ (let ((newv (replace-unquoted (cdr item) new-table)))
+ (setf new-table
+ (remove (car item) new-table :key #'car))
+ (cons-save-old item (car item) newv)))
+ (car let-data))))
+ (cons-save-old let-data
+ new-defs
+ (mapcar-save-old #'(lambda (subexpr)
+ (replace-unquoted subexpr new-table))
+ (cdr let-data)))))
(defun replace-unquoted (expr replace-tbl)
- (let ((target (cdr (assoc expr replace-tbl))))
- (cond
- (target target)
- ((atom expr) expr)
- ((null replace-tbl) expr)
- ((eql (first expr) 'quote) expr)
- ((eql (first expr) 'let)
- (cons-save-old expr
- 'let (replace-let (cdr expr) replace-tbl)))
- ((eql (first expr) 'symbol-macrolet)
- (cons-save-old expr
- 'symbol-macrolet (replace-let (cdr expr) replace-tbl)))
- ((eql (first expr) 'let*)
- (cons-save-old expr
- 'let* (replace-let* (cdr expr) replace-tbl)))
- (t (cons-save-old expr
- (replace-unquoted (car expr) replace-tbl)
- (replace-unquoted (cdr expr) replace-tbl))))))
+ (let ((target (cdr (assoc expr replace-tbl))))
+ (cond
+ (target target)
+ ((atom expr) expr)
+ ((null replace-tbl) expr)
+ ((eql (first expr) 'quote) expr)
+ ((eql (first expr) 'let)
+ (cons-save-old expr
+ 'let (replace-let (cdr expr) replace-tbl)))
+ ((eql (first expr) 'symbol-macrolet)
+ (cons-save-old expr
+ 'symbol-macrolet (replace-let (cdr expr) replace-tbl)))
+ ((eql (first expr) 'let*)
+ (cons-save-old expr
+ 'let* (replace-let* (cdr expr) replace-tbl)))
+ (t (cons-save-old expr
+ (replace-unquoted (car expr) replace-tbl)
+ (replace-unquoted (cdr expr) replace-tbl))))))
(defun wrap-progn (code)
- (if (cdr code) `(progn ,@code) (car code)))
+ (if (cdr code) `(progn ,@code) (car code)))
(defun wrap-progn-filter (code)
- (wrap-progn
- (mapcan
- #'(lambda (expr)
- (match expr
- ('nil nil)
- (`(declare ,@_) nil)
- (`(progn ,@code) code)
- (_ (list expr))))
- code)))
+ (wrap-progn
+ (mapcan #'(lambda (expr)
+ (match expr
+ ('nil nil)
+ (`(declare ,@_) nil)
+ (`(progn ,@code) code)
+ (_ (list expr))))
+ code)))
(defun convert-letv-exprs (exprs &key pull-last)
- (let* ((elst (if (and (consp exprs) (eql (car exprs) 'progn))
- (cdr exprs)
- (list exprs)))
- (nonull (remove-if-not #'identity elst))
- (body (if pull-last (last nonull) nil))
- (noblk (if pull-last (butlast nonull) nonull))
- (items (mapcar
- #'(lambda (expr)
- (match expr
- (`(setf ,(type symbol var) ,vexpr)
- (list var vexpr))
- (_