Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 424 lines (352 sloc) 15.587 kb
eaf2f0d Initial commit.
Nathan Froyd authored
1 ;;;; common.lisp -- efficient implementations of mod32 arithmetic and macros
2
3 ;;; Functions in this file are intended to be fast
4 (in-package :crypto)
5
6 (defmacro defconst (name value)
7 `(defconstant ,name
8 (if (boundp ',name)
9 (symbol-value ',name)
10 ,value)))
11
12 ;;; CMUCL and SBCL both have an internal type for this, but we'd like to
13 ;;; be portable, so we define our own.
14
15 (deftype index () '(mod #.array-dimension-limit))
16 (deftype index+1 () `(mod ,(1+ array-dimension-limit)))
17
18 ;;; We write something like this all over the place.
19
20 (deftype simple-octet-vector (&optional length)
21 (let ((length (or length '*)))
22 `(simple-array (unsigned-byte 8) (,length))))
23
24
25 ;;; a global specification of optimization settings
26
27 (eval-when (:compile-toplevel :load-toplevel :execute)
28 (defun burn-baby-burn ()
29 '(optimize (speed 3) (safety 0) (space 0)
30 (debug 0) (compilation-speed 0)))
31
32 (defun hold-me-back ()
33 '(declare (optimize (speed 3) (space 0) (compilation-speed 0)
34 #-cmu (safety 1) #-cmu (debug 1)
35 #+cmu (safety 0) #+cmu (debug 0))
36 #+cmu (ext:optimize-interface (safety 1) (debug 1))))
37 ) ; EVAL-WHEN
38
39
40 ;;; extracting individual bytes from integers
41
42 ;;; We used to declare these functions with much stricter types (e.g.
43 ;;; (UNSIGNED-BYTE 32) as the lone argument), but we need to access
44 ;;; bytes of both 32-bit and 64-bit words and the types would just get
45 ;;; in our way. We declare these functions as inline; a good Common
46 ;;; Lisp compiler should be able to generate efficient code from the
47 ;;; declarations at the point of the call.
48
49 ;;; These functions are named according to big-endian conventions. The
50 ;;; comment is here because I always forget and need to be reminded.
51 #.(loop for i from 1 to 8
ce2fbc4 Bob Uhl Fixed bug with string case
eadmund authored
52 collect (let ((name (read-from-string (format nil "~:R-~A" i '#:byte))))
eaf2f0d Initial commit.
Nathan Froyd authored
53 `(progn
54 (declaim (inline ,name))
55 (declaim (ftype (function (unsigned-byte) (unsigned-byte 8)) ,name))
56 (defun ,name (ub)
57 (declare (type unsigned-byte ub))
58 (ldb (byte 8 ,(* 8 (1- i))) ub)))) into forms
59 finally (return `(progn ,@forms)))
60
61
62 ;;; fetching/storing appropriately-sized integers from octet vectors
63
64 (eval-when (:compile-toplevel :load-toplevel :execute)
65 (defun ubref-fun-name (bitsize big-endian-p)
b6461b4 Nathan Froyd add dependency on nibbles for faster N-bit accesses
authored
66 (nibbles::byte-ref-fun-name bitsize nil big-endian-p))
eaf2f0d Initial commit.
Nathan Froyd authored
67 ) ; EVAL-WHEN
68
69
70 ;;; efficient 32-bit arithmetic, which a lot of algorithms require
71
d34b2b8 Nathan Froyd dial back the inlining for non-mod-wordsize implementations
authored
72 (declaim #+ironclad-fast-mod32-arithmetic (inline mod32+)
eaf2f0d Initial commit.
Nathan Froyd authored
73 (ftype (function ((unsigned-byte 32) (unsigned-byte 32)) (unsigned-byte 32)) mod32+))
74 (defun mod32+ (a b)
75 (declare (type (unsigned-byte 32) a b))
76 (ldb (byte 32 0) (+ a b)))
77
78 #+cmu
79 (define-compiler-macro mod32+ (a b)
80 `(ext:truly-the (unsigned-byte 32) (+ ,a ,b)))
81
82 #+sbcl
83 (define-compiler-macro mod32+ (a b)
84 `(ldb (byte 32 0) (+ ,a ,b)))
85
86 ;;; mostly needed for CAST*
d34b2b8 Nathan Froyd dial back the inlining for non-mod-wordsize implementations
authored
87 (declaim #+ironclad-fast-mod32-arithmetic (inline mod32-)
eaf2f0d Initial commit.
Nathan Froyd authored
88 (ftype (function ((unsigned-byte 32) (unsigned-byte 32)) (unsigned-byte 32)) mod32-))
89
90 (defun mod32- (a b)
91 (declare (type (unsigned-byte 32) a b))
92 (ldb (byte 32 0) (- a b)))
93
94 #+cmu
95 (define-compiler-macro mod32- (a b)
96 `(ext:truly-the (unsigned-byte 32) (- ,a ,b)))
97
98 #+sbcl
99 (define-compiler-macro mod32- (a b)
100 `(ldb (byte 32 0) (- ,a ,b)))
101
102 ;;; mostly needed for RC6
d34b2b8 Nathan Froyd dial back the inlining for non-mod-wordsize implementations
authored
103 (declaim #+ironclad-fast-mod32-arithmetic (inline mod32*)
eaf2f0d Initial commit.
Nathan Froyd authored
104 (ftype (function ((unsigned-byte 32) (unsigned-byte 32)) (unsigned-byte 32)) mod32*))
105
106 (defun mod32* (a b)
107 (declare (type (unsigned-byte 32) a b))
108 (ldb (byte 32 0) (* a b)))
109
110 #+cmu
111 (define-compiler-macro mod32* (a b)
112 `(ext:truly-the (unsigned-byte 32) (* ,a ,b)))
113
114 #+sbcl
115 (define-compiler-macro mod32* (a b)
116 `(ldb (byte 32 0) (* ,a ,b)))
117
d34b2b8 Nathan Froyd dial back the inlining for non-mod-wordsize implementations
authored
118 (declaim #+ironclad-fast-mod32-arithmetic (inline mod32ash)
eaf2f0d Initial commit.
Nathan Froyd authored
119 (ftype (function ((unsigned-byte 32) (integer -31 31)) (unsigned-byte 32)) mod32ash))
120
121 (defun mod32ash (num count)
122 (declare (type (unsigned-byte 32) num))
123 (declare (type (integer -31 31) count))
124 (ldb (byte 32 0) (ash num count)))
125
126 #+sbcl
127 (define-compiler-macro mod32ash (num count)
128 ;; work around SBCL optimizing bug as described by APD:
129 ;; http://www.caddr.com/macho/archives/sbcl-devel/2004-8/3877.html
130 `(logand #xffffffff (ash ,num ,count)))
131
d34b2b8 Nathan Froyd dial back the inlining for non-mod-wordsize implementations
authored
132 (declaim #+ironclad-fast-mod32-arithmetic (inline mod32lognot)
eaf2f0d Initial commit.
Nathan Froyd authored
133 (ftype (function ((unsigned-byte 32)) (unsigned-byte 32)) mod32lognot))
134
135 (defun mod32lognot (num)
136 (ldb (byte 32 0) (lognot num)))
137
138 #+sbcl
139 (define-compiler-macro mod32lognot (num)
140 `(ldb (byte 32 0) (lognot ,num)))
141
d34b2b8 Nathan Froyd dial back the inlining for non-mod-wordsize implementations
authored
142 (declaim #+ironclad-fast-mod32-arithmetic (inline rol32 ror32)
eaf2f0d Initial commit.
Nathan Froyd authored
143 (ftype (function ((unsigned-byte 32) (unsigned-byte 5)) (unsigned-byte 32)) rol32 ror32))
144
145 (defun rol32 (a s)
146 (declare (type (unsigned-byte 32) a) (type (integer 0 32) s))
147 #+cmu
148 (kernel:32bit-logical-or #+little-endian (kernel:shift-towards-end a s)
149 #+big-endian (kernel:shift-towards-start a s)
150 (ash a (- s 32)))
151 #+sbcl
152 (sb-rotate-byte:rotate-byte s (byte 32 0) a)
153 #-(or sbcl cmu)
154 (logior (ldb (byte 32 0) (ash a s)) (ash a (- s 32))))
155
156 (defun ror32 (a s)
157 (declare (type (unsigned-byte 32) a) (type (integer 0 32) s))
158 #+sbcl
159 (sb-rotate-byte:rotate-byte (- s) (byte 32 0) a)
160 #-sbcl
161 (rol32 a (- 32 s)))
162
b5727b9 Nathan Froyd add 64-bit Tiger support for x86-64 SBCL
authored
163 (declaim #+ironclad-fast-mod64-arithmetic (inline mod64+ mod64- mod64*)
eaf2f0d Initial commit.
Nathan Froyd authored
164 (ftype (function ((unsigned-byte 64) (unsigned-byte 64)) (unsigned-byte 64)) mod64+))
165 (defun mod64+ (a b)
166 (declare (type (unsigned-byte 64) a b))
167 (ldb (byte 64 0) (+ a b)))
168
169 #+sbcl
170 (define-compiler-macro mod64+ (a b)
171 `(ldb (byte 64 0) (+ ,a ,b)))
172
b5727b9 Nathan Froyd add 64-bit Tiger support for x86-64 SBCL
authored
173 (defun mod64- (a b)
174 (declare (type (unsigned-byte 64) a b))
175 (ldb (byte 64 0) (- a b)))
176
177 #+sbcl
178 (define-compiler-macro mod64- (a b)
179 `(ldb (byte 64 0) (- ,a ,b)))
180
181 (defun mod64* (a b)
182 (declare (type (unsigned-byte 64) a b))
183 (ldb (byte 64 0) (* a b)))
184
185 #+sbcl
186 (define-compiler-macro mod64* (a b)
187 `(ldb (byte 64 0) (* ,a ,b)))
188
d34b2b8 Nathan Froyd dial back the inlining for non-mod-wordsize implementations
authored
189 (declaim #+ironclad-fast-mod64-arithmetic (inline rol64 ror64)
eaf2f0d Initial commit.
Nathan Froyd authored
190 (ftype (function ((unsigned-byte 64) (unsigned-byte 6)) (unsigned-byte 64)) rol64 ror64))
191
b5727b9 Nathan Froyd add 64-bit Tiger support for x86-64 SBCL
authored
192 (declaim #+ironclad-fast-mod64-arithmetic (inline mod64ash)
193 (ftype (function ((unsigned-byte 64) (integer -63 63)) (unsigned-byte 64)) mod64ash))
194
195 (defun mod64ash (num count)
196 (declare (type (unsigned-byte 64) num))
197 (declare (type (integer -63 63) count))
198 (ldb (byte 64 0) (ash num count)))
199
200 #+sbcl
201 (define-compiler-macro mod64ash (num count)
202 ;; work around SBCL optimizing bug as described by APD:
203 ;; http://www.caddr.com/macho/archives/sbcl-devel/2004-8/3877.html
204 `(logand #xffffffffffffffff (ash ,num ,count)))
205
206 (declaim #+ironclad-fast-mod64-arithmetic (inline mod64lognot)
207 (ftype (function ((unsigned-byte 64)) (unsigned-byte 64)) mod64lognot))
208
209 (defun mod64lognot (num)
210 (ldb (byte 64 0) (lognot num)))
211
212 #+sbcl
213 (define-compiler-macro mod64lognot (num)
214 `(ldb (byte 64 0) (lognot ,num)))
215
229ddfe Nathan Froyd make 64-bit rotates use sb-rotate-byte on x86-64 sbcl
authored
216 (declaim #+ironclad-fast-mod64-arithmetic (inline rol64 ror64)
217 (ftype (function ((unsigned-byte 64) (unsigned-byte 6)) (unsigned-byte 64)) rol64 ror64))
218
eaf2f0d Initial commit.
Nathan Froyd authored
219 (defun rol64 (a s)
220 (declare (type (unsigned-byte 64) a) (type (integer 0 64) s))
229ddfe Nathan Froyd make 64-bit rotates use sb-rotate-byte on x86-64 sbcl
authored
221 #+(and sbcl ironclad-fast-mod64-arithmetic)
222 (sb-rotate-byte:rotate-byte s (byte 64 0) a)
223 #-(and sbcl ironclad-fast-mod64-arithmetic)
eaf2f0d Initial commit.
Nathan Froyd authored
224 (logior (ldb (byte 64 0) (ash a s)) (ash a (- s 64))))
225
226 (defun ror64 (a s)
227 (declare (type (unsigned-byte 64) a) (type (integer 0 64) s))
229ddfe Nathan Froyd make 64-bit rotates use sb-rotate-byte on x86-64 sbcl
authored
228 #+(and sbcl ironclad-fast-mod64-arithmetic)
229 (sb-rotate-byte:rotate-byte (- s) (byte 64 0) a)
230 #-(and sbcl ironclad-fast-mod64-arithmetic)
eaf2f0d Initial commit.
Nathan Froyd authored
231 (rol64 a (- 64 s)))
232
233
234 ;;; 64-bit utilities
235
d34b2b8 Nathan Froyd dial back the inlining for non-mod-wordsize implementations
authored
236 (declaim #+ironclad-fast-mod32-arithmetic
237 (inline %add-with-carry %subtract-with-borrow))
eaf2f0d Initial commit.
Nathan Froyd authored
238
239 ;;; The names are taken from sbcl and cmucl's bignum routines.
240 ;;; Naturally, they work the same way (which means %SUBTRACT-WITH-BORROW
241 ;;; is a little weird).
242 (defun %add-with-carry (x y carry)
243 (declare (type (unsigned-byte 32) x y)
244 (type (mod 2) carry))
245 #+(and sbcl 32-bit)
246 (sb-bignum:%add-with-carry x y carry)
247 #+(and cmucl 32-bit)
248 (bignum:%add-with-carry x y carry)
249 #-(or (and sbcl 32-bit)
250 (and cmucl 32-bit))
251 (let* ((temp (mod32+ x y))
252 (temp-carry (if (< temp x) 1 0))
253 (result (mod32+ temp carry)))
254 (values result (logior temp-carry (if (< result temp) 1 0)))))
255
256 (defun %subtract-with-borrow (x y borrow)
257 (declare (type (unsigned-byte 32) x y)
258 (type (mod 2) borrow))
259 #+(and sbcl 32-bit)
260 (sb-bignum:%subtract-with-borrow x y borrow)
261 #+(and cmucl 32-bit)
262 (bignum:%subtract-with-borrow x y borrow)
263 #-(or (and sbcl 32-bit)
264 (and cmucl 32-bit))
265 (let ((temp (mod32- x y)))
266 (cond
267 ((zerop borrow)
268 (values (mod32- temp 1) (if (< y x) 1 0)))
269 (t
270 (values temp (logxor (if (< x y) 1 0) 1))))))
271
272 ;;; efficient 8-byte -> 32-byte buffer copy routines, mostly used by
273 ;;; the hash functions. we provide big-endian and little-endian
274 ;;; versions.
275
276 (declaim (inline fill-block-le-ub8 fill-block-be-ub8))
277
278 (declaim (inline copy-to-buffer))
279 (defun copy-to-buffer (from from-offset count buffer buffer-offset)
280 "Copy a partial segment from input vector from starting at
281 from-offset and copying count elements into the 64 byte buffer
282 starting at buffer-offset."
283 (declare (type index from-offset)
284 (type (integer 0 127) count buffer-offset)
285 (type simple-octet-vector from)
286 (type simple-octet-vector buffer)
287 #.(burn-baby-burn))
288 #+cmu
289 (kernel:bit-bash-copy
290 from (+ (* vm:vector-data-offset vm:word-bits) (* from-offset vm:byte-bits))
291 buffer (+ (* vm:vector-data-offset vm:word-bits)
292 (* buffer-offset vm:byte-bits))
293 (* count vm:byte-bits))
294 #+sbcl
295 (sb-kernel:ub8-bash-copy from from-offset buffer buffer-offset count)
296 #-(or cmu sbcl)
297 (loop for buffer-index of-type (integer 0 64) from buffer-offset
298 for from-index of-type fixnum from from-offset
299 below (+ from-offset count)
300 do
301 (setf (aref buffer buffer-index) (aref from from-index))))
302
303 (defun fill-block-ub8-le (block buffer offset)
304 "Convert a complete 64 (UNSIGNED-BYTE 8) input BUFFER starting from
305 OFFSET into the given (UNSIGNED-BYTE 32) BLOCK."
306 (declare (type (integer 0 #.(- array-dimension-limit 64)) offset)
307 (type (simple-array (unsigned-byte 32) (16)) block)
308 (type simple-octet-vector buffer))
309 #+(and :cmu :little-endian)
310 (kernel:bit-bash-copy
311 buffer (+ (* vm:vector-data-offset vm:word-bits) (* offset vm:byte-bits))
312 block (* vm:vector-data-offset vm:word-bits)
313 (* 64 vm:byte-bits))
314 #+(and :sbcl :little-endian)
315 (sb-kernel:ub8-bash-copy buffer offset block 0 64)
316 #-(or (and :sbcl :little-endian) (and :cmu :little-endian))
317 (loop for i of-type (integer 0 16) from 0
318 for j of-type (integer 0 #.array-dimension-limit)
319 from offset to (+ offset 63) by 4
320 do
b6461b4 Nathan Froyd add dependency on nibbles for faster N-bit accesses
authored
321 (setf (aref block i) (nibbles:ub32ref/le buffer j))))
eaf2f0d Initial commit.
Nathan Froyd authored
322
323 (defun fill-block-ub8-be (block buffer offset)
324 "Convert a complete 64 (unsigned-byte 8) input vector segment
325 starting from offset into the given 16 word SHA1 block. Calling this function
326 without subsequently calling EXPAND-BLOCK results in undefined behavior."
327 (declare (type (integer 0 #.(- array-dimension-limit 64)) offset)
328 (type (simple-array (unsigned-byte 32) (*)) block)
329 (type simple-octet-vector buffer))
330 ;; convert to 32-bit words
331 #+(and :cmu :big-endian)
332 (kernel:bit-bash-copy
333 buffer (+ (* vm:vector-data-offset vm:word-bits)
334 (* offset vm:byte-bits))
335 block (* vm:vector-data-offset vm:word-bits)
336 (* 64 vm:byte-bits))
337 #+(and :sbcl :big-endian)
338 (sb-kernel:ub8-bash-copy buffer offset block 0 64)
339 #-(or (and :sbcl :big-endian) (and :cmu :big-endian))
340 (loop for i of-type (integer 0 16) from 0
341 for j of-type (integer 0 #.array-dimension-limit)
342 from offset to (+ offset 63) by 4
b6461b4 Nathan Froyd add dependency on nibbles for faster N-bit accesses
authored
343 do (setf (aref block i) (nibbles:ub32ref/be buffer j)))
344 (values))
eaf2f0d Initial commit.
Nathan Froyd authored
345
b5727b9 Nathan Froyd add 64-bit Tiger support for x86-64 SBCL
authored
346 (defun fill-block-ub8-le/64 (block buffer offset)
347 "Convert a complete 128 (unsigned-byte 8) input vector segment
348 starting from offset into the given 16 qword SHA1 block. Calling this
349 function without subsequently calling EXPAND-BLOCK results in undefined
350 behavior."
351 (declare (type (integer 0 #.(- array-dimension-limit 64)) offset)
352 (type (simple-array (unsigned-byte 64) (*)) block)
353 (type simple-octet-vector buffer)
354 #.(burn-baby-burn))
355 ;; convert to 64-bit words
356 #+(and :cmu :little-endian :64-bit)
357 (kernel:bit-bash-copy
358 buffer (+ (* vm:vector-data-offset vm:word-bits)
359 (* offset vm:byte-bits))
360 block (* vm:vector-data-offset vm:word-bits)
361 (* 64 vm:byte-bits))
362 #+(and :sbcl :little-endian :64-bit)
363 (sb-kernel:ub8-bash-copy buffer offset block 0 64)
364 #-(or (and :sbcl :little-endian :64-bit) (and :cmu :little-endian :64-bit))
365 (loop for i of-type (integer 0 8) from 0
366 for j of-type (integer 0 #.array-dimension-limit)
367 from offset to (+ offset 63) by 8
b6461b4 Nathan Froyd add dependency on nibbles for faster N-bit accesses
authored
368 do (setf (aref block i) (nibbles:ub64ref/le buffer j))))
b5727b9 Nathan Froyd add 64-bit Tiger support for x86-64 SBCL
authored
369
eaf2f0d Initial commit.
Nathan Froyd authored
370 (defun fill-block-ub8-be/64 (block buffer offset)
371 "Convert a complete 128 (unsigned-byte 8) input vector segment
372 starting from offset into the given 16 qword SHA1 block. Calling this
373 function without subsequently calling EXPAND-BLOCK results in undefined
374 behavior."
375 (declare (type (integer 0 #.(- array-dimension-limit 128)) offset)
376 (type (simple-array (unsigned-byte 64) (*)) block)
377 (type simple-octet-vector buffer)
378 #.(burn-baby-burn))
929b01a Nathan Froyd fix errors on 32-bit big-endian CMUCL and SBCL
authored
379 ;; convert to 64-bit words
380 #+(and :cmu :big-endian :64-bit)
eaf2f0d Initial commit.
Nathan Froyd authored
381 (kernel:bit-bash-copy
382 buffer (+ (* vm:vector-data-offset vm:word-bits)
383 (* offset vm:byte-bits))
384 block (* vm:vector-data-offset vm:word-bits)
385 (* 128 vm:byte-bits))
929b01a Nathan Froyd fix errors on 32-bit big-endian CMUCL and SBCL
authored
386 #+(and :sbcl :big-endian :64-bit)
eaf2f0d Initial commit.
Nathan Froyd authored
387 (sb-kernel:ub8-bash-copy buffer offset block 0 128)
388 #-(or (and :sbcl :big-endian) (and :cmu :big-endian))
389 (loop for i of-type (integer 0 16) from 0
390 for j of-type (integer 0 #.array-dimension-limit)
391 from offset to (+ offset 127) by 8
b6461b4 Nathan Froyd add dependency on nibbles for faster N-bit accesses
authored
392 do (setf (aref block i) (nibbles:ub64ref/be buffer j))))
28aa4f7 Nathan Froyd move XOR-BLOCK earlier in the compilation process
authored
393
394 (declaim (inline xor-block))
395 (defun xor-block (block-length input-block1 input-block2 input-block2-start
396 output-block output-block-start)
397 (declare (type (simple-array (unsigned-byte 8) (*)) input-block1 input-block2 output-block))
398 (declare (type index block-length input-block2-start output-block-start))
399 ;; this could be made more efficient by doing things in a word-wise fashion.
400 ;; of course, then we'd have to deal with fun things like boundary
401 ;; conditions and such like. maybe we could just win by unrolling the
402 ;; loop a bit. BLOCK-LENGTH should be a constant in all calls to this
403 ;; function; maybe a compiler macro would work well.
404 (dotimes (i block-length)
405 (setf (aref output-block (+ output-block-start i))
406 (logxor (aref input-block1 i)
407 (aref input-block2 (+ input-block2-start i))))))
eaf2f0d Initial commit.
Nathan Froyd authored
408
409 ;;; a few functions that are useful during compilation
410
411 (defun make-circular-list (&rest elements)
412 (let ((list (copy-seq elements)))
413 (setf (cdr (last list)) list)))
414
415 ;;; SUBSEQ is defined to error on circular lists, so we define our own
416 (defun circular-list-subseq (list start end)
417 (let* ((length (- end start))
418 (subseq (make-list length)))
419 (do ((i 0 (1+ i))
420 (list (nthcdr start list) (cdr list))
421 (xsubseq subseq (cdr xsubseq)))
422 ((>= i length) subseq)
ce2fbc4 Bob Uhl Fixed bug with string case
eadmund authored
423 (setf (first xsubseq) (first list)))))
Something went wrong with that request. Please try again.