44 *
55 * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
66 * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
7+ * Copyright 2024 Google LLC
78 */
89
910#include <linux/linkage.h>
2829 .byte 0xdb , 0x3d , 0x18 , 0x55 , 0x6d , 0xc2 , 0x2f , 0xf1
2930 .byte 0x20 , 0x11 , 0x31 , 0x42 , 0x73 , 0xb5 , 0x28 , 0xdd
3031
31- .section .rodata.cst16.aegis128_counter , "aM" , @progbits , 16
32- .align 16
33- .Laegis128_counter :
34- .byte 0x00 , 0x01 , 0x02 , 0x03 , 0x04 , 0x05 , 0x06 , 0x07
35- .byte 0x08 , 0x09 , 0x0a , 0x0b , 0x0c , 0x0d , 0x0e , 0x0f
32+ .section .rodata.cst32.zeropad_mask , "aM" , @progbits , 32
33+ .align 32
34+ .Lzeropad_mask :
35+ .octa 0xffffffffffffffffffffffffffffffff
36+ .octa 0
3637
3738.text
3839
5556.endm
5657
5758/*
58- * __load_partial: internal ABI
59- * input:
60- * LEN - bytes
61- * SRC - src
62- * output:
63- * MSG - message block
64- * changed:
65- * T0
66- * %r8
67- * %r9
59+ * Load 1 <= LEN (%ecx) <= 15 bytes from the pointer SRC into the xmm register
60+ * MSG and zeroize any remaining bytes. Clobbers %rax, %rcx, and %r8.
6861 */
69- SYM_FUNC_START_LOCAL (__load_partial)
70- .set LEN, %ecx
71- .set SRC, %rsi
72- xor %r9d , %r9d
73- pxor MSG, MSG
74-
75- mov LEN, %r8d
76- and $0x1 , %r8
77- jz .Lld_partial_1
78-
79- mov LEN, %r8d
80- and $0x1E , %r8
81- add SRC, %r8
82- mov (%r8 ), %r9b
83-
84- .Lld_partial_1:
85- mov LEN, %r8d
86- and $0x2 , %r8
87- jz .Lld_partial_2
88-
89- mov LEN, %r8d
90- and $0x1C , %r8
91- add SRC, %r8
92- shl $0x10 , %r9
93- mov (%r8 ), %r9w
94-
95- .Lld_partial_2:
96- mov LEN, %r8d
97- and $0x4 , %r8
98- jz .Lld_partial_4
99-
100- mov LEN, %r8d
101- and $0x18 , %r8
102- add SRC, %r8
103- shl $32 , %r9
104- mov (%r8 ), %r8d
105- xor %r8 , %r9
106-
107- .Lld_partial_4:
108- movq %r9 , MSG
109-
110- mov LEN, %r8d
111- and $0x8 , %r8
112- jz .Lld_partial_8
113-
114- mov LEN, %r8d
115- and $0x10 , %r8
116- add SRC, %r8
117- pslldq $8 , MSG
118- movq (%r8 ), T0
119- pxor T0, MSG
120-
121- .Lld_partial_8:
122- RET
123- SYM_FUNC_END(__load_partial)
62+ .macro load_partial
63+ sub $8 , %ecx /* LEN - 8 */
64+ jle .Lle8\@
65+
66+ /* Load 9 <= LEN <= 15 bytes: */
67+ movq (SRC), MSG /* Load first 8 bytes */
68+ mov (SRC, %rcx ), %rax /* Load last 8 bytes */
69+ neg %ecx
70+ shl $3 , %ecx
71+ shr %cl , %rax /* Discard overlapping bytes */
72+ pinsrq $1 , %rax , MSG
73+ jmp .Ldone\@
74+
75+ .Lle8\@:
76+ add $4 , %ecx /* LEN - 4 */
77+ jl .Llt4\@
78+
79+ /* Load 4 <= LEN <= 8 bytes: */
80+ mov (SRC), %eax /* Load first 4 bytes */
81+ mov (SRC, %rcx ), %r8d /* Load last 4 bytes */
82+ jmp .Lcombine\@
83+
84+ .Llt4\@:
85+ /* Load 1 <= LEN <= 3 bytes: */
86+ add $2 , %ecx /* LEN - 2 */
87+ movzbl (SRC), %eax /* Load first byte */
88+ jl .Lmovq\@
89+ movzwl (SRC, %rcx ), %r8d /* Load last 2 bytes */
90+ .Lcombine\@:
91+ shl $3 , %ecx
92+ shl %cl , %r8
93+ or %r8 , %rax /* Combine the two parts */
94+ .Lmovq\@:
95+ movq %rax , MSG
96+ .Ldone\@:
97+ .endm
12498
12599/*
126- * __store_partial: internal ABI
127- * input:
128- * LEN - bytes
129- * DST - dst
130- * output:
131- * T0 - message block
132- * changed:
133- * %r8
134- * %r9
135- * %r10
100+ * Store 1 <= LEN (%ecx) <= 15 bytes from the xmm register \msg to the pointer
101+ * DST. Clobbers %rax, %rcx, and %r8.
136102 */
137- SYM_FUNC_START_LOCAL (__store_partial)
138- .set LEN, %ecx
139- .set DST, %rdx
140- mov LEN, %r8d
141- mov DST, %r9
142-
143- movq T0, %r10
144-
145- cmp $8 , %r8
146- jl .Lst_partial_8
147-
148- mov %r10 , (%r9 )
149- psrldq $8 , T0
150- movq T0, %r10
151-
152- sub $8 , %r8
153- add $8 , %r9
154-
155- .Lst_partial_8:
156- cmp $4 , %r8
157- jl .Lst_partial_4
158-
159- mov %r10d , (%r9 )
160- shr $32 , %r10
161-
162- sub $4 , %r8
163- add $4 , %r9
164-
165- .Lst_partial_4:
166- cmp $2 , %r8
167- jl .Lst_partial_2
168-
169- mov %r10w , (%r9 )
170- shr $0x10 , %r10
171-
172- sub $2 , %r8
173- add $2 , %r9
174-
175- .Lst_partial_2:
176- cmp $1 , %r8
177- jl .Lst_partial_1
178-
179- mov %r10b , (%r9 )
180-
181- .Lst_partial_1:
182- RET
183- SYM_FUNC_END(__store_partial)
103+ .macro store_partial msg
104+ sub $8 , %ecx /* LEN - 8 */
105+ jl .Llt8\@
106+
107+ /* Store 8 <= LEN <= 15 bytes: */
108+ pextrq $1 , \msg, %rax
109+ mov %ecx , %r8d
110+ shl $3 , %ecx
111+ ror %cl , %rax
112+ mov %rax , (DST, %r8 ) /* Store last LEN - 8 bytes */
113+ movq \msg, (DST) /* Store first 8 bytes */
114+ jmp .Ldone\@
115+
116+ .Llt8\@:
117+ add $4 , %ecx /* LEN - 4 */
118+ jl .Llt4\@
119+
120+ /* Store 4 <= LEN <= 7 bytes: */
121+ pextrd $1 , \msg, %eax
122+ mov %ecx , %r8d
123+ shl $3 , %ecx
124+ ror %cl , %eax
125+ mov %eax , (DST, %r8 ) /* Store last LEN - 4 bytes */
126+ movd \msg, (DST) /* Store first 4 bytes */
127+ jmp .Ldone\@
128+
129+ .Llt4\@:
130+ /* Store 1 <= LEN <= 3 bytes: */
131+ pextrb $0 , \msg, 0 (DST)
132+ cmp $-2 , %ecx /* LEN - 4 == -2, i.e. LEN == 2? */
133+ jl .Ldone\@
134+ pextrb $1 , \msg, 1 (DST)
135+ je .Ldone\@
136+ pextrb $2 , \msg, 2 (DST)
137+ .Ldone\@:
138+ .endm
184139
185140/*
186141 * void aegis128_aesni_init(struct aegis_state *state,
@@ -453,7 +408,7 @@ SYM_FUNC_START(aegis128_aesni_enc_tail)
453408 .set STATEP, %rdi
454409 .set SRC, %rsi
455410 .set DST, %rdx
456- .set LEN, %ecx
411+ .set LEN, %ecx /* {load,store}_partial rely on this being %ecx */
457412 FRAME_BEGIN
458413
459414 /* load the state: */
@@ -464,7 +419,8 @@ SYM_FUNC_START(aegis128_aesni_enc_tail)
464419 movdqu 0x40 (STATEP), STATE4
465420
466421 /* encrypt message: */
467- call __load_partial
422+ mov LEN, %r9d
423+ load_partial
468424
469425 movdqa MSG, T0
470426 pxor STATE1, T0
@@ -473,7 +429,8 @@ SYM_FUNC_START(aegis128_aesni_enc_tail)
473429 pand STATE3, T1
474430 pxor T1, T0
475431
476- call __store_partial
432+ mov %r9d , LEN
433+ store_partial T0
477434
478435 aegis128_update
479436 pxor MSG, STATE4
@@ -598,7 +555,7 @@ SYM_FUNC_START(aegis128_aesni_dec_tail)
598555 .set STATEP, %rdi
599556 .set SRC, %rsi
600557 .set DST, %rdx
601- .set LEN, %ecx
558+ .set LEN, %ecx /* {load,store}_partial rely on this being %ecx */
602559 FRAME_BEGIN
603560
604561 /* load the state: */
@@ -609,25 +566,22 @@ SYM_FUNC_START(aegis128_aesni_dec_tail)
609566 movdqu 0x40 (STATEP), STATE4
610567
611568 /* decrypt message: */
612- call __load_partial
569+ mov LEN, %r9d
570+ load_partial
613571
614572 pxor STATE1, MSG
615573 pxor STATE4, MSG
616574 movdqa STATE2, T1
617575 pand STATE3, T1
618576 pxor T1, MSG
619577
620- movdqa MSG, T0
621- call __store_partial
578+ mov %r9d , LEN
579+ store_partial MSG
622580
623581 /* mask with byte count: */
624- movd LEN, T0
625- punpcklbw T0, T0
626- punpcklbw T0, T0
627- punpcklbw T0, T0
628- punpcklbw T0, T0
629- movdqa .Laegis128_counter(%rip ), T1
630- pcmpgtb T1, T0
582+ lea .Lzeropad_mask+16 (%rip ), %rax
583+ sub %r9 , %rax
584+ movdqu (%rax ), T0
631585 pand T0, MSG
632586
633587 aegis128_update
0 commit comments