Skip to content

Commit

Permalink
core: crypto: cleanup and fix CE accelerated AES CTR
Browse files Browse the repository at this point in the history
There is a problem with how the counter is incremented in our Aarch32
implementation of ce_aes_ctr_encrypt(). When 3 or more 16-byte blocks of
data are processed, the counter is incremented one time too many and
invalid data is produced as a result [1].

More generally, the way the counter is handled is quite convoluted. It is
incremented:
- In the generic LibTomCrypt code in ctr_encrypt_sub(),
- In the Crypto Extension glue layer in aes_ctr_encrypt_nblocks(),
- In the CE accelerated assembly code in ce_aes_ctr_encrypt().
We can easily get rid of the second one. We can also avoid always calling
the non-accelerated function on the first block of data.

This commit simplifies the C code to reflect the following rules:
- The core encryption functions (accelerated or not) should use the
counter value as is to process the first block of data,
- They should increment it for each block that is processed and return it
as an output parameter

The AArch32 and AArch64 CE assembler implementations are updated to the
latest available in the upstream Linux kernel (v4.17-rc7), thus
incorporating further improvements/simplifications by Ard Biesheuvel.
These functions handle the counter as described above so they fit our use
case perfectly.

Fixes: [1] OP-TEE#2305
CC: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Jerome Forissier <jerome.forissier@linaro.org>
Tested-by: Jerome Forissier <jerome.forissier@linaro.org> (HiKey960, 32/64, CE/no CE)
Acked-by: Jens Wiklander <jens.wiklander@linaro.org>
[jf: pick aaec75e]
Signed-off-by: Jerome Forissier <jerome.forissier@linaro.org>
  • Loading branch information
jforissier committed Sep 12, 2019
1 parent 3f4d78d commit c54b634
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 163 deletions.
13 changes: 0 additions & 13 deletions core/lib/libtomcrypt/src/ciphers/aes_armv8a_ce.c
Original file line number Diff line number Diff line change
Expand Up @@ -279,18 +279,6 @@ static int aes_cbc_decrypt_nblocks(const unsigned char *ct, unsigned char *pt,
return CRYPT_OK;
}

/* Increment 128-bit counter */
static void increment_ctr(unsigned char *val)
{
int i;

for (i = 15; i >= 0; i--) {
val[i] = (val[i] + 1) & 0xff;
if (val[i])
break;
}
}

static int aes_ctr_encrypt_nblocks(const unsigned char *pt, unsigned char *ct,
unsigned long blocks, unsigned char *IV,
int mode, symmetric_key *skey)
Expand All @@ -312,7 +300,6 @@ static int aes_ctr_encrypt_nblocks(const unsigned char *pt, unsigned char *ct,
Nr = skey->rijndael.Nr;
rk = (u8 *)skey->rijndael.eK;

increment_ctr(IV);
tomcrypt_arm_neon_enable(&state);
ce_aes_ctr_encrypt(ct, pt, rk, Nr, blocks, IV, 1);
tomcrypt_arm_neon_disable(&state);
Expand Down
17 changes: 4 additions & 13 deletions core/lib/libtomcrypt/src/ciphers/aes_modes_armv8a_ce_a32.S
Original file line number Diff line number Diff line change
Expand Up @@ -276,11 +276,6 @@ ce_aes_cbc_decrypt:
* aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int blocks, u8 ctr[])
*/
/*
* Note: ctr is not incremented before first block is encrypted. It is
* not incremented either after the last block is done. The value used
* for the last block is copied on return.
*/
.global ce_aes_ctr_encrypt
.type ce_aes_ctr_encrypt, %function
ce_aes_ctr_encrypt:
Expand All @@ -290,8 +285,7 @@ ce_aes_ctr_encrypt:
prepare_key r2, r3
vmov r6, s27 @ keep swabbed ctr in r6
rev r6, r6
sub r7, r4, #1 @ times ctr will be incremented
cmn r6, r7 @ 32 bit overflow?
cmn r6, r4 @ 32 bit overflow?
bcs .Lctrloop
.Lctrloop3x:
subs r4, r4, #3
Expand Down Expand Up @@ -324,11 +318,10 @@ ce_aes_ctr_encrypt:
vmov q0, q6
bl aes_encrypt
subs r4, r4, #1
bmi .Lctrhalfblock @ blocks < 0 means 1/2 block
bmi .Lctrtailblock @ blocks < 0 means tail block
vld1.8 {q3}, [r1]!
veor q3, q0, q3
vst1.8 {q3}, [r0]!
beq .Lctrout

adds r6, r6, #1 @ increment BE ctr
rev ip, r6
Expand All @@ -340,10 +333,8 @@ ce_aes_ctr_encrypt:
vst1.8 {q6}, [r5]
pop {r4-r6, pc}

.Lctrhalfblock:
vld1.8 {d1}, [r1]
veor d0, d0, d1
vst1.8 {d0}, [r0]
.Lctrtailblock:
vst1.8 {q0}, [r0, :64] @ return just the key stream
pop {r4-r6, pc}

.Lctrcarry:
Expand Down
166 changes: 69 additions & 97 deletions core/lib/libtomcrypt/src/ciphers/aes_modes_armv8a_ce_a64.S
Original file line number Diff line number Diff line change
Expand Up @@ -413,108 +413,80 @@ ENDPROC(ce_aes_cbc_decrypt)
*/

ENTRY(ce_aes_ctr_encrypt)
FRAME_PUSH
mov x9, x5 /* save ctr pointer */
cbnz w6, .Lctrfirst /* 1st time around? */
umov x5, v4.d[1] /* keep swabbed ctr in reg */
rev x5, x5
#if INTERLEAVE >= 2
cmn w5, w4 /* 32 bit overflow? */
bcs .Lctrinc
add x5, x5, #1 /* increment BE ctr */
b .LctrincNx
#else
b .Lctrinc
#endif
.Lctrfirst:
enc_prepare w3, x2, x6
ld1 {v4.16b}, [x5]
umov x5, v4.d[1] /* keep swabbed ctr in reg */
rev x5, x5
#if INTERLEAVE >= 2
cmn w5, w4 /* 32 bit overflow? */
bcs .Lctrloop
stp x29, x30, [sp, #-16]!
mov x29, sp

enc_prepare w3, x2, x6
ld1 {v4.16b}, [x5]

umov x6, v4.d[1] /* keep swabbed ctr in reg */
rev x6, x6
cmn w6, w4 /* 32 bit overflow? */
bcs .Lctrloop
.LctrloopNx:
subs w4, w4, #INTERLEAVE
bmi .Lctr1x
#if INTERLEAVE == 2
mov v0.8b, v4.8b
mov v1.8b, v4.8b
rev x7, x5
add x5, x5, #1
ins v0.d[1], x7
rev x7, x5
add x5, x5, #1
ins v1.d[1], x7
ld1 {v2.16b-v3.16b}, [x1], #32 /* get 2 input blocks */
do_encrypt_block2x
eor v0.16b, v0.16b, v2.16b
eor v1.16b, v1.16b, v3.16b
st1 {v0.16b-v1.16b}, [x0], #32
#else
ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */
dup v7.4s, w5
mov v0.16b, v4.16b
add v7.4s, v7.4s, v8.4s
mov v1.16b, v4.16b
rev32 v8.16b, v7.16b
mov v2.16b, v4.16b
mov v3.16b, v4.16b
mov v1.s[3], v8.s[0]
mov v2.s[3], v8.s[1]
mov v3.s[3], v8.s[2]
ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
do_encrypt_block4x
eor v0.16b, v5.16b, v0.16b
ld1 {v5.16b}, [x1], #16 /* get 1 input block */
eor v1.16b, v6.16b, v1.16b
eor v2.16b, v7.16b, v2.16b
eor v3.16b, v5.16b, v3.16b
st1 {v0.16b-v3.16b}, [x0], #64
add x5, x5, #INTERLEAVE
#endif
cbz w4, .LctroutNx
.LctrincNx:
rev x7, x5
ins v4.d[1], x7
b .LctrloopNx
.LctroutNx:
sub x5, x5, #1
rev x7, x5
ins v4.d[1], x7
b .Lctrout
subs w4, w4, #4
bmi .Lctr1x
ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */
dup v7.4s, w6
mov v0.16b, v4.16b
add v7.4s, v7.4s, v8.4s
mov v1.16b, v4.16b
rev32 v8.16b, v7.16b
mov v2.16b, v4.16b
mov v3.16b, v4.16b
mov v1.s[3], v8.s[0]
mov v2.s[3], v8.s[1]
mov v3.s[3], v8.s[2]
ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
bl aes_encrypt_block4x
eor v0.16b, v5.16b, v0.16b
ld1 {v5.16b}, [x1], #16 /* get 1 input block */
eor v1.16b, v6.16b, v1.16b
eor v2.16b, v7.16b, v2.16b
eor v3.16b, v5.16b, v3.16b
st1 {v0.16b-v3.16b}, [x0], #64
add x6, x6, #4
rev x7, x6
ins v4.d[1], x7
cbz w4, .Lctrout
b .LctrloopNx
.Lctr1x:
adds w4, w4, #INTERLEAVE
beq .Lctrout
#endif
adds w4, w4, #4
beq .Lctrout
.Lctrloop:
mov v0.16b, v4.16b
encrypt_block v0, w3, x2, x6, w7
subs w4, w4, #1
bmi .Lctrhalfblock /* blocks < 0 means 1/2 block */
ld1 {v3.16b}, [x1], #16
eor v3.16b, v0.16b, v3.16b
st1 {v3.16b}, [x0], #16
beq .Lctrout
.Lctrinc:
adds x5, x5, #1 /* increment BE ctr */
rev x7, x5
ins v4.d[1], x7
bcc .Lctrloop /* no overflow? */
umov x7, v4.d[0] /* load upper word of ctr */
rev x7, x7 /* ... to handle the carry */
add x7, x7, #1
rev x7, x7
ins v4.d[0], x7
b .Lctrloop
.Lctrhalfblock:
ld1 {v3.8b}, [x1]
eor v3.8b, v0.8b, v3.8b
st1 {v3.8b}, [x0]
mov v0.16b, v4.16b
encrypt_block v0, w3, x2, x8, w7

adds x6, x6, #1 /* increment BE ctr */
rev x7, x6
ins v4.d[1], x7
bcs .Lctrcarry /* overflow? */

.Lctrcarrydone:
subs w4, w4, #1
bmi .Lctrtailblock /* blocks <0 means tail block */
ld1 {v3.16b}, [x1], #16
eor v3.16b, v0.16b, v3.16b
st1 {v3.16b}, [x0], #16
bne .Lctrloop

.Lctrout:
st1 {v4.16b}, [x9] /* save ctr for next call */
FRAME_POP
st1 {v4.16b}, [x5] /* return next CTR value */
ldp x29, x30, [sp], #16
ret

.Lctrtailblock:
st1 {v0.16b}, [x0]
ldp x29, x30, [sp], #16
ret

.Lctrcarry:
umov x7, v4.d[0] /* load upper word of ctr */
rev x7, x7 /* ... to handle the carry */
add x7, x7, #1
rev x7, x7
ins v4.d[0], x7
b .Lctrcarrydone
ENDPROC(ce_aes_ctr_encrypt)
.ltorg

Expand Down
91 changes: 51 additions & 40 deletions core/lib/libtomcrypt/src/modes/ctr/ctr_encrypt.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,26 @@

#ifdef LTC_CTR_MODE

static void ctr_increment_counter(symmetric_CTR *ctr)
{
int x;

if (ctr->mode == CTR_COUNTER_LITTLE_ENDIAN) {
for (x = 0; x < ctr->ctrlen; x++) {
ctr->ctr[x] = (ctr->ctr[x] + 1) & 0xff;
if (ctr->ctr[x])
return;
}
} else {
for (x = ctr->blocklen - 1; x >= ctr->ctrlen; x--) {
ctr->ctr[x] = (ctr->ctr[x] + 1) & 0xff;
if (ctr->ctr[x]) {
return;
}
}
}
}

/**
CTR encrypt software implementation
@param pt Plaintext
Expand All @@ -27,35 +47,16 @@
*/
static int _ctr_encrypt(const unsigned char *pt, unsigned char *ct, unsigned long len, symmetric_CTR *ctr)
{
int x, err;
int err;

while (len) {
/* is the pad empty? */
if (ctr->padlen == ctr->blocklen) {
/* increment counter */
if (ctr->mode == CTR_COUNTER_LITTLE_ENDIAN) {
/* little-endian */
for (x = 0; x < ctr->ctrlen; x++) {
ctr->ctr[x] = (ctr->ctr[x] + (unsigned char)1) & (unsigned char)255;
if (ctr->ctr[x] != (unsigned char)0) {
break;
}
}
} else {
/* big-endian */
for (x = ctr->blocklen-1; x >= ctr->ctrlen; x--) {
ctr->ctr[x] = (ctr->ctr[x] + (unsigned char)1) & (unsigned char)255;
if (ctr->ctr[x] != (unsigned char)0) {
break;
}
}
}

/* encrypt it */
if ((err = cipher_descriptor[ctr->cipher]->ecb_encrypt(ctr->ctr, ctr->pad, &ctr->key)) != CRYPT_OK) {
return err;
}
ctr->padlen = 0;
/* encrypt counter into pad */
if ((err = cipher_descriptor[ctr->cipher]->ecb_encrypt(ctr->ctr, ctr->pad, &ctr->key)) != CRYPT_OK) {
return err;
}
ctr->padlen = 0;
}
#ifdef LTC_FAST
if ((ctr->padlen == 0) && (len >= (unsigned long)ctr->blocklen)) {
Expand All @@ -72,6 +73,11 @@ static int _ctr_encrypt(const unsigned char *pt, unsigned char *ct, unsigned lon
#endif
*ct++ = *pt++ ^ ctr->pad[ctr->padlen++];
--len;

/* done with one full block? if so, set counter for next block. */
if (ctr->padlen == ctr->blocklen) {
ctr_increment_counter(ctr);
}
}
return CRYPT_OK;
}
Expand All @@ -86,7 +92,8 @@ static int _ctr_encrypt(const unsigned char *pt, unsigned char *ct, unsigned lon
*/
int ctr_encrypt(const unsigned char *pt, unsigned char *ct, unsigned long len, symmetric_CTR *ctr)
{
int err, fr;
unsigned long incr;
int err;

LTC_ARGCHK(pt != NULL);
LTC_ARGCHK(ct != NULL);
Expand All @@ -108,25 +115,29 @@ int ctr_encrypt(const unsigned char *pt, unsigned char *ct, unsigned long len, s
}
#endif

/* handle acceleration only if pad is empty, accelerator is present and length is >= a block size */
if ((cipher_descriptor[ctr->cipher]->accel_ctr_encrypt != NULL) && (len >= (unsigned long)ctr->blocklen)) {
if (ctr->padlen < ctr->blocklen) {
fr = ctr->blocklen - ctr->padlen;
if ((err = _ctr_encrypt(pt, ct, fr, ctr)) != CRYPT_OK) {
return err;
}
pt += fr;
ct += fr;
len -= fr;
}

if (len >= (unsigned long)ctr->blocklen) {
if (cipher_descriptor[ctr->cipher]->accel_ctr_encrypt != NULL ) {
/* handle acceleration only if not in the middle of a block, accelerator is present and length is >= a block size */
if ((ctr->padlen == 0 || ctr->padlen == ctr->blocklen) && len >= (unsigned long)ctr->blocklen) {
if ((err = cipher_descriptor[ctr->cipher]->accel_ctr_encrypt(pt, ct, len/ctr->blocklen, ctr->ctr, ctr->mode, &ctr->key)) != CRYPT_OK) {
return err;
return err;
}
pt += (len / ctr->blocklen) * ctr->blocklen;
ct += (len / ctr->blocklen) * ctr->blocklen;
len %= ctr->blocklen;
/* counter was changed by accelerator so mark pad empty (will need updating in _ctr_encrypt()) */
ctr->padlen = ctr->blocklen;
}

/* try to re-synchronize on a block boundary for maximum use of acceleration */
incr = ctr->blocklen - ctr->padlen;
if (len >= incr + (unsigned long)ctr->blocklen) {
if ((err = _ctr_encrypt(pt, ct, incr, ctr)) != CRYPT_OK) {
return err;
}
pt += incr;
ct += incr;
len -= incr;
return ctr_encrypt(pt, ct, len, ctr);
}
}

Expand Down

0 comments on commit c54b634

Please sign in to comment.