Skip to content

Commit

Permalink
Improve AES-NI performance for AES-XTS:
Browse files Browse the repository at this point in the history
- Operate on uint64_t types when doing XORing, etc. instead of uint8_t.
- Don't bzero() temporary block for every AES block. Do it once for entire
  data block.
- AES-NI is available only on little endian architectures. Simplify code
  that takes block number from IV.

Benchmarks:

Memory-backed md(4) device, software AES-XTS, 4kB sector:

	# dd if=/dev/md0.eli bs=1m
	59.61MB/s

Memory-backed md(4) device, old AES-NI AES-XTS, 4kB sector:

	# dd if=/dev/md0.eli bs=1m
	97.29MB/s

Memory-backed md(4) device, new AES-NI AES-XTS, 4kB sector:

	# dd if=/dev/md0.eli bs=1m
	221.26MB/s

127% performance improvement between old and new code.

Harddisk, raw speed:

	# dd if=/dev/ada0 bs=1m
	137.63MB/s

Harddisk, software AES-XTS, 4kB sector:

	# dd if=/dev/ada0.eli bs=1m
	47.83MB/s (34% of raw disk speed)

Harddisk, old AES-NI AES-XTS, 4kB sector:

	# dd if=/dev/ada0.eli bs=1m
	68.33MB/s (49% of raw disk speed)

Harddisk, new AES-NI AES-XTS, 4kB sector:

	# dd if=/dev/ada0.eli bs=1m
	108.35MB/s (78% of raw disk speed)

58% performance improvement between old and new code.

As a side-note, GELI with AES-NI using AES-CBC can achive native disk speed.

MFC after:	3 days
  • Loading branch information
pjd authored and pjd committed Oct 27, 2011
1 parent efcbafc commit d01d359
Showing 1 changed file with 27 additions and 26 deletions.
53 changes: 27 additions & 26 deletions sys/crypto/aesni/aesni_wrap.c
Expand Up @@ -87,66 +87,67 @@ aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len,
#define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */

static void
aesni_crypt_xts_block(int rounds, const void *key_schedule, uint8_t *tweak,
const uint8_t *from, uint8_t *to, int do_encrypt)
aesni_crypt_xts_block(int rounds, const void *key_schedule, uint64_t *tweak,
const uint64_t *from, uint64_t *to, uint64_t *block, int do_encrypt)
{
uint8_t block[AES_XTS_BLOCKSIZE];
u_int i, carry_in, carry_out;
int carry;

for (i = 0; i < AES_XTS_BLOCKSIZE; i++)
block[i] = from[i] ^ tweak[i];
block[0] = from[0] ^ tweak[0];
block[1] = from[1] ^ tweak[1];

if (do_encrypt)
aesni_enc(rounds - 1, key_schedule, block, to, NULL);
aesni_enc(rounds - 1, key_schedule, (uint8_t *)block, (uint8_t *)to, NULL);
else
aesni_dec(rounds - 1, key_schedule, block, to, NULL);
aesni_dec(rounds - 1, key_schedule, (uint8_t *)block, (uint8_t *)to, NULL);

for (i = 0; i < AES_XTS_BLOCKSIZE; i++)
to[i] ^= tweak[i];
to[0] ^= tweak[0];
to[1] ^= tweak[1];

/* Exponentiate tweak. */
carry_in = 0;
for (i = 0; i < AES_XTS_BLOCKSIZE; i++) {
carry_out = tweak[i] & 0x80;
tweak[i] = (tweak[i] << 1) | (carry_in ? 1 : 0);
carry_in = carry_out;
carry = ((tweak[0] & 0x8000000000000000ULL) > 0);
tweak[0] <<= 1;
if (tweak[1] & 0x8000000000000000ULL) {
uint8_t *twk = (uint8_t *)tweak;

twk[0] ^= AES_XTS_ALPHA;
}
if (carry_in)
tweak[0] ^= AES_XTS_ALPHA;
bzero(block, sizeof(block));
tweak[1] <<= 1;
if (carry)
tweak[1] |= 1;
}

static void
aesni_crypt_xts(int rounds, const void *data_schedule,
const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
const uint8_t iv[AES_BLOCK_LEN], int do_encrypt)
{
uint64_t block[AES_XTS_BLOCKSIZE / 8];
uint8_t tweak[AES_XTS_BLOCKSIZE];
uint64_t blocknum;
size_t i;

/*
* Prepare tweak as E_k2(IV). IV is specified as LE representation
* of a 64-bit block number which we allow to be passed in directly.
*/
bcopy(iv, &blocknum, AES_XTS_IVSIZE);
for (i = 0; i < AES_XTS_IVSIZE; i++) {
tweak[i] = blocknum & 0xff;
blocknum >>= 8;
}
#if BYTE_ORDER == LITTLE_ENDIAN
bcopy(iv, tweak, AES_XTS_IVSIZE);
/* Last 64 bits of IV are always zero. */
bzero(tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE);
#else
#error Only LITTLE_ENDIAN architectures are supported.
#endif
aesni_enc(rounds - 1, tweak_schedule, tweak, tweak, NULL);

len /= AES_XTS_BLOCKSIZE;
for (i = 0; i < len; i++) {
aesni_crypt_xts_block(rounds, data_schedule, tweak, from, to,
do_encrypt);
aesni_crypt_xts_block(rounds, data_schedule, (uint64_t *)tweak,
(const uint64_t *)from, (uint64_t *)to, block, do_encrypt);
from += AES_XTS_BLOCKSIZE;
to += AES_XTS_BLOCKSIZE;
}

bzero(tweak, sizeof(tweak));
bzero(block, sizeof(block));
}

static void
Expand Down

0 comments on commit d01d359

Please sign in to comment.