@@ -52,11 +52,6 @@ typedef uint128_t p224_widelimb;
typedef p224_limb p224_felem[4];
typedef p224_widelimb p224_widefelem[7];

// Field element represented as a byte arrary. 28*8 = 224 bits is also the
// group order size for the elliptic curve, and we also use this type for
// scalars for point multiplication.
typedef uint8_t p224_felem_bytearray[28];

// Precomputed multiples of the standard generator
// Points are given in coordinates (X, Y, Z) where Z normally is 1
// (0 for the point at infinity).
@@ -180,31 +175,16 @@ static const p224_felem g_p224_pre_comp[2][16][3] = {
{0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
{1, 0, 0, 0}}}};

static uint64_t p224_load_u64(const uint8_t in[8]) {
uint64_t ret;
OPENSSL_memcpy(&ret, in, sizeof(ret));
return ret;
}

// Helper functions to convert field elements to/from internal representation
static void p224_bin28_to_felem(p224_felem out, const uint8_t in[28]) {
out[0] = p224_load_u64(in) & 0x00ffffffffffffff;
out[1] = p224_load_u64(in + 7) & 0x00ffffffffffffff;
out[2] = p224_load_u64(in + 14) & 0x00ffffffffffffff;
out[3] = p224_load_u64(in + 20) >> 8;
}

static void p224_felem_to_bin28(uint8_t out[28], const p224_felem in) {
for (size_t i = 0; i < 7; ++i) {
out[i] = in[0] >> (8 * i);
out[i + 7] = in[1] >> (8 * i);
out[i + 14] = in[2] >> (8 * i);
out[i + 21] = in[3] >> (8 * i);
}
}

static void p224_generic_to_felem(p224_felem out, const EC_FELEM *in) {
p224_bin28_to_felem(out, in->bytes);
// |p224_felem|'s minimal representation uses four 56-bit words. |EC_FELEM|
// uses four 64-bit words. (The top-most word only has 32 bits.)
out[0] = in->words[0] & 0x00ffffffffffffff;
out[1] = ((in->words[0] >> 56) | (in->words[1] << 8)) & 0x00ffffffffffffff;
out[2] = ((in->words[1] >> 48) | (in->words[2] << 16)) & 0x00ffffffffffffff;
out[3] = ((in->words[2] >> 40) | (in->words[3] << 24)) & 0x00ffffffffffffff;
}

// Requires 0 <= in < 2*p (always call p224_felem_reduce first)
@@ -256,9 +236,12 @@ static void p224_felem_to_generic(EC_FELEM *out, const p224_felem in) {
tmp2[2] = tmp[2];
tmp2[3] = tmp[3];

p224_felem_to_bin28(out->bytes, tmp2);
// 224 is not a multiple of 64, so zero the remaining bytes.
OPENSSL_memset(out->bytes + 28, 0, 32 - 28);
// |p224_felem|'s minimal representation uses four 56-bit words. |EC_FELEM|
// uses four 64-bit words. (The top-most word only has 32 bits.)
out->words[0] = tmp2[0] | (tmp2[1] << 56);
out->words[1] = (tmp2[1] >> 8) | (tmp2[2] << 48);
out->words[2] = (tmp2[2] >> 16) | (tmp2[3] << 40);
out->words[3] = tmp2[3] >> 24;
}


@@ -865,12 +848,13 @@ static void p224_select_point(const uint64_t idx, size_t size,
}
}

// p224_get_bit returns the |i|th bit in |in|
static crypto_word_t p224_get_bit(const p224_felem_bytearray in, size_t i) {
// p224_get_bit returns the |i|th bit in |in|.
static crypto_word_t p224_get_bit(const EC_SCALAR *in, size_t i) {
if (i >= 224) {
return 0;
}
return (in[i >> 3] >> (i & 7)) & 1;
static_assert(sizeof(in->words[0]) == 8, "BN_ULONG is not 64-bit");
return (in->words[i >> 6] >> (i & 63)) & 1;
}

// Takes the Jacobian coordinates (X, Y, Z) of a point and returns
@@ -977,12 +961,12 @@ static void ec_GFp_nistp224_point_mul(const EC_GROUP *group, EC_RAW_POINT *r,

// Add every 5 doublings.
if (i % 5 == 0) {
crypto_word_t bits = p224_get_bit(scalar->bytes, i + 4) << 5;
bits |= p224_get_bit(scalar->bytes, i + 3) << 4;
bits |= p224_get_bit(scalar->bytes, i + 2) << 3;
bits |= p224_get_bit(scalar->bytes, i + 1) << 2;
bits |= p224_get_bit(scalar->bytes, i) << 1;
bits |= p224_get_bit(scalar->bytes, i - 1);
crypto_word_t bits = p224_get_bit(scalar, i + 4) << 5;
bits |= p224_get_bit(scalar, i + 3) << 4;
bits |= p224_get_bit(scalar, i + 2) << 3;
bits |= p224_get_bit(scalar, i + 1) << 2;
bits |= p224_get_bit(scalar, i) << 1;
bits |= p224_get_bit(scalar, i - 1);
crypto_word_t sign, digit;
ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);

@@ -1022,10 +1006,10 @@ static void ec_GFp_nistp224_point_mul_base(const EC_GROUP *group,
}

// First, look 28 bits upwards.
crypto_word_t bits = p224_get_bit(scalar->bytes, i + 196) << 3;
bits |= p224_get_bit(scalar->bytes, i + 140) << 2;
bits |= p224_get_bit(scalar->bytes, i + 84) << 1;
bits |= p224_get_bit(scalar->bytes, i + 28);
crypto_word_t bits = p224_get_bit(scalar, i + 196) << 3;
bits |= p224_get_bit(scalar, i + 140) << 2;
bits |= p224_get_bit(scalar, i + 84) << 1;
bits |= p224_get_bit(scalar, i + 28);
// Select the point to add, in constant time.
p224_select_point(bits, 16, g_p224_pre_comp[1], tmp);

@@ -1038,10 +1022,10 @@ static void ec_GFp_nistp224_point_mul_base(const EC_GROUP *group,
}

// Second, look at the current position/
bits = p224_get_bit(scalar->bytes, i + 168) << 3;
bits |= p224_get_bit(scalar->bytes, i + 112) << 2;
bits |= p224_get_bit(scalar->bytes, i + 56) << 1;
bits |= p224_get_bit(scalar->bytes, i);
bits = p224_get_bit(scalar, i + 168) << 3;
bits |= p224_get_bit(scalar, i + 112) << 2;
bits |= p224_get_bit(scalar, i + 56) << 1;
bits |= p224_get_bit(scalar, i);
// Select the point to add, in constant time.
p224_select_point(bits, 16, g_p224_pre_comp[0], tmp);
p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */,
@@ -1080,10 +1064,10 @@ static void ec_GFp_nistp224_point_mul_public(const EC_GROUP *group,
// Add multiples of the generator.
if (i <= 27) {
// First, look 28 bits upwards.
crypto_word_t bits = p224_get_bit(g_scalar->bytes, i + 196) << 3;
bits |= p224_get_bit(g_scalar->bytes, i + 140) << 2;
bits |= p224_get_bit(g_scalar->bytes, i + 84) << 1;
bits |= p224_get_bit(g_scalar->bytes, i + 28);
crypto_word_t bits = p224_get_bit(g_scalar, i + 196) << 3;
bits |= p224_get_bit(g_scalar, i + 140) << 2;
bits |= p224_get_bit(g_scalar, i + 84) << 1;
bits |= p224_get_bit(g_scalar, i + 28);

size_t index = (size_t)bits;
p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */,
@@ -1092,10 +1076,10 @@ static void ec_GFp_nistp224_point_mul_public(const EC_GROUP *group,
assert(!skip);

// Second, look at the current position.
bits = p224_get_bit(g_scalar->bytes, i + 168) << 3;
bits |= p224_get_bit(g_scalar->bytes, i + 112) << 2;
bits |= p224_get_bit(g_scalar->bytes, i + 56) << 1;
bits |= p224_get_bit(g_scalar->bytes, i);
bits = p224_get_bit(g_scalar, i + 168) << 3;
bits |= p224_get_bit(g_scalar, i + 112) << 2;
bits |= p224_get_bit(g_scalar, i + 56) << 1;
bits |= p224_get_bit(g_scalar, i);
index = (size_t)bits;
p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */,
g_p224_pre_comp[0][index][0], g_p224_pre_comp[0][index][1],
@@ -1104,12 +1088,12 @@ static void ec_GFp_nistp224_point_mul_public(const EC_GROUP *group,

// Incorporate |p_scalar| every 5 doublings.
if (i % 5 == 0) {
crypto_word_t bits = p224_get_bit(p_scalar->bytes, i + 4) << 5;
bits |= p224_get_bit(p_scalar->bytes, i + 3) << 4;
bits |= p224_get_bit(p_scalar->bytes, i + 2) << 3;
bits |= p224_get_bit(p_scalar->bytes, i + 1) << 2;
bits |= p224_get_bit(p_scalar->bytes, i) << 1;
bits |= p224_get_bit(p_scalar->bytes, i - 1);
crypto_word_t bits = p224_get_bit(p_scalar, i + 4) << 5;
bits |= p224_get_bit(p_scalar, i + 3) << 4;
bits |= p224_get_bit(p_scalar, i + 2) << 3;
bits |= p224_get_bit(p_scalar, i + 1) << 2;
bits |= p224_get_bit(p_scalar, i) << 1;
bits |= p224_get_bit(p_scalar, i - 1);
crypto_word_t sign, digit;
ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);

@@ -201,7 +201,7 @@ static void ecp_nistz256_windowed_mul(const EC_GROUP *group, P256_POINT *r,
// ~1599 ((96 * 16) + 63) bytes of stack space.
alignas(64) P256_POINT table[16];
uint8_t p_str[33];
OPENSSL_memcpy(p_str, p_scalar->bytes, 32);
OPENSSL_memcpy(p_str, p_scalar->words, 32);
p_str[32] = 0;

// table[0] is implicitly (0,0,0) (the point at infinity), therefore it is
@@ -321,7 +321,7 @@ static void ecp_nistz256_point_mul_base(const EC_GROUP *group, EC_RAW_POINT *r,
alignas(32) p256_point_union_t t, p;

uint8_t p_str[33];
OPENSSL_memcpy(p_str, scalar->bytes, 32);
OPENSSL_memcpy(p_str, scalar->words, 32);
p_str[32] = 0;

// First window
@@ -366,7 +366,7 @@ static void ecp_nistz256_points_mul_public(const EC_GROUP *group,

alignas(32) p256_point_union_t t, p;
uint8_t p_str[33];
OPENSSL_memcpy(p_str, g_scalar->bytes, 32);
OPENSSL_memcpy(p_str, g_scalar->words, 32);
p_str[32] = 0;

// First window
@@ -149,8 +149,8 @@ TEST(P256_NistzTest, BEEU) {
EXPECT_TRUE(bn_less_than_words(out, order_words, P256_LIMBS));

// Calculate out*in and confirm that it equals one, modulo the order.
OPENSSL_memcpy(in_scalar.bytes, in, sizeof(in));
OPENSSL_memcpy(out_scalar.bytes, out, sizeof(out));
OPENSSL_memcpy(in_scalar.words, in, sizeof(in));
OPENSSL_memcpy(out_scalar.words, out, sizeof(out));
ec_scalar_to_montgomery(group.get(), &in_scalar, &in_scalar);
ec_scalar_to_montgomery(group.get(), &out_scalar, &out_scalar);
ec_scalar_mul_montgomery(group.get(), &result, &in_scalar, &out_scalar);
@@ -81,17 +81,22 @@ static void fiat_p256_cmovznz(fiat_p256_limb_t out[FIAT_P256_NLIMBS],
fiat_p256_selectznz(out, !!t, z, nz);
}

static void fiat_p256_from_words(fiat_p256_felem out,
const BN_ULONG in[32 / sizeof(BN_ULONG)]) {
// Typically, |BN_ULONG| and |fiat_p256_limb_t| will be the same type, but on
// 64-bit platforms without |uint128_t|, they are different. However, on
// little-endian systems, |uint64_t[4]| and |uint32_t[8]| have the same
// layout.
OPENSSL_memcpy(out, in, 32);
}

static void fiat_p256_from_generic(fiat_p256_felem out, const EC_FELEM *in) {
fiat_p256_from_bytes(out, in->bytes);
fiat_p256_from_words(out, in->words);
}

static void fiat_p256_to_generic(EC_FELEM *out, const fiat_p256_felem in) {
// This works because 256 is a multiple of 64, so there are no excess bytes to
// zero when rounding up to |BN_ULONG|s.
OPENSSL_STATIC_ASSERT(
256 / 8 == sizeof(BN_ULONG) * ((256 + BN_BITS2 - 1) / BN_BITS2),
"fiat_p256_to_bytes leaves bytes uninitialized");
fiat_p256_to_bytes(out->bytes, in);
// See |fiat_p256_from_words|.
OPENSSL_memcpy(out->words, in, 32);
}

// fiat_p256_inv_square calculates |out| = |in|^{-2}
@@ -394,12 +399,18 @@ static void fiat_p256_select_point(const fiat_p256_limb_t idx, size_t size,
}
}

// fiat_p256_get_bit returns the |i|th bit in |in|
static crypto_word_t fiat_p256_get_bit(const uint8_t *in, int i) {
// fiat_p256_get_bit returns the |i|th bit in |in|.
static crypto_word_t fiat_p256_get_bit(const EC_SCALAR *in, int i) {
if (i < 0 || i >= 256) {
return 0;
}
return (in[i >> 3] >> (i & 7)) & 1;
#if defined(OPENSSL_64_BIT)
static_assert(sizeof(BN_ULONG) == 8, "BN_ULONG was not 64-bit");
return (in->words[i >> 6] >> (i & 63)) & 1;
#else
static_assert(sizeof(BN_ULONG) == 4, "BN_ULONG was not 32-bit");
return (in->words[i >> 5] >> (i & 31)) & 1;
#endif
}

// OPENSSL EC_METHOD FUNCTIONS
@@ -500,12 +511,12 @@ static void ec_GFp_nistp256_point_mul(const EC_GROUP *group, EC_RAW_POINT *r,

// do other additions every 5 doublings
if (i % 5 == 0) {
crypto_word_t bits = fiat_p256_get_bit(scalar->bytes, i + 4) << 5;
bits |= fiat_p256_get_bit(scalar->bytes, i + 3) << 4;
bits |= fiat_p256_get_bit(scalar->bytes, i + 2) << 3;
bits |= fiat_p256_get_bit(scalar->bytes, i + 1) << 2;
bits |= fiat_p256_get_bit(scalar->bytes, i) << 1;
bits |= fiat_p256_get_bit(scalar->bytes, i - 1);
crypto_word_t bits = fiat_p256_get_bit(scalar, i + 4) << 5;
bits |= fiat_p256_get_bit(scalar, i + 3) << 4;
bits |= fiat_p256_get_bit(scalar, i + 2) << 3;
bits |= fiat_p256_get_bit(scalar, i + 1) << 2;
bits |= fiat_p256_get_bit(scalar, i) << 1;
bits |= fiat_p256_get_bit(scalar, i - 1);
crypto_word_t sign, digit;
ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);

@@ -545,10 +556,10 @@ static void ec_GFp_nistp256_point_mul_base(const EC_GROUP *group,
}

// First, look 32 bits upwards.
crypto_word_t bits = fiat_p256_get_bit(scalar->bytes, i + 224) << 3;
bits |= fiat_p256_get_bit(scalar->bytes, i + 160) << 2;
bits |= fiat_p256_get_bit(scalar->bytes, i + 96) << 1;
bits |= fiat_p256_get_bit(scalar->bytes, i + 32);
crypto_word_t bits = fiat_p256_get_bit(scalar, i + 224) << 3;
bits |= fiat_p256_get_bit(scalar, i + 160) << 2;
bits |= fiat_p256_get_bit(scalar, i + 96) << 1;
bits |= fiat_p256_get_bit(scalar, i + 32);
// Select the point to add, in constant time.
fiat_p256_select_point_affine((fiat_p256_limb_t)bits, 15,
fiat_p256_g_pre_comp[1], tmp);
@@ -564,10 +575,10 @@ static void ec_GFp_nistp256_point_mul_base(const EC_GROUP *group,
}

// Second, look at the current position.
bits = fiat_p256_get_bit(scalar->bytes, i + 192) << 3;
bits |= fiat_p256_get_bit(scalar->bytes, i + 128) << 2;
bits |= fiat_p256_get_bit(scalar->bytes, i + 64) << 1;
bits |= fiat_p256_get_bit(scalar->bytes, i);
bits = fiat_p256_get_bit(scalar, i + 192) << 3;
bits |= fiat_p256_get_bit(scalar, i + 128) << 2;
bits |= fiat_p256_get_bit(scalar, i + 64) << 1;
bits |= fiat_p256_get_bit(scalar, i);
// Select the point to add, in constant time.
fiat_p256_select_point_affine((fiat_p256_limb_t)bits, 15,
fiat_p256_g_pre_comp[0], tmp);
@@ -617,10 +628,10 @@ static void ec_GFp_nistp256_point_mul_public(const EC_GROUP *group,
// constant-time lookup.
if (i <= 31) {
// First, look 32 bits upwards.
crypto_word_t bits = fiat_p256_get_bit(g_scalar->bytes, i + 224) << 3;
bits |= fiat_p256_get_bit(g_scalar->bytes, i + 160) << 2;
bits |= fiat_p256_get_bit(g_scalar->bytes, i + 96) << 1;
bits |= fiat_p256_get_bit(g_scalar->bytes, i + 32);
crypto_word_t bits = fiat_p256_get_bit(g_scalar, i + 224) << 3;
bits |= fiat_p256_get_bit(g_scalar, i + 160) << 2;
bits |= fiat_p256_get_bit(g_scalar, i + 96) << 1;
bits |= fiat_p256_get_bit(g_scalar, i + 32);
if (bits != 0) {
size_t index = (size_t)(bits - 1);
fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2],
@@ -631,10 +642,10 @@ static void ec_GFp_nistp256_point_mul_public(const EC_GROUP *group,
}

// Second, look at the current position.
bits = fiat_p256_get_bit(g_scalar->bytes, i + 192) << 3;
bits |= fiat_p256_get_bit(g_scalar->bytes, i + 128) << 2;
bits |= fiat_p256_get_bit(g_scalar->bytes, i + 64) << 1;
bits |= fiat_p256_get_bit(g_scalar->bytes, i);
bits = fiat_p256_get_bit(g_scalar, i + 192) << 3;
bits |= fiat_p256_get_bit(g_scalar, i + 128) << 2;
bits |= fiat_p256_get_bit(g_scalar, i + 64) << 1;
bits |= fiat_p256_get_bit(g_scalar, i);
if (bits != 0) {
size_t index = (size_t)(bits - 1);
fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2],
@@ -687,7 +698,7 @@ static int ec_GFp_nistp256_cmp_x_coordinate(const EC_GROUP *group,
fiat_p256_mul(Z2_mont, Z2_mont, Z2_mont);

fiat_p256_felem r_Z2;
fiat_p256_from_bytes(r_Z2, r->bytes); // r < order < p, so this is valid.
fiat_p256_from_words(r_Z2, r->words); // r < order < p, so this is valid.
fiat_p256_mul(r_Z2, r_Z2, Z2_mont);

fiat_p256_felem X;
@@ -54,9 +54,7 @@ int ec_random_nonzero_scalar(const EC_GROUP *group, EC_SCALAR *out,
void ec_scalar_to_bytes(const EC_GROUP *group, uint8_t *out, size_t *out_len,
const EC_SCALAR *in) {
size_t len = BN_num_bytes(&group->order);
for (size_t i = 0; i < len; i++) {
out[len - i - 1] = in->bytes[i];
}
bn_words_to_big_endian(out, len, in->words, group->order.width);
*out_len = len;
}

@@ -67,11 +65,7 @@ int ec_scalar_from_bytes(const EC_GROUP *group, EC_SCALAR *out,
return 0;
}

OPENSSL_memset(out, 0, sizeof(EC_SCALAR));

for (size_t i = 0; i < len; i++) {
out->bytes[i] = in[len - i - 1];
}
bn_big_endian_to_words(out->words, group->order.width, in, len);

if (!bn_less_than_words(out->words, group->order.d, group->order.width)) {
OPENSSL_PUT_ERROR(EC, EC_R_INVALID_SCALAR);
@@ -330,9 +330,7 @@ int ec_GFp_simple_cmp_x_coordinate(const EC_GROUP *group, const EC_RAW_POINT *p,
void ec_GFp_simple_felem_to_bytes(const EC_GROUP *group, uint8_t *out,
size_t *out_len, const EC_FELEM *in) {
size_t len = BN_num_bytes(&group->field);
for (size_t i = 0; i < len; i++) {
out[i] = in->bytes[len - 1 - i];
}
bn_words_to_big_endian(out, len, in->words, group->field.width);
*out_len = len;
}

@@ -343,10 +341,7 @@ int ec_GFp_simple_felem_from_bytes(const EC_GROUP *group, EC_FELEM *out,
return 0;
}

OPENSSL_memset(out, 0, sizeof(EC_FELEM));
for (size_t i = 0; i < len; i++) {
out->bytes[i] = in[len - 1 - i];
}
bn_big_endian_to_words(out->words, group->field.width, in, len);

if (!bn_less_than_words(out->words, group->field.d, group->field.width)) {
OPENSSL_PUT_ERROR(EC, EC_R_DECODE_ERROR);
@@ -78,10 +78,7 @@ static void digest_to_scalar(const EC_GROUP *group, EC_SCALAR *out,
if (digest_len > num_bytes) {
digest_len = num_bytes;
}
OPENSSL_memset(out, 0, sizeof(EC_SCALAR));
for (size_t i = 0; i < digest_len; i++) {
out->bytes[i] = digest[digest_len - 1 - i];
}
bn_big_endian_to_words(out->words, order->width, digest, digest_len);

// If it is still too long, truncate remaining bits with a shift.
if (8 * digest_len > num_bits) {
@@ -901,6 +901,18 @@ static inline void CRYPTO_store_word_le(void *out, crypto_word_t v) {
OPENSSL_memcpy(out, &v, sizeof(v));
}

static inline crypto_word_t CRYPTO_load_word_be(const void *in) {
crypto_word_t v;
OPENSSL_memcpy(&v, in, sizeof(v));
#if defined(OPENSSL_64_BIT)
static_assert(sizeof(v) == 8, "crypto_word_t has unexpected size");
return CRYPTO_bswap8(v);
#else
static_assert(sizeof(v) == 4, "crypto_word_t has unexpected size");
return CRYPTO_bswap4(v);
#endif
}


// Bit rotation functions.
//
@@ -854,7 +854,7 @@ TEST_P(TrustTokenBadKeyTest, BadKey) {
&key->key.y1, &key->key.xs, &key->key.ys};

// Corrupt private key scalar.
scalars[corrupted_key()]->bytes[0] ^= 42;
scalars[corrupted_key()]->words[0] ^= 42;

size_t tokens_issued;
ASSERT_TRUE(TRUST_TOKEN_ISSUER_issue(