Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize ScalarMult with NAF #10

Merged
merged 2 commits into from
Feb 5, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,16 @@ func BenchmarkScalarBaseMult(b *testing.B) {
}
}

// BenchmarkScalarBaseMultLarge benchmarks the secp256k1 curve ScalarBaseMult
// function with abnormally large k values.
func BenchmarkScalarBaseMultLarge(b *testing.B) {
k := fromHex("d74bf844b0862475103d96a611cf2d898447e288d34b360bc885cb8ce7c005751111111011111110")
curve := btcec.S256()
for i := 0; i < b.N; i++ {
curve.ScalarBaseMult(k.Bytes())
}
}

// BenchmarkScalarMult benchmarks the secp256k1 curve ScalarMult function.
func BenchmarkScalarMult(b *testing.B) {
x := fromHex("34f9460f0e4f08393d192b3c5133a6ba099aa0ad9fd54ebccfacdfa239ff49c6")
Expand All @@ -68,6 +78,14 @@ func BenchmarkScalarMult(b *testing.B) {
}
}

// BenchmarkNAF benchmarks the NAF function.
func BenchmarkNAF(b *testing.B) {
k := fromHex("d74bf844b0862475103d96a611cf2d898447e288d34b360bc885cb8ce7c00575")
for i := 0; i < b.N; i++ {
btcec.NAF(k.Bytes())
}
}

// BenchmarkSigVerify benchmarks how long it takes the secp256k1 curve to
// verify signatures.
func BenchmarkSigVerify(b *testing.B) {
Expand Down
281 changes: 256 additions & 25 deletions btcec.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,23 @@ var (
// interface from crypto/elliptic.
type KoblitzCurve struct {
*elliptic.CurveParams
q *big.Int
H int // cofactor of the curve.
q *big.Int
H int // cofactor of the curve.

// The next 6 values are used specifically for endomorphism optimizations
// in ScalarMult.

// lambda should fulfill lambda^3 = 1 mod N where N is the order of G
lambda *big.Int
// beta should fulfill beta^3 = 1 mod P where P is the prime field of the curve
beta *fieldVal
// a1, b1, a2 and b2 are explained in detail in Guide To Elliptical Curve
// Cryptography (Hankerson, Menezes, Vanstone) in Algorithm 3.74
a1 *big.Int
b1 *big.Int
a2 *big.Int
b2 *big.Int
byteSize int
bytePoints *[32][256][3]fieldVal
}

Expand Down Expand Up @@ -594,29 +609,224 @@ func (curve *KoblitzCurve) Double(x1, y1 *big.Int) (*big.Int, *big.Int) {
return curve.fieldJacobianToBigAffine(fx3, fy3, fz3)
}

// splitK returns a balanced length-two representation of k and their
// signs.
// This is algorithm 3.74 from Guide to Elliptical Curve Cryptography (ref above)
// One thing of note about this algorithm is that no matter what c1 and c2 are,
// the final equation of k = k1 + k2 * lambda (mod n) will hold. This is provable
// mathematically due to how a1/b1/a2/b2 are computed.
// c1 and c2 are chosen to minimize the max(k1,k2).
func (curve *KoblitzCurve) splitK(k []byte) ([]byte, []byte, int, int) {

// All math here is done with big.Int, which is slow.
// At some point, it might be useful to write something similar to fieldVal
// but for N instead of P as the prime field if this ends up being a
// bottleneck.
bigIntK, c1, c2, tmp1, tmp2, k1, k2 := new(big.Int), new(big.Int), new(big.Int), new(big.Int), new(big.Int), new(big.Int), new(big.Int)

bigIntK.SetBytes(k)
// c1 = round(b2 * k / n) from step 4.
// Rounding isn't really necessary and costs too much, hence skipped
c1.Mul(curve.b2, bigIntK)
c1.Div(c1, curve.N)
// c2 = round(b1 * k / n) from step 4 (sign reversed to optimize one step)
// Rounding isn't really necessary and costs too much, hence skipped
c2.Mul(curve.b1, bigIntK)
c2.Div(c2, curve.N)
// k1 = k - c1 * a1 - c2 * a2 from step 5 (note c2's sign is reversed)
tmp1.Mul(c1, curve.a1)
tmp2.Mul(c2, curve.a2)
k1.Sub(bigIntK, tmp1)
k1.Add(k1, tmp2)
// k2 = - c1 * b1 - c2 * b2 from step 5 (note c2's sign is reversed)
tmp1.Mul(c1, curve.b1)
tmp2.Mul(c2, curve.b2)
k2.Sub(tmp2, tmp1)

// Note Bytes() throws out the sign of k1 and k2. This matters
// since k1 and/or k2 can be negative. Hence, we pass that
// back separately.
return k1.Bytes(), k2.Bytes(), k1.Sign(), k2.Sign()
}

// moduloReduce reduces k from more than 32 bytes to 32 bytes and under.
// This is done by doing a simple modulo curve.N. We can do this since
// G^N = 1 and thus any other valid point on the elliptical curve has the
// same order.
func (curve *KoblitzCurve) moduloReduce(k []byte) []byte {
// Since the order of G is curve.N, we can use a much smaller number
// by doing modulo curve.N
if len(k) > curve.byteSize {
// reduce k by performing modulo curve.N
tmpK := new(big.Int).SetBytes(k)
tmpK.Mod(tmpK, curve.N)
return tmpK.Bytes()
}

return k
}

// NAF takes a positive integer k and returns the Non-Adjacent Form (NAF)
// as two byte slices. The first is where 1's should be. The second is where
// -1's should be.
// NAF is also convenient in that on average, only 1/3rd of its values are
// non-zero.
// The algorithm here is from Guide to Elliptical Cryptography 3.30 (ref above)
// Essentially, this makes it possible to minimize the number of operations
// since the resulting ints returned will be at least 50% 0's.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment is no longer accurate.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should be fixed. not sure why it's not outdated yet.

func NAF(k []byte) ([]byte, []byte) {

// The essence of this algorithm is that whenever we have consecutive 1s
// in the binary, we want to put a -1 in the lowest bit and get a bunch of
// 0s up to the highest bit of consecutive 1s. This is due to this identity:
// 2^n + 2^(n-1) + 2^(n-2) + ... + 2^(n-k) = 2^(n+1) - 2^(n-k)
// The algorithm thus may need to go 1 more bit than the length of the bits
// we actually have, hence bits being 1 bit longer than was necessary.
// Since we need to know whether adding will cause a carry, we go from
// right-to-left in this addition.
var carry, curIsOne, nextIsOne bool
// these default to zero
retPos := make([]byte, len(k)+1)
retNeg := make([]byte, len(k)+1)
for i := len(k) - 1; i >= 0; i-- {
curByte := k[i]
for j := uint(0); j < 8; j++ {
curIsOne = curByte&1 == 1
if j == 7 {
if i == 0 {
nextIsOne = false
} else {
nextIsOne = k[i-1]&1 == 1
}
} else {
nextIsOne = curByte&2 == 2
}
if carry {
if curIsOne {
// This bit is 1, so we continue to carry and
// don't need to do anything
} else {
// We've hit a 0 after some number of 1s.
if nextIsOne {
// We start carrying again since we're starting
// a new sequence of 1s.
retNeg[i+1] += 1 << j
} else {
// We stop carrying since 1s have stopped.
carry = false
retPos[i+1] += 1 << j
}
}
} else if curIsOne {
if nextIsOne {
// if this is the start of at least 2 consecutive 1's
// we want to set the current one to -1 and start carrying
retNeg[i+1] += 1 << j
carry = true
} else {
// this is a singleton, not consecutive 1's.
retPos[i+1] += 1 << j
}
}
curByte >>= 1
}
}
if carry {
retPos[0] = 1
}

return retPos, retNeg
}

// ScalarMult returns k*(Bx, By) where k is a big endian integer.
// Part of the elliptic.Curve interface.
func (curve *KoblitzCurve) ScalarMult(Bx, By *big.Int, k []byte) (*big.Int, *big.Int) {
// This uses the left to right binary method for point multiplication:

// Point Q = ∞ (point at infinity).
qx, qy, qz := new(fieldVal), new(fieldVal), new(fieldVal)

// Point P = the point to multiply the scalar with.
px, py := curve.bigAffineToField(Bx, By)
pz := new(fieldVal).SetInt(1)
// decompose K into k1 and k2 in order to halve the number of EC ops
// see Algorithm 3.74 in Guide to Elliptical Curve Cryptography by
// Hankerson, et al.
k1, k2, signK1, signK2 := curve.splitK(curve.moduloReduce(k))

// The main equation here to remember is
// k * P = k1 * P + k2 * ϕ(P)
// P1 below is P in the equation, P2 below is ϕ(P) in the equation
p1x, p1y := curve.bigAffineToField(Bx, By)
// For NAF, we need the negative point
p1yNeg := new(fieldVal).NegateVal(p1y, 1)
p1z := new(fieldVal).SetInt(1)
// Note ϕ(x,y) = (βx,y), the Jacobian z coordinate is 1, so this math
// goes through.
p2x := new(fieldVal).Mul2(p1x, curve.beta)
p2y := new(fieldVal).Set(p1y)
// For NAF, we need the negative point
p2yNeg := new(fieldVal).NegateVal(p2y, 1)
p2z := new(fieldVal).SetInt(1)

// If k1 or k2 are negative, we flip the positive/negative values
if signK1 == -1 {
p1y, p1yNeg = p1yNeg, p1y
}
if signK2 == -1 {
p2y, p2yNeg = p2yNeg, p2y
}

// NAF versions of k1 and k2 should have a lot more zeros
// the Pos version of the bytes contain the +1's and the Neg versions
// contain the -1's
k1PosNAF, k1NegNAF := NAF(k1)
k2PosNAF, k2NegNAF := NAF(k2)
k1Len := len(k1PosNAF)
k2Len := len(k2PosNAF)

m := k1Len
if m < k2Len {
m = k2Len
}

// We add left-to-right using the NAF optimization. This is using
// algorithm 3.77 from Guide to Elliptical Curve Cryptography.
// This should be faster overall since there will be a lot more instances
// of 0, hence reducing the number of Jacobian additions at the cost
// of 1 possible extra doubling.
var k1BytePos, k1ByteNeg, k2BytePos, k2ByteNeg byte
for i := 0; i < m; i++ {
// Since we're going left-to-right, we need to pad the front with 0's
if i < m-k1Len {
k1BytePos = 0
k1ByteNeg = 0
} else {
k1BytePos = k1PosNAF[i-(m-k1Len)]
k1ByteNeg = k1NegNAF[i-(m-k1Len)]
}
if i < m-k2Len {
k2BytePos = 0
k2ByteNeg = 0
} else {
k2BytePos = k2PosNAF[i-(m-k2Len)]
k2ByteNeg = k2NegNAF[i-(m-k2Len)]
}

// Double and add as necessary depending on the bits set in the scalar.
for _, byteVal := range k {
for bitNum := 0; bitNum < 8; bitNum++ {
// Q = 2*Q
for j := 7; j >= 0; j-- {
// Q = 2 * Q
curve.doubleJacobian(qx, qy, qz, qx, qy, qz)
if byteVal&0x80 == 0x80 {
// Q = Q + P
curve.addJacobian(qx, qy, qz, px, py, pz, qx,
qy, qz)

if k1BytePos&0x80 == 0x80 {
curve.addJacobian(qx, qy, qz, p1x, p1y, p1z, qx, qy, qz)
} else if k1ByteNeg&0x80 == 0x80 {
curve.addJacobian(qx, qy, qz, p1x, p1yNeg, p1z, qx, qy, qz)
}

if k2BytePos&0x80 == 0x80 {
curve.addJacobian(qx, qy, qz, p2x, p2y, p2z, qx, qy, qz)
} else if k2ByteNeg&0x80 == 0x80 {
curve.addJacobian(qx, qy, qz, p2x, p2yNeg, p2z, qx, qy, qz)
}
byteVal <<= 1
k1BytePos <<= 1
k1ByteNeg <<= 1
k2BytePos <<= 1
k2ByteNeg <<= 1
}
}

Expand All @@ -628,14 +838,8 @@ func (curve *KoblitzCurve) ScalarMult(Bx, By *big.Int, k []byte) (*big.Int, *big
// big endian integer.
// Part of the elliptic.Curve interface.
func (curve *KoblitzCurve) ScalarBaseMult(k []byte) (*big.Int, *big.Int) {
// Fall back to slower generic scalar point multiplication when the integer is
// larger than what can be used with the precomputed table which enables
// accelerated multiplication by the known fixed point.
if len(k) > len(curve.bytePoints) {
return curve.ScalarMult(curve.Gx, curve.Gy, k)
}

diff := len(curve.bytePoints) - len(k)
newK := curve.moduloReduce(k)
diff := len(curve.bytePoints) - len(newK)

// Point Q = ∞ (point at infinity).
qx, qy, qz := new(fieldVal), new(fieldVal), new(fieldVal)
Expand All @@ -645,7 +849,7 @@ func (curve *KoblitzCurve) ScalarBaseMult(k []byte) (*big.Int, *big.Int) {
// expressing k in base-256 which it already sort of is.
// Each "digit" in the 8-bit window can be looked up using bytePoints
// and added together.
for i, byteVal := range k {
for i, byteVal := range newK {
point := curve.bytePoints[diff+i][byteVal]
curve.addJacobian(qx, qy, qz, &point[0], &point[1], &point[2], qx, qy, qz)
}
Expand Down Expand Up @@ -684,6 +888,33 @@ func initS256() {
if err := loadS256BytePoints(); err != nil {
panic(err)
}

// Next 6 constants are from Hal Finney's bitcointalk.org post:
// https://bitcointalk.org/index.php?topic=3238.msg45565#msg45565
// May he rest in peace.
// These have been independently verified by Dave Collins using
// an ecc math script.
secp256k1.lambda, _ = new(big.Int).SetString("5363AD4CC05C30E0A5261C028812645A122E22EA20816678DF02967C1B23BD72", 16)
secp256k1.beta = new(fieldVal).SetHex("7AE96A2B657C07106E64479EAC3434E99CF0497512F58995C1396C28719501EE")
secp256k1.a1, _ = new(big.Int).SetString("3086D221A7D46BCDE86C90E49284EB15", 16)
secp256k1.b1, _ = new(big.Int).SetString("-E4437ED6010E88286F547FA90ABFE4C3", 16)
secp256k1.a2, _ = new(big.Int).SetString("114CA50F7A8E2F3F657C1108D9D44CFD8", 16)
secp256k1.b2, _ = new(big.Int).SetString("3086D221A7D46BCDE86C90E49284EB15", 16)

// for convenience this gets computed repeatedly
secp256k1.byteSize = secp256k1.BitSize / 8

// Alternatively, we can use the parameters below, however, they seem
// to be about 8% slower.
// λ = AC9C52B33FA3CF1F5AD9E3FD77ED9BA4A880B9FC8EC739C2E0CFC810B51283CE
// β = 851695D49A83F8EF919BB86153CBCB16630FB68AED0A766A3EC693D68E6AFA40
// secp256k1.lambda, _ = new(big.Int).SetString("AC9C52B33FA3CF1F5AD9E3FD77ED9BA4A880B9FC8EC739C2E0CFC810B51283CE", 16)
// secp256k1.beta = new(fieldVal).SetHex("851695D49A83F8EF919BB86153CBCB16630FB68AED0A766A3EC693D68E6AFA40")
// secp256k1.a1, _ = new(big.Int).SetString("E4437ED6010E88286F547FA90ABFE4C3", 16)
// secp256k1.b1, _ = new(big.Int).SetString("-3086D221A7D46BCDE86C90E49284EB15", 16)
// secp256k1.a2, _ = new(big.Int).SetString("3086D221A7D46BCDE86C90E49284EB15", 16)
// secp256k1.b2, _ = new(big.Int).SetString("114CA50F7A8E2F3F657C1108D9D44CFD8", 16)

}

// S256 returns a Curve which implements secp256k1.
Expand Down
Loading