diff --git a/blocks.go b/blocks.go index e7befd0..817b215 100644 --- a/blocks.go +++ b/blocks.go @@ -1,4 +1,4 @@ -// +build !amd64 appengine gccgo +// +build !arm,!amd64 appengine gccgo package siphash diff --git a/blocks_arm.s b/blocks_arm.s new file mode 100644 index 0000000..bfcfd9b --- /dev/null +++ b/blocks_arm.s @@ -0,0 +1,144 @@ +#include "textflag.h" + +#define ROUND()\ + ADD.S R2,R0,R0;\ + ADC R3,R1,R1;\ + EOR R2<<13,R0,R8;\ + EOR R3>>19,R8,R8;\ + EOR R2>>19,R1,R11;\ + EOR R3<<13,R11,R11;\ + ADD.S R6,R4,R4;\ + ADC R7,R5,R5;\ + EOR R6<<16,R4,R2;\ + EOR R7>>16,R2,R2;\ + EOR R6>>16,R5,R3;\ + EOR R7<<16,R3,R3;\ + ADD.S R2,R1,R1;\ + ADC R3,R0,R0;\ + EOR R2<<21,R1,R6;\ + EOR R3>>11,R6,R6;\ + EOR R2>>11,R0,R7;\ + EOR R3<<21,R7,R7;\ + ADD.S R8,R4,R4;\ + ADC R11,R5,R5;\ + EOR R8<<17,R4,R2;\ + EOR R11>>15,R2,R2;\ + EOR R8>>15,R5,R3;\ + EOR R11<<17,R3,R3;\ + ADD.S R2,R1,R1;\ + ADC R3,R0,R0;\ + EOR R2<<13,R1,R8;\ + EOR R3>>19,R8,R8;\ + EOR R2>>19,R0,R11;\ + EOR R3<<13,R11,R11;\ + ADD.S R6,R5,R5;\ + ADC R7,R4,R4;\ + EOR R6<<16,R5,R2;\ + EOR R7>>16,R2,R2;\ + EOR R6>>16,R4,R3;\ + EOR R7<<16,R3,R3;\ + ADD.S R2,R0,R0;\ + ADC R3,R1,R1;\ + EOR R2<<21,R0,R6;\ + EOR R3>>11,R6,R6;\ + EOR R2>>11,R1,R7;\ + EOR R3<<21,R7,R7;\ + ADD.S R8,R5,R5;\ + ADC R11,R4,R4;\ + EOR R8<<17,R5,R2;\ + EOR R11>>15,R2,R2;\ + EOR R8>>15,R4,R3;\ + EOR R11<<17,R3,R3; + +// once(d *digest) +TEXT ·once(SB),NOSPLIT,$4-4 + MOVW d+0(FP),R8 + MOVM.IA (R8),[R0,R1,R2,R3,R4,R5,R6,R7] + MOVW 48(R8),R12 + MOVW 52(R8),R14 + EOR R12,R6,R6 + EOR R14,R7,R7 + ROUND() + EOR R12,R0,R0 + EOR R14,R1,R1 + MOVW d+0(FP),R8 + MOVM.IA [R0,R1,R2,R3,R4,R5,R6,R7],(R8) + RET + +// finalize(d *digest) uint64 +TEXT ·finalize(SB),NOSPLIT,$4-12 + MOVW d+0(FP),R8 + MOVM.IA (R8),[R0,R1,R2,R3,R4,R5,R6,R7] + MOVW 48(R8),R12 + MOVW 52(R8),R14 + EOR R12,R6,R6 + EOR R14,R7,R7 + ROUND() + EOR R12,R0,R0 + EOR R14,R1,R1 + EOR $255,R4 + ROUND() + ROUND() + EOR R2,R0,R0 + EOR R3,R1,R1 + EOR R6,R4,R4 + EOR R7,R5,R5 + EOR R4,R0,R0 + EOR R5,R1,R1 + MOVW R0,ret_lo+4(FP) + MOVW R1,ret_hi+8(FP) + RET + +// blocks(d *digest, data []uint8) +TEXT ·blocks(SB),NOSPLIT,$8-16 + MOVW R9,sav-8(SP) + MOVW d+0(FP),R8 + MOVM.IA (R8),[R0,R1,R2,R3,R4,R5,R6,R7] + MOVW p+4(FP),R9 + MOVW p_len+8(FP),R11 + ADD R9,R11,R11 + MOVW R11,endp-4(SP) + AND.S $3,R9,R8 + BNE blocksunaligned +blocksloop: + MOVM.IA.W (R9),[R12,R14] + EOR R12,R6,R6 + EOR R14,R7,R7 + ROUND() + EOR R12,R0,R0 + EOR R14,R1,R1 + MOVW endp-4(SP),R11 + CMP R11,R9 + BLO blocksloop + MOVW d+0(FP),R8 + MOVM.IA [R0,R1,R2,R3,R4,R5,R6,R7],(R8) + MOVW sav-8(SP),R9 + RET +blocksunaligned: + MOVBU (R9),R12 + MOVBU 1(R9),R11 + ORR R11<<8,R12,R12 + MOVBU 2(R9),R11 + ORR R11<<16,R12,R12 + MOVBU 3(R9),R11 + ORR R11<<24,R12,R12 + MOVBU 4(R9),R14 + MOVBU 5(R9),R11 + ORR R11<<8,R14,R14 + MOVBU 6(R9),R11 + ORR R11<<16,R14,R14 + MOVBU 7(R9),R11 + ORR R11<<24,R14,R14 + ADD $8,R9,R9 + EOR R12,R6,R6 + EOR R14,R7,R7 + ROUND() + EOR R12,R0,R0 + EOR R14,R1,R1 + MOVW endp-4(SP),R11 + CMP R11,R9 + BLO blocksunaligned + MOVW d+0(FP),R8 + MOVM.IA [R0,R1,R2,R3,R4,R5,R6,R7],(R8) + MOVW sav-8(SP),R9 + RET diff --git a/blocks_asm.go b/blocks_asm.go new file mode 100644 index 0000000..a238394 --- /dev/null +++ b/blocks_asm.go @@ -0,0 +1,21 @@ +// +build arm amd64,!appengine,!gccgo + +// Written in 2012 by Dmitry Chestnykh. +// +// To the extent possible under law, the author have dedicated all copyright +// and related and neighboring rights to this software to the public domain +// worldwide. This software is distributed without any warranty. +// http://creativecommons.org/publicdomain/zero/1.0/ + +// This file contains a function definition for use with assembly implementations of Hash() + +package siphash + +//go:noescape +func blocks(d *digest, p []uint8) + +//go:noescape +func finalize(d *digest) uint64 + +//go:noescape +func once(d *digest) diff --git a/hash.go b/hash.go index 44f2778..0de28d0 100644 --- a/hash.go +++ b/hash.go @@ -1,4 +1,4 @@ -// +build !amd64 appengine gccgo +// +build !arm,!amd64 appengine gccgo // Written in 2012 by Dmitry Chestnykh. // diff --git a/hash128.go b/hash128.go index b5dac64..634ce57 100644 --- a/hash128.go +++ b/hash128.go @@ -1,4 +1,4 @@ -// +build !amd64 appengine gccgo +// +build !arm,!amd64 appengine gccgo // Written in 2012 by Dmitry Chestnykh. // Modifications 2014 for 128-bit hash function by Damian Gryski. // diff --git a/hash_arm.go b/hash_arm.go new file mode 100644 index 0000000..428dd96 --- /dev/null +++ b/hash_arm.go @@ -0,0 +1,27 @@ +// +build arm + +package siphash + +// NB: ARM implementation of forgoes extra speed for Hash() +// and Hash128() by simply reusing the same blocks() implementation +// in assembly used by the streaming hash. + +func Hash(k0, k1 uint64, p []byte) uint64 { + var d digest + d.size = Size + d.k0 = k0 + d.k1 = k1 + d.Reset() + d.Write(p) + return d.Sum64() +} + +func Hash128(k0, k1 uint64, p []byte) (uint64, uint64) { + var d digest + d.size = Size128 + d.k0 = k0 + d.k1 = k1 + d.Reset() + d.Write(p) + return d.sum128() +} diff --git a/hash_asm.go b/hash_asm.go index 73c99ea..1437a2a 100644 --- a/hash_asm.go +++ b/hash_asm.go @@ -22,12 +22,3 @@ func Hash(k0, k1 uint64, b []byte) uint64 // Hash128 returns the 128-bit SipHash-2-4 of the given byte slice with two // 64-bit parts of 128-bit key: k0 and k1. func Hash128(k0, k1 uint64, b []byte) (uint64, uint64) - -//go:noescape -func blocks(d *digest, p []uint8) - -//go:noescape -func finalize(d *digest) uint64 - -//go:noescape -func once(d *digest) diff --git a/siphash_test.go b/siphash_test.go index 4e88d2d..843b051 100644 --- a/siphash_test.go +++ b/siphash_test.go @@ -347,16 +347,60 @@ func TestAlign(t *testing.T) { data := "0076a9143219adce9b6f0a21fd53cb17e2fd9b2b4fac40b388ac" k0 := uint64(316665572293978160) k1 := uint64(8573005253291875333) - want := uint64(16770526497674945769) + + want := []uint64{ + 16380727507974277821, + 16770526497674945769, + 11373998677292870540, + 10374222295991299613, + } + want128 := []uint64{ + 14802151199638645495, + 13251497035884452880, + 7034723853391616289, + 16742813562040528752, + 10468120447644272532, + 10941274532208162335, + 11293904790559355408, + 15432350433573653068, + } d, err := hex.DecodeString(data) if err != nil { t.Fatal(err) } - res := Hash(k0, k1, d[1:]) - if res != want { - t.Fatalf("Expected %v got %v", want, res) + var k [16]byte + binary.LittleEndian.PutUint64(k[0:], k0) + binary.LittleEndian.PutUint64(k[8:], k1) + + for i := range want { + res := Hash(k0, k1, d[i:]) + if res != want[i] { + t.Fatalf("Expected %v got %v", want[i], res) + } + reslo, reshi := Hash128(k0, k1, d[i:]) + if reslo != want128[i*2] { + t.Fatalf("Expected %v got %v", want128[i*2], reslo) + } + if reshi != want128[i*2+1] { + t.Fatalf("Expected %v got %v", want128[i*2+1], reshi) + } + dig := newDigest(Size, k[:]) + dig.Write(d[i:]) + res = dig.Sum64() + if res != want[i] { + t.Fatalf("Expected %v got %v", want[i], res) + } + dig128 := newDigest(Size128, k[:]) + dig128.Write(d[i:]) + reslo, reshi = dig128.sum128() + if reslo != want128[i*2] { + t.Fatalf("Expected %v got %v", want128[i*2], reslo) + } + if reshi != want128[i*2+1] { + t.Fatalf("Expected %v got %v", want128[i*2+1], reshi) + } } }