Skip to content

Commit

Permalink
inline permutations
Browse files Browse the repository at this point in the history
  • Loading branch information
qti3e committed Apr 16, 2024
1 parent bbb41a1 commit ca82907
Show file tree
Hide file tree
Showing 4 changed files with 306 additions and 35 deletions.
4 changes: 3 additions & 1 deletion bench.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { hash as rustWasmHash } from "./blake3-wasm/pkg/blake3_wasm.js";
import { sha256 } from "https://denopkg.com/chiefbiiko/sha256@v1.0.0/mod.ts";
import { hash as jsHashV0 } from "./js/v0.ts";
import { hash as jsHashV1 } from "./js/v1.ts";
import { hash as jsHashV2 } from "./js/v2.ts";
import { hash as latestHash } from "./js/latest.ts";

// Share the same input buffer across benchmars.
Expand Down Expand Up @@ -45,4 +46,5 @@ bench("Sha256", sha256);
bench("Rust (wasm)", rustWasmHash);
bench("Js#01", jsHashV0);
bench("Js#02", jsHashV1);
bench("Js#03", latestHash);
bench("Js#03", jsHashV2);
bench("Js#04", latestHash);
11 changes: 11 additions & 0 deletions codegen/permute.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
const MSG_PERMUTATION = [2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8];

// 0, ..., 15
let numbers = MSG_PERMUTATION.map((_, idx) => idx);
for (let i = 0; i < 7; ++i) {
// console.log(JSON.stringify(numbers));

let args = numbers.join(",");
console.log(`round(state, m, [${args}]);`);
numbers = MSG_PERMUTATION.map((p) => numbers[p]);
}
51 changes: 17 additions & 34 deletions js/latest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@ const IV = new Uint32Array([
0x1f83d9ab, 0x5be0cd19,
]) as W8;

const MSG_PERMUTATION = [2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8];

function rightRotate(word: Word, bits: number): Word {
return (word >>> bits) | (word << (32 - bits));
}
Expand All @@ -46,24 +44,17 @@ function g(
state[b] = rightRotate(state[b] ^ state[c], 7);
}

function round(state: W16, m: W16) {
function round(state: W16, m: W16, p: number[]) {
// Mix the columns.
g(state, 0, 4, 8, 12, m[0], m[1]);
g(state, 1, 5, 9, 13, m[2], m[3]);
g(state, 2, 6, 10, 14, m[4], m[5]);
g(state, 3, 7, 11, 15, m[6], m[7]);
g(state, 0, 4, 8, 12, m[p[0]], m[p[1]]);
g(state, 1, 5, 9, 13, m[p[2]], m[p[3]]);
g(state, 2, 6, 10, 14, m[p[4]], m[p[5]]);
g(state, 3, 7, 11, 15, m[p[6]], m[p[7]]);
// Mix the diagonals.
g(state, 0, 5, 10, 15, m[8], m[9]);
g(state, 1, 6, 11, 12, m[10], m[11]);
g(state, 2, 7, 8, 13, m[12], m[13]);
g(state, 3, 4, 9, 14, m[14], m[15]);
}

function permute(m: W16) {
const copy = new Uint32Array(m);
for (let i = 0; i < 16; ++i) {
m[i] = copy[MSG_PERMUTATION[i]];
}
g(state, 0, 5, 10, 15, m[p[8]], m[p[9]]);
g(state, 1, 6, 11, 12, m[p[10]], m[p[11]]);
g(state, 2, 7, 8, 13, m[p[12]], m[p[13]]);
g(state, 3, 4, 9, 14, m[p[14]], m[p[15]]);
}

function compress(
Expand Down Expand Up @@ -92,22 +83,14 @@ function compress(
flags,
]) as W16;

const block = new Uint32Array(block_words) as W16;

round(state, block); // round 1
permute(block);
round(state, block); // round 2
permute(block);
round(state, block); // round 3
permute(block);
round(state, block); // round 4
permute(block);
round(state, block); // round 5
permute(block);
round(state, block); // round 6
permute(block);
round(state, block); // round 7
permute(block);
const m = block_words;
round(state, m, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
round(state, m, [2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8]);
round(state, m, [3, 4, 10, 12, 13, 2, 7, 14, 6, 5, 9, 0, 11, 15, 8, 1]);
round(state, m, [10, 7, 12, 9, 14, 3, 13, 15, 4, 0, 11, 2, 5, 8, 1, 6]);
round(state, m, [12, 13, 9, 11, 15, 10, 14, 8, 7, 2, 5, 3, 0, 1, 6, 4]);
round(state, m, [9, 14, 11, 5, 8, 12, 15, 1, 13, 3, 0, 10, 2, 6, 4, 7]);
round(state, m, [11, 15, 5, 0, 1, 9, 8, 6, 14, 10, 2, 12, 3, 4, 7, 13]);

for (let i = 0; i < 8; ++i) {
state[i] ^= state[i + 8];
Expand Down
275 changes: 275 additions & 0 deletions js/v2.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,275 @@
type Word = number;
type W16 = Uint32Array & { length: 16 };
type W8 = Uint32Array & { length: 8 };
type Block = Uint8Array & { length: 64 };

const OUT_LEN = 32;
const KEY_LEN = 32;
const BLOCK_LEN = 64;
const CHUNK_LEN = 1024;

const CHUNK_START = 1 << 0;
const CHUNK_END = 1 << 1;
const PARENT = 1 << 2;
const ROOT = 1 << 3;
const KEYED_HASH = 1 << 4;
const DERIVE_KEY_CONTEXT = 1 << 5;
const DERIVE_KEY_MATERIAL = 1 << 6;

const IV = new Uint32Array([
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c,
0x1f83d9ab, 0x5be0cd19,
]) as W8;

function rightRotate(word: Word, bits: number): Word {
return (word >>> bits) | (word << (32 - bits));
}

function g(
state: W16,
a: number,
b: number,
c: number,
d: number,
mx: Word,
my: Word,
) {
state[a] = (((state[a] + state[b]) | 0) + mx) | 0;
state[d] = rightRotate(state[d] ^ state[a], 16);
state[c] = (state[c] + state[d]) | 0;
state[b] = rightRotate(state[b] ^ state[c], 12);
state[a] = (((state[a] + state[b]) | 0) + my) | 0;
state[d] = rightRotate(state[d] ^ state[a], 8);
state[c] = (state[c] + state[d]) | 0;
state[b] = rightRotate(state[b] ^ state[c], 7);
}

function round(state: W16, m: W16, p: number[]) {
// Mix the columns.
g(state, 0, 4, 8, 12, m[p[0]], m[p[1]]);
g(state, 1, 5, 9, 13, m[p[2]], m[p[3]]);
g(state, 2, 6, 10, 14, m[p[4]], m[p[5]]);
g(state, 3, 7, 11, 15, m[p[6]], m[p[7]]);
// Mix the diagonals.
g(state, 0, 5, 10, 15, m[p[8]], m[p[9]]);
g(state, 1, 6, 11, 12, m[p[10]], m[p[11]]);
g(state, 2, 7, 8, 13, m[p[12]], m[p[13]]);
g(state, 3, 4, 9, 14, m[p[14]], m[p[15]]);
}

function compress(
chaining_value: W8,
block_words: W16,
counter: number,
block_len: Word,
flags: Word,
): W16 {
const state = new Uint32Array([
chaining_value[0],
chaining_value[1],
chaining_value[2],
chaining_value[3],
chaining_value[4],
chaining_value[5],
chaining_value[6],
chaining_value[7],
IV[0],
IV[1],
IV[2],
IV[3],
counter,
(counter / 0x100000000) | 0,
block_len,
flags,
]) as W16;

const m = block_words;
round(state, m, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
round(state, m, [2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8]);
round(state, m, [3, 4, 10, 12, 13, 2, 7, 14, 6, 5, 9, 0, 11, 15, 8, 1]);
round(state, m, [10, 7, 12, 9, 14, 3, 13, 15, 4, 0, 11, 2, 5, 8, 1, 6]);
round(state, m, [12, 13, 9, 11, 15, 10, 14, 8, 7, 2, 5, 3, 0, 1, 6, 4]);
round(state, m, [9, 14, 11, 5, 8, 12, 15, 1, 13, 3, 0, 10, 2, 6, 4, 7]);
round(state, m, [11, 15, 5, 0, 1, 9, 8, 6, 14, 10, 2, 12, 3, 4, 7, 13]);

for (let i = 0; i < 8; ++i) {
state[i] ^= state[i + 8];
state[i + 8] ^= chaining_value[i];
}

return state;
}

function first8Words(compression_output: W16): W8 {
return new Uint32Array(compression_output).slice(0, 8) as W8;
}

function readLittleEndianWords(
array: ArrayLike<number>,
offset: number,
words: Uint32Array,
) {
let i = 0;
// Read full multiples of four.
for (; offset + 3 < array.length; ++i, offset += 4) {
words[i] =
array[offset] |
(array[offset + 1] << 8) |
(array[offset + 2] << 16) |
(array[offset + 3] << 24);
}
// Fill the rest with zero.
for (let j = i; j < words.length; ++j) {
words[j] = 0;
}
// Read the last word. (If input not a multiple of 4).
for (let s = 0; offset < array.length; s += 8, ++offset) {
words[i] |= array[offset] << s;
}
}

function readLittleEndianWordsFull(
array: ArrayLike<number>,
offset: number,
words: Uint32Array,
) {
for (let i = 0; i < words.length; ++i, offset += 4) {
words[i] =
array[offset] |
(array[offset + 1] << 8) |
(array[offset + 2] << 16) |
(array[offset + 3] << 24);
}
}

export function hash(input: Uint8Array): Uint8Array {
const flags = 0;
const keyWords = IV;

// The hasher state.
const blockWords = new Uint32Array(16) as W16;
const cvStack: W8[] = [];

let chunkCounter = 0;
let offset = 0;

// Compute the number of bytes we can process knowing there is more data.
const length = input.length;
const take = Math.max(0, ((length - 1) | 1023) - 1023);

for (; offset < take; ) {
let cv = keyWords;

for (let i = 0; i < 16; ++i, offset += 64) {
readLittleEndianWordsFull(input, offset, blockWords);

cv = first8Words(
compress(
cv,
blockWords,
chunkCounter,
BLOCK_LEN,
flags | (i === 0 ? CHUNK_START : i === 15 ? CHUNK_END : 0),
),
);
}

chunkCounter += 1;
cvStack.push(cv);

let totalChunks = chunkCounter;
while ((totalChunks & 1) === 0) {
const rightChildCv = cvStack.pop()!;
const leftChildCv = cvStack.pop()!;
blockWords.set(leftChildCv, 0);
blockWords.set(rightChildCv, 8);
cv = first8Words(
compress(keyWords, blockWords, 0, BLOCK_LEN, flags | PARENT),
);
cvStack.push(cv);
totalChunks >>= 1;
}
}

// last chunk. it can be any number of blocks. in one special case where
// n(remaining_bytes) <= BLOCK_LEN, the flag should be set to CHUNK_END
// on the initial block.
const remainingBytes = length - take;
// remainingBytes > 0 -> no underflow.
const fullBlocks = ((remainingBytes - 1) / 64) | 0;

let cv = keyWords;

for (let i = 0; i < fullBlocks; ++i, offset += 64) {
readLittleEndianWordsFull(input, offset, blockWords);

cv = first8Words(
compress(
cv,
blockWords,
chunkCounter,
BLOCK_LEN,
flags | (i === 0 ? CHUNK_START : i === 15 ? CHUNK_END : 0),
),
);
}

// There are two path in the code here. One case is that there is nothing in
// the stack and that this block needs to be finalized. And the other is the
// opposite, we have entries in the stack which we should merge.

let finalChainingValue: W8;
let finalBlockLen: number;
let finalFlags: Word;

readLittleEndianWords(input, offset, blockWords);

if (cvStack.length == 0) {
finalChainingValue = cv;
finalBlockLen = length - offset;
finalFlags =
flags | ROOT | CHUNK_END | (fullBlocks === 0 ? CHUNK_START : 0);
} else {
finalChainingValue = keyWords;
finalBlockLen = BLOCK_LEN;
finalFlags = flags | PARENT | ROOT;

cv = first8Words(
compress(
cv,
blockWords,
chunkCounter,
length - offset,
flags | CHUNK_END | (fullBlocks === 0 ? CHUNK_START : 0),
),
);

cvStack.push(cv);

while (cvStack.length > 2) {
const rightChildCv = cvStack.pop()!;
const leftChildCv = cvStack.pop()!;
blockWords.set(leftChildCv, 0);
blockWords.set(rightChildCv, 8);
cv = first8Words(
compress(keyWords, blockWords, 0, BLOCK_LEN, flags | PARENT),
);
cvStack.push(cv);
}

const rightChildCv = cvStack.pop()!;
const leftChildCv = cvStack.pop()!;
blockWords.set(leftChildCv, 0);
blockWords.set(rightChildCv, 8);
}

let out = compress(
finalChainingValue,
blockWords,
0,
finalBlockLen,
finalFlags,
);

return new Uint8Array(out.buffer, 0, 32);
}

0 comments on commit ca82907

Please sign in to comment.