Skip to content

Commit

Permalink
optimizae for little endian systems
Browse files Browse the repository at this point in the history
  • Loading branch information
qti3e committed Apr 16, 2024
1 parent 568d62d commit c6bb4c3
Show file tree
Hide file tree
Showing 3 changed files with 485 additions and 12 deletions.
4 changes: 3 additions & 1 deletion bench.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import { hash as jsHashV4 } from "./js/v4.ts";
import { hash as jsHashV5 } from "./js/v5.ts";
import { hash as jsHashV6 } from "./js/v6.ts";
import { hash as jsHashV7 } from "./js/v7.ts";
import { hash as jsHashV8 } from "./js/v8.ts";
import { hash as latestHash } from "./js/latest.ts";

// Share the same input buffer across benchmars.
Expand Down Expand Up @@ -57,4 +58,5 @@ bench("Js#05", jsHashV4);
bench("Js#06", jsHashV5);
bench("Js#07", jsHashV6);
bench("Js#08", jsHashV7);
bench("Js#09", latestHash);
bench("Js#09", jsHashV8);
bench("Js#10", latestHash);
48 changes: 37 additions & 11 deletions js/latest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ const IV = new Uint32Array([
0x1f83d9ab, 0x5be0cd19,
]) as W8;

// Blake3 is really little endian friendly, given +95% of devices running the client
// are indeed little endian, we can do some optimizations in regards to that.
const IsBigEndian = !new Uint8Array(new Uint32Array([1]).buffer)[0];

function compress(
cv: Uint32Array,
cvOffset: number,
Expand Down Expand Up @@ -260,6 +264,12 @@ function getCvStack(maxDepth: number) {
}

export function hash(input: Uint8Array): Uint8Array {
const inputWords = new Uint32Array(
input.buffer,
input.byteOffset,
input.byteLength >> 2,
);

const flags = 0;
const keyWords = IV;
const out = new Uint32Array(8);
Expand All @@ -279,13 +289,15 @@ export function hash(input: Uint8Array): Uint8Array {
cvStack.set(keyWords, cvStackPos);

for (let i = 0; i < 16; ++i, offset += 64) {
readLittleEndianWordsFull(input, offset, blockWords);
if (IsBigEndian) {
readLittleEndianWordsFull(input, offset, blockWords);
}

compress(
cvStack,
cvStackPos,
blockWords,
0,
IsBigEndian ? blockWords : inputWords,
IsBigEndian ? 0 : offset / 4,
cvStack,
cvStackPos,
true,
Expand Down Expand Up @@ -330,13 +342,15 @@ export function hash(input: Uint8Array): Uint8Array {
cvStack.set(keyWords, cvStackPos);

for (let i = 0; i < fullBlocks; ++i, offset += 64) {
readLittleEndianWordsFull(input, offset, blockWords);
if (IsBigEndian) {
readLittleEndianWordsFull(input, offset, blockWords);
}

compress(
cvStack,
cvStackPos,
blockWords,
0,
IsBigEndian ? blockWords : inputWords,
IsBigEndian ? 0 : offset / 4,
cvStack,
cvStackPos,
true,
Expand All @@ -350,14 +364,26 @@ export function hash(input: Uint8Array): Uint8Array {
// the stack and that this block needs to be finalized. And the other is the
// opposite, we have entries in the stack which we should merge.

readLittleEndianWords(input, offset, blockWords);
const lastBlockLen = length - offset;
let lastBlockWords = blockWords as Uint32Array;
let lastBlockWordsOffset = 0;
if (lastBlockLen == BLOCK_LEN) {
if (IsBigEndian) {
readLittleEndianWordsFull(input, offset, blockWords);
} else {
lastBlockWords = inputWords;
lastBlockWordsOffset = offset / 4;
}
} else {
readLittleEndianWords(input, offset, blockWords);
}

if (cvStackPos == 0) {
compress(
cvStack,
0,
blockWords,
0,
lastBlockWords,
lastBlockWordsOffset,
out,
0,
true,
Expand All @@ -369,8 +395,8 @@ export function hash(input: Uint8Array): Uint8Array {
compress(
cvStack,
cvStackPos,
blockWords,
0,
lastBlockWords,
lastBlockWordsOffset,
cvStack,
cvStackPos,
true,
Expand Down

0 comments on commit c6bb4c3

Please sign in to comment.