optimizae for little endian systems

fleek-network · Apr 16, 2024 · c6bb4c3 · c6bb4c3
1 parent 568d62d
commit c6bb4c3
Show file tree

Hide file tree

Showing 3 changed files with 485 additions and 12 deletions.
diff --git a/bench.ts b/bench.ts
@@ -8,6 +8,7 @@ import { hash as jsHashV4 } from "./js/v4.ts";
 import { hash as jsHashV5 } from "./js/v5.ts";
 import { hash as jsHashV6 } from "./js/v6.ts";
 import { hash as jsHashV7 } from "./js/v7.ts";
+import { hash as jsHashV8 } from "./js/v8.ts";
 import { hash as latestHash } from "./js/latest.ts";
 
 // Share the same input buffer across benchmars.
@@ -57,4 +58,5 @@ bench("Js#05", jsHashV4);
 bench("Js#06", jsHashV5);
 bench("Js#07", jsHashV6);
 bench("Js#08", jsHashV7);
-bench("Js#09", latestHash);
+bench("Js#09", jsHashV8);
+bench("Js#10", latestHash);
diff --git a/js/latest.ts b/js/latest.ts
@@ -21,6 +21,10 @@ const IV = new Uint32Array([
   0x1f83d9ab, 0x5be0cd19,
 ]) as W8;
 
+// Blake3 is really little endian friendly, given +95% of devices running the client
+// are indeed little endian, we can do some optimizations in regards to that.
+const IsBigEndian = !new Uint8Array(new Uint32Array([1]).buffer)[0];
+
 function compress(
   cv: Uint32Array,
   cvOffset: number,
@@ -260,6 +264,12 @@ function getCvStack(maxDepth: number) {
 }
 
 export function hash(input: Uint8Array): Uint8Array {
+  const inputWords = new Uint32Array(
+    input.buffer,
+    input.byteOffset,
+    input.byteLength >> 2,
+  );
+
   const flags = 0;
   const keyWords = IV;
   const out = new Uint32Array(8);
@@ -279,13 +289,15 @@ export function hash(input: Uint8Array): Uint8Array {
     cvStack.set(keyWords, cvStackPos);
 
     for (let i = 0; i < 16; ++i, offset += 64) {
-      readLittleEndianWordsFull(input, offset, blockWords);
+      if (IsBigEndian) {
+        readLittleEndianWordsFull(input, offset, blockWords);
+      }
 
       compress(
         cvStack,
         cvStackPos,
-        blockWords,
-        0,
+        IsBigEndian ? blockWords : inputWords,
+        IsBigEndian ? 0 : offset / 4,
         cvStack,
         cvStackPos,
         true,
@@ -330,13 +342,15 @@ export function hash(input: Uint8Array): Uint8Array {
   cvStack.set(keyWords, cvStackPos);
 
   for (let i = 0; i < fullBlocks; ++i, offset += 64) {
-    readLittleEndianWordsFull(input, offset, blockWords);
+    if (IsBigEndian) {
+      readLittleEndianWordsFull(input, offset, blockWords);
+    }
 
     compress(
       cvStack,
       cvStackPos,
-      blockWords,
-      0,
+      IsBigEndian ? blockWords : inputWords,
+      IsBigEndian ? 0 : offset / 4,
       cvStack,
       cvStackPos,
       true,
@@ -350,14 +364,26 @@ export function hash(input: Uint8Array): Uint8Array {
   // the stack and that this block needs to be finalized. And the other is the
   // opposite, we have entries in the stack which we should merge.
 
-  readLittleEndianWords(input, offset, blockWords);
+  const lastBlockLen = length - offset;
+  let lastBlockWords = blockWords as Uint32Array;
+  let lastBlockWordsOffset = 0;
+  if (lastBlockLen == BLOCK_LEN) {
+    if (IsBigEndian) {
+      readLittleEndianWordsFull(input, offset, blockWords);
+    } else {
+      lastBlockWords = inputWords;
+      lastBlockWordsOffset = offset / 4;
+    }
+  } else {
+    readLittleEndianWords(input, offset, blockWords);
+  }
 
   if (cvStackPos == 0) {
     compress(
       cvStack,
       0,
-      blockWords,
-      0,
+      lastBlockWords,
+      lastBlockWordsOffset,
       out,
       0,
       true,
@@ -369,8 +395,8 @@ export function hash(input: Uint8Array): Uint8Array {
     compress(
       cvStack,
       cvStackPos,
-      blockWords,
-      0,
+      lastBlockWords,
+      lastBlockWordsOffset,
       cvStack,
       cvStackPos,
       true,