Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
[WTF] Add memchr16 on ARM64
https://bugs.webkit.org/show_bug.cgi?id=242568

Reviewed by Darin Adler.

This change adds SIMD implementation of memchr16. We use ARM64 NEON intrinsics
to implement faster memchr16, and leverage it in WTF::find and TypedArray indexOf/includes.

                                             ToT                     Patched

u16-string-index-of-1001-mid            0.6371+-0.0201     ^      0.4996+-0.0310        ^ definitely 1.2753x faster
u16-string-index-of-101-beg             0.4603+-0.0461            0.4518+-0.0241          might be 1.0186x faster
u16-string-index-of-11-end              0.4592+-0.0350     ?      0.4787+-0.0359        ? might be 1.0424x slower
u16-string-index-of-100001-end         33.0156+-0.6301     ^      6.4448+-0.0449        ^ definitely 5.1229x faster
uint16-array-index-of-large           665.2918+-2.5632     ^    112.1221+-1.7009        ^ definitely 5.9336x faster
u16-string-index-of-10000001-end     3261.2755+-5.9943     ^    622.5246+-1.2345        ^ definitely 5.2388x faster
u16-string-index-of-101-404             0.4759+-0.0301     ?      0.5007+-0.0583        ? might be 1.0523x slower
u16-string-index-of-10001-mid           2.1295+-0.0140     ^      0.8355+-0.0185        ^ definitely 2.5487x faster
u16-string-index-of-1000001-mid       168.4613+-3.2181     ^     35.4271+-0.0338        ^ definitely 4.7551x faster
uint16-array-index-of-small             0.4010+-0.0118            0.3834+-0.0151          might be 1.0461x faster
u16-string-index-of-101-end             0.4986+-0.0699            0.4568+-0.0184          might be 1.0914x faster
u16-string-index-of-11-beg              0.4497+-0.0382            0.4433+-0.0378          might be 1.0144x faster
u16-string-index-of-100001-beg          1.4841+-0.0359     ?      1.5071+-0.0142        ? might be 1.0155x slower
u16-string-index-of-10000001-404     3257.2980+-4.5640     ^    625.2343+-3.0194        ^ definitely 5.2097x faster
int16-array-index-of-large            665.0809+-3.2257     ^    112.5059+-2.9796        ^ definitely 5.9115x faster
u16-string-index-of-100001-404         33.4299+-1.1079     ^      6.5370+-0.2573        ^ definitely 5.1140x faster
u16-string-index-of-11-404              0.4853+-0.0453            0.4795+-0.0406          might be 1.0121x faster
int16-array-index-of-small              0.6745+-0.4688            0.3655+-0.0104          might be 1.8454x faster
u16-string-index-of-10000001-beg      112.7112+-0.9075     !    116.7191+-2.8648        ! definitely 1.0356x slower
u16-string-index-of-10000001-mid     1721.3761+-21.7927    ^    377.8264+-1.9241        ^ definitely 4.5560x faster
int16-array-index-of-medium            13.2849+-0.0279     ^      2.5942+-0.0163        ^ definitely 5.1211x faster
uint16-array-index-of-medium           13.4077+-0.1505     ^      2.5650+-0.0501        ^ definitely 5.2272x faster
u16-string-index-of-10001-end           3.6959+-0.0262     ^      1.0838+-0.0770        ^ definitely 3.4100x faster
u16-string-index-of-1000001-end       328.2864+-7.9285     ^     59.9328+-0.3556        ^ definitely 5.4776x faster
u16-string-index-of-1001-end            0.8068+-0.0256     ^      0.5374+-0.0365        ^ definitely 1.5014x faster
u16-string-index-of-11-mid              0.4565+-0.0454            0.4553+-0.0315
u16-string-index-of-100001-mid         17.3082+-0.4797     ^      3.9760+-0.0264        ^ definitely 4.3532x faster
u16-string-index-of-10001-beg           0.5774+-0.0328            0.5710+-0.0408          might be 1.0113x faster
u16-string-index-of-1000001-beg        10.9870+-0.3981     ?     11.0094+-0.0433        ?
u16-string-index-of-1001-404            0.8054+-0.0332     ^      0.5104+-0.0227        ^ definitely 1.5778x faster
u16-string-index-of-101-mid             0.4739+-0.0377            0.4641+-0.0256          might be 1.0212x faster
u16-string-index-of-1000001-404       326.1748+-5.7428     ^     60.2302+-1.2138        ^ definitely 5.4155x faster
u16-string-index-of-10001-404           3.6958+-0.0159     ^      1.0830+-0.0501        ^ definitely 3.4125x faster
u16-string-index-of-1001-beg            0.4613+-0.0361     ?      0.4815+-0.0357        ? might be 1.0438x slower

<geometric>                             7.7050+-0.0885     ^      3.5582+-0.0275        ^ definitely 2.1654x faster

* JSTests/microbenchmarks/int16-array-index-of-large.js: Added.
(test):
* JSTests/microbenchmarks/int16-array-index-of-medium.js: Added.
(test):
* JSTests/microbenchmarks/int16-array-index-of-small.js: Added.
(test):
* JSTests/microbenchmarks/u16-string-index-of-10000001-404.js: Added.
(bench):
(target):
(forRepeatCount):
(all):
* JSTests/microbenchmarks/u16-string-index-of-10000001-beg.js: Added.
(bench):
(target):
(forRepeatCount):
(all):
* JSTests/microbenchmarks/u16-string-index-of-10000001-end.js: Added.
(bench):
(target):
(forRepeatCount):
(all):
* JSTests/microbenchmarks/u16-string-index-of-10000001-mid.js: Added.
(bench):
(target):
(forRepeatCount):
(all):
* JSTests/microbenchmarks/u16-string-index-of-1000001-404.js: Added.
(bench):
(target):
(forRepeatCount):
(all):
* JSTests/microbenchmarks/u16-string-index-of-1000001-beg.js: Added.
(bench):
(target):
(forRepeatCount):
(all):
* JSTests/microbenchmarks/u16-string-index-of-1000001-end.js: Added.
(bench):
(target):
(forRepeatCount):
(all):
* JSTests/microbenchmarks/u16-string-index-of-1000001-mid.js: Added.
(bench):
(target):
(forRepeatCount):
(all):
* JSTests/microbenchmarks/u16-string-index-of-100001-404.js: Added.
(bench):
(target):
(forRepeatCount):
(all):
* JSTests/microbenchmarks/u16-string-index-of-100001-beg.js: Added.
(bench):
(target):
(forRepeatCount):
(all):
* JSTests/microbenchmarks/u16-string-index-of-100001-end.js: Added.
(bench):
(target):
(forRepeatCount):
(all):
* JSTests/microbenchmarks/u16-string-index-of-100001-mid.js: Added.
(bench):
(target):
(forRepeatCount):
(all):
* JSTests/microbenchmarks/u16-string-index-of-10001-404.js: Added.
(bench):
(target):
(forRepeatCount):
(all):
* JSTests/microbenchmarks/u16-string-index-of-10001-beg.js: Added.
(bench):
(target):
(forRepeatCount):
(all):
* JSTests/microbenchmarks/u16-string-index-of-10001-end.js: Added.
(bench):
(target):
(forRepeatCount):
(all):
* JSTests/microbenchmarks/u16-string-index-of-10001-mid.js: Added.
(bench):
(target):
(forRepeatCount):
(all):
* JSTests/microbenchmarks/u16-string-index-of-1001-404.js: Added.
(bench):
(target):
(forRepeatCount):
(all):
* JSTests/microbenchmarks/u16-string-index-of-1001-beg.js: Added.
(bench):
(target):
(forRepeatCount):
(all):
* JSTests/microbenchmarks/u16-string-index-of-1001-end.js: Added.
(bench):
(target):
(forRepeatCount):
(all):
* JSTests/microbenchmarks/u16-string-index-of-1001-mid.js: Added.
(bench):
(target):
(forRepeatCount):
(all):
* JSTests/microbenchmarks/u16-string-index-of-101-404.js: Added.
(bench):
(target):
(forRepeatCount):
(all):
* JSTests/microbenchmarks/u16-string-index-of-101-beg.js: Added.
(bench):
(target):
(forRepeatCount):
(all):
* JSTests/microbenchmarks/u16-string-index-of-101-end.js: Added.
(bench):
(target):
(forRepeatCount):
(all):
* JSTests/microbenchmarks/u16-string-index-of-101-mid.js: Added.
(bench):
(target):
(forRepeatCount):
(all):
* JSTests/microbenchmarks/u16-string-index-of-11-404.js: Added.
(bench):
(target):
(forRepeatCount):
(all):
* JSTests/microbenchmarks/u16-string-index-of-11-beg.js: Added.
(bench):
(target):
(forRepeatCount):
(all):
* JSTests/microbenchmarks/u16-string-index-of-11-end.js: Added.
(bench):
(target):
(forRepeatCount):
(all):
* JSTests/microbenchmarks/u16-string-index-of-11-mid.js: Added.
(bench):
(target):
(forRepeatCount):
(all):
* JSTests/microbenchmarks/uint16-array-index-of-large.js: Added.
(test):
* JSTests/microbenchmarks/uint16-array-index-of-medium.js: Added.
(test):
* JSTests/microbenchmarks/uint16-array-index-of-small.js: Added.
(test):
* Source/JavaScriptCore/runtime/JSGenericTypedArrayViewPrototypeFunctions.h:
(JSC::genericTypedArrayViewProtoFuncIncludes):
(JSC::genericTypedArrayViewProtoFuncIndexOf):
* Source/WTF/WTF.xcodeproj/project.pbxproj:
* Source/WTF/wtf/CMakeLists.txt:
* Source/WTF/wtf/text/StringCommon.cpp: Added.
(WTF::memchr16):
* Source/WTF/wtf/text/StringCommon.h:
(WTF::find):

Canonical link: https://commits.webkit.org/252544@main
  • Loading branch information
Constellation committed Jul 17, 2022
1 parent 6545a6f commit 3942acb
Show file tree
Hide file tree
Showing 40 changed files with 1,975 additions and 13 deletions.
14 changes: 14 additions & 0 deletions JSTests/microbenchmarks/int16-array-index-of-large.js
@@ -0,0 +1,14 @@
let size = 20 << 20;
let array = new Int16Array(size);
for (let i = 0; i < size; ++i)
array[i] = 1;
array[size - 1] = 42;

function test(array) {
let result = 0;
for (let i = 0; i < 1e2; ++i)
result += array.indexOf(42);
return result;
}
noInline(test);
test(array);
14 changes: 14 additions & 0 deletions JSTests/microbenchmarks/int16-array-index-of-medium.js
@@ -0,0 +1,14 @@
let size = 4096;
let array = new Int16Array(size);
for (let i = 0; i < size; ++i)
array[i] = 1;
array[size - 1] = 42;

function test(array) {
let result = 0;
for (let i = 0; i < 1e4; ++i)
result += array.indexOf(42);
return result;
}
noInline(test);
test(array);
14 changes: 14 additions & 0 deletions JSTests/microbenchmarks/int16-array-index-of-small.js
@@ -0,0 +1,14 @@
let size = 16;
let array = new Int16Array(size);
for (let i = 0; i < size; ++i)
array[i] = 1;
array[size - 1] = 42;

function test(array) {
let result = 0;
for (let i = 0; i < 1e4; ++i)
result += array.indexOf(42);
return result;
}
noInline(test);
test(array);
61 changes: 61 additions & 0 deletions JSTests/microbenchmarks/u16-string-index-of-10000001-404.js
@@ -0,0 +1,61 @@
function bench(string, func)
{
for (var i = 0; i < 1000; ++i)
func();
}
noInline(bench);

function forRepeatCount(count, pos, utf16) {
var base = "lalalalala".repeat(count);
if (utf16) {
base += "ϧ"; // arbitrary utf-16
}

var input = base;
var label;
const charToFind = !utf16 ? "z" : String.fromCodePoint(0x0245);
switch (pos) {
case -1: {
input = charToFind + base;
label = `beg ${utf16 ? "UChar" : "LChar"}`;
break;
}
case 0: {
label = `mid ${utf16 ? "UChar" : "LChar"}`;
input =
base.substring(0, (base.length / 2) | 0) +
charToFind +
base.substring((base.length / 2) | 0);
break;
}
case 1: {
label = `end ${utf16 ? "UChar" : "LChar"}`;
input = base + charToFind;
break;
}
// not found
case 2: {
label = `404 ${utf16 ? "UChar" : "LChar"}`;
break;
}
}

// force it to not be a rope
input = input.split("").join("");

function target() {
input.indexOf(charToFind)
}
noInline(target);
return bench(
`<${label}> [${new Intl.NumberFormat()
.format(input.length)
.padStart("10,000,001".length)} chars] indexOf`,
target
);
}

function all(utf16) {
forRepeatCount(1000000, 2, !!utf16);
}
all(true);
61 changes: 61 additions & 0 deletions JSTests/microbenchmarks/u16-string-index-of-10000001-beg.js
@@ -0,0 +1,61 @@
function bench(string, func)
{
for (var i = 0; i < 1000; ++i)
func();
}
noInline(bench);

function forRepeatCount(count, pos, utf16) {
var base = "lalalalala".repeat(count);
if (utf16) {
base += "ϧ"; // arbitrary utf-16
}

var input = base;
var label;
const charToFind = !utf16 ? "z" : String.fromCodePoint(0x0245);
switch (pos) {
case -1: {
input = charToFind + base;
label = `beg ${utf16 ? "UChar" : "LChar"}`;
break;
}
case 0: {
label = `mid ${utf16 ? "UChar" : "LChar"}`;
input =
base.substring(0, (base.length / 2) | 0) +
charToFind +
base.substring((base.length / 2) | 0);
break;
}
case 1: {
label = `end ${utf16 ? "UChar" : "LChar"}`;
input = base + charToFind;
break;
}
// not found
case 2: {
label = `404 ${utf16 ? "UChar" : "LChar"}`;
break;
}
}

// force it to not be a rope
input = input.split("").join("");

function target() {
input.indexOf(charToFind)
}
noInline(target);
return bench(
`<${label}> [${new Intl.NumberFormat()
.format(input.length)
.padStart("10,000,001".length)} chars] indexOf`,
target
);
}

function all(utf16) {
forRepeatCount(1000000, -1, !!utf16);
}
all(true);
61 changes: 61 additions & 0 deletions JSTests/microbenchmarks/u16-string-index-of-10000001-end.js
@@ -0,0 +1,61 @@
function bench(string, func)
{
for (var i = 0; i < 1000; ++i)
func();
}
noInline(bench);

function forRepeatCount(count, pos, utf16) {
var base = "lalalalala".repeat(count);
if (utf16) {
base += "ϧ"; // arbitrary utf-16
}

var input = base;
var label;
const charToFind = !utf16 ? "z" : String.fromCodePoint(0x0245);
switch (pos) {
case -1: {
input = charToFind + base;
label = `beg ${utf16 ? "UChar" : "LChar"}`;
break;
}
case 0: {
label = `mid ${utf16 ? "UChar" : "LChar"}`;
input =
base.substring(0, (base.length / 2) | 0) +
charToFind +
base.substring((base.length / 2) | 0);
break;
}
case 1: {
label = `end ${utf16 ? "UChar" : "LChar"}`;
input = base + charToFind;
break;
}
// not found
case 2: {
label = `404 ${utf16 ? "UChar" : "LChar"}`;
break;
}
}

// force it to not be a rope
input = input.split("").join("");

function target() {
input.indexOf(charToFind)
}
noInline(target);
return bench(
`<${label}> [${new Intl.NumberFormat()
.format(input.length)
.padStart("10,000,001".length)} chars] indexOf`,
target
);
}

function all(utf16) {
forRepeatCount(1000000, 1, !!utf16);
}
all(true);
61 changes: 61 additions & 0 deletions JSTests/microbenchmarks/u16-string-index-of-10000001-mid.js
@@ -0,0 +1,61 @@
function bench(string, func)
{
for (var i = 0; i < 1000; ++i)
func();
}
noInline(bench);

function forRepeatCount(count, pos, utf16) {
var base = "lalalalala".repeat(count);
if (utf16) {
base += "ϧ"; // arbitrary utf-16
}

var input = base;
var label;
const charToFind = !utf16 ? "z" : String.fromCodePoint(0x0245);
switch (pos) {
case -1: {
input = charToFind + base;
label = `beg ${utf16 ? "UChar" : "LChar"}`;
break;
}
case 0: {
label = `mid ${utf16 ? "UChar" : "LChar"}`;
input =
base.substring(0, (base.length / 2) | 0) +
charToFind +
base.substring((base.length / 2) | 0);
break;
}
case 1: {
label = `end ${utf16 ? "UChar" : "LChar"}`;
input = base + charToFind;
break;
}
// not found
case 2: {
label = `404 ${utf16 ? "UChar" : "LChar"}`;
break;
}
}

// force it to not be a rope
input = input.split("").join("");

function target() {
input.indexOf(charToFind)
}
noInline(target);
return bench(
`<${label}> [${new Intl.NumberFormat()
.format(input.length)
.padStart("10,000,001".length)} chars] indexOf`,
target
);
}

function all(utf16) {
forRepeatCount(1000000, 0, !!utf16);
}
all(true);
61 changes: 61 additions & 0 deletions JSTests/microbenchmarks/u16-string-index-of-1000001-404.js
@@ -0,0 +1,61 @@
function bench(string, func)
{
for (var i = 0; i < 1000; ++i)
func();
}
noInline(bench);

function forRepeatCount(count, pos, utf16) {
var base = "lalalalala".repeat(count);
if (utf16) {
base += "ϧ"; // arbitrary utf-16
}

var input = base;
var label;
const charToFind = !utf16 ? "z" : String.fromCodePoint(0x0245);
switch (pos) {
case -1: {
input = charToFind + base;
label = `beg ${utf16 ? "UChar" : "LChar"}`;
break;
}
case 0: {
label = `mid ${utf16 ? "UChar" : "LChar"}`;
input =
base.substring(0, (base.length / 2) | 0) +
charToFind +
base.substring((base.length / 2) | 0);
break;
}
case 1: {
label = `end ${utf16 ? "UChar" : "LChar"}`;
input = base + charToFind;
break;
}
// not found
case 2: {
label = `404 ${utf16 ? "UChar" : "LChar"}`;
break;
}
}

// force it to not be a rope
input = input.split("").join("");

function target() {
input.indexOf(charToFind)
}
noInline(target);
return bench(
`<${label}> [${new Intl.NumberFormat()
.format(input.length)
.padStart("10,000,001".length)} chars] indexOf`,
target
);
}

function all(utf16) {
forRepeatCount(100000, 2, !!utf16);
}
all(true);

0 comments on commit 3942acb

Please sign in to comment.