From 0ca04480b8d277b6553679bd31edd3d9937f4bc1 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 25 May 2024 21:40:59 -0700 Subject: [PATCH 01/39] Make: Upgrade Semantic Release v24-beta2 https://github.com/semantic-release/release-notes-generator/issues/633#issuecomment-2130745309 --- package-ci.json | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/package-ci.json b/package-ci.json index db39178..76e5fab 100644 --- a/package-ci.json +++ b/package-ci.json @@ -2,10 +2,9 @@ "name": "simsimd-ci", "version": "1.0.0", "devDependencies": { - "@semantic-release/commit-analyzer": "11.1.0", - "@semantic-release/exec": "6.0.3", - "@semantic-release/git": "10.0.1", - "conventional-changelog-eslint": "3.0.9", - "semantic-release": "21.1.2" + "@semantic-release/exec": "^6.0.3", + "@semantic-release/git": "^10.0.1", + "conventional-changelog-eslint": "^3.0.9", + "semantic-release": "^24.0.0-beta.2" } } \ No newline at end of file From 6958d8b98e81d4117d03908a832472e7725f827f Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 25 May 2024 21:47:42 -0700 Subject: [PATCH 02/39] Make: Replace `package-ci.json` with CI YAML --- .github/workflows/release.yml | 11 ++++++++++- package-ci.json | 10 ---------- 2 files changed, 10 insertions(+), 11 deletions(-) delete mode 100644 package-ci.json diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 993855e..f065c2e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -32,7 +32,16 @@ jobs: with: toolchain: stable override: true - - run: npm install --ignore-scripts --save-dev --prefix ./package-ci @semantic-release/exec @semantic-release/git conventional-changelog-eslint semantic-release && npx --prefix ./package-ci semantic-release + + # Semantic Release + - name: Clean npm cache + run: npm cache clean --force + - name: Remove node_modules and package-lock.json + run: rm -rf ./package-ci/node_modules ./package-ci/package-lock.json + - name: Install dependencies + run: npm install --ignore-scripts --save-dev --prefix ./package-ci @semantic-release/exec@6.0.3 @semantic-release/git@10.0.1 conventional-changelog-eslint@3.0.9 semantic-release@24.0.0-beta.2 + - name: Run semantic-release + run: npx --prefix ./package-ci semantic-release rebase: name: Rebase Dev. Branch diff --git a/package-ci.json b/package-ci.json deleted file mode 100644 index 76e5fab..0000000 --- a/package-ci.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "name": "simsimd-ci", - "version": "1.0.0", - "devDependencies": { - "@semantic-release/exec": "^6.0.3", - "@semantic-release/git": "^10.0.1", - "conventional-changelog-eslint": "^3.0.9", - "semantic-release": "^24.0.0-beta.2" - } -} \ No newline at end of file From 28c39af832d54dc7aadb7e71421b8b0688397b87 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 25 May 2024 21:48:56 -0700 Subject: [PATCH 03/39] Make: `--legacy-peer-deps` --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f065c2e..7dd3759 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -39,7 +39,7 @@ jobs: - name: Remove node_modules and package-lock.json run: rm -rf ./package-ci/node_modules ./package-ci/package-lock.json - name: Install dependencies - run: npm install --ignore-scripts --save-dev --prefix ./package-ci @semantic-release/exec@6.0.3 @semantic-release/git@10.0.1 conventional-changelog-eslint@3.0.9 semantic-release@24.0.0-beta.2 + run: npm install --ignore-scripts --legacy-peer-deps --prefix ./package-ci - name: Run semantic-release run: npx --prefix ./package-ci semantic-release From 78b8bba209e9678714d8b37c4e72f3371a3abb15 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 25 May 2024 21:51:19 -0700 Subject: [PATCH 04/39] Make: Versioned `--legacy-peer-deps` --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 7dd3759..5fa7506 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -39,7 +39,7 @@ jobs: - name: Remove node_modules and package-lock.json run: rm -rf ./package-ci/node_modules ./package-ci/package-lock.json - name: Install dependencies - run: npm install --ignore-scripts --legacy-peer-deps --prefix ./package-ci + run: npm install --ignore-scripts --legacy-peer-deps --prefix ./package-ci @semantic-release/exec@6.0.3 @semantic-release/git@10.0.1 conventional-changelog-eslint@3.0.9 semantic-release@24.0.0-beta.2 - name: Run semantic-release run: npx --prefix ./package-ci semantic-release From 959b80b4479d113977e9c0c8745f67e19e88ec9a Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 25 May 2024 21:55:40 -0700 Subject: [PATCH 05/39] Make: Revert `package-ci.json` --- .github/workflows/package-ci.json | 35 +++++++++++++++++++++++++++++++ .github/workflows/release.yml | 2 +- 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/package-ci.json diff --git a/.github/workflows/package-ci.json b/.github/workflows/package-ci.json new file mode 100644 index 0000000..242a070 --- /dev/null +++ b/.github/workflows/package-ci.json @@ -0,0 +1,35 @@ +{ + "name": "simsimd-ci", + "version": "1.0.0", + "devDependencies": { + "@semantic-release/exec": "^6.0.3", + "@semantic-release/git": "^10.0.1", + "conventional-changelog-eslint": "^3.0.9", + "semantic-release": "^24.0.0-beta.2" + }, + "release": { + "branches": [ + "main" + ], + "plugins": [ + "@semantic-release/commit-analyzer", + "@semantic-release/release-notes-generator", + "@semantic-release/changelog", + [ + "@semantic-release/exec", + { + "verifyConditionsCmd": "echo verifyConditions", + "analyzeCommitsCmd": "echo analyzeCommits", + "verifyReleaseCmd": "echo verifyRelease", + "generateNotesCmd": "echo generateNotes", + "prepareCmd": "echo prepare", + "publishCmd": "echo publish", + "successCmd": "echo success", + "failCmd": "echo fail" + } + ], + "@semantic-release/github", + "@semantic-release/git" + ] + } +} \ No newline at end of file diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 5fa7506..7dd3759 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -39,7 +39,7 @@ jobs: - name: Remove node_modules and package-lock.json run: rm -rf ./package-ci/node_modules ./package-ci/package-lock.json - name: Install dependencies - run: npm install --ignore-scripts --legacy-peer-deps --prefix ./package-ci @semantic-release/exec@6.0.3 @semantic-release/git@10.0.1 conventional-changelog-eslint@3.0.9 semantic-release@24.0.0-beta.2 + run: npm install --ignore-scripts --legacy-peer-deps --prefix ./package-ci - name: Run semantic-release run: npx --prefix ./package-ci semantic-release From 0c65bab0cf8e7c0f848a5919dbc8423532279219 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 25 May 2024 21:56:20 -0700 Subject: [PATCH 06/39] Make: Move `package-ci.json` --- .github/workflows/package-ci.json => package-ci.json | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/package-ci.json => package-ci.json (100%) diff --git a/.github/workflows/package-ci.json b/package-ci.json similarity index 100% rename from .github/workflows/package-ci.json rename to package-ci.json From bbd3d6ef48f5253a0d7815b33a5aeb9c241558cf Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 25 May 2024 21:58:15 -0700 Subject: [PATCH 07/39] Make: Hard-code `semantic-release` bin path --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 7dd3759..68d8a0e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -41,7 +41,7 @@ jobs: - name: Install dependencies run: npm install --ignore-scripts --legacy-peer-deps --prefix ./package-ci - name: Run semantic-release - run: npx --prefix ./package-ci semantic-release + run: ./package-ci/node_modules/.bin/semantic-release rebase: name: Rebase Dev. Branch From 0c9e37469fba2a8e6b88dc0767e98070b345c1dd Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 25 May 2024 21:59:25 -0700 Subject: [PATCH 08/39] Make: List installed packages --- .github/workflows/release.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 68d8a0e..6e1326d 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -40,6 +40,8 @@ jobs: run: rm -rf ./package-ci/node_modules ./package-ci/package-lock.json - name: Install dependencies run: npm install --ignore-scripts --legacy-peer-deps --prefix ./package-ci + - name: List installed packages + run: ls -l ./package-ci/node_modules/.bin - name: Run semantic-release run: ./package-ci/node_modules/.bin/semantic-release From 7b4f42ef7b0465ea6a6177e7e011c9240ac0a020 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 25 May 2024 22:05:26 -0700 Subject: [PATCH 09/39] Make: Move release settings --- package-ci.json => .github/workflows/package.json | 0 .github/workflows/release.yml | 10 ++-------- 2 files changed, 2 insertions(+), 8 deletions(-) rename package-ci.json => .github/workflows/package.json (100%) diff --git a/package-ci.json b/.github/workflows/package.json similarity index 100% rename from package-ci.json rename to .github/workflows/package.json diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 6e1326d..c3b88fd 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -34,16 +34,10 @@ jobs: override: true # Semantic Release - - name: Clean npm cache - run: npm cache clean --force - - name: Remove node_modules and package-lock.json - run: rm -rf ./package-ci/node_modules ./package-ci/package-lock.json - name: Install dependencies - run: npm install --ignore-scripts --legacy-peer-deps --prefix ./package-ci - - name: List installed packages - run: ls -l ./package-ci/node_modules/.bin + run: npm install --ignore-scripts --legacy-peer-deps --prefix ./package-ci/.github/workflows - name: Run semantic-release - run: ./package-ci/node_modules/.bin/semantic-release + run: npx --prefix ./package-ci/.github/workflows semantic-release rebase: name: Rebase Dev. Branch From 7685c0bfcf77bb2437440f3979aef56fc10e66ba Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 25 May 2024 22:09:33 -0700 Subject: [PATCH 10/39] Make: Debug Semantic Release --- .github/workflows/release.yml | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index c3b88fd..49ab14c 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -34,10 +34,18 @@ jobs: override: true # Semantic Release + - name: Clean npm cache + run: npm cache clean --force + - name: Remove node_modules and package-lock.json + run: rm -rf .github/workflows/node_modules .github/workflows/package-lock.json - name: Install dependencies - run: npm install --ignore-scripts --legacy-peer-deps --prefix ./package-ci/.github/workflows + run: npm install --ignore-scripts --legacy-peer-deps --prefix .github/workflows + - name: Verify installation + run: ls -l .github/workflows/node_modules + - name: Verify bin directory + run: ls -l .github/workflows/node_modules/.bin - name: Run semantic-release - run: npx --prefix ./package-ci/.github/workflows semantic-release + run: .github/workflows/node_modules/.bin/semantic-release rebase: name: Rebase Dev. Branch From 12b27dc6d0036b4662f835d9f161bbf4ec7dc421 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 25 May 2024 22:11:52 -0700 Subject: [PATCH 11/39] Make: Move `.releaserc` --- .github/workflows/.releaserc | 87 ++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 .github/workflows/.releaserc diff --git a/.github/workflows/.releaserc b/.github/workflows/.releaserc new file mode 100644 index 0000000..874866a --- /dev/null +++ b/.github/workflows/.releaserc @@ -0,0 +1,87 @@ +{ + "branches": [ + "main" + ], + "debug": true, + "ci": true, + "dryRun": false, + "plugins": [ + [ + "@semantic-release/commit-analyzer", + { + "preset": "eslint", + "releaseRules": [ + { + "tag": "Break", + "release": "major" + }, + { + "tag": "Add", + "release": "minor" + }, + { + "tag": "Improve", + "release": "patch" + }, + { + "tag": "Make", + "release": "patch" + }, + { + "tag": "Refactor", + "release": false + } + ] + } + ], + [ + "@semantic-release/release-notes-generator", + { + "preset": "eslint", + "releaseRules": [ + { + "tag": "Break", + "release": "major" + }, + { + "tag": "Add", + "release": "minor" + }, + { + "tag": "Improve", + "release": "patch" + }, + { + "tag": "Make", + "release": "patch" + }, + { + "tag": "Refactor", + "release": false + } + ] + } + ], + "@semantic-release/github", + [ + "@semantic-release/exec", + { + "prepareCmd": "bash .github/workflows/update_version.sh '${nextRelease.version}'" + } + ], + [ + "@semantic-release/git", + { + "assets": [ + "VERSION", + "package.json", + "Cargo.toml", + "Cargo.lock", + "CMakeLists.txt", + "include/simsimd/simsimd.h" + ], + "message": "Build: Released ${nextRelease.version} [skip ci]\n\n${nextRelease.notes}" + } + ] + ] +} \ No newline at end of file From a98d40d5b844e94c0e87dcaf3c4cd8769c3adcd1 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 25 May 2024 22:17:16 -0700 Subject: [PATCH 12/39] Make: Merge `.releaserc` into `package.json` --- .github/workflows/.releaserc | 87 ---------------------------------- .github/workflows/package.json | 75 +++++++++++++++++++++++++++-- .releaserc | 87 ---------------------------------- 3 files changed, 71 insertions(+), 178 deletions(-) delete mode 100644 .github/workflows/.releaserc delete mode 100644 .releaserc diff --git a/.github/workflows/.releaserc b/.github/workflows/.releaserc deleted file mode 100644 index 874866a..0000000 --- a/.github/workflows/.releaserc +++ /dev/null @@ -1,87 +0,0 @@ -{ - "branches": [ - "main" - ], - "debug": true, - "ci": true, - "dryRun": false, - "plugins": [ - [ - "@semantic-release/commit-analyzer", - { - "preset": "eslint", - "releaseRules": [ - { - "tag": "Break", - "release": "major" - }, - { - "tag": "Add", - "release": "minor" - }, - { - "tag": "Improve", - "release": "patch" - }, - { - "tag": "Make", - "release": "patch" - }, - { - "tag": "Refactor", - "release": false - } - ] - } - ], - [ - "@semantic-release/release-notes-generator", - { - "preset": "eslint", - "releaseRules": [ - { - "tag": "Break", - "release": "major" - }, - { - "tag": "Add", - "release": "minor" - }, - { - "tag": "Improve", - "release": "patch" - }, - { - "tag": "Make", - "release": "patch" - }, - { - "tag": "Refactor", - "release": false - } - ] - } - ], - "@semantic-release/github", - [ - "@semantic-release/exec", - { - "prepareCmd": "bash .github/workflows/update_version.sh '${nextRelease.version}'" - } - ], - [ - "@semantic-release/git", - { - "assets": [ - "VERSION", - "package.json", - "Cargo.toml", - "Cargo.lock", - "CMakeLists.txt", - "include/simsimd/simsimd.h" - ], - "message": "Build: Released ${nextRelease.version} [skip ci]\n\n${nextRelease.notes}" - } - ] - ] -} \ No newline at end of file diff --git a/.github/workflows/package.json b/.github/workflows/package.json index 242a070..1f95990 100644 --- a/.github/workflows/package.json +++ b/.github/workflows/package.json @@ -12,8 +12,62 @@ "main" ], "plugins": [ - "@semantic-release/commit-analyzer", - "@semantic-release/release-notes-generator", + [ + "@semantic-release/commit-analyzer", + { + "preset": "eslint", + "releaseRules": [ + { + "tag": "Break", + "release": "major" + }, + { + "tag": "Add", + "release": "minor" + }, + { + "tag": "Improve", + "release": "patch" + }, + { + "tag": "Make", + "release": "patch" + }, + { + "tag": "Refactor", + "release": false + } + ] + } + ], + [ + "@semantic-release/release-notes-generator", + { + "preset": "eslint", + "releaseRules": [ + { + "tag": "Break", + "release": "major" + }, + { + "tag": "Add", + "release": "minor" + }, + { + "tag": "Improve", + "release": "patch" + }, + { + "tag": "Make", + "release": "patch" + }, + { + "tag": "Refactor", + "release": false + } + ] + } + ], "@semantic-release/changelog", [ "@semantic-release/exec", @@ -22,14 +76,27 @@ "analyzeCommitsCmd": "echo analyzeCommits", "verifyReleaseCmd": "echo verifyRelease", "generateNotesCmd": "echo generateNotes", - "prepareCmd": "echo prepare", + "prepareCmd": "bash .github/workflows/update_version.sh '${nextRelease.version}'", "publishCmd": "echo publish", "successCmd": "echo success", "failCmd": "echo fail" } ], "@semantic-release/github", - "@semantic-release/git" + [ + "@semantic-release/git", + { + "assets": [ + "VERSION", + "package.json", + "Cargo.toml", + "Cargo.lock", + "CMakeLists.txt", + "include/simsimd/simsimd.h" + ], + "message": "Build: Released ${nextRelease.version} [skip ci]\n\n${nextRelease.notes}" + } + ] ] } } \ No newline at end of file diff --git a/.releaserc b/.releaserc deleted file mode 100644 index 874866a..0000000 --- a/.releaserc +++ /dev/null @@ -1,87 +0,0 @@ -{ - "branches": [ - "main" - ], - "debug": true, - "ci": true, - "dryRun": false, - "plugins": [ - [ - "@semantic-release/commit-analyzer", - { - "preset": "eslint", - "releaseRules": [ - { - "tag": "Break", - "release": "major" - }, - { - "tag": "Add", - "release": "minor" - }, - { - "tag": "Improve", - "release": "patch" - }, - { - "tag": "Make", - "release": "patch" - }, - { - "tag": "Refactor", - "release": false - } - ] - } - ], - [ - "@semantic-release/release-notes-generator", - { - "preset": "eslint", - "releaseRules": [ - { - "tag": "Break", - "release": "major" - }, - { - "tag": "Add", - "release": "minor" - }, - { - "tag": "Improve", - "release": "patch" - }, - { - "tag": "Make", - "release": "patch" - }, - { - "tag": "Refactor", - "release": false - } - ] - } - ], - "@semantic-release/github", - [ - "@semantic-release/exec", - { - "prepareCmd": "bash .github/workflows/update_version.sh '${nextRelease.version}'" - } - ], - [ - "@semantic-release/git", - { - "assets": [ - "VERSION", - "package.json", - "Cargo.toml", - "Cargo.lock", - "CMakeLists.txt", - "include/simsimd/simsimd.h" - ], - "message": "Build: Released ${nextRelease.version} [skip ci]\n\n${nextRelease.notes}" - } - ] - ] -} \ No newline at end of file From f533249134be68cd2e835128770b991a92fe5613 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 25 May 2024 22:19:55 -0700 Subject: [PATCH 13/39] Make: Add `debug`, `ci`, `dryRun` props --- .github/workflows/package.json | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/package.json b/.github/workflows/package.json index 1f95990..a6da098 100644 --- a/.github/workflows/package.json +++ b/.github/workflows/package.json @@ -8,6 +8,9 @@ "semantic-release": "^24.0.0-beta.2" }, "release": { + "debug": true, + "ci": true, + "dryRun": false, "branches": [ "main" ], From 48efc4b9f00c5f7195c165b6d6fbfd5a43ae9412 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 25 May 2024 22:25:09 -0700 Subject: [PATCH 14/39] Make: Run via `npx` --- .github/workflows/package.json | 2 +- .github/workflows/release.yml | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/package.json b/.github/workflows/package.json index a6da098..7820b32 100644 --- a/.github/workflows/package.json +++ b/.github/workflows/package.json @@ -72,6 +72,7 @@ } ], "@semantic-release/changelog", + "@semantic-release/github", [ "@semantic-release/exec", { @@ -85,7 +86,6 @@ "failCmd": "echo fail" } ], - "@semantic-release/github", [ "@semantic-release/git", { diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 49ab14c..a63adf6 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -45,7 +45,10 @@ jobs: - name: Verify bin directory run: ls -l .github/workflows/node_modules/.bin - name: Run semantic-release - run: .github/workflows/node_modules/.bin/semantic-release + # We can directly run the semantic-release binary from the node_modules/.bin directory. + # but this may fail to fetch environment variables. + # run: .github/workflows/node_modules/.bin/semantic-release + run: npx --prefix .github/workflows semantic-release rebase: name: Rebase Dev. Branch From afa7b2a089f9308daa24cc07833456a483b40c52 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 25 May 2024 22:31:05 -0700 Subject: [PATCH 15/39] Make: Try `private=true` to avoid NPM --- .github/workflows/package.json | 1 + .github/workflows/release.yml | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/package.json b/.github/workflows/package.json index 7820b32..f5629a6 100644 --- a/.github/workflows/package.json +++ b/.github/workflows/package.json @@ -11,6 +11,7 @@ "debug": true, "ci": true, "dryRun": false, + "private": true, "branches": [ "main" ], diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a63adf6..84ea882 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -47,8 +47,8 @@ jobs: - name: Run semantic-release # We can directly run the semantic-release binary from the node_modules/.bin directory. # but this may fail to fetch environment variables. - # run: .github/workflows/node_modules/.bin/semantic-release - run: npx --prefix .github/workflows semantic-release + # run: npx --prefix .github/workflows semantic-release + run: .github/workflows/node_modules/.bin/semantic-release rebase: name: Rebase Dev. Branch From 314a24085caa36e037eee50486f20de7c0d988aa Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 25 May 2024 22:35:59 -0700 Subject: [PATCH 16/39] Make: Set `"npmPublish": false` --- .github/workflows/package.json | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/package.json b/.github/workflows/package.json index f5629a6..4c92cef 100644 --- a/.github/workflows/package.json +++ b/.github/workflows/package.json @@ -100,7 +100,13 @@ ], "message": "Build: Released ${nextRelease.version} [skip ci]\n\n${nextRelease.notes}" } - ] + ], + [ + "@semantic-release/npm", + { + "npmPublish": false + } + ], ] } } \ No newline at end of file From 9fab3e46771dd08fb9fdb8ec096eb44c763c2cfd Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 25 May 2024 22:37:05 -0700 Subject: [PATCH 17/39] Fix: Remove JSON comma --- .github/workflows/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/package.json b/.github/workflows/package.json index 4c92cef..8ad972b 100644 --- a/.github/workflows/package.json +++ b/.github/workflows/package.json @@ -106,7 +106,7 @@ { "npmPublish": false } - ], + ] ] } } \ No newline at end of file From 8a60987dc4efa564fe928209db763041b822712a Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 25 May 2024 22:42:01 -0700 Subject: [PATCH 18/39] Make: Revert NPM plugin --- .github/workflows/package.json | 6 ------ .github/workflows/release.yml | 3 --- 2 files changed, 9 deletions(-) diff --git a/.github/workflows/package.json b/.github/workflows/package.json index 8ad972b..f5629a6 100644 --- a/.github/workflows/package.json +++ b/.github/workflows/package.json @@ -100,12 +100,6 @@ ], "message": "Build: Released ${nextRelease.version} [skip ci]\n\n${nextRelease.notes}" } - ], - [ - "@semantic-release/npm", - { - "npmPublish": false - } ] ] } diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 84ea882..49ab14c 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -45,9 +45,6 @@ jobs: - name: Verify bin directory run: ls -l .github/workflows/node_modules/.bin - name: Run semantic-release - # We can directly run the semantic-release binary from the node_modules/.bin directory. - # but this may fail to fetch environment variables. - # run: npx --prefix .github/workflows semantic-release run: .github/workflows/node_modules/.bin/semantic-release rebase: From 94645a61f8ac4d20fa4815fbbe236ee9e295d582 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 25 May 2024 22:45:05 -0700 Subject: [PATCH 19/39] Make: Disable `ci` in SemVer --- .github/workflows/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/package.json b/.github/workflows/package.json index f5629a6..401a504 100644 --- a/.github/workflows/package.json +++ b/.github/workflows/package.json @@ -9,7 +9,7 @@ }, "release": { "debug": true, - "ci": true, + "ci": false, "dryRun": false, "private": true, "branches": [ From d1e0879f2f9225269bb88fddb7fe5c546002790b Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 25 May 2024 22:49:16 -0700 Subject: [PATCH 20/39] Make: Prefixing semantic release path --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 49ab14c..1a2e603 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -45,7 +45,7 @@ jobs: - name: Verify bin directory run: ls -l .github/workflows/node_modules/.bin - name: Run semantic-release - run: .github/workflows/node_modules/.bin/semantic-release + run: npx --prefix .github/workflows semantic-release rebase: name: Rebase Dev. Branch From d2d172fe4fb3b3c5c430931870995d1598868821 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 25 May 2024 22:50:33 -0700 Subject: [PATCH 21/39] Make: Debug SemVer --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 1a2e603..9407a1e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -45,7 +45,7 @@ jobs: - name: Verify bin directory run: ls -l .github/workflows/node_modules/.bin - name: Run semantic-release - run: npx --prefix .github/workflows semantic-release + run: npx --prefix .github/workflows semantic-release --no-ci --no-npm --debug rebase: name: Rebase Dev. Branch From bff55ccfef78c74aee89d9f591a46247add745c6 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Wed, 29 May 2024 19:42:19 +0000 Subject: [PATCH 22/39] Make: Upgrade `eslint` to `^6.0.0` https://github.com/semantic-release/release-notes-generator/issues/633#issuecomment-2132233221 --- .github/workflows/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/package.json b/.github/workflows/package.json index 401a504..739aa76 100644 --- a/.github/workflows/package.json +++ b/.github/workflows/package.json @@ -4,7 +4,7 @@ "devDependencies": { "@semantic-release/exec": "^6.0.3", "@semantic-release/git": "^10.0.1", - "conventional-changelog-eslint": "^3.0.9", + "conventional-changelog-eslint": "^6.0.0", "semantic-release": "^24.0.0-beta.2" }, "release": { From 2761100be8ad4ce7e2cc1eff7ce089864f5e4c5d Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Wed, 29 May 2024 20:00:41 +0000 Subject: [PATCH 23/39] Make: Bump side dependencies --- .github/workflows/package.json | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/package.json b/.github/workflows/package.json index 739aa76..7942272 100644 --- a/.github/workflows/package.json +++ b/.github/workflows/package.json @@ -4,6 +4,7 @@ "devDependencies": { "@semantic-release/exec": "^6.0.3", "@semantic-release/git": "^10.0.1", + "@semantic-release/github": "^10.0.5", "conventional-changelog-eslint": "^6.0.0", "semantic-release": "^24.0.0-beta.2" }, From 32fb4833f4472edf3979761bb47552019cffba2a Mon Sep 17 00:00:00 2001 From: Ashot Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Mon, 10 Jun 2024 19:44:43 +0000 Subject: [PATCH 24/39] Improve: Align benchmark inputs This way, we can isolate the penalty of split-loads in benchmarks, which would affect cheaper-to-compute distance functions --- cpp/bench.cxx | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/cpp/bench.cxx b/cpp/bench.cxx index 3dfe53f..1df1280 100644 --- a/cpp/bench.cxx +++ b/cpp/bench.cxx @@ -42,8 +42,8 @@ template struct vect using compressed16_t = unsigned short; static constexpr bool is_integral = datatype_ak == simsimd_datatype_i8_k || datatype_ak == simsimd_datatype_b8_k; - scalar_t a[dimensions_ak]{}; - scalar_t b[dimensions_ak]{}; + alignas(64) scalar_t a[dimensions_ak]{}; + alignas(64) scalar_t b[dimensions_ak]{}; std::size_t dimensions() const noexcept { return dimensions_ak; } std::size_t size_bytes() const noexcept { return dimensions_ak * sizeof(scalar_t); } @@ -134,7 +134,8 @@ void measure(bm::State& state, metric_at metric, metric_at baseline) { }; // Let's average the distance results over many pairs. - std::vector pairs(1024); + constexpr std::size_t pairs_count = 4; + std::vector pairs(pairs_count); for (auto& pair : pairs) pair.randomize(); @@ -147,7 +148,8 @@ void measure(bm::State& state, metric_at metric, metric_at baseline) { // The actual benchmarking loop. std::size_t iterations = 0; for (auto _ : state) - bm::DoNotOptimize((results_contender[iterations & 1023] = call_contender(pairs[iterations & 1023]))), + bm::DoNotOptimize((results_contender[iterations & (pairs_count - 1)] = + call_contender(pairs[iterations & (pairs_count - 1)]))), iterations++; // Measure the mean absolute delta and relative error. From d33fd8ae9bf223e50b6f3006cba5c2144faf2f2b Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Tue, 11 Jun 2024 01:48:35 +0000 Subject: [PATCH 25/39] Improve: Ignore Swift temporary files --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 45fbcd5..0a67b1f 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ build/ dist/ javascript/dist bun.lockb +.build/ # for Google Benchmark compare.py From ba3b41395f25bc002d81c149a1c22bedc70a2eb4 Mon Sep 17 00:00:00 2001 From: Valery Mironov Date: Tue, 11 Jun 2024 04:57:17 +0300 Subject: [PATCH 26/39] Fix: Dynamic dispatch order on x86 (#137) Closes #136 Co-authored-by: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> --- include/simsimd/simsimd.h | 60 +++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/include/simsimd/simsimd.h b/include/simsimd/simsimd.h index 5c4e97a..5f8f98c 100644 --- a/include/simsimd/simsimd.h +++ b/include/simsimd/simsimd.h @@ -550,14 +550,6 @@ SIMSIMD_PUBLIC void simsimd_find_metric_punned( // default: break; } #endif -#if SIMSIMD_TARGET_HASWELL - if (viable & simsimd_cap_haswell_k) - switch (kind) { - case simsimd_metric_dot_k: *m = (m_t)&simsimd_dot_f32c_haswell, *c = simsimd_cap_haswell_k; return; - case simsimd_metric_vdot_k: *m = (m_t)&simsimd_vdot_f32c_haswell, *c = simsimd_cap_haswell_k; return; - default: break; - } -#endif #if SIMSIMD_TARGET_SKYLAKE if (viable & simsimd_cap_skylake_k) switch (kind) { @@ -566,6 +558,14 @@ SIMSIMD_PUBLIC void simsimd_find_metric_punned( // default: break; } #endif +#if SIMSIMD_TARGET_HASWELL + if (viable & simsimd_cap_haswell_k) + switch (kind) { + case simsimd_metric_dot_k: *m = (m_t)&simsimd_dot_f32c_haswell, *c = simsimd_cap_haswell_k; return; + case simsimd_metric_vdot_k: *m = (m_t)&simsimd_vdot_f32c_haswell, *c = simsimd_cap_haswell_k; return; + default: break; + } +#endif if (viable & simsimd_cap_serial_k) switch (kind) { @@ -622,14 +622,6 @@ SIMSIMD_PUBLIC void simsimd_find_metric_punned( // default: break; } #endif -#if SIMSIMD_TARGET_HASWELL - if (viable & simsimd_cap_haswell_k) - switch (kind) { - case simsimd_metric_dot_k: *m = (m_t)&simsimd_dot_f16c_haswell, *c = simsimd_cap_haswell_k; return; - case simsimd_metric_vdot_k: *m = (m_t)&simsimd_vdot_f16c_haswell, *c = simsimd_cap_haswell_k; return; - default: break; - } -#endif #if SIMSIMD_TARGET_SAPPHIRE if (viable & simsimd_cap_sapphire_k) switch (kind) { @@ -638,6 +630,14 @@ SIMSIMD_PUBLIC void simsimd_find_metric_punned( // default: break; } #endif +#if SIMSIMD_TARGET_HASWELL + if (viable & simsimd_cap_haswell_k) + switch (kind) { + case simsimd_metric_dot_k: *m = (m_t)&simsimd_dot_f16c_haswell, *c = simsimd_cap_haswell_k; return; + case simsimd_metric_vdot_k: *m = (m_t)&simsimd_vdot_f16c_haswell, *c = simsimd_cap_haswell_k; return; + default: break; + } +#endif if (viable & simsimd_cap_serial_k) switch (kind) { @@ -878,10 +878,10 @@ SIMSIMD_PUBLIC void simsimd_dot_f16(simsimd_f16_t const* a, simsimd_f16_t const* simsimd_dot_f16_sve(a, b, n, d); #elif SIMSIMD_TARGET_NEON simsimd_dot_f16_neon(a, b, n, d); -#elif SIMSIMD_TARGET_HASWELL - simsimd_dot_f16_haswell(a, b, n, d); #elif SIMSIMD_TARGET_SAPPHIRE simsimd_dot_f16_sapphire(a, b, n, d); +#elif SIMSIMD_TARGET_HASWELL + simsimd_dot_f16_haswell(a, b, n, d); #else simsimd_dot_f16_serial(a, b, n, d); #endif @@ -926,10 +926,10 @@ SIMSIMD_PUBLIC void simsimd_dot_f16c(simsimd_f16_t const* a, simsimd_f16_t const simsimd_dot_f16c_sve(a, b, n, d); #elif SIMSIMD_TARGET_NEON simsimd_dot_f16c_neon(a, b, n, d); -#elif SIMSIMD_TARGET_HASWELL - simsimd_dot_f16c_haswell(a, b, n, d); #elif SIMSIMD_TARGET_SAPPHIRE simsimd_dot_f16c_sapphire(a, b, n, d); +#elif SIMSIMD_TARGET_HASWELL + simsimd_dot_f16c_haswell(a, b, n, d); #else simsimd_dot_f16c_serial(a, b, n, d); #endif @@ -948,10 +948,10 @@ SIMSIMD_PUBLIC void simsimd_dot_f32c(simsimd_f32_t const* a, simsimd_f32_t const simsimd_dot_f32c_sve(a, b, n, d); #elif SIMSIMD_TARGET_NEON simsimd_dot_f32c_neon(a, b, n, d); -#elif SIMSIMD_TARGET_HASWELL - simsimd_dot_f32c_haswell(a, b, n, d); #elif SIMSIMD_TARGET_SKYLAKE simsimd_dot_f32c_skylake(a, b, n, d); +#elif SIMSIMD_TARGET_HASWELL + simsimd_dot_f32c_haswell(a, b, n, d); #else simsimd_dot_f32c_serial(a, b, n, d); #endif @@ -971,11 +971,11 @@ SIMSIMD_PUBLIC void simsimd_vdot_f16c(simsimd_f16_t const* a, simsimd_f16_t cons #if SIMSIMD_TARGET_SVE simsimd_vdot_f16c_sve(a, b, n, d); #elif SIMSIMD_TARGET_NEON - simsimd_vdot_f16c_neon(a, b, n, d); -#elif SIMSIMD_TARGET_HASWELL - simsimd_vdot_f16c_haswell(a, b, n, d); + simsimd_dot_f16c_neon(a, b, n, d); #elif SIMSIMD_TARGET_SAPPHIRE - simsimd_vdot_f16c_sapphire(a, b, n, d); + simsimd_dot_f16c_sapphire(a, b, n, d); +#elif SIMSIMD_TARGET_HASWELL + simsimd_dot_f16c_haswell(a, b, n, d); #else simsimd_vdot_f16c_serial(a, b, n, d); #endif @@ -989,11 +989,11 @@ SIMSIMD_PUBLIC void simsimd_vdot_f32c(simsimd_f32_t const* a, simsimd_f32_t cons #if SIMSIMD_TARGET_SVE simsimd_vdot_f32c_sve(a, b, n, d); #elif SIMSIMD_TARGET_NEON - simsimd_vdot_f32c_neon(a, b, n, d); -#elif SIMSIMD_TARGET_HASWELL - simsimd_vdot_f32c_haswell(a, b, n, d); + simsimd_dot_f32c_neon(a, b, n, d); #elif SIMSIMD_TARGET_SKYLAKE - simsimd_vdot_f32c_skylake(a, b, n, d); + simsimd_dot_f32c_skylake(a, b, n, d); +#elif SIMSIMD_TARGET_HASWELL + simsimd_dot_f32c_haswell(a, b, n, d); #else simsimd_vdot_f32c_serial(a, b, n, d); #endif From a16cedd9eca364bbb311261d095ff9dbc7980c00 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Wed, 19 Jun 2024 20:35:14 -0700 Subject: [PATCH 27/39] Fix: Disable `bf16` for NEON SimSIMD needs a more fine-grained approach to versioning Arm kernels. Even at compilation time, AppleClang would fail to recognize many of the intrinsics and we need a way to avoid those sections. --- cpp/bench.cxx | 2 ++ include/simsimd/dot.h | 2 ++ include/simsimd/spatial.h | 3 ++- include/simsimd/types.h | 50 +++++++++++++++++++++++++++++---------- 4 files changed, 44 insertions(+), 13 deletions(-) diff --git a/cpp/bench.cxx b/cpp/bench.cxx index 1df1280..baa7e5d 100644 --- a/cpp/bench.cxx +++ b/cpp/bench.cxx @@ -276,9 +276,11 @@ int main(int argc, char** argv) { register_("kl_f16_neon", simsimd_kl_f16_neon, simsimd_kl_f16_accurate); register_("js_f16_neon", simsimd_js_f16_neon, simsimd_js_f16_accurate); +#if SIMSIMD_TARGET_NEON_BF16_IMPLEMENTED register_("dot_bf16_neon", simsimd_dot_bf16_neon, simsimd_dot_bf16_accurate); register_("cos_bf16_neon", simsimd_cos_bf16_neon, simsimd_cos_bf16_accurate); register_("l2sq_bf16_neon", simsimd_l2sq_bf16_neon, simsimd_l2sq_bf16_accurate); +#endif register_("dot_f32_neon", simsimd_dot_f32_neon, simsimd_dot_f32_accurate); register_("cos_f32_neon", simsimd_cos_f32_neon, simsimd_cos_f32_accurate); diff --git a/include/simsimd/dot.h b/include/simsimd/dot.h index 74015a1..6d33d16 100644 --- a/include/simsimd/dot.h +++ b/include/simsimd/dot.h @@ -467,6 +467,7 @@ SIMSIMD_PUBLIC void simsimd_vdot_f16c_neon(simsimd_f16_t const* a, simsimd_f16_t #pragma clang attribute pop #pragma GCC pop_options +#if SIMSIMD_TARGET_NEON_BF16_IMPLEMENTED #pragma GCC push_options #pragma GCC target("+simd+bf16") #pragma clang attribute push(__attribute__((target("+simd+bf16"))), apply_to = function) @@ -585,6 +586,7 @@ SIMSIMD_PUBLIC void simsimd_vdot_bf16c_neon(simsimd_bf16_t const* a, simsimd_bf1 #pragma clang attribute pop #pragma GCC pop_options +#endif #endif // SIMSIMD_TARGET_NEON #if SIMSIMD_TARGET_SVE diff --git a/include/simsimd/spatial.h b/include/simsimd/spatial.h index b8f3808..0b51298 100644 --- a/include/simsimd/spatial.h +++ b/include/simsimd/spatial.h @@ -298,6 +298,7 @@ SIMSIMD_PUBLIC void simsimd_cos_f16_neon(simsimd_f16_t const* a, simsimd_f16_t c *result = ab != 0 ? 1 - ab * a2_b2_arr[0] * a2_b2_arr[1] : 1; } +#if SIMSIMD_TARGET_NEON_BF16_IMPLEMENTED SIMSIMD_PUBLIC void simsimd_cos_bf16_neon(simsimd_bf16_t const* a, simsimd_bf16_t const* b, simsimd_size_t n, simsimd_distance_t* result) { float32x4_t ab_high_vec = vdupq_n_f32(0), ab_low_vec = vdupq_n_f32(0); @@ -388,7 +389,7 @@ SIMSIMD_PUBLIC void simsimd_l2sq_bf16_neon(simsimd_bf16_t const* a, simsimd_bf16 simsimd_f32_t sum = vaddvq_f32(vaddq_f32(sum_high_vec, sum_low_vec)); *result = sum; } - +#endif #pragma clang attribute pop #pragma GCC pop_options diff --git a/include/simsimd/types.h b/include/simsimd/types.h index c151144..c3a2523 100644 --- a/include/simsimd/types.h +++ b/include/simsimd/types.h @@ -203,7 +203,7 @@ typedef simsimd_f64_t simsimd_distance_t; /** * @brief Half-precision floating-point type. * - * - GCC or Clang on 64-bit ARM: `__fp16`, may require `-mfp16-format` option. + * - GCC or Clang on 64-bit Arm: `__fp16`, may require `-mfp16-format` option. * - GCC or Clang on 64-bit x86: `_Float16`. * - Default: `unsigned short`. */ @@ -228,29 +228,55 @@ typedef _Float16 simsimd_f16_t; typedef unsigned short simsimd_f16_t; #endif +#if !defined(SIMSIMD_NATIVE_BF16) || SIMSIMD_NATIVE_BF16 +/** + * @brief Half-precision brain-float type. + * + * - GCC or Clang on 64-bit Arm: `__bf16` + * - GCC or Clang on 64-bit x86: `_BFloat16`. + * - Default: `unsigned short`. + * + * @warning Apple Clang has hard time with bf16. + * https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms + * https://forums.developer.apple.com/forums/thread/726201 + */ +#if (defined(__GNUC__) || defined(__clang__)) && (defined(__ARM_ARCH) || defined(__aarch64__)) && \ + (defined(__ARM_BF16_FORMAT_ALTERNATIVE)) #if !defined(SIMSIMD_NATIVE_BF16) #define SIMSIMD_NATIVE_BF16 1 #endif -// SIMSIMD_CAN_COMPILE_BF16 is set during build if __bf16 compiles -#if SIMSIMD_NATIVE_BF16 && defined(SIMSIMD_CAN_COMPILE_BF16) -typedef __bf16 simsimd_bf16_t; -#else +typedef __fp16 simsimd_bf16_t; +#elif ((defined(__GNUC__) || defined(__clang__)) && (defined(__x86_64__) || defined(__i386__)) && \ + (defined(__SSE2__) || defined(__AVX512F__))) +typedef _Float16 simsimd_bf16_t; +#if !defined(SIMSIMD_NATIVE_BF16) +#define SIMSIMD_NATIVE_BF16 1 +#endif +#else // Unknown compiler or architecture +#define SIMSIMD_NATIVE_BF16 0 +#endif // Unknown compiler or architecture +#endif // !SIMSIMD_NATIVE_BF16 + +#if !SIMSIMD_NATIVE_BF16 typedef unsigned short simsimd_bf16_t; #endif /** * @brief Alias for the half-precision floating-point type on Arm. - * Clang and GCC bring the `float16_t` symbol when you compile for Aarch64. - * MSVC lacks it, and it's `vld1_f16`-like intrinsics are in reality macros, - * that cast to 16-bit integers internally, instead of using floats. + * + * Clang and GCC bring the `float16_t` symbol when you compile for Aarch64. + * MSVC lacks it, and it's `vld1_f16`-like intrinsics are in reality macros, + * that cast to 16-bit integers internally, instead of using floats. + * Some of those are defined as aliases, so we use `#define` preprocessor + * directives instead of `typedef` to avoid errors. */ #if SIMSIMD_TARGET_ARM #if defined(_MSC_VER) -typedef simsimd_f16_t simsimd_f16_for_arm_simd_t; -typedef simsimd_bf16_t simsimd_bf16_for_arm_simd_t; +#define simsimd_f16_for_arm_simd_t simsimd_f16_t +#define simsimd_bf16_for_arm_simd_t simsimd_bf16_t #else -typedef float16_t simsimd_f16_for_arm_simd_t; -typedef bfloat16_t simsimd_bf16_for_arm_simd_t; +#define simsimd_f16_for_arm_simd_t float16_t +#define simsimd_bf16_for_arm_simd_t bfloat16_t #endif #endif From 20d7a2973d9bc9f3d728f2facd9ad822340a9a6f Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Wed, 19 Jun 2024 20:45:24 -0700 Subject: [PATCH 28/39] Fix: Picking the right `bf16` decompressor Prior `bf16` tests in Python failed due to this inconsistency in `#ifndef`. --- include/simsimd/types.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/simsimd/types.h b/include/simsimd/types.h index c3a2523..85c58f0 100644 --- a/include/simsimd/types.h +++ b/include/simsimd/types.h @@ -298,11 +298,13 @@ typedef unsigned short simsimd_bf16_t; * @brief Returns the value of the half-precision brain floating-point number, * potentially decompressed into single-precision. */ -#ifdef SIMSIMD_NATIVE_BF16 +#ifndef SIMSIMD_UNCOMPRESS_BF16 +#if SIMSIMD_NATIVE_BF16 #define SIMSIMD_UNCOMPRESS_BF16(x) (SIMSIMD_IDENTIFY(x)) #else #define SIMSIMD_UNCOMPRESS_BF16(x) (simsimd_uncompress_bf16(x)) #endif +#endif typedef union { unsigned i; From b426d30f242bf75048475cd325c290f3efc66a2c Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Wed, 19 Jun 2024 20:49:50 -0700 Subject: [PATCH 29/39] Make: Bump to 4.4.0 --- .github/workflows/release.yml | 47 ++++++++++++++++++----------------- CMakeLists.txt | 2 +- Cargo.lock | 2 +- Cargo.toml | 2 +- VERSION | 2 +- package.json | 2 +- 6 files changed, 29 insertions(+), 28 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 9407a1e..15a4dd0 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -23,29 +23,30 @@ jobs: - uses: actions/checkout@v4 with: persist-credentials: false - - name: Set up Node.js - uses: actions/setup-node@v4 - with: - node-version: 20 - - name: Set up Cargo - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - override: true - - # Semantic Release - - name: Clean npm cache - run: npm cache clean --force - - name: Remove node_modules and package-lock.json - run: rm -rf .github/workflows/node_modules .github/workflows/package-lock.json - - name: Install dependencies - run: npm install --ignore-scripts --legacy-peer-deps --prefix .github/workflows - - name: Verify installation - run: ls -l .github/workflows/node_modules - - name: Verify bin directory - run: ls -l .github/workflows/node_modules/.bin - - name: Run semantic-release - run: npx --prefix .github/workflows semantic-release --no-ci --no-npm --debug + + # Automatic versioning is a pain. + # + # - name: Set up Node.js + # uses: actions/setup-node@v4 + # with: + # node-version: 20 + # - name: Set up Cargo + # uses: actions-rs/toolchain@v1 + # with: + # toolchain: stable + # override: true + # - name: Clean npm cache + # run: npm cache clean --force + # - name: Remove node_modules and package-lock.json + # run: rm -rf .github/workflows/node_modules .github/workflows/package-lock.json + # - name: Install dependencies + # run: npm install --ignore-scripts --legacy-peer-deps --prefix .github/workflows + # - name: Verify installation + # run: ls -l .github/workflows/node_modules + # - name: Verify bin directory + # run: ls -l .github/workflows/node_modules/.bin + # - name: Run semantic-release + # run: npx --prefix .github/workflows semantic-release --no-ci --no-npm --debug rebase: name: Rebase Dev. Branch diff --git a/CMakeLists.txt b/CMakeLists.txt index 255bc79..c6a755d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.14 FATAL_ERROR) project( simsimd - VERSION 4.3.1 + VERSION 4.4.0 LANGUAGES C CXX DESCRIPTION "Fastest SIMD-Accelerated Vector Similarity Functions for x86 and Arm" HOMEPAGE_URL "https://github.com/ashvardanian/simsimd" diff --git a/Cargo.lock b/Cargo.lock index ef784df..082d10d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -496,7 +496,7 @@ dependencies = [ [[package]] name = "simsimd" -version = "4.3.1" +version = "4.4.0" dependencies = [ "cc", "criterion", diff --git a/Cargo.toml b/Cargo.toml index 253d2b2..9f99d00 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "simsimd" description = "Fastest SIMD-Accelerated Vector Similarity Functions for x86 and Arm" -version = "4.3.1" +version = "4.4.0" edition = "2021" license = "Apache-2.0" authors = ["Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>"] diff --git a/VERSION b/VERSION index f77856a..fdc6698 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -4.3.1 +4.4.0 diff --git a/package.json b/package.json index 11b6c6e..c5749a3 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "simsimd", - "version": "4.3.1", + "version": "4.4.0", "description": "Fastest SIMD-Accelerated Vector Similarity Functions for x86 and Arm", "homepage": "https://github.com/ashvardanian/simsimd", "author": "Ash Vardanian", From 8e08ed9a02609872c414391847259a1f8d7b010c Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 29 Jun 2024 21:15:54 +0000 Subject: [PATCH 30/39] Fix: Missing `GCC push_options` --- include/simsimd/binary.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/simsimd/binary.h b/include/simsimd/binary.h index b664a21..55433a7 100644 --- a/include/simsimd/binary.h +++ b/include/simsimd/binary.h @@ -78,6 +78,7 @@ SIMSIMD_PUBLIC void simsimd_jaccard_b8_serial(simsimd_b8_t const* a, simsimd_b8_ #if SIMSIMD_TARGET_ARM #if SIMSIMD_TARGET_NEON +#pragma GCC push_options #pragma GCC target("+simd") #pragma clang attribute push(__attribute__((target("+simd"))), apply_to = function) @@ -117,6 +118,7 @@ SIMSIMD_PUBLIC void simsimd_jaccard_b8_neon(simsimd_b8_t const* a, simsimd_b8_t #endif // SIMSIMD_TARGET_NEON #if SIMSIMD_TARGET_SVE +#pragma GCC push_options #pragma GCC target("+sve") #pragma clang attribute push(__attribute__((target("+sve"))), apply_to = function) From ebd4a01a57b4724eac9b59c12ab596adb1bbf782 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 29 Jun 2024 22:21:46 +0000 Subject: [PATCH 31/39] Add: First `i8mm` attempts on Arm --- include/simsimd/spatial.h | 114 +++++++++++++++++++++++++++++--------- 1 file changed, 89 insertions(+), 25 deletions(-) diff --git a/include/simsimd/spatial.h b/include/simsimd/spatial.h index 8ab01db..6f828d1 100644 --- a/include/simsimd/spatial.h +++ b/include/simsimd/spatial.h @@ -394,8 +394,8 @@ SIMSIMD_PUBLIC void simsimd_l2sq_bf16_neon(simsimd_bf16_t const* a, simsimd_bf16 #pragma GCC pop_options #pragma GCC push_options -#pragma GCC target("arch=armv8.2-a+dotprod") -#pragma clang attribute push(__attribute__((target("arch=armv8.2-a+dotprod"))), apply_to = function) +#pragma GCC target("arch=armv8.2-a+dotprod+i8mm") +#pragma clang attribute push(__attribute__((target("arch=armv8.2-a+dotprod+i8mm"))), apply_to = function) SIMSIMD_PUBLIC void simsimd_l2sq_i8_neon(simsimd_i8_t const* a, simsimd_i8_t const* b, simsimd_size_t n, simsimd_distance_t* result) { @@ -422,37 +422,101 @@ SIMSIMD_PUBLIC void simsimd_l2sq_i8_neon(simsimd_i8_t const* a, simsimd_i8_t con SIMSIMD_PUBLIC void simsimd_cos_i8_neon(simsimd_i8_t const* a, simsimd_i8_t const* b, simsimd_size_t n, simsimd_distance_t* result) { - int32x4_t ab_vec = vdupq_n_s32(0); - int32x4_t a2_vec = vdupq_n_s32(0); - int32x4_t b2_vec = vdupq_n_s32(0); simsimd_size_t i = 0; + // Variant 1. // If the 128-bit `vdot_s32` intrinsic is unavailable, we can use the 64-bit `vdot_s32`. - // for (simsimd_size_t i = 0; i != n; i += 8) { - // int16x8_t a_vec = vmovl_s8(vld1_s8(a + i)); - // int16x8_t b_vec = vmovl_s8(vld1_s8(b + i)); - // int16x8_t ab_part_vec = vmulq_s16(a_vec, b_vec); - // int16x8_t a2_part_vec = vmulq_s16(a_vec, a_vec); - // int16x8_t b2_part_vec = vmulq_s16(b_vec, b_vec); - // ab_vec = vaddq_s32(ab_vec, vaddq_s32(vmovl_s16(vget_high_s16(ab_part_vec)), // - // vmovl_s16(vget_low_s16(ab_part_vec)))); - // a2_vec = vaddq_s32(a2_vec, vaddq_s32(vmovl_s16(vget_high_s16(a2_part_vec)), // - // vmovl_s16(vget_low_s16(a2_part_vec)))); - // b2_vec = vaddq_s32(b2_vec, vaddq_s32(vmovl_s16(vget_high_s16(b2_part_vec)), // - // vmovl_s16(vget_low_s16(b2_part_vec)))); - // } - // TODO: Redo with MMLA: vmmlaq_s32 + // + // int32x4_t ab_vec = vdupq_n_s32(0); + // int32x4_t a2_vec = vdupq_n_s32(0); + // int32x4_t b2_vec = vdupq_n_s32(0); + // for (simsimd_size_t i = 0; i != n; i += 8) { + // int16x8_t a_vec = vmovl_s8(vld1_s8(a + i)); + // int16x8_t b_vec = vmovl_s8(vld1_s8(b + i)); + // int16x8_t ab_part_vec = vmulq_s16(a_vec, b_vec); + // int16x8_t a2_part_vec = vmulq_s16(a_vec, a_vec); + // int16x8_t b2_part_vec = vmulq_s16(b_vec, b_vec); + // ab_vec = vaddq_s32(ab_vec, vaddq_s32(vmovl_s16(vget_high_s16(ab_part_vec)), // + // vmovl_s16(vget_low_s16(ab_part_vec)))); + // a2_vec = vaddq_s32(a2_vec, vaddq_s32(vmovl_s16(vget_high_s16(a2_part_vec)), // + // vmovl_s16(vget_low_s16(a2_part_vec)))); + // b2_vec = vaddq_s32(b2_vec, vaddq_s32(vmovl_s16(vget_high_s16(b2_part_vec)), // + // vmovl_s16(vget_low_s16(b2_part_vec)))); + // } + // + // Variant 2. + // With the 128-bit `vdotq_s32` intrinsic, we can use the following code: + // + // for (; i + 16 <= n; i += 16) { + // int8x16_t a_vec = vld1q_s8(a + i); + // int8x16_t b_vec = vld1q_s8(b + i); + // ab_vec = vdotq_s32(ab_vec, a_vec, b_vec); + // a2_vec = vdotq_s32(a2_vec, a_vec, a_vec); + // b2_vec = vdotq_s32(b2_vec, b_vec, b_vec); + // } + // + // Variant 3. + // To use MMLA instructions, we need to reorganize the contents of the vectors. + // On input we have `a_vec` and `b_vec`: + // + // a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7], a[8], a[9], a[10], a[11], a[12], a[13], a[14], a[15] + // b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8], b[9], b[10], b[11], b[12], b[13], b[14], b[15] + // + // After transpose we can have `a_low_b_low_vec` and `a_high_b_high_vec`: + // + // a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7], b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7] + // a[8], a[9], a[10], a[11], a[12], a[13], a[14], a[15], b[8], b[9], b[10], b[11], b[12], b[13], b[14], b[15] + // + // This is however not entirely true, the first register must be 2x8 instead of 8x2. + // + // X = + // a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7], + // b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7] + // Y = + // a[0], b[0], + // a[1], b[1], + // a[2], b[2], + // a[3], b[3], + // a[4], b[4], + // a[5], b[5], + // a[6], b[6], + // a[7], b[7] + // + // V = + // a[8], a[9], a[10], a[11], a[12], a[13], a[14], a[15], + // b[8], b[9], b[10], b[11], b[12], b[13], b[14], b[15] + // W = + // a[8], b[8], + // a[9], b[9], + // a[10], b[10], + // a[11], b[11], + // a[12], b[12], + // a[13], b[13], + // a[14], b[14], + // a[15], b[15] + // + // Performing matrix multiplications we can aggregate into a matrix `products_low_vec` and `products_high_vec`: + // + // X * X, X * Y V * W, V * V + // Y * X, Y * Y W * W, W * V + // + // Of those values we need only 3/4, as the (X * Y) and (Y * X) are the same. + int32x4_t products_low_vec = vdupq_n_s32(0), products_high_vec = vdupq_n_s32(0); + int8x16_t a_low_b_low_vec, a_high_b_high_vec; for (; i + 16 <= n; i += 16) { int8x16_t a_vec = vld1q_s8(a + i); int8x16_t b_vec = vld1q_s8(b + i); - ab_vec = vdotq_s32(ab_vec, a_vec, b_vec); - a2_vec = vdotq_s32(a2_vec, a_vec, a_vec); - b2_vec = vdotq_s32(b2_vec, b_vec, b_vec); + int8x16x2_t y_w_vecs = vzipq_s8(a_vec, b_vec); + int8x16_t x_vec = vcombine_s8(vget_low_s8(a_vec), vget_low_s8(b_vec)); + int8x16_t v_vec = vcombine_s8(vget_high_s8(a_vec), vget_high_s8(b_vec)); + products_low_vec = vmmlaq_s32(products_low_vec, x_vec, y_w_vecs.val[0]); + products_high_vec = vmmlaq_s32(products_high_vec, v_vec, y_w_vecs.val[1]); } - int32_t ab = vaddvq_s32(ab_vec); - int32_t a2 = vaddvq_s32(a2_vec); - int32_t b2 = vaddvq_s32(b2_vec); + int32x4_t products_vec = vaddq_s32(products_high_vec, products_low_vec); + int32_t a2 = products_vec[0]; + int32_t ab = products_vec[1]; + int32_t b2 = products_vec[3]; // Take care of the tail: for (; i < n; ++i) { From 6dfda2b881aab445fec6cb2aa52147d00aa47c42 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 29 Jun 2024 22:26:15 +0000 Subject: [PATCH 32/39] Improve: Use `vextq` over `vcombine` --- include/simsimd/spatial.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/simsimd/spatial.h b/include/simsimd/spatial.h index 6f828d1..41d44e6 100644 --- a/include/simsimd/spatial.h +++ b/include/simsimd/spatial.h @@ -507,8 +507,8 @@ SIMSIMD_PUBLIC void simsimd_cos_i8_neon(simsimd_i8_t const* a, simsimd_i8_t cons int8x16_t a_vec = vld1q_s8(a + i); int8x16_t b_vec = vld1q_s8(b + i); int8x16x2_t y_w_vecs = vzipq_s8(a_vec, b_vec); - int8x16_t x_vec = vcombine_s8(vget_low_s8(a_vec), vget_low_s8(b_vec)); - int8x16_t v_vec = vcombine_s8(vget_high_s8(a_vec), vget_high_s8(b_vec)); + int8x16_t x_vec = vextq_s8(a_vec, b_vec, 0); + int8x16_t v_vec = vextq_s8(a_vec, b_vec, 1); products_low_vec = vmmlaq_s32(products_low_vec, x_vec, y_w_vecs.val[0]); products_high_vec = vmmlaq_s32(products_high_vec, v_vec, y_w_vecs.val[1]); } From c55f2c121aa7ab82fc19ca2a7cfb82f7f458cf24 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 29 Jun 2024 23:14:28 +0000 Subject: [PATCH 33/39] Improve: Correct combination Co-authored-by: Everett Knag --- include/simsimd/spatial.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/include/simsimd/spatial.h b/include/simsimd/spatial.h index 41d44e6..cf0b2f6 100644 --- a/include/simsimd/spatial.h +++ b/include/simsimd/spatial.h @@ -462,12 +462,7 @@ SIMSIMD_PUBLIC void simsimd_cos_i8_neon(simsimd_i8_t const* a, simsimd_i8_t cons // a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7], a[8], a[9], a[10], a[11], a[12], a[13], a[14], a[15] // b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8], b[9], b[10], b[11], b[12], b[13], b[14], b[15] // - // After transpose we can have `a_low_b_low_vec` and `a_high_b_high_vec`: - // - // a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7], b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7] - // a[8], a[9], a[10], a[11], a[12], a[13], a[14], a[15], b[8], b[9], b[10], b[11], b[12], b[13], b[14], b[15] - // - // This is however not entirely true, the first register must be 2x8 instead of 8x2. + // We will be multiplying matrices of size 2x8 and 8x2. So we need to perform a few shuffles: // // X = // a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7], @@ -507,8 +502,8 @@ SIMSIMD_PUBLIC void simsimd_cos_i8_neon(simsimd_i8_t const* a, simsimd_i8_t cons int8x16_t a_vec = vld1q_s8(a + i); int8x16_t b_vec = vld1q_s8(b + i); int8x16x2_t y_w_vecs = vzipq_s8(a_vec, b_vec); - int8x16_t x_vec = vextq_s8(a_vec, b_vec, 0); - int8x16_t v_vec = vextq_s8(a_vec, b_vec, 1); + int8x16_t x_vec = vcombine_s8(vget_low_s8(a_vec), vget_low_s8(b_vec)); + int8x16_t v_vec = vcombine_s8(vget_high_s8(a_vec), vget_high_s8(b_vec)); products_low_vec = vmmlaq_s32(products_low_vec, x_vec, y_w_vecs.val[0]); products_high_vec = vmmlaq_s32(products_high_vec, v_vec, y_w_vecs.val[1]); } From f53d3c99ac8122ccedc549653cfe92accdf10177 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 29 Jun 2024 23:32:35 +0000 Subject: [PATCH 34/39] Improve: Prefer `vdotq_s32` for now --- include/simsimd/spatial.h | 40 +++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/include/simsimd/spatial.h b/include/simsimd/spatial.h index cf0b2f6..8abda7a 100644 --- a/include/simsimd/spatial.h +++ b/include/simsimd/spatial.h @@ -496,22 +496,38 @@ SIMSIMD_PUBLIC void simsimd_cos_i8_neon(simsimd_i8_t const* a, simsimd_i8_t cons // Y * X, Y * Y W * W, W * V // // Of those values we need only 3/4, as the (X * Y) and (Y * X) are the same. - int32x4_t products_low_vec = vdupq_n_s32(0), products_high_vec = vdupq_n_s32(0); - int8x16_t a_low_b_low_vec, a_high_b_high_vec; + // + // int32x4_t products_low_vec = vdupq_n_s32(0), products_high_vec = vdupq_n_s32(0); + // int8x16_t a_low_b_low_vec, a_high_b_high_vec; + // for (; i + 16 <= n; i += 16) { + // int8x16_t a_vec = vld1q_s8(a + i); + // int8x16_t b_vec = vld1q_s8(b + i); + // int8x16x2_t y_w_vecs = vzipq_s8(a_vec, b_vec); + // int8x16_t x_vec = vcombine_s8(vget_low_s8(a_vec), vget_low_s8(b_vec)); + // int8x16_t v_vec = vcombine_s8(vget_high_s8(a_vec), vget_high_s8(b_vec)); + // products_low_vec = vmmlaq_s32(products_low_vec, x_vec, y_w_vecs.val[0]); + // products_high_vec = vmmlaq_s32(products_high_vec, v_vec, y_w_vecs.val[1]); + // } + // int32x4_t products_vec = vaddq_s32(products_high_vec, products_low_vec); + // int32_t a2 = products_vec[0]; + // int32_t ab = products_vec[1]; + // int32_t b2 = products_vec[3]; + // + // That solution is elegant, but it requires the additional `+i8mm` extension and is currently slower, + // at least on AWS Graviton 3. + int32x4_t ab_vec = vdupq_n_s32(0); + int32x4_t a2_vec = vdupq_n_s32(0); + int32x4_t b2_vec = vdupq_n_s32(0); for (; i + 16 <= n; i += 16) { int8x16_t a_vec = vld1q_s8(a + i); int8x16_t b_vec = vld1q_s8(b + i); - int8x16x2_t y_w_vecs = vzipq_s8(a_vec, b_vec); - int8x16_t x_vec = vcombine_s8(vget_low_s8(a_vec), vget_low_s8(b_vec)); - int8x16_t v_vec = vcombine_s8(vget_high_s8(a_vec), vget_high_s8(b_vec)); - products_low_vec = vmmlaq_s32(products_low_vec, x_vec, y_w_vecs.val[0]); - products_high_vec = vmmlaq_s32(products_high_vec, v_vec, y_w_vecs.val[1]); + ab_vec = vdotq_s32(ab_vec, a_vec, b_vec); + a2_vec = vdotq_s32(a2_vec, a_vec, a_vec); + b2_vec = vdotq_s32(b2_vec, b_vec, b_vec); } - - int32x4_t products_vec = vaddq_s32(products_high_vec, products_low_vec); - int32_t a2 = products_vec[0]; - int32_t ab = products_vec[1]; - int32_t b2 = products_vec[3]; + int32_t ab = vaddvq_s32(ab_vec); + int32_t a2 = vaddvq_s32(a2_vec); + int32_t b2 = vaddvq_s32(b2_vec); // Take care of the tail: for (; i < n; ++i) { From 37feb6512a9832844bf363bdd189dcce5d034440 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 29 Jun 2024 23:35:49 +0000 Subject: [PATCH 35/39] Fix: Drop `SIMSIMD_TARGET_NEON_BF16_IMPLEMENTED` --- cpp/bench.cxx | 2 -- include/simsimd/dot.h | 2 -- include/simsimd/spatial.h | 2 -- 3 files changed, 6 deletions(-) diff --git a/cpp/bench.cxx b/cpp/bench.cxx index 92d2284..07b8e52 100644 --- a/cpp/bench.cxx +++ b/cpp/bench.cxx @@ -282,11 +282,9 @@ int main(int argc, char** argv) { register_("kl_f16_neon", simsimd_kl_f16_neon, simsimd_kl_f16_accurate); register_("js_f16_neon", simsimd_js_f16_neon, simsimd_js_f16_accurate); -#if SIMSIMD_TARGET_NEON_BF16_IMPLEMENTED register_("dot_bf16_neon", simsimd_dot_bf16_neon, simsimd_dot_bf16_accurate); register_("cos_bf16_neon", simsimd_cos_bf16_neon, simsimd_cos_bf16_accurate); register_("l2sq_bf16_neon", simsimd_l2sq_bf16_neon, simsimd_l2sq_bf16_accurate); -#endif register_("dot_f32_neon", simsimd_dot_f32_neon, simsimd_dot_f32_accurate); register_("cos_f32_neon", simsimd_cos_f32_neon, simsimd_cos_f32_accurate); diff --git a/include/simsimd/dot.h b/include/simsimd/dot.h index 6d33d16..74015a1 100644 --- a/include/simsimd/dot.h +++ b/include/simsimd/dot.h @@ -467,7 +467,6 @@ SIMSIMD_PUBLIC void simsimd_vdot_f16c_neon(simsimd_f16_t const* a, simsimd_f16_t #pragma clang attribute pop #pragma GCC pop_options -#if SIMSIMD_TARGET_NEON_BF16_IMPLEMENTED #pragma GCC push_options #pragma GCC target("+simd+bf16") #pragma clang attribute push(__attribute__((target("+simd+bf16"))), apply_to = function) @@ -586,7 +585,6 @@ SIMSIMD_PUBLIC void simsimd_vdot_bf16c_neon(simsimd_bf16_t const* a, simsimd_bf1 #pragma clang attribute pop #pragma GCC pop_options -#endif #endif // SIMSIMD_TARGET_NEON #if SIMSIMD_TARGET_SVE diff --git a/include/simsimd/spatial.h b/include/simsimd/spatial.h index 7911c4d..bdcff32 100644 --- a/include/simsimd/spatial.h +++ b/include/simsimd/spatial.h @@ -298,7 +298,6 @@ SIMSIMD_PUBLIC void simsimd_cos_f16_neon(simsimd_f16_t const* a, simsimd_f16_t c *result = ab != 0 ? 1 - ab * a2_b2_arr[0] * a2_b2_arr[1] : 1; } -#if SIMSIMD_TARGET_NEON_BF16_IMPLEMENTED SIMSIMD_PUBLIC void simsimd_cos_bf16_neon(simsimd_bf16_t const* a, simsimd_bf16_t const* b, simsimd_size_t n, simsimd_distance_t* result) { // TODO: Redo with BFMMLA - vbfmmlaq_f32 @@ -390,7 +389,6 @@ SIMSIMD_PUBLIC void simsimd_l2sq_bf16_neon(simsimd_bf16_t const* a, simsimd_bf16 simsimd_f32_t sum = vaddvq_f32(vaddq_f32(sum_high_vec, sum_low_vec)); *result = sum; } -#endif #pragma clang attribute pop #pragma GCC pop_options From 05d9d7540c619620979746a29fe92248932c3559 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sun, 30 Jun 2024 00:20:08 +0000 Subject: [PATCH 36/39] Improve: Add compiler flags Python & Rust builds fail due to compiler option mismatch: https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html --- include/simsimd/dot.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/simsimd/dot.h b/include/simsimd/dot.h index 74015a1..67f5879 100644 --- a/include/simsimd/dot.h +++ b/include/simsimd/dot.h @@ -468,8 +468,8 @@ SIMSIMD_PUBLIC void simsimd_vdot_f16c_neon(simsimd_f16_t const* a, simsimd_f16_t #pragma GCC pop_options #pragma GCC push_options -#pragma GCC target("+simd+bf16") -#pragma clang attribute push(__attribute__((target("+simd+bf16"))), apply_to = function) +#pragma GCC target("+simd+bf16+fp16fml+fp+flagm") +#pragma clang attribute push(__attribute__((target("+simd+bf16+fp16fml+fp+flagm"))), apply_to = function) SIMSIMD_PUBLIC void simsimd_dot_bf16_neon(simsimd_bf16_t const* a, simsimd_bf16_t const* b, simsimd_size_t n, simsimd_distance_t* result) { From 236597844c7e74d8580e94ef84bba19f05b4560a Mon Sep 17 00:00:00 2001 From: Everett Knag <61752493+eknag@users.noreply.github.com> Date: Sat, 29 Jun 2024 17:34:51 -0700 Subject: [PATCH 37/39] Add: `bf16` cosine with BFMMLA (#146) Co-authored-by: Everett Knag Co-authored-by: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> --- include/simsimd/spatial.h | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/include/simsimd/spatial.h b/include/simsimd/spatial.h index bdcff32..893ae2d 100644 --- a/include/simsimd/spatial.h +++ b/include/simsimd/spatial.h @@ -300,7 +300,35 @@ SIMSIMD_PUBLIC void simsimd_cos_f16_neon(simsimd_f16_t const* a, simsimd_f16_t c SIMSIMD_PUBLIC void simsimd_cos_bf16_neon(simsimd_bf16_t const* a, simsimd_bf16_t const* b, simsimd_size_t n, simsimd_distance_t* result) { - // TODO: Redo with BFMMLA - vbfmmlaq_f32 + + // Similar to `simsimd_cos_i8_neon`, we can use the `BFMMLA` instruction through + // the `vbfmmlaq_f32` intrinsic to compute matrix products and later drop 1/4 of values. + // The only difference is that `zip` isn't provided for `bf16` and we need to reinterpret back + // and forth before zipping. Same as with integers, on modern Arm CPUs, this "smart" + // approach is actually slower by around 25%. + // + // float32x4_t products_low_vec = vdupq_n_f32(0.0f); + // float32x4_t products_high_vec = vdupq_n_f32(0.0f); + // for (; i + 8 <= n; i += 8) { + // bfloat16x8_t a_vec = vld1q_bf16((simsimd_bf16_for_arm_simd_t const*)a + i); + // bfloat16x8_t b_vec = vld1q_bf16((simsimd_bf16_for_arm_simd_t const*)b + i); + // int16x8_t a_vec_s16 = vreinterpretq_s16_bf16(a_vec); + // int16x8_t b_vec_s16 = vreinterpretq_s16_bf16(b_vec); + // int16x8x2_t y_w_vecs_s16 = vzipq_s16(a_vec_s16, b_vec_s16); + // bfloat16x8_t y_vec = vreinterpretq_bf16_s16(y_w_vecs_s16.val[0]); + // bfloat16x8_t w_vec = vreinterpretq_bf16_s16(y_w_vecs_s16.val[1]); + // bfloat16x4_t a_low = vget_low_bf16(a_vec); + // bfloat16x4_t b_low = vget_low_bf16(b_vec); + // bfloat16x4_t a_high = vget_high_bf16(a_vec); + // bfloat16x4_t b_high = vget_high_bf16(b_vec); + // bfloat16x8_t x_vec = vcombine_bf16(a_low, b_low); + // bfloat16x8_t v_vec = vcombine_bf16(a_high, b_high); + // products_low_vec = vbfmmlaq_f32(products_low_vec, x_vec, y_vec); + // products_high_vec = vbfmmlaq_f32(products_high_vec, v_vec, w_vec); + // } + // float32x4_t products_vec = vaddq_f32(products_high_vec, products_low_vec); + // simsimd_f32_t a2 = products_vec[0], ab = products_vec[1], b2 = products_vec[3]; + float32x4_t ab_high_vec = vdupq_n_f32(0), ab_low_vec = vdupq_n_f32(0); float32x4_t a2_high_vec = vdupq_n_f32(0), a2_low_vec = vdupq_n_f32(0); float32x4_t b2_high_vec = vdupq_n_f32(0), b2_low_vec = vdupq_n_f32(0); From 9e242358f4ba82c9acd284132d171e9535213027 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sun, 30 Jun 2024 00:37:18 +0000 Subject: [PATCH 38/39] Fix: Missing `ADD_CAP(sve2)` --- python/lib.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/python/lib.c b/python/lib.c index cdee764..12de731 100644 --- a/python/lib.c +++ b/python/lib.c @@ -265,12 +265,17 @@ static PyObject* api_get_capabilities(PyObject* self) { if (!cap_dict) return NULL; -#define ADD_CAP(name) PyDict_SetItemString(cap_dict, #name, PyBool_FromLong((caps) & simsimd_cap_##name##_k)) +#define ADD_CAP(name) PyDict_SetItemString(cap_dict, #name, PyBool_FromLong((caps)&simsimd_cap_##name##_k)) ADD_CAP(serial); ADD_CAP(neon); ADD_CAP(sve); - ADD_CAP(sve2); + ADD_CAP(neon_f16); + ADD_CAP(sve_f16); + ADD_CAP(neon_bf16); + ADD_CAP(sve_bf16); + ADD_CAP(neon_i8); + ADD_CAP(sve_i8); ADD_CAP(haswell); ADD_CAP(skylake); ADD_CAP(ice); From 3e54b410ee3e6900ebffbf43b964ed775c70632d Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sun, 30 Jun 2024 00:38:18 +0000 Subject: [PATCH 39/39] Make: Bump to 5.0.0 --- CMakeLists.txt | 2 +- Cargo.lock | 2 +- Cargo.toml | 2 +- VERSION | 2 +- include/simsimd/simsimd.h | 6 +++--- package.json | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c6a755d..8fbe89a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.14 FATAL_ERROR) project( simsimd - VERSION 4.4.0 + VERSION 5.0.0 LANGUAGES C CXX DESCRIPTION "Fastest SIMD-Accelerated Vector Similarity Functions for x86 and Arm" HOMEPAGE_URL "https://github.com/ashvardanian/simsimd" diff --git a/Cargo.lock b/Cargo.lock index 082d10d..2e2ed16 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -496,7 +496,7 @@ dependencies = [ [[package]] name = "simsimd" -version = "4.4.0" +version = "5.0.0" dependencies = [ "cc", "criterion", diff --git a/Cargo.toml b/Cargo.toml index 9f99d00..eb68f4d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "simsimd" description = "Fastest SIMD-Accelerated Vector Similarity Functions for x86 and Arm" -version = "4.4.0" +version = "5.0.0" edition = "2021" license = "Apache-2.0" authors = ["Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>"] diff --git a/VERSION b/VERSION index fdc6698..28cbf7c 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -4.4.0 +5.0.0 \ No newline at end of file diff --git a/include/simsimd/simsimd.h b/include/simsimd/simsimd.h index dad9329..ee8fdee 100644 --- a/include/simsimd/simsimd.h +++ b/include/simsimd/simsimd.h @@ -86,9 +86,9 @@ #ifndef SIMSIMD_H #define SIMSIMD_H -#define SIMSIMD_VERSION_MAJOR 4 -#define SIMSIMD_VERSION_MINOR 3 -#define SIMSIMD_VERSION_PATCH 1 +#define SIMSIMD_VERSION_MAJOR 5 +#define SIMSIMD_VERSION_MINOR 0 +#define SIMSIMD_VERSION_PATCH 0 /** * @brief Removes compile-time dispatching, and replaces it with runtime dispatching. diff --git a/package.json b/package.json index c5749a3..d9a6672 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "simsimd", - "version": "4.4.0", + "version": "5.0.0", "description": "Fastest SIMD-Accelerated Vector Similarity Functions for x86 and Arm", "homepage": "https://github.com/ashvardanian/simsimd", "author": "Ash Vardanian",