SimHash implementation for detecting near-duplicate text using the SipHash-2-4 function
npm i @counterrealist/simhash-wasmconst { SimHash } = require("@counterrealist/simhash-wasm");
const simhash = new SimHash(3);
// Text to compare
const text1 = "khan academy";
const text2 = "khan academia";
// Compute BigInteger hashes
const bigIntHash1: BigInteger = simhash.compute(text1); // 182883240033146189889226648883436234289n
const bigIntHash2: BigInteger = simhash.compute(text2); // 188200070891594117632711953576407656125n
// Calculate similarity between BigInteger hashes
const bigIntSimilarity: number = simhash.similarity(bigIntHash1, bigIntHash2); // 0.8203125
// Compute hexadecimal hashes
const hexHash1: string = simhash.compute_hex(text1); // "899607e4844c4236a88584c4ca58a631"
const hexHash2: string = simhash.compute_hex(text2); // "8d9603e494480e34e8e7a5edfa58e6bd"
// Calculate similarity between hexadecimal hashes
const hexSimilarity: number = simhash.similarity_from_hex(hexHash1, hexHash2); // 0.8203125
// Free WebAssembly memory when done
simhash.free();Note: The expected similarity for this example in another SimHash implementation is 0.890625.
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | shNote: For platform-specific instructions or troubleshooting, refer to the Official Rust Installation Guide.
cargo install wasm-packBefore building, ensure all tests pass for your target environment:
wasm-pack test --chrome
# or
wasm-pack test --safari
# or
wasm-pack test --firefoxwasm-pack test --nodeThen build the package:
wasm-pack build --target YOUR_TARGET -- --features wee_allocReplace YOUR_TARGET with one of the following:
webbundlerdenonodejs
Finally, you can create a .tgz archive suitable for local use:
wasm-pack packLearn more about
wasm-packin the official wasm-pack documentation.