Skip to content

Commit

Permalink
feat: non-ASCII support and optimization in murmurhash3 (#931)
Browse files Browse the repository at this point in the history
* feat: non-ASCII support
* feat: `ROTL` updates the number instead of creating new pair
* feat: `x64LeftShift` updates values instead of returning new pair
* feat: `x64Multiply` updates values instead of creating new pair
* feat: The rest of the functions also update values instead of creating new
* feat: Export and additional test-cases for `getUTF8Bytes`
  • Loading branch information
dj-stormtrooper committed Aug 10, 2023
1 parent a44cbf3 commit e34c8e3
Show file tree
Hide file tree
Showing 6 changed files with 292 additions and 179 deletions.
3 changes: 0 additions & 3 deletions resources/license_banner.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
FingerprintJS v<%= pkg.version %> - Copyright (c) FingerprintJS, Inc, <%= new Date().getFullYear() %> (https://fingerprint.com)

Licensed under <%= data.license %>

This software contains code from open-source projects:
MurmurHash3 by Karan Lyons (https://github.com/karanlyons/murmurHash3.js)
1 change: 1 addition & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,4 @@ export {
UnknownSources,
} from './utils/entropy_source'
export { withIframe } from './utils/dom'
export { getUTF8Bytes } from './utils/data'
9 changes: 8 additions & 1 deletion src/utils/data.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { areSetsEqual, maxInIterator, parseSimpleCssSelector, round, toFloat, toInt } from './data'
import { areSetsEqual, getUTF8Bytes, maxInIterator, parseSimpleCssSelector, round, toFloat, toInt } from './data'

describe('Data utilities', () => {
it('converts to integer', () => {
Expand Down Expand Up @@ -100,4 +100,11 @@ describe('Data utilities', () => {
expect(maxInIterator(generator(), (item) => (item.val % 2 === 0 ? item.val : item.val * 2))).toEqual({ val: 7 })
expect(maxInIterator(emptyGenerator(), () => Math.random())).toBeUndefined()
})

it('converts string to UTF8 bytes', () => {
expect(getUTF8Bytes('Hello, world!')).toEqual(
new Uint8Array([72, 101, 108, 108, 111, 44, 32, 119, 111, 114, 108, 100, 33]),
)
expect(getUTF8Bytes('fё%?=🤔')).toEqual(new Uint8Array([102, 209, 145, 37, 63, 61, 240, 159, 164, 148]))
})
})
22 changes: 22 additions & 0 deletions src/utils/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -149,3 +149,25 @@ export function maxInIterator<T>(iterator: Iterator<T>, getItemScore: (item: T)

return maxItem
}

/**
* Converts a string to UTF8 bytes
*
* Warning for package users:
* This function is out of Semantic Versioning, i.e. can change unexpectedly. Usage is at your own risk.
*/
export function getUTF8Bytes(input: string): Uint8Array {
// If you want to just count bytes, see solutions at https://jsbench.me/ehklab415e/1
const result = new Uint8Array(input.length)
for (let i = 0; i < input.length; i++) {
// `charCode` is faster than encoding so we prefer that when it's possible
const charCode = input.charCodeAt(i)

// In case of non-ASCII symbols we use proper encoding
if (charCode < 0 || charCode > 127) {
return new TextEncoder().encode(input)
}
result[i] = charCode
}
return result
}
22 changes: 21 additions & 1 deletion src/utils/hashing.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,27 @@ const longText =

describe('Murmur3', () => {
it('makes x64 128 bit hash', () => {
expect(x64hash128('Hello, world')).toBe('ebd28b45027ab97477416103e3fff7b8')
const input = 'Hello, world, hi'
const inputLessThanChunk = 'Hello, world, h'
const inputGreaterThanChunk = 'Hello, world, hi!'
const inputGreaterThan2Chunks = 'Hello, world, hi, Hello, world, hi'

// Value: 'ňťŬŬůĬĠŷůŲŬŤĬĠŨũ'
const nonAsciiInput = input
.split('')
.map((char) => String.fromCharCode(char.charCodeAt(0) + 256))
.join('')

const shortInput = 'hello'

expect(x64hash128(input)).not.toBe(x64hash128(nonAsciiInput))

expect(x64hash128(input)).toBe('9a66b4567d520770dc8eaf9a508ecf1b')
expect(x64hash128(nonAsciiInput)).toBe('460892d2cab76edff07f62a97e106f6b')
expect(x64hash128(shortInput)).toBe('cbd8a7b341bd9b025b1e906a48ae1d19')
expect(x64hash128(longText)).toBe('211a6f425b82e115fb52ccdc51edb290')
expect(x64hash128(inputLessThanChunk)).toBe('4552c6409e0a7bd3b0f9eb318bb35f05')
expect(x64hash128(inputGreaterThanChunk)).toBe('dce3e02d43da4d2374e84e484c566492')
expect(x64hash128(inputGreaterThan2Chunks)).toBe('d49c261c833b671870b471c42df4dbf0')
})
})
Loading

0 comments on commit e34c8e3

Please sign in to comment.