From faafb1a69fa2045cf1ce7e74fc43dd3393c1a8d2 Mon Sep 17 00:00:00 2001 From: Julian Schurhammer Date: Sat, 23 Jul 2022 09:33:25 +1200 Subject: [PATCH 01/19] initial implementation of 'hash array mapped trie' structure for persistent maps --- src/gleam/map.gleam | 6 + src/gleam_stdlib.mjs | 91 ++----- src/persistent-hash-map.mjs | 527 ++++++++++++++++++++++++++++++++++++ 3 files changed, 549 insertions(+), 75 deletions(-) create mode 100644 src/persistent-hash-map.mjs diff --git a/src/gleam/map.gleam b/src/gleam/map.gleam index 6c25a7c9..d3495c26 100644 --- a/src/gleam/map.gleam +++ b/src/gleam/map.gleam @@ -5,6 +5,12 @@ if javascript { import gleam/pair } +if javascript { + // hack to include another js file.. + pub external fn include_persistent_hash_map() -> Nil = + "../persistent-hash-map.mjs" "__include_me" +} + /// A dictionary of keys and values. /// /// Any type can be used for the keys and values of a map, but all the keys diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs index a6123676..ce6e7b8a 100644 --- a/src/gleam_stdlib.mjs +++ b/src/gleam_stdlib.mjs @@ -15,10 +15,10 @@ import { } from "./gleam/regex.mjs"; import { DecodeError } from "./gleam/dynamic.mjs"; import { Some, None } from "./gleam/option.mjs"; - -const HASHCODE_CACHE = new WeakMap(); +import * as pmap from "./persistent-hash-map.mjs" const Nil = undefined; +const NOT_FOUND = {} export function identity(x) { return x; @@ -306,71 +306,8 @@ export function regex_scan(regex, string) { return List.fromArray(matches); } -class Map { - static #hashcode_cache = new WeakMap(); - - static hash(value) { - let existing = this.#hashcode_cache.get(value); - if (existing) { - return existing; - } else if (value instanceof Object) { - let hashcode = inspect(value); - HASHCODE_CACHE.set(value, hashcode); - return hashcode; - } else { - return value.toString(); - } - } - - constructor() { - this.entries = new globalThis.Map(); - } - - get size() { - return this.entries.size; - } - - inspect() { - let entries = [...this.entries.values()] - .map((pair) => inspect(pair)) - .join(", "); - return `map.from_list([${entries}])`; - } - - copy() { - let map = new Map(); - map.entries = new globalThis.Map(this.entries); - return map; - } - - toList() { - return List.fromArray([...this.entries.values()]); - } - - insert(k, v) { - let map = this.copy(); - map.entries.set(Map.hash(k), [k, v]); - return map; - } - - delete(k) { - let map = this.copy(); - map.entries.delete(Map.hash(k)); - return map; - } - - get(key) { - let code = Map.hash(key); - if (this.entries.has(code)) { - return new Ok(this.entries.get(code)[1]); - } else { - return new Error(Nil); - } - } -} - export function new_map() { - return new Map(); + return pmap.create(); } export function map_size(map) { @@ -378,19 +315,23 @@ export function map_size(map) { } export function map_to_list(map) { - return map.toList(); + return List.fromArray(pmap.entries(map)); } -export function map_remove(k, map) { - return map.delete(k); +export function map_remove(key, map) { + return pmap.remove(map, key); } export function map_get(map, key) { - return map.get(key); + const value = pmap.getWithDefault(map, key, NOT_FOUND); + if(value === NOT_FOUND) { + return new Error(Nil); + } + return new Ok(value) } export function map_insert(key, value, map) { - return map.insert(key, value); + return pmap.set(map, key, value); } function unsafe_percent_decode(string) { @@ -521,7 +462,7 @@ export function classify_dynamic(data) { return `Tuple of ${data.length} elements`; } else if (BitString.isBitString(data)) { return "BitString"; - } else if (data instanceof Map) { + } else if (data instanceof pmap.Map) { return "Map"; } else if (typeof data === "number") { return "Float"; @@ -587,7 +528,7 @@ export function decode_result(data) { } export function decode_map(data) { - return data instanceof Map ? new Ok(data) : decoder_error("Map", data); + return data instanceof pmap.Map ? new Ok(data) : decoder_error("Map", data); } export function decode_option(data, decoder) { @@ -604,8 +545,8 @@ export function decode_option(data, decoder) { export function decode_field(value, name) { let error = () => decoder_error_no_classify("field", "nothing"); - if (value instanceof Map) { - let entry = value.get(name); + if (value instanceof pmap.Map) { + let entry = map_get(value, name); return entry.isOk() ? entry : error(); } try { diff --git a/src/persistent-hash-map.mjs b/src/persistent-hash-map.mjs new file mode 100644 index 00000000..b65621d5 --- /dev/null +++ b/src/persistent-hash-map.mjs @@ -0,0 +1,527 @@ +import { + inspect, + isEqual +} from "./gleam.mjs"; +function getHash(s) { + if (typeof s === "number") return s; + if (typeof s !== "string") s = inspect(s); + let hash = 0; + const len = s.length; + for (let i = 0; i < len; i++) { + hash = (Math.imul(31, hash) + s.charCodeAt(i)) | 0; + } + return hash; +} +const SHIFT = 5; // number of bits you need to shift by to get the next bucket +const BUCKET_SIZE = Math.pow(2, SHIFT); +const MASK = BUCKET_SIZE - 1; // used to zero out all bits not in the bucket +const MAX_INDEX_NODE = BUCKET_SIZE / 2; // when does index node grow into array node +const MIN_ARRAY_NODE = BUCKET_SIZE / 4; // when does array node shrink to index node +const ENTRY = 0; +const ARRAY_NODE = 1; +const INDEX_NODE = 2; +const COLLISION_NODE = 3; +const EMPTY = { + type: INDEX_NODE, + bitmap: 0, + array: [], +}; +/** Mask the hash to get only the bucket corresponding to shift */ +function mask(hash, shift) { + return (hash >>> shift) & MASK; +} +/** Set only the Nth bit where N is the masked hash */ +function bitpos(hash, shift) { + return 1 << mask(hash, shift); +} +/** Count the number of 1 bits in a number */ +function bitcount(x) { + x -= (x >> 1) & 0x55555555; + x = (x & 0x33333333) + ((x >> 2) & 0x33333333); + x = (x + (x >> 4)) & 0x0f0f0f0f; + x += x >> 8; + x += x >> 16; + return x & 0x7f; +} +/** Calculate the array index of an item in a bitmap index node */ +function index(bitmap, bit) { + return bitcount(bitmap & (bit - 1)); +} +/** Efficiently copy an array and set one value at an index */ +function cloneAndSet(arr, at, val) { + const len = arr.length; + const out = new Array(len); + for (let i = 0; i < len; ++i) { + out[i] = arr[i]; + } + out[at] = val; + return out; +} +/** Efficiently copy an array and insert one value at an index */ +function spliceIn(arr, at, val) { + const len = arr.length; + const out = new Array(len + 1); + let i = 0; + let g = 0; + while (i < at) { + out[g++] = arr[i++]; + } + out[g++] = val; + while (i < len) { + out[g++] = arr[i++]; + } + return out; +} +/** Efficiently copy an array and remove one value at an index */ +function spliceOut(arr, at) { + const len = arr.length; + const out = new Array(len - 1); + let i = 0; + let g = 0; + while (i < at) { + out[g++] = arr[i++]; + } + ++i; + while (i < len) { + out[g++] = arr[i++]; + } + return out; +} +/** Create a new node containing two entries */ +function createNode(shift, key1, val1, key2hash, key2, val2) { + const key1hash = getHash(key1); + if (key1hash === key2hash) { + return { + type: COLLISION_NODE, + hash: key1hash, + array: [ + { type: ENTRY, k: key1, v: val1 }, + { type: ENTRY, k: key2, v: val2 }, + ], + }; + } + const addedLeaf = { val: false }; + return assoc(assocIndex(EMPTY, shift, key1hash, key1, val1, addedLeaf), shift, key2hash, key2, val2, addedLeaf); +} +/** Associate a node with a new entry, creating a new node. */ +function assoc(root, shift, hash, key, val, addedLeaf) { + switch (root.type) { + case ARRAY_NODE: + return assocArray(root, shift, hash, key, val, addedLeaf); + case INDEX_NODE: + return assocIndex(root, shift, hash, key, val, addedLeaf); + case COLLISION_NODE: + return assocCollision(root, shift, hash, key, val, addedLeaf); + } +} +function assocArray(root, shift, hash, key, val, addedLeaf) { + const idx = mask(hash, shift); + const node = root.array[idx]; + // if the corresponding index is empty set the index to a newly created node + if (node === undefined) { + return { + type: ARRAY_NODE, + size: root.size + 1, + array: cloneAndSet(root.array, idx, assocIndex(EMPTY, shift + SHIFT, hash, key, val, addedLeaf)), + }; + } + // otherwise call assoc on the child node + const n = assoc(node, shift + SHIFT, hash, key, val, addedLeaf); + // if the child node hasn't changed just return the old root + if (n === node) { + return root; + } + // otherwise set the index to the new node + return { + type: ARRAY_NODE, + size: root.size, + array: cloneAndSet(root.array, idx, n), + }; +} +function assocIndex(root, shift, hash, key, val, addedLeaf) { + const bit = bitpos(hash, shift); + const idx = index(root.bitmap, bit); + // if there is already a item at this hash index.. + if ((root.bitmap & bit) !== 0) { + // if there is a node at the index (not an entry), call assoc on the child node + const node = root.array[idx]; + if (node.type !== ENTRY) { + const n = assoc(node, shift + SHIFT, hash, key, val, addedLeaf); + if (n === node) { + return root; + } + return { + type: INDEX_NODE, + bitmap: root.bitmap, + array: cloneAndSet(root.array, idx, n), + }; + } + // otherwise there is an entry at the index + // if the keys are equal replace the entry with the updated value + const keyOrNull = node.k; + if (isEqual(key, keyOrNull)) { + if (val === node.v) { + return root; + } + return { + type: INDEX_NODE, + bitmap: root.bitmap, + array: cloneAndSet(root.array, idx, { type: ENTRY, k: keyOrNull, v: val }), + }; + } + // if the keys are not equal, replace the entry with a new child node + addedLeaf.val = true; + return { + type: INDEX_NODE, + bitmap: root.bitmap, + array: cloneAndSet(root.array, idx, createNode(shift + SHIFT, keyOrNull, node.v, hash, key, val)), + }; + } + else { + // else there is currently no item at the hash index + const n = root.array.length; + // if the number of nodes is at the maximum, expand this node into an array node + if (n >= MAX_INDEX_NODE) { + // create a 32 length array for the new array node (one for each bit in the hash) + const nodes = new Array(32); + // create and insert a node for the new entry + const jdx = mask(hash, shift); + nodes[jdx] = assocIndex(EMPTY, shift + SHIFT, hash, key, val, addedLeaf); + let j = 0; + let bitmap = root.bitmap; + // place each item in the index node into the correct spot in the array node + // loop through all 32 bits / array positions + for (let i = 0; i < 32; i++) { + if ((bitmap & 1) !== 0) { + const node = root.array[j++]; + // turn any entries into index nodes + // since array nodes should only contain other nodes, not entries + if (node.type !== ENTRY) { + nodes[i] = node; + } + else { + nodes[i] = assocIndex(EMPTY, shift + SHIFT, getHash(node.k), node.k, node.v, addedLeaf); + } + } + // shift the bitmap to process the next bit + bitmap >>>= 1; + } + return { + type: ARRAY_NODE, + size: n + 1, + array: nodes, + }; + } + else { + // else there is still space in this index node + // simply insert a new entry at the hash index + const newArray = spliceIn(root.array, idx, { type: ENTRY, k: key, v: val }); + addedLeaf.val = true; + return { + type: INDEX_NODE, + bitmap: root.bitmap | bit, + array: newArray, + }; + } + } +} +function assocCollision(root, shift, hash, key, val, addedLeaf) { + // if there is a hash collision + if (hash === root.hash) { + const idx = collisionIndexOf(root, key); + // if this key already exists replace the entry with the new value + if (idx !== -1) { + const entry = root.array[idx]; + if (entry.v === val) { + return root; + } + return { + type: COLLISION_NODE, + hash: hash, + array: cloneAndSet(root.array, idx, { type: ENTRY, k: key, v: val }), + }; + } + // otherwise insert the entry at the end of the array + const size = root.array.length; + addedLeaf.val = true; + return { + type: COLLISION_NODE, + hash: hash, + array: cloneAndSet(root.array, size, { type: ENTRY, k: key, v: val }), + }; + } + // if there is no hash collision, upgrade to an index node + return assoc({ + type: INDEX_NODE, + bitmap: bitpos(root.hash, shift), + array: [root], + }, shift, hash, key, val, addedLeaf); +} +/** Find the index of a key in the collision node's array */ +function collisionIndexOf(root, key) { + const size = root.array.length; + for (let i = 0; i < size; i++) { + if (isEqual(key, root.array[i].k)) { + return i; + } + } + return -1; +} +/** Return the found entry or undefined if not present in the root */ +function find(root, shift, hash, key) { + switch (root.type) { + case ARRAY_NODE: + return findArray(root, shift, hash, key); + case INDEX_NODE: + return findIndex(root, shift, hash, key); + case COLLISION_NODE: + return findCollision(root, shift, hash, key); + } +} +function findArray(root, shift, hash, key) { + const idx = mask(hash, shift); + const node = root.array[idx]; + if (node === undefined) { + return undefined; + } + return find(node, shift + SHIFT, hash, key); +} +function findIndex(root, shift, hash, key) { + const bit = bitpos(hash, shift); + if ((root.bitmap & bit) === 0) { + return undefined; + } + const idx = index(root.bitmap, bit); + const node = root.array[idx]; + if (node.type !== ENTRY) { + return find(node, shift + SHIFT, hash, key); + } + if (isEqual(key, node.k)) { + return node; + } + return undefined; +} +function findCollision(root, _shift, _hash, key) { + const idx = collisionIndexOf(root, key); + if (idx < 0) { + return undefined; + } + return root.array[idx]; +} +/** +* Remove an entry from the root, returning the updated root. +* Returns undefined if the node should be removed from the parent. +* */ +function without(root, shift, hash, key) { + switch (root.type) { + case ARRAY_NODE: + return withoutArray(root, shift, hash, key); + case INDEX_NODE: + return withoutIndex(root, shift, hash, key); + case COLLISION_NODE: + return withoutCollision(root, shift, hash, key); + } +} +function withoutArray(root, shift, hash, key) { + const idx = mask(hash, shift); + const node = root.array[idx]; + if (node === undefined) { + return root; // already empty + } + const n = without(node, shift + SHIFT, hash, key); + if (n === node) { + return root; // no changes + } + // if the recursive call returned undefined the node should be removed + if (n === undefined) { + // if the number of child nodes is at the minimum, pack into an index node + if (root.size <= MIN_ARRAY_NODE) { + const arr = root.array; + const out = new Array(root.size - 1); + let i = 0; + let j = 0; + let bitmap = 0; + while (i < idx) { + const nv = arr[i]; + if (nv !== undefined) { + out[j] = nv; + bitmap |= 1 << i; + ++j; + } + ++i; + } + ++i; // skip copying the removed node + while (i < arr.length) { + const nv = arr[i]; + if (nv !== undefined) { + out[j] = nv; + bitmap |= 1 << i; + ++j; + } + ++i; + } + return { + type: INDEX_NODE, + bitmap: bitmap, + array: out, + }; + } + return { + type: ARRAY_NODE, + size: root.size - 1, + array: cloneAndSet(root.array, idx, n), + }; + } + return { + type: ARRAY_NODE, + size: root.size, + array: cloneAndSet(root.array, idx, n), + }; +} +function withoutIndex(root, shift, hash, key) { + const bit = bitpos(hash, shift); + if ((root.bitmap & bit) === 0) { + return root; // already empty + } + const idx = index(root.bitmap, bit); + const node = root.array[idx]; + // if the item is not an entry + if (node.type !== ENTRY) { + const n = without(node, shift + SHIFT, hash, key); + if (n === node) { + return root; // no changes + } + // if not undefined, the child node still has items, so update it + if (n !== undefined) { + return { + type: INDEX_NODE, + bitmap: root.bitmap, + array: cloneAndSet(root.array, idx, n), + }; + } + // otherwise the child node should be removed + // if it was the only child node, remove this node from the parent + if (root.bitmap === bit) { + return undefined; + } + // otherwise just remove the child node + return { + type: INDEX_NODE, + bitmap: root.bitmap ^ bit, + array: spliceOut(root.array, idx), + }; + } + // otherwise the item is an entry, remove it if the key matches + if (isEqual(key, node.k)) { + if (root.bitmap === bit) { + return undefined; + } + return { + type: INDEX_NODE, + bitmap: root.bitmap ^ bit, + array: spliceOut(root.array, idx), + }; + } + return root; +} +function withoutCollision(root, _shift, _hash, key) { + const idx = collisionIndexOf(root, key); + // if the key not found, no changes + if (idx === -1) { + return root; + } + // otherwise the entry was found, remove it + // if it was the only entry in this node, remove the whole node + if (root.array.length === 1) { + return undefined; + } + // otherwise just remove the entry + return { + type: COLLISION_NODE, + hash: root.hash, + array: spliceOut(root.array, idx), + }; +} +function toArray(root, result) { + if (root === undefined) { + return; + } + const items = root.array; + const size = items.length; + for (let i = 0; i < size; i++) { + const item = items[i]; + if (item === undefined) { + continue; + } + if (item.type === ENTRY) { + result.push([item.k, item.v]); + continue; + } + toArray(item, result); + } +} +export class Map { + constructor(root, size) { + this.root = root; + this.size = size; + } +} +/** Extra wrapper to keep track of map size */ +export function create() { + return new Map(undefined, 0); +} +export function get(map, key) { + if (map.root === undefined) { + return undefined; + } + return find(map.root, 0, getHash(key), key)?.v; +} +export function getWithDefault(map, key, notFound) { + if (map.root === undefined) { + return notFound; + } + const found = find(map.root, 0, getHash(key), key); + if (found === undefined) { + return notFound; + } + return found.v; +} +export function set(map, key, val) { + const addedLeaf = { val: false }; + const root = map.root === undefined ? EMPTY : map.root; + const newRoot = assoc(root, 0, getHash(key), key, val, addedLeaf); + if (newRoot === map.root) { + return map; + } + return new Map(newRoot, addedLeaf.val ? map.size + 1 : map.size); +} +export function remove(map, key) { + if (map.root === undefined) { + return map; + } + const newRoot = without(map.root, 0, getHash(key), key); + if (newRoot === map.root) { + return map; + } + if (newRoot === undefined) { + return create(); + } + return new Map(newRoot, map.size - 1); +} +export function has(map, key) { + if (map.root === undefined) { + return false; + } + return find(map.root, 0, getHash(key), key) !== undefined; +} +export function entries(map) { + if (map.root === undefined) { + return []; + } + const result = []; + toArray(map.root, result); + return result; +} +export function __include_me() { + // blank +} From 9d9e73f1284b3932b229c178ce329bc11d0283e8 Mon Sep 17 00:00:00 2001 From: Julian Schurhammer Date: Thu, 11 Aug 2022 20:39:35 +1200 Subject: [PATCH 02/19] format --- src/persistent-hash-map.mjs | 521 +++++++++++++++++++----------------- 1 file changed, 276 insertions(+), 245 deletions(-) diff --git a/src/persistent-hash-map.mjs b/src/persistent-hash-map.mjs index b65621d5..cc17c16c 100644 --- a/src/persistent-hash-map.mjs +++ b/src/persistent-hash-map.mjs @@ -1,7 +1,4 @@ -import { - inspect, - isEqual -} from "./gleam.mjs"; +import { inspect, isEqual } from "./gleam.mjs"; function getHash(s) { if (typeof s === "number") return s; if (typeof s !== "string") s = inspect(s); @@ -52,7 +49,7 @@ function cloneAndSet(arr, at, val) { const len = arr.length; const out = new Array(len); for (let i = 0; i < len; ++i) { - out[i] = arr[i]; + out[i] = arr[i]; } out[at] = val; return out; @@ -64,11 +61,11 @@ function spliceIn(arr, at, val) { let i = 0; let g = 0; while (i < at) { - out[g++] = arr[i++]; + out[g++] = arr[i++]; } out[g++] = val; while (i < len) { - out[g++] = arr[i++]; + out[g++] = arr[i++]; } return out; } @@ -79,11 +76,11 @@ function spliceOut(arr, at) { let i = 0; let g = 0; while (i < at) { - out[g++] = arr[i++]; + out[g++] = arr[i++]; } ++i; while (i < len) { - out[g++] = arr[i++]; + out[g++] = arr[i++]; } return out; } @@ -91,27 +88,34 @@ function spliceOut(arr, at) { function createNode(shift, key1, val1, key2hash, key2, val2) { const key1hash = getHash(key1); if (key1hash === key2hash) { - return { - type: COLLISION_NODE, - hash: key1hash, - array: [ - { type: ENTRY, k: key1, v: val1 }, - { type: ENTRY, k: key2, v: val2 }, - ], - }; + return { + type: COLLISION_NODE, + hash: key1hash, + array: [ + { type: ENTRY, k: key1, v: val1 }, + { type: ENTRY, k: key2, v: val2 }, + ], + }; } const addedLeaf = { val: false }; - return assoc(assocIndex(EMPTY, shift, key1hash, key1, val1, addedLeaf), shift, key2hash, key2, val2, addedLeaf); + return assoc( + assocIndex(EMPTY, shift, key1hash, key1, val1, addedLeaf), + shift, + key2hash, + key2, + val2, + addedLeaf + ); } /** Associate a node with a new entry, creating a new node. */ function assoc(root, shift, hash, key, val, addedLeaf) { switch (root.type) { - case ARRAY_NODE: - return assocArray(root, shift, hash, key, val, addedLeaf); - case INDEX_NODE: - return assocIndex(root, shift, hash, key, val, addedLeaf); - case COLLISION_NODE: - return assocCollision(root, shift, hash, key, val, addedLeaf); + case ARRAY_NODE: + return assocArray(root, shift, hash, key, val, addedLeaf); + case INDEX_NODE: + return assocIndex(root, shift, hash, key, val, addedLeaf); + case COLLISION_NODE: + return assocCollision(root, shift, hash, key, val, addedLeaf); } } function assocArray(root, shift, hash, key, val, addedLeaf) { @@ -119,23 +123,27 @@ function assocArray(root, shift, hash, key, val, addedLeaf) { const node = root.array[idx]; // if the corresponding index is empty set the index to a newly created node if (node === undefined) { - return { - type: ARRAY_NODE, - size: root.size + 1, - array: cloneAndSet(root.array, idx, assocIndex(EMPTY, shift + SHIFT, hash, key, val, addedLeaf)), - }; + return { + type: ARRAY_NODE, + size: root.size + 1, + array: cloneAndSet( + root.array, + idx, + assocIndex(EMPTY, shift + SHIFT, hash, key, val, addedLeaf) + ), + }; } // otherwise call assoc on the child node const n = assoc(node, shift + SHIFT, hash, key, val, addedLeaf); // if the child node hasn't changed just return the old root if (n === node) { - return root; + return root; } // otherwise set the index to the new node return { - type: ARRAY_NODE, - size: root.size, - array: cloneAndSet(root.array, idx, n), + type: ARRAY_NODE, + size: root.size, + array: cloneAndSet(root.array, idx, n), }; } function assocIndex(root, shift, hash, key, val, addedLeaf) { @@ -143,284 +151,307 @@ function assocIndex(root, shift, hash, key, val, addedLeaf) { const idx = index(root.bitmap, bit); // if there is already a item at this hash index.. if ((root.bitmap & bit) !== 0) { - // if there is a node at the index (not an entry), call assoc on the child node - const node = root.array[idx]; - if (node.type !== ENTRY) { - const n = assoc(node, shift + SHIFT, hash, key, val, addedLeaf); - if (n === node) { - return root; - } - return { - type: INDEX_NODE, - bitmap: root.bitmap, - array: cloneAndSet(root.array, idx, n), - }; + // if there is a node at the index (not an entry), call assoc on the child node + const node = root.array[idx]; + if (node.type !== ENTRY) { + const n = assoc(node, shift + SHIFT, hash, key, val, addedLeaf); + if (n === node) { + return root; } - // otherwise there is an entry at the index - // if the keys are equal replace the entry with the updated value - const keyOrNull = node.k; - if (isEqual(key, keyOrNull)) { - if (val === node.v) { - return root; - } - return { - type: INDEX_NODE, - bitmap: root.bitmap, - array: cloneAndSet(root.array, idx, { type: ENTRY, k: keyOrNull, v: val }), - }; + return { + type: INDEX_NODE, + bitmap: root.bitmap, + array: cloneAndSet(root.array, idx, n), + }; + } + // otherwise there is an entry at the index + // if the keys are equal replace the entry with the updated value + const keyOrNull = node.k; + if (isEqual(key, keyOrNull)) { + if (val === node.v) { + return root; } - // if the keys are not equal, replace the entry with a new child node - addedLeaf.val = true; return { - type: INDEX_NODE, - bitmap: root.bitmap, - array: cloneAndSet(root.array, idx, createNode(shift + SHIFT, keyOrNull, node.v, hash, key, val)), + type: INDEX_NODE, + bitmap: root.bitmap, + array: cloneAndSet(root.array, idx, { + type: ENTRY, + k: keyOrNull, + v: val, + }), }; - } - else { - // else there is currently no item at the hash index - const n = root.array.length; - // if the number of nodes is at the maximum, expand this node into an array node - if (n >= MAX_INDEX_NODE) { - // create a 32 length array for the new array node (one for each bit in the hash) - const nodes = new Array(32); - // create and insert a node for the new entry - const jdx = mask(hash, shift); - nodes[jdx] = assocIndex(EMPTY, shift + SHIFT, hash, key, val, addedLeaf); - let j = 0; - let bitmap = root.bitmap; - // place each item in the index node into the correct spot in the array node - // loop through all 32 bits / array positions - for (let i = 0; i < 32; i++) { - if ((bitmap & 1) !== 0) { - const node = root.array[j++]; - // turn any entries into index nodes - // since array nodes should only contain other nodes, not entries - if (node.type !== ENTRY) { - nodes[i] = node; - } - else { - nodes[i] = assocIndex(EMPTY, shift + SHIFT, getHash(node.k), node.k, node.v, addedLeaf); - } - } - // shift the bitmap to process the next bit - bitmap >>>= 1; + } + // if the keys are not equal, replace the entry with a new child node + addedLeaf.val = true; + return { + type: INDEX_NODE, + bitmap: root.bitmap, + array: cloneAndSet( + root.array, + idx, + createNode(shift + SHIFT, keyOrNull, node.v, hash, key, val) + ), + }; + } else { + // else there is currently no item at the hash index + const n = root.array.length; + // if the number of nodes is at the maximum, expand this node into an array node + if (n >= MAX_INDEX_NODE) { + // create a 32 length array for the new array node (one for each bit in the hash) + const nodes = new Array(32); + // create and insert a node for the new entry + const jdx = mask(hash, shift); + nodes[jdx] = assocIndex(EMPTY, shift + SHIFT, hash, key, val, addedLeaf); + let j = 0; + let bitmap = root.bitmap; + // place each item in the index node into the correct spot in the array node + // loop through all 32 bits / array positions + for (let i = 0; i < 32; i++) { + if ((bitmap & 1) !== 0) { + const node = root.array[j++]; + // turn any entries into index nodes + // since array nodes should only contain other nodes, not entries + if (node.type !== ENTRY) { + nodes[i] = node; + } else { + nodes[i] = assocIndex( + EMPTY, + shift + SHIFT, + getHash(node.k), + node.k, + node.v, + addedLeaf + ); } - return { - type: ARRAY_NODE, - size: n + 1, - array: nodes, - }; - } - else { - // else there is still space in this index node - // simply insert a new entry at the hash index - const newArray = spliceIn(root.array, idx, { type: ENTRY, k: key, v: val }); - addedLeaf.val = true; - return { - type: INDEX_NODE, - bitmap: root.bitmap | bit, - array: newArray, - }; + } + // shift the bitmap to process the next bit + bitmap >>>= 1; } + return { + type: ARRAY_NODE, + size: n + 1, + array: nodes, + }; + } else { + // else there is still space in this index node + // simply insert a new entry at the hash index + const newArray = spliceIn(root.array, idx, { + type: ENTRY, + k: key, + v: val, + }); + addedLeaf.val = true; + return { + type: INDEX_NODE, + bitmap: root.bitmap | bit, + array: newArray, + }; + } } } function assocCollision(root, shift, hash, key, val, addedLeaf) { // if there is a hash collision if (hash === root.hash) { - const idx = collisionIndexOf(root, key); - // if this key already exists replace the entry with the new value - if (idx !== -1) { - const entry = root.array[idx]; - if (entry.v === val) { - return root; - } - return { - type: COLLISION_NODE, - hash: hash, - array: cloneAndSet(root.array, idx, { type: ENTRY, k: key, v: val }), - }; + const idx = collisionIndexOf(root, key); + // if this key already exists replace the entry with the new value + if (idx !== -1) { + const entry = root.array[idx]; + if (entry.v === val) { + return root; } - // otherwise insert the entry at the end of the array - const size = root.array.length; - addedLeaf.val = true; return { - type: COLLISION_NODE, - hash: hash, - array: cloneAndSet(root.array, size, { type: ENTRY, k: key, v: val }), + type: COLLISION_NODE, + hash: hash, + array: cloneAndSet(root.array, idx, { type: ENTRY, k: key, v: val }), }; + } + // otherwise insert the entry at the end of the array + const size = root.array.length; + addedLeaf.val = true; + return { + type: COLLISION_NODE, + hash: hash, + array: cloneAndSet(root.array, size, { type: ENTRY, k: key, v: val }), + }; } // if there is no hash collision, upgrade to an index node - return assoc({ + return assoc( + { type: INDEX_NODE, bitmap: bitpos(root.hash, shift), array: [root], - }, shift, hash, key, val, addedLeaf); + }, + shift, + hash, + key, + val, + addedLeaf + ); } /** Find the index of a key in the collision node's array */ function collisionIndexOf(root, key) { const size = root.array.length; for (let i = 0; i < size; i++) { - if (isEqual(key, root.array[i].k)) { - return i; - } + if (isEqual(key, root.array[i].k)) { + return i; + } } return -1; } /** Return the found entry or undefined if not present in the root */ function find(root, shift, hash, key) { switch (root.type) { - case ARRAY_NODE: - return findArray(root, shift, hash, key); - case INDEX_NODE: - return findIndex(root, shift, hash, key); - case COLLISION_NODE: - return findCollision(root, shift, hash, key); + case ARRAY_NODE: + return findArray(root, shift, hash, key); + case INDEX_NODE: + return findIndex(root, shift, hash, key); + case COLLISION_NODE: + return findCollision(root, shift, hash, key); } } function findArray(root, shift, hash, key) { const idx = mask(hash, shift); const node = root.array[idx]; if (node === undefined) { - return undefined; + return undefined; } return find(node, shift + SHIFT, hash, key); } function findIndex(root, shift, hash, key) { const bit = bitpos(hash, shift); if ((root.bitmap & bit) === 0) { - return undefined; + return undefined; } const idx = index(root.bitmap, bit); const node = root.array[idx]; if (node.type !== ENTRY) { - return find(node, shift + SHIFT, hash, key); + return find(node, shift + SHIFT, hash, key); } if (isEqual(key, node.k)) { - return node; + return node; } return undefined; } function findCollision(root, _shift, _hash, key) { const idx = collisionIndexOf(root, key); if (idx < 0) { - return undefined; + return undefined; } return root.array[idx]; } /** -* Remove an entry from the root, returning the updated root. -* Returns undefined if the node should be removed from the parent. -* */ + * Remove an entry from the root, returning the updated root. + * Returns undefined if the node should be removed from the parent. + * */ function without(root, shift, hash, key) { switch (root.type) { - case ARRAY_NODE: - return withoutArray(root, shift, hash, key); - case INDEX_NODE: - return withoutIndex(root, shift, hash, key); - case COLLISION_NODE: - return withoutCollision(root, shift, hash, key); + case ARRAY_NODE: + return withoutArray(root, shift, hash, key); + case INDEX_NODE: + return withoutIndex(root, shift, hash, key); + case COLLISION_NODE: + return withoutCollision(root, shift, hash, key); } } function withoutArray(root, shift, hash, key) { const idx = mask(hash, shift); const node = root.array[idx]; if (node === undefined) { - return root; // already empty + return root; // already empty } const n = without(node, shift + SHIFT, hash, key); if (n === node) { - return root; // no changes + return root; // no changes } // if the recursive call returned undefined the node should be removed if (n === undefined) { - // if the number of child nodes is at the minimum, pack into an index node - if (root.size <= MIN_ARRAY_NODE) { - const arr = root.array; - const out = new Array(root.size - 1); - let i = 0; - let j = 0; - let bitmap = 0; - while (i < idx) { - const nv = arr[i]; - if (nv !== undefined) { - out[j] = nv; - bitmap |= 1 << i; - ++j; - } - ++i; - } - ++i; // skip copying the removed node - while (i < arr.length) { - const nv = arr[i]; - if (nv !== undefined) { - out[j] = nv; - bitmap |= 1 << i; - ++j; - } - ++i; - } - return { - type: INDEX_NODE, - bitmap: bitmap, - array: out, - }; + // if the number of child nodes is at the minimum, pack into an index node + if (root.size <= MIN_ARRAY_NODE) { + const arr = root.array; + const out = new Array(root.size - 1); + let i = 0; + let j = 0; + let bitmap = 0; + while (i < idx) { + const nv = arr[i]; + if (nv !== undefined) { + out[j] = nv; + bitmap |= 1 << i; + ++j; + } + ++i; + } + ++i; // skip copying the removed node + while (i < arr.length) { + const nv = arr[i]; + if (nv !== undefined) { + out[j] = nv; + bitmap |= 1 << i; + ++j; + } + ++i; } return { - type: ARRAY_NODE, - size: root.size - 1, - array: cloneAndSet(root.array, idx, n), + type: INDEX_NODE, + bitmap: bitmap, + array: out, }; - } - return { + } + return { type: ARRAY_NODE, - size: root.size, + size: root.size - 1, array: cloneAndSet(root.array, idx, n), + }; + } + return { + type: ARRAY_NODE, + size: root.size, + array: cloneAndSet(root.array, idx, n), }; } function withoutIndex(root, shift, hash, key) { const bit = bitpos(hash, shift); if ((root.bitmap & bit) === 0) { - return root; // already empty + return root; // already empty } const idx = index(root.bitmap, bit); const node = root.array[idx]; // if the item is not an entry if (node.type !== ENTRY) { - const n = without(node, shift + SHIFT, hash, key); - if (n === node) { - return root; // no changes - } - // if not undefined, the child node still has items, so update it - if (n !== undefined) { - return { - type: INDEX_NODE, - bitmap: root.bitmap, - array: cloneAndSet(root.array, idx, n), - }; - } - // otherwise the child node should be removed - // if it was the only child node, remove this node from the parent - if (root.bitmap === bit) { - return undefined; - } - // otherwise just remove the child node + const n = without(node, shift + SHIFT, hash, key); + if (n === node) { + return root; // no changes + } + // if not undefined, the child node still has items, so update it + if (n !== undefined) { return { - type: INDEX_NODE, - bitmap: root.bitmap ^ bit, - array: spliceOut(root.array, idx), + type: INDEX_NODE, + bitmap: root.bitmap, + array: cloneAndSet(root.array, idx, n), }; + } + // otherwise the child node should be removed + // if it was the only child node, remove this node from the parent + if (root.bitmap === bit) { + return undefined; + } + // otherwise just remove the child node + return { + type: INDEX_NODE, + bitmap: root.bitmap ^ bit, + array: spliceOut(root.array, idx), + }; } // otherwise the item is an entry, remove it if the key matches if (isEqual(key, node.k)) { - if (root.bitmap === bit) { - return undefined; - } - return { - type: INDEX_NODE, - bitmap: root.bitmap ^ bit, - array: spliceOut(root.array, idx), - }; + if (root.bitmap === bit) { + return undefined; + } + return { + type: INDEX_NODE, + bitmap: root.bitmap ^ bit, + array: spliceOut(root.array, idx), + }; } return root; } @@ -428,42 +459,42 @@ function withoutCollision(root, _shift, _hash, key) { const idx = collisionIndexOf(root, key); // if the key not found, no changes if (idx === -1) { - return root; + return root; } // otherwise the entry was found, remove it // if it was the only entry in this node, remove the whole node if (root.array.length === 1) { - return undefined; + return undefined; } // otherwise just remove the entry return { - type: COLLISION_NODE, - hash: root.hash, - array: spliceOut(root.array, idx), + type: COLLISION_NODE, + hash: root.hash, + array: spliceOut(root.array, idx), }; } function toArray(root, result) { if (root === undefined) { - return; + return; } const items = root.array; const size = items.length; for (let i = 0; i < size; i++) { - const item = items[i]; - if (item === undefined) { - continue; - } - if (item.type === ENTRY) { - result.push([item.k, item.v]); - continue; - } - toArray(item, result); + const item = items[i]; + if (item === undefined) { + continue; + } + if (item.type === ENTRY) { + result.push([item.k, item.v]); + continue; + } + toArray(item, result); } } export class Map { constructor(root, size) { - this.root = root; - this.size = size; + this.root = root; + this.size = size; } } /** Extra wrapper to keep track of map size */ @@ -472,17 +503,17 @@ export function create() { } export function get(map, key) { if (map.root === undefined) { - return undefined; + return undefined; } return find(map.root, 0, getHash(key), key)?.v; } export function getWithDefault(map, key, notFound) { if (map.root === undefined) { - return notFound; + return notFound; } const found = find(map.root, 0, getHash(key), key); if (found === undefined) { - return notFound; + return notFound; } return found.v; } @@ -491,32 +522,32 @@ export function set(map, key, val) { const root = map.root === undefined ? EMPTY : map.root; const newRoot = assoc(root, 0, getHash(key), key, val, addedLeaf); if (newRoot === map.root) { - return map; + return map; } return new Map(newRoot, addedLeaf.val ? map.size + 1 : map.size); } export function remove(map, key) { if (map.root === undefined) { - return map; + return map; } const newRoot = without(map.root, 0, getHash(key), key); if (newRoot === map.root) { - return map; + return map; } if (newRoot === undefined) { - return create(); + return create(); } return new Map(newRoot, map.size - 1); } export function has(map, key) { if (map.root === undefined) { - return false; + return false; } return find(map.root, 0, getHash(key), key) !== undefined; } export function entries(map) { if (map.root === undefined) { - return []; + return []; } const result = []; toArray(map.root, result); From 67ec443105b9e7834f86fa29c4e005fe3d89f1e2 Mon Sep 17 00:00:00 2001 From: Julian Schurhammer Date: Thu, 11 Aug 2022 23:35:07 +1200 Subject: [PATCH 03/19] hash function that can hash most js objects --- src/gleam_stdlib.mjs | 6 +- src/persistent-hash-map.mjs | 115 ++++++++++++++++++++++++++++++++---- 2 files changed, 108 insertions(+), 13 deletions(-) diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs index ce6e7b8a..c1f90eb1 100644 --- a/src/gleam_stdlib.mjs +++ b/src/gleam_stdlib.mjs @@ -462,7 +462,7 @@ export function classify_dynamic(data) { return `Tuple of ${data.length} elements`; } else if (BitString.isBitString(data)) { return "BitString"; - } else if (data instanceof pmap.Map) { + } else if (data instanceof pmap.PMap) { return "Map"; } else if (typeof data === "number") { return "Float"; @@ -528,7 +528,7 @@ export function decode_result(data) { } export function decode_map(data) { - return data instanceof pmap.Map ? new Ok(data) : decoder_error("Map", data); + return data instanceof pmap.PMap ? new Ok(data) : decoder_error("Map", data); } export function decode_option(data, decoder) { @@ -545,7 +545,7 @@ export function decode_option(data, decoder) { export function decode_field(value, name) { let error = () => decoder_error_no_classify("field", "nothing"); - if (value instanceof pmap.Map) { + if (value instanceof pmap.PMap) { let entry = map_get(value, name); return entry.isOk() ? entry : error(); } diff --git a/src/persistent-hash-map.mjs b/src/persistent-hash-map.mjs index cc17c16c..8cdc0231 100644 --- a/src/persistent-hash-map.mjs +++ b/src/persistent-hash-map.mjs @@ -1,13 +1,108 @@ -import { inspect, isEqual } from "./gleam.mjs"; -function getHash(s) { - if (typeof s === "number") return s; - if (typeof s !== "string") s = inspect(s); +import { isEqual } from "./gleam.mjs"; + +const referenceMap = new WeakMap(); +let referenceUID = 1; + +/** hash the object by reference using a weak map and incrementing uid */ +function hashByReference(o) { + const known = referenceMap.get(o); + if (known !== undefined) { + return known; + } + const hash = hashInteger(referenceUID++); + if (referenceUID === 0x7fffffff) { + referenceUID = 1; + } + referenceMap.set(o, hash); + return hash; +} +/** taken from immutable.js */ +function hashMerge(a, b) { + return (a ^ (b + 0x9e3779b9 + (a << 6) + (a >> 2))) | 0; +} +/** scrambles an integer to make it more randomly distributed */ +function hashInteger(i) { + i = Math.imul(0x45d9f3b, (i >>> 16) ^ i); + i = Math.imul(0x45d9f3b, (i >>> 16) ^ i); + i = (i >>> 16) ^ i; + return i; +} +/** standard string hash popularised by java + hashInteger at the end */ +function hashString(s) { let hash = 0; const len = s.length; for (let i = 0; i < len; i++) { hash = (Math.imul(31, hash) + s.charCodeAt(i)) | 0; } - return hash; + return hashInteger(hash); +} +/** convert number to string and hash, seems to be better and faster than anything else */ +function hashNumber(n) { + return hashString(n.toString()); +} +/** hash any js object */ +function hashObject(o) { + const proto = Object.getPrototypeOf(o); + if (proto !== null && typeof proto.hashCode === "function") { + try { + return o.hashCode(o); + } catch {} + } + if (o instanceof Promise || o instanceof WeakSet || o instanceof WeakMap) { + return hashByReference(o); + } + if (o instanceof Date) { + return hashNumber(o.getTime()); + } + let h = 0; + if (Array.isArray(o)) { + for (let i = 0; i < o.length; i++) { + h = (Math.imul(31, h) + getHash(o[i])) | 0; + } + } else if (o instanceof Set) { + o.forEach((v) => { + h = (h + getHash(v)) | 0; + }); + } else if (o instanceof Map) { + o.forEach((v, k) => { + h = (h + hashMerge(getHash(v), getHash(k))) | 0; + }); + } else if (o instanceof ArrayBuffer) { + const view = new Uint8Array(o); + for (let i = 0; i < view.length; i++) { + h = (Math.imul(31, h) + getHash(view[i])) | 0; + } + } else { + const keys = Object.keys(o); + for (let i = 0; i < keys.length; i++) { + const k = keys[i]; + const v = o[k]; + h = (h + hashMerge(getHash(v), hashString(k))) | 0; + } + } + return h; +} +/** hash any js value */ +export function getHash(u) { + if (u == null) { + return u === null ? 0x42108422 : 0x42108423; + } + switch (typeof u) { + case "number": + return hashNumber(u); + case "string": + return hashString(u); + case "boolean": + return u ? 0x42108421 : 0x42108420; + case "bigint": + return hashNumber(u); + case "object": + return hashObject(u); + case "symbol": + return hashByReference(u); + case "function": + return hashByReference(u); + } } const SHIFT = 5; // number of bits you need to shift by to get the next bucket const BUCKET_SIZE = Math.pow(2, SHIFT); @@ -491,15 +586,15 @@ function toArray(root, result) { toArray(item, result); } } -export class Map { +/** Extra wrapper to keep track of map size */ +export class PMap { constructor(root, size) { this.root = root; this.size = size; } } -/** Extra wrapper to keep track of map size */ export function create() { - return new Map(undefined, 0); + return new PMap(undefined, 0); } export function get(map, key) { if (map.root === undefined) { @@ -524,7 +619,7 @@ export function set(map, key, val) { if (newRoot === map.root) { return map; } - return new Map(newRoot, addedLeaf.val ? map.size + 1 : map.size); + return new PMap(newRoot, addedLeaf.val ? map.size + 1 : map.size); } export function remove(map, key) { if (map.root === undefined) { @@ -537,7 +632,7 @@ export function remove(map, key) { if (newRoot === undefined) { return create(); } - return new Map(newRoot, map.size - 1); + return new PMap(newRoot, map.size - 1); } export function has(map, key) { if (map.root === undefined) { From 19d228b87b9e398e916657a237976d9c6e04a8dd Mon Sep 17 00:00:00 2001 From: Julian Schurhammer Date: Thu, 11 Aug 2022 23:57:14 +1200 Subject: [PATCH 04/19] implement hashCode for maps in javascript --- src/persistent-hash-map.mjs | 15 +++++++++++---- test/gleam/map_test.gleam | 11 ++++++++--- test/gleam/set_test.gleam | 1 + 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/src/persistent-hash-map.mjs b/src/persistent-hash-map.mjs index 8cdc0231..c2a792cd 100644 --- a/src/persistent-hash-map.mjs +++ b/src/persistent-hash-map.mjs @@ -568,7 +568,7 @@ function withoutCollision(root, _shift, _hash, key) { array: spliceOut(root.array, idx), }; } -function toArray(root, result) { +function forEach(root, fn) { if (root === undefined) { return; } @@ -580,10 +580,10 @@ function toArray(root, result) { continue; } if (item.type === ENTRY) { - result.push([item.k, item.v]); + fn(item.v, item.k); continue; } - toArray(item, result); + forEach(item, fn); } } /** Extra wrapper to keep track of map size */ @@ -592,6 +592,13 @@ export class PMap { this.root = root; this.size = size; } + hashCode() { + let h = 0; + forEach(this, (v, k) => { + h = (h + hashMerge(getHash(v), getHash(k))) | 0; + }); + return h; + } } export function create() { return new PMap(undefined, 0); @@ -645,7 +652,7 @@ export function entries(map) { return []; } const result = []; - toArray(map.root, result); + forEach(map.root, (v, k) => result.push([k, v])); return result; } export function __include_me() { diff --git a/test/gleam/map_test.gleam b/test/gleam/map_test.gleam index 2666579f..34f3ee66 100644 --- a/test/gleam/map_test.gleam +++ b/test/gleam/map_test.gleam @@ -2,6 +2,8 @@ import gleam/map import gleam/option.{None, Some} import gleam/should import gleam/string +import gleam/list +import gleam/int pub fn from_list_test() { [#(4, 0), #(1, 0)] @@ -103,6 +105,7 @@ pub fn keys_test() { [#("a", 0), #("b", 1), #("c", 2)] |> map.from_list |> map.keys + |> list.sort(string.compare) |> should.equal(["a", "b", "c"]) } @@ -110,6 +113,7 @@ pub fn values_test() { [#("a", 0), #("b", 1), #("c", 2)] |> map.from_list |> map.values + |> list.sort(int.compare) |> should.equal([0, 1, 2]) } @@ -189,11 +193,12 @@ pub fn fold_test() { |> map.fold(0, add) |> should.equal(6) - let concat = fn(acc, k, _) { string.append(acc, k) } + let prepend = fn(acc, k, _) { list.prepend(acc, k) } dict - |> map.fold("", concat) - |> should.equal("abcd") + |> map.fold([], prepend) + |> list.sort(string.compare) + |> should.equal(["a", "b", "c", "d"]) map.from_list([]) |> map.fold(0, add) diff --git a/test/gleam/set_test.gleam b/test/gleam/set_test.gleam index c771951d..8a1335d1 100644 --- a/test/gleam/set_test.gleam +++ b/test/gleam/set_test.gleam @@ -70,6 +70,7 @@ pub fn filter_test() { |> set.from_list() |> set.filter(for: int.is_even) |> set.to_list + |> list.sort(int.compare) |> should.equal([4, 6, 44]) } From 5463777364edd9647205a6c9cfd9780dfedffcff Mon Sep 17 00:00:00 2001 From: Julian Schurhammer Date: Fri, 12 Aug 2022 00:04:53 +1200 Subject: [PATCH 05/19] implement equals for maps in javascript --- src/persistent-hash-map.mjs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/persistent-hash-map.mjs b/src/persistent-hash-map.mjs index c2a792cd..18512581 100644 --- a/src/persistent-hash-map.mjs +++ b/src/persistent-hash-map.mjs @@ -588,6 +588,7 @@ function forEach(root, fn) { } /** Extra wrapper to keep track of map size */ export class PMap { + static NOT_FOUND = Symbol(); constructor(root, size) { this.root = root; this.size = size; @@ -599,6 +600,13 @@ export class PMap { }); return h; } + equals(o) { + let equal = true; + forEach(this, (v, k) => { + equal = equal && isEqual(v, getWithDefault(o, k, PMap.NOT_FOUND)); + }); + return equal; + } } export function create() { return new PMap(undefined, 0); From 1edd644fe1efe787014a8a07fcf31aeff2d816dc Mon Sep 17 00:00:00 2001 From: Julian Schurhammer Date: Fri, 12 Aug 2022 20:37:57 +1200 Subject: [PATCH 06/19] javascript hashCode: handle uint8array --- src/persistent-hash-map.mjs | 37 ++++++++++++++------------------ test/gleam/bit_string_test.gleam | 5 +++++ 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/persistent-hash-map.mjs b/src/persistent-hash-map.mjs index 18512581..f2b7e817 100644 --- a/src/persistent-hash-map.mjs +++ b/src/persistent-hash-map.mjs @@ -9,7 +9,7 @@ function hashByReference(o) { if (known !== undefined) { return known; } - const hash = hashInteger(referenceUID++); + const hash = referenceUID++; if (referenceUID === 0x7fffffff) { referenceUID = 1; } @@ -27,14 +27,14 @@ function hashInteger(i) { i = (i >>> 16) ^ i; return i; } -/** standard string hash popularised by java + hashInteger at the end */ +/** standard string hash popularised by java */ function hashString(s) { let hash = 0; const len = s.length; for (let i = 0; i < len; i++) { hash = (Math.imul(31, hash) + s.charCodeAt(i)) | 0; } - return hashInteger(hash); + return hash; } /** convert number to string and hash, seems to be better and faster than anything else */ function hashNumber(n) { @@ -55,7 +55,8 @@ function hashObject(o) { return hashNumber(o.getTime()); } let h = 0; - if (Array.isArray(o)) { + if (o instanceof ArrayBuffer) o = new Uint8Array(o); + if (Array.isArray(o) || o instanceof Uint8Array) { for (let i = 0; i < o.length; i++) { h = (Math.imul(31, h) + getHash(o[i])) | 0; } @@ -67,41 +68,35 @@ function hashObject(o) { o.forEach((v, k) => { h = (h + hashMerge(getHash(v), getHash(k))) | 0; }); - } else if (o instanceof ArrayBuffer) { - const view = new Uint8Array(o); - for (let i = 0; i < view.length; i++) { - h = (Math.imul(31, h) + getHash(view[i])) | 0; - } } else { const keys = Object.keys(o); for (let i = 0; i < keys.length; i++) { const k = keys[i]; const v = o[k]; - h = (h + hashMerge(getHash(v), hashString(k))) | 0; + h = (h + hashMerge(getHash(v), hashInteger(hashString(k)))) | 0; } } return h; } /** hash any js value */ export function getHash(u) { - if (u == null) { - return u === null ? 0x42108422 : 0x42108423; - } + if (u === null) return 0x42108422; + if (u === undefined) return 0x42108423; + if (u === true) return 0x42108421; + if (u === false) return 0x42108420; switch (typeof u) { case "number": - return hashNumber(u); + return hashInteger(hashNumber(u)); case "string": - return hashString(u); - case "boolean": - return u ? 0x42108421 : 0x42108420; + return hashInteger(hashString(u)); case "bigint": - return hashNumber(u); + return hashInteger(hashNumber(u)); case "object": - return hashObject(u); + return hashInteger(hashObject(u)); case "symbol": - return hashByReference(u); + return hashInteger(hashByReference(u)); case "function": - return hashByReference(u); + return hashInteger(hashByReference(u)); } } const SHIFT = 5; // number of bits you need to shift by to get the next bucket diff --git a/test/gleam/bit_string_test.gleam b/test/gleam/bit_string_test.gleam index 48f66d64..734792c3 100644 --- a/test/gleam/bit_string_test.gleam +++ b/test/gleam/bit_string_test.gleam @@ -9,6 +9,11 @@ pub fn byte_size_test() { |> should.equal(0) } +pub fn not_equal_test() { + bit_string.from_string("test") + |> should.not_equal(bit_string.from_string("asdf")) +} + pub fn append_test() { bit_string.from_string("Test") |> bit_string.append(bit_string.from_string(" Me")) From f66429825d883cd527fed6344a4e7cca009e5095 Mon Sep 17 00:00:00 2001 From: Julian Schurhammer Date: Sat, 13 Aug 2022 23:09:32 +1200 Subject: [PATCH 07/19] js maps: add more tests and fix stuff --- src/gleam_stdlib.mjs | 2 +- src/persistent-hash-map.mjs | 58 ++++++++++----------------- test/gleam/map_test.gleam | 80 +++++++++++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+), 37 deletions(-) diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs index c1f90eb1..d70cf235 100644 --- a/src/gleam_stdlib.mjs +++ b/src/gleam_stdlib.mjs @@ -192,7 +192,7 @@ export function trim_right(string) { } export function bit_string_from_string(string) { - return new toBitString([stringBits(string)]); + return toBitString([stringBits(string)]); } export function bit_string_concat(bit_strings) { diff --git a/src/persistent-hash-map.mjs b/src/persistent-hash-map.mjs index f2b7e817..dc3db31d 100644 --- a/src/persistent-hash-map.mjs +++ b/src/persistent-hash-map.mjs @@ -1,7 +1,7 @@ import { isEqual } from "./gleam.mjs"; const referenceMap = new WeakMap(); -let referenceUID = 1; +let referenceUID = 0; /** hash the object by reference using a weak map and incrementing uid */ function hashByReference(o) { @@ -11,22 +11,15 @@ function hashByReference(o) { } const hash = referenceUID++; if (referenceUID === 0x7fffffff) { - referenceUID = 1; + referenceUID = 0; } referenceMap.set(o, hash); return hash; } -/** taken from immutable.js */ +/** merge two hashes in an order sensitive way */ function hashMerge(a, b) { return (a ^ (b + 0x9e3779b9 + (a << 6) + (a >> 2))) | 0; } -/** scrambles an integer to make it more randomly distributed */ -function hashInteger(i) { - i = Math.imul(0x45d9f3b, (i >>> 16) ^ i); - i = Math.imul(0x45d9f3b, (i >>> 16) ^ i); - i = (i >>> 16) ^ i; - return i; -} /** standard string hash popularised by java */ function hashString(s) { let hash = 0; @@ -55,7 +48,9 @@ function hashObject(o) { return hashNumber(o.getTime()); } let h = 0; - if (o instanceof ArrayBuffer) o = new Uint8Array(o); + if (o instanceof ArrayBuffer) { + o = new Uint8Array(o); + } if (Array.isArray(o) || o instanceof Uint8Array) { for (let i = 0; i < o.length; i++) { h = (Math.imul(31, h) + getHash(o[i])) | 0; @@ -73,7 +68,7 @@ function hashObject(o) { for (let i = 0; i < keys.length; i++) { const k = keys[i]; const v = o[k]; - h = (h + hashMerge(getHash(v), hashInteger(hashString(k)))) | 0; + h = (h + hashMerge(getHash(v), hashString(k))) | 0; } } return h; @@ -86,17 +81,17 @@ export function getHash(u) { if (u === false) return 0x42108420; switch (typeof u) { case "number": - return hashInteger(hashNumber(u)); + return hashNumber(u); case "string": - return hashInteger(hashString(u)); + return hashString(u); case "bigint": - return hashInteger(hashNumber(u)); + return hashNumber(u); case "object": - return hashInteger(hashObject(u)); + return hashObject(u); case "symbol": - return hashInteger(hashByReference(u)); + return hashByReference(u); case "function": - return hashInteger(hashByReference(u)); + return hashByReference(u); } } const SHIFT = 5; // number of bits you need to shift by to get the next bucket @@ -396,7 +391,7 @@ function find(root, shift, hash, key) { case INDEX_NODE: return findIndex(root, shift, hash, key); case COLLISION_NODE: - return findCollision(root, shift, hash, key); + return findCollision(root, key); } } function findArray(root, shift, hash, key) { @@ -422,7 +417,7 @@ function findIndex(root, shift, hash, key) { } return undefined; } -function findCollision(root, _shift, _hash, key) { +function findCollision(root, key) { const idx = collisionIndexOf(root, key); if (idx < 0) { return undefined; @@ -440,7 +435,7 @@ function without(root, shift, hash, key) { case INDEX_NODE: return withoutIndex(root, shift, hash, key); case COLLISION_NODE: - return withoutCollision(root, shift, hash, key); + return withoutCollision(root, key); } } function withoutArray(root, shift, hash, key) { @@ -545,10 +540,10 @@ function withoutIndex(root, shift, hash, key) { } return root; } -function withoutCollision(root, _shift, _hash, key) { +function withoutCollision(root, key) { const idx = collisionIndexOf(root, key); // if the key not found, no changes - if (idx === -1) { + if (idx < 0) { return root; } // otherwise the entry was found, remove it @@ -583,22 +578,21 @@ function forEach(root, fn) { } /** Extra wrapper to keep track of map size */ export class PMap { - static NOT_FOUND = Symbol(); constructor(root, size) { this.root = root; this.size = size; } hashCode() { let h = 0; - forEach(this, (v, k) => { + forEach(this.root, (v, k) => { h = (h + hashMerge(getHash(v), getHash(k))) | 0; }); return h; } equals(o) { let equal = true; - forEach(this, (v, k) => { - equal = equal && isEqual(v, getWithDefault(o, k, PMap.NOT_FOUND)); + forEach(this.root, (v, k) => { + equal = equal && isEqual(getWithDefault(o, k, !v), v); }); return equal; } @@ -606,12 +600,6 @@ export class PMap { export function create() { return new PMap(undefined, 0); } -export function get(map, key) { - if (map.root === undefined) { - return undefined; - } - return find(map.root, 0, getHash(key), key)?.v; -} export function getWithDefault(map, key, notFound) { if (map.root === undefined) { return notFound; @@ -658,6 +646,4 @@ export function entries(map) { forEach(map.root, (v, k) => result.push([k, v])); return result; } -export function __include_me() { - // blank -} +export function __include_me() {} diff --git a/test/gleam/map_test.gleam b/test/gleam/map_test.gleam index 34f3ee66..cd669f9d 100644 --- a/test/gleam/map_test.gleam +++ b/test/gleam/map_test.gleam @@ -204,3 +204,83 @@ pub fn fold_test() { |> map.fold(0, add) |> should.equal(0) } + +fn range(start, end, a) { + case end - start { + n if n < 1 -> a + _ -> range(start, end - 1, [end - 1, ..a]) + } +} + +fn list_to_map(list) { + list + |> list.map(fn(n) { #(n, n) }) + |> map.from_list +} + +fn grow_and_shrink_map(initial_size, final_size) { + range(0, initial_size, []) + |> list_to_map + |> list.fold( + range(final_size, initial_size, []), + _, + fn(map, item) { map.delete(map, item) }, + ) +} + +// maps should be equal even if the insert/removal order was different +pub fn insert_order_equality_test() { + grow_and_shrink_map(8, 2) + |> should.equal(grow_and_shrink_map(4, 2)) + grow_and_shrink_map(17, 10) + |> should.equal(grow_and_shrink_map(12, 10)) + grow_and_shrink_map(2000, 1000) + |> should.equal(grow_and_shrink_map(1000, 1000)) +} + +// ensure operations on a map don't mutate it +pub fn persistence_test() { + let a = list_to_map([0]) + map.insert(a, 0, 5) + map.insert(a, 1, 6) + map.delete(a, 0) + map.get(a, 0) + |> should.equal(Ok(0)) +} + +// using maps as keys should work (tests hash function) +pub fn map_as_key_test() { + let l = range(0, 1000, []) + let a = list_to_map(l) + let a2 = list_to_map(list.reverse(l)) + let a3 = grow_and_shrink_map(2000, 1000) + let b = grow_and_shrink_map(60, 50) + let c = grow_and_shrink_map(50, 20) + let d = grow_and_shrink_map(2, 2) + + let map1 = + map.new() + |> map.insert(a, "a") + |> map.insert(b, "b") + |> map.insert(c, "c") + |> map.insert(d, "d") + + map.get(map1, a) + |> should.equal(Ok("a")) + map.get(map1, a2) + |> should.equal(Ok("a")) + map.get(map1, a3) + |> should.equal(Ok("a")) + map.get(map1, b) + |> should.equal(Ok("b")) + map.get(map1, c) + |> should.equal(Ok("c")) + map.get(map1, d) + |> should.equal(Ok("d")) + map.insert(map1, a2, "a2") + |> map.get(a) + |> should.equal(Ok("a2")) + map.insert(map1, a3, "a3") + |> map.get(a) + |> should.equal(Ok("a3")) +} From 5ee6fc7b2ab3398d7122788aea0dfa16a3a8beea Mon Sep 17 00:00:00 2001 From: Julian Schurhammer Date: Sat, 13 Aug 2022 23:46:10 +1200 Subject: [PATCH 08/19] js map: refactor into class --- src/gleam_stdlib.mjs | 18 ++--- src/persistent-hash-map.mjs | 129 +++++++++++++++++++++--------------- 2 files changed, 85 insertions(+), 62 deletions(-) diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs index d70cf235..c881ad2a 100644 --- a/src/gleam_stdlib.mjs +++ b/src/gleam_stdlib.mjs @@ -15,7 +15,7 @@ import { } from "./gleam/regex.mjs"; import { DecodeError } from "./gleam/dynamic.mjs"; import { Some, None } from "./gleam/option.mjs"; -import * as pmap from "./persistent-hash-map.mjs" +import PMap from "./persistent-hash-map.mjs" const Nil = undefined; const NOT_FOUND = {} @@ -307,7 +307,7 @@ export function regex_scan(regex, string) { } export function new_map() { - return pmap.create(); + return PMap.new(); } export function map_size(map) { @@ -315,15 +315,15 @@ export function map_size(map) { } export function map_to_list(map) { - return List.fromArray(pmap.entries(map)); + return List.fromArray(map.entries()); } export function map_remove(key, map) { - return pmap.remove(map, key); + return map.delete(key); } export function map_get(map, key) { - const value = pmap.getWithDefault(map, key, NOT_FOUND); + const value = map.get(key, NOT_FOUND); if(value === NOT_FOUND) { return new Error(Nil); } @@ -331,7 +331,7 @@ export function map_get(map, key) { } export function map_insert(key, value, map) { - return pmap.set(map, key, value); + return map.set(key, value); } function unsafe_percent_decode(string) { @@ -462,7 +462,7 @@ export function classify_dynamic(data) { return `Tuple of ${data.length} elements`; } else if (BitString.isBitString(data)) { return "BitString"; - } else if (data instanceof pmap.PMap) { + } else if (data instanceof PMap) { return "Map"; } else if (typeof data === "number") { return "Float"; @@ -528,7 +528,7 @@ export function decode_result(data) { } export function decode_map(data) { - return data instanceof pmap.PMap ? new Ok(data) : decoder_error("Map", data); + return data instanceof PMap ? new Ok(data) : decoder_error("Map", data); } export function decode_option(data, decoder) { @@ -545,7 +545,7 @@ export function decode_option(data, decoder) { export function decode_field(value, name) { let error = () => decoder_error_no_classify("field", "nothing"); - if (value instanceof pmap.PMap) { + if (value instanceof PMap) { let entry = map_get(value, name); return entry.isOk() ? entry : error(); } diff --git a/src/persistent-hash-map.mjs b/src/persistent-hash-map.mjs index dc3db31d..c3b2d17e 100644 --- a/src/persistent-hash-map.mjs +++ b/src/persistent-hash-map.mjs @@ -577,73 +577,96 @@ function forEach(root, fn) { } } /** Extra wrapper to keep track of map size */ -export class PMap { +export default class PMap { + static fromObject(o) { + const keys = Object.keys(o); + let m = PMap.new() + for (let i = 0; i < keys.length; i++) { + const k = keys[i]; + m = m.set(k, o[k]) + } + return m + } + static fromMap(o) { + let m = PMap.new() + o.forEach((v, k) => { + m = m.set(k, v) + }); + return m + } + static new() { + return new PMap(undefined, 0); + } constructor(root, size) { this.root = root; this.size = size; } + get(key, notFound) { + if (this.root === undefined) { + return notFound; + } + const found = find(this.root, 0, getHash(key), key); + if (found === undefined) { + return notFound; + } + return found.v; + } + set(key, val) { + const addedLeaf = { val: false }; + const root = this.root === undefined ? EMPTY : this.root; + const newRoot = assoc(root, 0, getHash(key), key, val, addedLeaf); + if (newRoot === this.root) { + return this; + } + return new PMap(newRoot, addedLeaf.val ? this.size + 1 : this.size); + } + delete(key) { + if (this.root === undefined) { + return this; + } + const newRoot = without(this.root, 0, getHash(key), key); + if (newRoot === this.root) { + return this; + } + if (newRoot === undefined) { + return PMap.new(); + } + return new PMap(newRoot, this.size - 1); + } + has(key) { + if (this.root === undefined) { + return false; + } + return find(this.root, 0, getHash(key), key) !== undefined; + } + entries() { + if (this.root === undefined) { + return []; + } + const result = []; + this.forEach((v, k) => result.push([k, v])); + return result; + } + forEach(fn) { + forEach(this.root, fn) + } hashCode() { let h = 0; - forEach(this.root, (v, k) => { + this.forEach((v, k) => { h = (h + hashMerge(getHash(v), getHash(k))) | 0; }); return h; } equals(o) { + if(!(o instanceof PMap)) { + return + } let equal = true; - forEach(this.root, (v, k) => { - equal = equal && isEqual(getWithDefault(o, k, !v), v); + this.forEach((v, k) => { + equal = equal && isEqual(o.get(k, !v), v); }); return equal; } } -export function create() { - return new PMap(undefined, 0); -} -export function getWithDefault(map, key, notFound) { - if (map.root === undefined) { - return notFound; - } - const found = find(map.root, 0, getHash(key), key); - if (found === undefined) { - return notFound; - } - return found.v; -} -export function set(map, key, val) { - const addedLeaf = { val: false }; - const root = map.root === undefined ? EMPTY : map.root; - const newRoot = assoc(root, 0, getHash(key), key, val, addedLeaf); - if (newRoot === map.root) { - return map; - } - return new PMap(newRoot, addedLeaf.val ? map.size + 1 : map.size); -} -export function remove(map, key) { - if (map.root === undefined) { - return map; - } - const newRoot = without(map.root, 0, getHash(key), key); - if (newRoot === map.root) { - return map; - } - if (newRoot === undefined) { - return create(); - } - return new PMap(newRoot, map.size - 1); -} -export function has(map, key) { - if (map.root === undefined) { - return false; - } - return find(map.root, 0, getHash(key), key) !== undefined; -} -export function entries(map) { - if (map.root === undefined) { - return []; - } - const result = []; - forEach(map.root, (v, k) => result.push([k, v])); - return result; -} + export function __include_me() {} From 9be511c305c9f6543dea38160ed3f29415f8d9d1 Mon Sep 17 00:00:00 2001 From: Julian Schurhammer Date: Sat, 13 Aug 2022 23:48:46 +1200 Subject: [PATCH 09/19] format --- src/persistent-hash-map.mjs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/persistent-hash-map.mjs b/src/persistent-hash-map.mjs index c3b2d17e..9236ec01 100644 --- a/src/persistent-hash-map.mjs +++ b/src/persistent-hash-map.mjs @@ -576,23 +576,23 @@ function forEach(root, fn) { forEach(item, fn); } } -/** Extra wrapper to keep track of map size */ +/** Extra wrapper to keep track of map size and clean up the API */ export default class PMap { static fromObject(o) { const keys = Object.keys(o); - let m = PMap.new() + let m = PMap.new(); for (let i = 0; i < keys.length; i++) { const k = keys[i]; - m = m.set(k, o[k]) + m = m.set(k, o[k]); } - return m + return m; } static fromMap(o) { - let m = PMap.new() + let m = PMap.new(); o.forEach((v, k) => { - m = m.set(k, v) + m = m.set(k, v); }); - return m + return m; } static new() { return new PMap(undefined, 0); @@ -648,7 +648,7 @@ export default class PMap { return result; } forEach(fn) { - forEach(this.root, fn) + forEach(this.root, fn); } hashCode() { let h = 0; @@ -658,8 +658,8 @@ export default class PMap { return h; } equals(o) { - if(!(o instanceof PMap)) { - return + if (!(o instanceof PMap)) { + return false; } let equal = true; this.forEach((v, k) => { From 5c9fe15119d52dea45dcfd195c6160293a844748 Mon Sep 17 00:00:00 2001 From: Julian Schurhammer Date: Sun, 14 Aug 2022 00:01:10 +1200 Subject: [PATCH 10/19] js map: decode from Map or Object --- src/gleam_stdlib.mjs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs index c881ad2a..0bbc9104 100644 --- a/src/gleam_stdlib.mjs +++ b/src/gleam_stdlib.mjs @@ -324,10 +324,10 @@ export function map_remove(key, map) { export function map_get(map, key) { const value = map.get(key, NOT_FOUND); - if(value === NOT_FOUND) { + if (value === NOT_FOUND) { return new Error(Nil); } - return new Ok(value) + return new Ok(value); } export function map_insert(key, value, map) { @@ -528,6 +528,13 @@ export function decode_result(data) { } export function decode_map(data) { + if (data instanceof Map) { + return new Ok(PMap.fromMap(data)); + } + const proto = Object.getPrototypeOf(data); + if (proto === Object.prototype || proto === null) { + return new Ok(PMap.fromObject(data)); + } return data instanceof PMap ? new Ok(data) : decoder_error("Map", data); } From b631ed2ac9ef21848a3aa74e14054a0a69fcf96d Mon Sep 17 00:00:00 2001 From: Julian Schurhammer Date: Sun, 14 Aug 2022 00:26:29 +1200 Subject: [PATCH 11/19] map: add bitstring test --- test/gleam/map_test.gleam | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test/gleam/map_test.gleam b/test/gleam/map_test.gleam index cd669f9d..9b3fcfe1 100644 --- a/test/gleam/map_test.gleam +++ b/test/gleam/map_test.gleam @@ -84,6 +84,21 @@ pub fn get_test() { m |> map.get(C) |> should.equal(Error(Nil)) + + let proplist = [#(<<1, 2, 3>>, 0), #(<<3, 2, 1>>, 1)] + let m = map.from_list(proplist) + + m + |> map.get(<<1, 2, 3>>) + |> should.equal(Ok(0)) + + m + |> map.get(<<3, 2, 1>>) + |> should.equal(Ok(1)) + + m + |> map.get(<<1, 3, 2>>) + |> should.equal(Error(Nil)) } pub fn insert_test() { From 5440a672be783c4ac7b1b0941a4f075e044b94b4 Mon Sep 17 00:00:00 2001 From: Julian Schurhammer Date: Sun, 14 Aug 2022 14:38:38 +1200 Subject: [PATCH 12/19] bump ci gleam version --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d727a847..ae306417 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,7 +15,7 @@ jobs: - uses: erlef/setup-beam@v1.9.0 with: otp-version: "23.2" - gleam-version: "0.20.0-rc1" + gleam-version: "0.22.1" - uses: actions/setup-node@v2 with: node-version: "16.0.0" From 7f311c3400e2fcff856cb960a07ea72077642619 Mon Sep 17 00:00:00 2001 From: Julian Schurhammer Date: Sun, 14 Aug 2022 14:40:12 +1200 Subject: [PATCH 13/19] bump ci gleam version 2 electric boogaloo --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ae306417..72186a67 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,7 +15,7 @@ jobs: - uses: erlef/setup-beam@v1.9.0 with: otp-version: "23.2" - gleam-version: "0.22.1" + gleam-version: "nightly" - uses: actions/setup-node@v2 with: node-version: "16.0.0" From 1056fdbb145b9f241305eaad9f914dd38893380c Mon Sep 17 00:00:00 2001 From: Julian Schurhammer Date: Sun, 14 Aug 2022 22:46:39 +1200 Subject: [PATCH 14/19] better number hash --- src/persistent-hash-map.mjs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/persistent-hash-map.mjs b/src/persistent-hash-map.mjs index 9236ec01..a1e1e3f5 100644 --- a/src/persistent-hash-map.mjs +++ b/src/persistent-hash-map.mjs @@ -1,6 +1,7 @@ import { isEqual } from "./gleam.mjs"; const referenceMap = new WeakMap(); +const tempDataView = new DataView(new ArrayBuffer(8)); let referenceUID = 0; /** hash the object by reference using a weak map and incrementing uid */ @@ -29,9 +30,12 @@ function hashString(s) { } return hash; } -/** convert number to string and hash, seems to be better and faster than anything else */ +/** hash a number by converting to two integers and do some jumbling */ function hashNumber(n) { - return hashString(n.toString()); + tempDataView.setFloat64(0, n); + const i = tempDataView.getInt32(0); + const j = tempDataView.getInt32(4); + return Math.imul(0x45d9f3b, (i >> 16) ^ i) ^ j; } /** hash any js object */ function hashObject(o) { From 5192e00c063e6e442869d7a5c5435af43d0775bd Mon Sep 17 00:00:00 2001 From: Julian Schurhammer Date: Sat, 20 Aug 2022 13:20:27 +1200 Subject: [PATCH 15/19] js map: allow entries in array nodes to reduce the number of nodes created --- src/persistent-hash-map.mjs | 70 +++++++++++++++++++++++++------------ test/gleam/map_test.gleam | 37 ++++++++++++++++++++ 2 files changed, 84 insertions(+), 23 deletions(-) diff --git a/src/persistent-hash-map.mjs b/src/persistent-hash-map.mjs index a1e1e3f5..e7c76fe5 100644 --- a/src/persistent-hash-map.mjs +++ b/src/persistent-hash-map.mjs @@ -212,13 +212,38 @@ function assocArray(root, shift, hash, key, val, addedLeaf) { const node = root.array[idx]; // if the corresponding index is empty set the index to a newly created node if (node === undefined) { + addedLeaf.val = true; return { type: ARRAY_NODE, size: root.size + 1, + array: cloneAndSet(root.array, idx, { type: ENTRY, k: key, v: val }), + }; + } + if (node.type === ENTRY) { + // if keys are equal replace the entry + if (isEqual(key, node.k)) { + if (val === node.v) { + return root; + } + return { + type: ARRAY_NODE, + size: root.size, + array: cloneAndSet(root.array, idx, { + type: ENTRY, + k: key, + v: val, + }), + }; + } + // otherwise upgrade the entry to a node and insert + addedLeaf.val = true; + return { + type: ARRAY_NODE, + size: root.size, array: cloneAndSet( root.array, idx, - assocIndex(EMPTY, shift + SHIFT, hash, key, val, addedLeaf) + createNode(shift + SHIFT, node.k, node.v, hash, key, val) ), }; } @@ -255,8 +280,8 @@ function assocIndex(root, shift, hash, key, val, addedLeaf) { } // otherwise there is an entry at the index // if the keys are equal replace the entry with the updated value - const keyOrNull = node.k; - if (isEqual(key, keyOrNull)) { + const nodeKey = node.k; + if (isEqual(key, nodeKey)) { if (val === node.v) { return root; } @@ -265,7 +290,7 @@ function assocIndex(root, shift, hash, key, val, addedLeaf) { bitmap: root.bitmap, array: cloneAndSet(root.array, idx, { type: ENTRY, - k: keyOrNull, + k: key, v: val, }), }; @@ -278,7 +303,7 @@ function assocIndex(root, shift, hash, key, val, addedLeaf) { array: cloneAndSet( root.array, idx, - createNode(shift + SHIFT, keyOrNull, node.v, hash, key, val) + createNode(shift + SHIFT, nodeKey, node.v, hash, key, val) ), }; } else { @@ -298,23 +323,10 @@ function assocIndex(root, shift, hash, key, val, addedLeaf) { for (let i = 0; i < 32; i++) { if ((bitmap & 1) !== 0) { const node = root.array[j++]; - // turn any entries into index nodes - // since array nodes should only contain other nodes, not entries - if (node.type !== ENTRY) { - nodes[i] = node; - } else { - nodes[i] = assocIndex( - EMPTY, - shift + SHIFT, - getHash(node.k), - node.k, - node.v, - addedLeaf - ); - } + nodes[i] = node; } // shift the bitmap to process the next bit - bitmap >>>= 1; + bitmap = bitmap >>> 1; } return { type: ARRAY_NODE, @@ -404,6 +416,9 @@ function findArray(root, shift, hash, key) { if (node === undefined) { return undefined; } + if (node.type === ENTRY) { + return node; + } return find(node, shift + SHIFT, hash, key); } function findIndex(root, shift, hash, key) { @@ -448,9 +463,18 @@ function withoutArray(root, shift, hash, key) { if (node === undefined) { return root; // already empty } - const n = without(node, shift + SHIFT, hash, key); - if (n === node) { - return root; // no changes + let n = undefined; + // if node is an entry and the keys are not equal there is nothing to remove + // if node is not an entry do a recursive call + if (node.type === ENTRY) { + if (!isEqual(node.k, key)) { + return root; // no changes + } + } else { + n = without(node, shift + SHIFT, hash, key); + if (n === node) { + return root; // no changes + } } // if the recursive call returned undefined the node should be removed if (n === undefined) { diff --git a/test/gleam/map_test.gleam b/test/gleam/map_test.gleam index 9b3fcfe1..1c9f3401 100644 --- a/test/gleam/map_test.gleam +++ b/test/gleam/map_test.gleam @@ -299,3 +299,40 @@ pub fn map_as_key_test() { |> map.get(a) |> should.equal(Ok("a3")) } + +pub fn large_n_test() { + let n = 10000 + let l = range(0, n, []) + + let m = list_to_map(l) + list.map(l, fn(i) { should.equal(map.get(m, i), Ok(i)) }) + + let m = grow_and_shrink_map(n, 0) + list.map(l, fn(i) { should.equal(map.get(m, i), Error(Nil)) }) +} + +pub fn size_test() { + let n = 1000 + let m = list_to_map(range(0, n, [])) + map.size(m) + |> should.equal(n) + + let m = grow_and_shrink_map(n, n / 2) + map.size(m) + |> should.equal(n / 2) + + let m = + grow_and_shrink_map(n, 0) + |> map.delete(0) + map.size(m) + |> should.equal(0) + + let m = list_to_map(range(0, 18, [])) + + map.insert(m, 1, 99) + |> map.size() + |> should.equal(18) + map.insert(m, 2, 99) + |> map.size() + |> should.equal(18) +} From eb536be9573563e44e6210ed671de23516da2aba Mon Sep 17 00:00:00 2001 From: Julian Schurhammer Date: Sat, 20 Aug 2022 19:35:34 +1200 Subject: [PATCH 16/19] js map: add jsdoc for typechecking --- src/persistent-hash-map.mjs | 288 +++++++++++++++++++++++++++++++++--- 1 file changed, 268 insertions(+), 20 deletions(-) diff --git a/src/persistent-hash-map.mjs b/src/persistent-hash-map.mjs index e7c76fe5..bf5ef53a 100644 --- a/src/persistent-hash-map.mjs +++ b/src/persistent-hash-map.mjs @@ -3,8 +3,11 @@ import { isEqual } from "./gleam.mjs"; const referenceMap = new WeakMap(); const tempDataView = new DataView(new ArrayBuffer(8)); let referenceUID = 0; - -/** hash the object by reference using a weak map and incrementing uid */ +/** + * hash the object by reference using a weak map and incrementing uid + * @param {any} o + * @returns {number} + */ function hashByReference(o) { const known = referenceMap.get(o); if (known !== undefined) { @@ -17,11 +20,20 @@ function hashByReference(o) { referenceMap.set(o, hash); return hash; } -/** merge two hashes in an order sensitive way */ +/** + * merge two hashes in an order sensitive way + * @param {number} a + * @param {number} b + * @returns {number} + */ function hashMerge(a, b) { return (a ^ (b + 0x9e3779b9 + (a << 6) + (a >> 2))) | 0; } -/** standard string hash popularised by java */ +/** + * standard string hash popularised by java + * @param {string} s + * @returns {number} + */ function hashString(s) { let hash = 0; const len = s.length; @@ -30,14 +42,30 @@ function hashString(s) { } return hash; } -/** hash a number by converting to two integers and do some jumbling */ +/** + * hash a number by converting to two integers and do some jumbling + * @param {number} n + * @returns {number} + */ function hashNumber(n) { tempDataView.setFloat64(0, n); const i = tempDataView.getInt32(0); const j = tempDataView.getInt32(4); return Math.imul(0x45d9f3b, (i >> 16) ^ i) ^ j; } -/** hash any js object */ +/** + * hash a BigInt by converting it to a string and hashing that + * @param {BigInt} n + * @returns {number} + */ +function hashBigInt(n) { + return hashString(n.toString()); +} +/** + * hash any js object + * @param {any} o + * @returns {number} + */ function hashObject(o) { const proto = Object.getPrototypeOf(o); if (proto !== null && typeof proto.hashCode === "function") { @@ -77,7 +105,11 @@ function hashObject(o) { } return h; } -/** hash any js value */ +/** + * hash any js value + * @param {any} u + * @returns {number} + */ export function getHash(u) { if (u === null) return 0x42108422; if (u === undefined) return 0x42108423; @@ -89,15 +121,40 @@ export function getHash(u) { case "string": return hashString(u); case "bigint": - return hashNumber(u); + return hashBigInt(u); case "object": return hashObject(u); case "symbol": return hashByReference(u); case "function": return hashByReference(u); + default: + return 0; // should be unreachable } } +/** + * @template K,V + * @typedef {ArrayNode | IndexNode | CollisionNode} Node + */ +/** + * @template K,V + * @typedef {{ type: typeof ENTRY, k: K, v: V }} Entry + */ +/** + * @template K,V + * @typedef {{ type: typeof ARRAY_NODE, size: number, array: (undefined | Entry | Node)[] }} ArrayNode + */ +/** + * @template K,V + * @typedef {{ type: typeof INDEX_NODE, bitmap: number, array: (Entry | Node)[] }} IndexNode + */ +/** + * @template K,V + * @typedef {{ type: typeof COLLISION_NODE, hash: number, array: Entry[] }} CollisionNode + */ +/** + * @typedef {{ val: boolean }} Flag + */ const SHIFT = 5; // number of bits you need to shift by to get the next bucket const BUCKET_SIZE = Math.pow(2, SHIFT); const MASK = BUCKET_SIZE - 1; // used to zero out all bits not in the bucket @@ -107,20 +164,35 @@ const ENTRY = 0; const ARRAY_NODE = 1; const INDEX_NODE = 2; const COLLISION_NODE = 3; +/** @type {IndexNode} */ const EMPTY = { type: INDEX_NODE, bitmap: 0, array: [], }; -/** Mask the hash to get only the bucket corresponding to shift */ +/** + * Mask the hash to get only the bucket corresponding to shift + * @param {number} hash + * @param {number} shift + * @returns {number} + */ function mask(hash, shift) { return (hash >>> shift) & MASK; } -/** Set only the Nth bit where N is the masked hash */ +/** + * Set only the Nth bit where N is the masked hash + * @param {number} hash + * @param {number} shift + * @returns {number} + */ function bitpos(hash, shift) { return 1 << mask(hash, shift); } -/** Count the number of 1 bits in a number */ +/** + * Count the number of 1 bits in a number + * @param {number} x + * @returns {number} + */ function bitcount(x) { x -= (x >> 1) & 0x55555555; x = (x & 0x33333333) + ((x >> 2) & 0x33333333); @@ -129,11 +201,23 @@ function bitcount(x) { x += x >> 16; return x & 0x7f; } -/** Calculate the array index of an item in a bitmap index node */ +/** + * Calculate the array index of an item in a bitmap index node + * @param {number} bitmap + * @param {number} bit + * @returns {number} + */ function index(bitmap, bit) { return bitcount(bitmap & (bit - 1)); } -/** Efficiently copy an array and set one value at an index */ +/** + * Efficiently copy an array and set one value at an index + * @template T + * @param {T[]} arr + * @param {number} at + * @param {T} val + * @returns {T[]} + */ function cloneAndSet(arr, at, val) { const len = arr.length; const out = new Array(len); @@ -143,7 +227,14 @@ function cloneAndSet(arr, at, val) { out[at] = val; return out; } -/** Efficiently copy an array and insert one value at an index */ +/** + * Efficiently copy an array and insert one value at an index + * @template T + * @param {T[]} arr + * @param {number} at + * @param {T} val + * @returns {T[]} + */ function spliceIn(arr, at, val) { const len = arr.length; const out = new Array(len + 1); @@ -158,7 +249,13 @@ function spliceIn(arr, at, val) { } return out; } -/** Efficiently copy an array and remove one value at an index */ +/** + * Efficiently copy an array and remove one value at an index + * @template T + * @param {T[]} arr + * @param {number} at + * @returns {T[]} + */ function spliceOut(arr, at) { const len = arr.length; const out = new Array(len - 1); @@ -173,7 +270,17 @@ function spliceOut(arr, at) { } return out; } -/** Create a new node containing two entries */ +/** + * Create a new node containing two entries + * @template K,V + * @param {number} shift + * @param {K} key1 + * @param {V} val1 + * @param {number} key2hash + * @param {K} key2 + * @param {V} val2 + * @returns {Node} + */ function createNode(shift, key1, val1, key2hash, key2, val2) { const key1hash = getHash(key1); if (key1hash === key2hash) { @@ -196,7 +303,22 @@ function createNode(shift, key1, val1, key2hash, key2, val2) { addedLeaf ); } -/** Associate a node with a new entry, creating a new node. */ +/** + * @template T,K,V + * @callback AssocFunction + * @param {T} root + * @param {number} shift + * @param {number} hash + * @param {K} key + * @param {V} val + * @param {Flag} addedLeaf + * @returns {Node} + */ +/** + * Associate a node with a new entry, creating a new node + * @template T,K,V + * @type {AssocFunction,K,V>} + */ function assoc(root, shift, hash, key, val, addedLeaf) { switch (root.type) { case ARRAY_NODE: @@ -207,6 +329,10 @@ function assoc(root, shift, hash, key, val, addedLeaf) { return assocCollision(root, shift, hash, key, val, addedLeaf); } } +/** + * @template T,K,V + * @type {AssocFunction,K,V>} + */ function assocArray(root, shift, hash, key, val, addedLeaf) { const idx = mask(hash, shift); const node = root.array[idx]; @@ -260,6 +386,10 @@ function assocArray(root, shift, hash, key, val, addedLeaf) { array: cloneAndSet(root.array, idx, n), }; } +/** + * @template T,K,V + * @type {AssocFunction,K,V>} + */ function assocIndex(root, shift, hash, key, val, addedLeaf) { const bit = bitpos(hash, shift); const idx = index(root.bitmap, bit); @@ -350,6 +480,10 @@ function assocIndex(root, shift, hash, key, val, addedLeaf) { } } } +/** + * @template T,K,V + * @type {AssocFunction,K,V>} + */ function assocCollision(root, shift, hash, key, val, addedLeaf) { // if there is a hash collision if (hash === root.hash) { @@ -389,7 +523,13 @@ function assocCollision(root, shift, hash, key, val, addedLeaf) { addedLeaf ); } -/** Find the index of a key in the collision node's array */ +/** + * Find the index of a key in the collision node's array + * @template K,V + * @param {CollisionNode} root + * @param {K} key + * @returns {number} + */ function collisionIndexOf(root, key) { const size = root.array.length; for (let i = 0; i < size; i++) { @@ -399,7 +539,20 @@ function collisionIndexOf(root, key) { } return -1; } -/** Return the found entry or undefined if not present in the root */ +/** + * @template T,K,V + * @callback FindFunction + * @param {T} root + * @param {number} shift + * @param {number} hash + * @param {K} key + * @returns {undefined | Entry} + */ +/** + * Return the found entry or undefined if not present in the root + * @template K,V + * @type {FindFunction,K,V>} + */ function find(root, shift, hash, key) { switch (root.type) { case ARRAY_NODE: @@ -410,6 +563,10 @@ function find(root, shift, hash, key) { return findCollision(root, key); } } +/** + * @template K,V + * @type {FindFunction,K,V>} + */ function findArray(root, shift, hash, key) { const idx = mask(hash, shift); const node = root.array[idx]; @@ -421,6 +578,10 @@ function findArray(root, shift, hash, key) { } return find(node, shift + SHIFT, hash, key); } +/** + * @template K,V + * @type {FindFunction,K,V>} + */ function findIndex(root, shift, hash, key) { const bit = bitpos(hash, shift); if ((root.bitmap & bit) === 0) { @@ -436,6 +597,12 @@ function findIndex(root, shift, hash, key) { } return undefined; } +/** + * @template K,V + * @param {CollisionNode} root + * @param {K} key + * @returns {undefined | Entry} + */ function findCollision(root, key) { const idx = collisionIndexOf(root, key); if (idx < 0) { @@ -443,9 +610,20 @@ function findCollision(root, key) { } return root.array[idx]; } +/** + * @template T,K,V + * @callback WithoutFunction + * @param {T} root + * @param {number} shift + * @param {number} hash + * @param {K} key + * @returns {undefined | Node} + */ /** * Remove an entry from the root, returning the updated root. * Returns undefined if the node should be removed from the parent. + * @template K,V + * @type {WithoutFunction,K,V>} * */ function without(root, shift, hash, key) { switch (root.type) { @@ -457,6 +635,10 @@ function without(root, shift, hash, key) { return withoutCollision(root, key); } } +/** + * @template K,V + * @type {WithoutFunction,K,V>} + */ function withoutArray(root, shift, hash, key) { const idx = mask(hash, shift); const node = root.array[idx]; @@ -522,6 +704,10 @@ function withoutArray(root, shift, hash, key) { array: cloneAndSet(root.array, idx, n), }; } +/** + * @template K,V + * @type {WithoutFunction,K,V>} + */ function withoutIndex(root, shift, hash, key) { const bit = bitpos(hash, shift); if ((root.bitmap & bit) === 0) { @@ -568,6 +754,12 @@ function withoutIndex(root, shift, hash, key) { } return root; } +/** + * @template K,V + * @param {CollisionNode} root + * @param {K} key + * @returns {undefined | Node} + */ function withoutCollision(root, key) { const idx = collisionIndexOf(root, key); // if the key not found, no changes @@ -586,6 +778,12 @@ function withoutCollision(root, key) { array: spliceOut(root.array, idx), }; } +/** + * @template K,V + * @param {undefined | Node} root + * @param {(value:V,key:K)=>void} fn + * @returns {void} + */ function forEach(root, fn) { if (root === undefined) { return; @@ -604,10 +802,19 @@ function forEach(root, fn) { forEach(item, fn); } } -/** Extra wrapper to keep track of map size and clean up the API */ +/** + * Extra wrapper to keep track of map size and clean up the API + * @template K,V + */ export default class PMap { + /** + * @template V + * @param {Record} o + * @returns {PMap} + */ static fromObject(o) { const keys = Object.keys(o); + /** @type PMap */ let m = PMap.new(); for (let i = 0; i < keys.length; i++) { const k = keys[i]; @@ -615,7 +822,13 @@ export default class PMap { } return m; } + /** + * @template K,V + * @param {Map} o + * @returns {PMap} + */ static fromMap(o) { + /** @type PMap */ let m = PMap.new(); o.forEach((v, k) => { m = m.set(k, v); @@ -625,10 +838,20 @@ export default class PMap { static new() { return new PMap(undefined, 0); } + /** + * @param {undefined | Node} root + * @param {number} size + */ constructor(root, size) { this.root = root; this.size = size; } + /** + * @template NotFound + * @param {K} key + * @param {NotFound} notFound + * @returns {NotFound | V} + */ get(key, notFound) { if (this.root === undefined) { return notFound; @@ -639,6 +862,11 @@ export default class PMap { } return found.v; } + /** + * @param {K} key + * @param {V} val + * @returns {PMap} + */ set(key, val) { const addedLeaf = { val: false }; const root = this.root === undefined ? EMPTY : this.root; @@ -648,6 +876,10 @@ export default class PMap { } return new PMap(newRoot, addedLeaf.val ? this.size + 1 : this.size); } + /** + * @param {K} key + * @returns {PMap} + */ delete(key) { if (this.root === undefined) { return this; @@ -661,20 +893,32 @@ export default class PMap { } return new PMap(newRoot, this.size - 1); } + /** + * @param {K} key + * @returns {boolean} + */ has(key) { if (this.root === undefined) { return false; } return find(this.root, 0, getHash(key), key) !== undefined; } + /** + * @returns {[K,V][]} + */ entries() { if (this.root === undefined) { return []; } + /** @type [K,V][] */ const result = []; this.forEach((v, k) => result.push([k, v])); return result; } + /** + * + * @param {(val:V,key:K)=>void} fn + */ forEach(fn) { forEach(this.root, fn); } @@ -685,6 +929,10 @@ export default class PMap { }); return h; } + /** + * @param {unknown} o + * @returns {boolean} + */ equals(o) { if (!(o instanceof PMap)) { return false; From b46c8ceb04d8ba6769d5b042f8ddbb869a86ce7c Mon Sep 17 00:00:00 2001 From: Julian Schurhammer Date: Mon, 13 Mar 2023 10:30:03 +1300 Subject: [PATCH 17/19] persistent-hash-map.mjs import no longer needed (?) --- src/gleam/map.gleam | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/gleam/map.gleam b/src/gleam/map.gleam index c6251b08..fd9f961f 100644 --- a/src/gleam/map.gleam +++ b/src/gleam/map.gleam @@ -1,11 +1,5 @@ import gleam/option.{Option} -if javascript { - // hack to include another js file.. - pub external fn include_persistent_hash_map() -> Nil = - "../persistent-hash-map.mjs" "__include_me" -} - /// A dictionary of keys and values. /// /// Any type can be used for the keys and values of a map, but all the keys From e59993b3091faad83610b16cdd19bdc141161424 Mon Sep 17 00:00:00 2001 From: Julian Schurhammer Date: Mon, 13 Mar 2023 10:30:28 +1300 Subject: [PATCH 18/19] check that custom hashcode is a number --- src/persistent-hash-map.mjs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/persistent-hash-map.mjs b/src/persistent-hash-map.mjs index bf5ef53a..f454c7db 100644 --- a/src/persistent-hash-map.mjs +++ b/src/persistent-hash-map.mjs @@ -1,3 +1,8 @@ +/** + * This file uses jsdoc to annotate types. + * These types can be checked using the typescript compiler with "checkjs" option. + */ + import { isEqual } from "./gleam.mjs"; const referenceMap = new WeakMap(); @@ -70,7 +75,10 @@ function hashObject(o) { const proto = Object.getPrototypeOf(o); if (proto !== null && typeof proto.hashCode === "function") { try { - return o.hashCode(o); + const code = o.hashCode(o); + if (typeof code === "number") { + return code + } } catch {} } if (o instanceof Promise || o instanceof WeakSet || o instanceof WeakMap) { @@ -945,4 +953,3 @@ export default class PMap { } } -export function __include_me() {} From 74c76ddbb1f83f06629071c603bef68c57637f4f Mon Sep 17 00:00:00 2001 From: Julian Schurhammer Date: Mon, 13 Mar 2023 10:55:47 +1300 Subject: [PATCH 19/19] format --- test/gleam/map_test.gleam | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/gleam/map_test.gleam b/test/gleam/map_test.gleam index 1c9f3401..43e17a01 100644 --- a/test/gleam/map_test.gleam +++ b/test/gleam/map_test.gleam @@ -301,7 +301,7 @@ pub fn map_as_key_test() { } pub fn large_n_test() { - let n = 10000 + let n = 10_000 let l = range(0, n, []) let m = list_to_map(l)