From f274104865794f7f24db4244d591c39ad16f6688 Mon Sep 17 00:00:00 2001 From: Richard Moore Date: Wed, 17 Aug 2022 23:03:27 -0400 Subject: [PATCH] Applied lastest changes from ens-normalize (#42, #2376, #2754). --- packages/hash/src.ts/ens-normalize/decoder.ts | 28 ++-- packages/hash/src.ts/ens-normalize/include.ts | 2 +- packages/hash/src.ts/ens-normalize/lib.ts | 137 +++++++++++------- packages/hash/src.ts/namehash.ts | 22 --- 4 files changed, 100 insertions(+), 89 deletions(-) diff --git a/packages/hash/src.ts/ens-normalize/decoder.ts b/packages/hash/src.ts/ens-normalize/decoder.ts index 825e2b61dc..bc6d8c3ad2 100644 --- a/packages/hash/src.ts/ens-normalize/decoder.ts +++ b/packages/hash/src.ts/ens-normalize/decoder.ts @@ -216,10 +216,10 @@ export function read_zero_terminated_array(next: NextFunc): Array { return v; } -function read_transposed(n: number, w: number, next: NextFunc, lookup?: NextFunc): Array> { +function read_transposed(n: number, w: number, next: NextFunc): Array> { let m = Array(n).fill(undefined).map(() => []); for (let i = 0; i < w; i++) { - read_deltas(n, next).forEach((x, j) => m[j].push(lookup ? lookup(x) : x)); + read_deltas(n, next).forEach((x, j) => m[j].push(x)); } return m; } @@ -254,7 +254,7 @@ export type Branch = { export type Node = { branches: Array; - valid: boolean; + valid: number; fe0f: boolean; save: boolean; check: boolean; @@ -266,18 +266,18 @@ export function read_emoji_trie(next: NextFunc): Node { function read(): Node { let branches = []; while (true) { - let keys = read_member_array(next); + let keys = read_member_array(next, sorted); if (keys.length == 0) break; - branches.push({set: new Set(keys.map(i => sorted[i])), node: read()}); + branches.push({set: new Set(keys), node: read()}); } - branches.sort((a, b) => b.set.size - a.set.size); - let flag = next(); - return { - branches, - valid: (flag & 1) != 0, - fe0f: (flag & 2) != 0, - save: (flag & 4) != 0, - check: (flag & 8) != 0, - }; + branches.sort((a, b) => b.set.size - a.set.size); // sort by likelihood + let temp = next(); + let valid = temp % 3; + temp = (temp / 3)|0; + let fe0f = !!(temp & 1); + temp >>= 1; + let save = temp == 1; + let check = temp == 2; + return {branches, valid, fe0f, save, check}; } } diff --git a/packages/hash/src.ts/ens-normalize/include.ts b/packages/hash/src.ts/ens-normalize/include.ts index 38a6565e12..5eb56ca6b3 100644 --- a/packages/hash/src.ts/ens-normalize/include.ts +++ b/packages/hash/src.ts/ens-normalize/include.ts @@ -32,6 +32,6 @@ import { decode } from "@ethersproject/base64"; import {read_compressed_payload} from './decoder.js'; export function getData(): () => number { - return read_compressed_payload(decode('')); + return read_compressed_payload(decode('')); } diff --git a/packages/hash/src.ts/ens-normalize/lib.ts b/packages/hash/src.ts/ens-normalize/lib.ts index 20fd7ad087..a2007bce60 100644 --- a/packages/hash/src.ts/ens-normalize/lib.ts +++ b/packages/hash/src.ts/ens-normalize/lib.ts @@ -34,8 +34,6 @@ const r = getData(); import {read_member_array, read_mapped_map, read_emoji_trie} from './decoder.js'; -import type { Node } from "./decoder.js"; - // @TODO: This should be lazily loaded const VALID = new Set(read_member_array(r)); @@ -44,64 +42,99 @@ const MAPPED = read_mapped_map(r); const EMOJI_ROOT = read_emoji_trie(r); //const NFC_CHECK = new Set(read_member_array(r, Array.from(VALID.values()).sort((a, b) => a - b))); -function nfc(s: string): string { - return s.normalize('NFC'); +//const STOP = 0x2E; +const HYPHEN = 0x2D; +const UNDERSCORE = 0x5F; + +function explode_cp(name: string): Array { + return toUtf8CodePoints(name); } function filter_fe0f(cps: Array): Array { return cps.filter(cp => cp != 0xFE0F); } -export function ens_normalize(name: string, beautify = false): string { - const input = toUtf8CodePoints(name).reverse(); // flip for pop - const output = []; - while (input.length) { - const emoji = consume_emoji_reversed(input, EMOJI_ROOT); - if (emoji) { - output.push(...(beautify ? emoji : filter_fe0f(emoji))); - continue; - } - const cp = input.pop(); - if (VALID.has(cp)) { - output.push(cp); - continue; - } - if (IGNORED.has(cp)) { - continue; - } - let cps = MAPPED[cp]; - if (cps) { - output.push(...cps); - continue; - } - throw new Error(`Disallowed codepoint: 0x${cp.toString(16).toUpperCase()}`); - } - return nfc(String.fromCodePoint(...output)); +export function ens_normalize_post_check(name: string): string { + for (let label of name.split('.')) { + let cps = explode_cp(label); + try { + for (let i = cps.lastIndexOf(UNDERSCORE) - 1; i >= 0; i--) { + if (cps[i] !== UNDERSCORE) { + throw new Error(`underscore only allowed at start`); + } + } + if (cps.length >= 4 && cps.every(cp => cp < 0x80) && cps[2] === HYPHEN && cps[3] === HYPHEN) { + throw new Error(`invalid label extension`); + } + } catch (err) { + throw new Error(`Invalid label "${label}": ${err.message}`); + } + } + return name; +} + +export function ens_normalize(name: string): string { + return ens_normalize_post_check(normalize(name, filter_fe0f)); } +function normalize(name: string, emoji_filter: (a: Array) => Array): string { + let input = explode_cp(name).reverse(); // flip for pop + let output = []; + while (input.length) { + let emoji = consume_emoji_reversed(input); + if (emoji) { + output.push(...emoji_filter(emoji)); + continue; + } + let cp = input.pop(); + if (VALID.has(cp)) { + output.push(cp); + continue; + } + if (IGNORED.has(cp)) { + continue; + } + let cps = MAPPED[cp]; + if (cps) { + output.push(...cps); + continue; + } + throw new Error(`Disallowed codepoint: 0x${cp.toString(16).toUpperCase()}`); + } + return ens_normalize_post_check(nfc(String.fromCodePoint(...output))); +} -function consume_emoji_reversed(cps: Array, node: Node, eaten?: Array) { - let emoji; - const stack = []; - let pos = cps.length; - if (eaten) { eaten.length = 0; } // clear input buffer (if needed) - while (pos) { - const cp = cps[--pos]; - const branch = node.branches.find(x => x.set.has(cp)); - if (branch == null) { break; } - node = branch.node; - if (!node) { break; } - stack.push(cp); - if (node.fe0f) { - stack.push(0xFE0F); - if (pos > 0 && cps[pos - 1] == 0xFE0F) { pos--; } - } - if (node.valid) { // this is a valid emoji (so far) - emoji = stack.slice(); // copy stack - if (eaten) { eaten.push(...cps.slice(pos).reverse()); } // copy input (if needed) - cps.length = pos; // truncate - } - } - return emoji; +function nfc(s: string): string { + return s.normalize('NFC'); } +function consume_emoji_reversed(cps: Array, eaten?: Array) { + let node = EMOJI_ROOT; + let emoji; + let saved; + let stack = []; + let pos = cps.length; + if (eaten) eaten.length = 0; // clear input buffer (if needed) + while (pos) { + let cp = cps[--pos]; + node = node.branches.find(x => x.set.has(cp))?.node; + if (!node) break; + if (node.save) { // remember + saved = cp; + } else if (node.check) { // check exclusion + if (cp === saved) break; + } + stack.push(cp); + if (node.fe0f) { + stack.push(0xFE0F); + if (pos > 0 && cps[pos - 1] == 0xFE0F) pos--; // consume optional FE0F + } + if (node.valid) { // this is a valid emoji (so far) + emoji = stack.slice(); // copy stack + if (node.valid == 2) emoji.splice(1, 1); // delete FE0F at position 1 (RGI ZWJ don't follow spec!) + if (eaten) eaten.push(...cps.slice(pos).reverse()); // copy input (if needed) + cps.length = pos; // truncate + } + } + return emoji; +} diff --git a/packages/hash/src.ts/namehash.ts b/packages/hash/src.ts/namehash.ts index 953ff99041..5e38be59ab 100644 --- a/packages/hash/src.ts/namehash.ts +++ b/packages/hash/src.ts/namehash.ts @@ -13,28 +13,6 @@ Zeros.fill(0); function checkComponent(comp: Uint8Array): Uint8Array { if (comp.length === 0) { throw new Error("invalid ENS name; empty component"); } - let nonUnder = false; - let allAscii = true; - for (let i = 0; i < comp.length; i++) { - const c = comp[i]; - - // An underscore (i.e. "_"); only allows at the beginning - if (c === 0x5f) { - if (nonUnder) { throw new Error("invalid ENS name; non-prefix underscore"); } - } else { - // Non-ASCII byte - if (c & 0x80) { allAscii = false; } - - // Non-underscore found - nonUnder = true; - } - } - - // Prevent punycode-looking components - if (allAscii && comp[2] === 0x2d && comp[3] === 0x2d) { - throw new Error("invalid ENS name; punycode conflict"); - } - return comp; }