From 27e542570cda2ffd7ba488839c364f3828d0c590 Mon Sep 17 00:00:00 2001 From: Eran Hammer Date: Wed, 30 Oct 2019 16:34:39 -0700 Subject: [PATCH] Add uri decode. Closes #16 --- lib/decode.js | 120 +++++++++++++++++++++++++++++++++++++++++++++++++ lib/index.d.ts | 9 ++++ lib/index.js | 2 + test/decode.js | 102 +++++++++++++++++++++++++++++++++++++++++ test/domain.js | 2 +- test/email.js | 2 +- test/index.ts | 10 +++++ test/uri.js | 2 +- 8 files changed, 246 insertions(+), 3 deletions(-) create mode 100755 lib/decode.js create mode 100755 test/decode.js diff --git a/lib/decode.js b/lib/decode.js new file mode 100755 index 0000000..06a1236 --- /dev/null +++ b/lib/decode.js @@ -0,0 +1,120 @@ +'use strict'; + +// Adapted from: +// Copyright (c) 2017-2019 Justin Ridgewell, MIT Licensed, https://github.com/jridgewell/safe-decode-string-component +// Copyright (c) 2008-2009 Bjoern Hoehrmann , MIT Licensed, http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ + + +const internals = {}; + + +exports.decode = function (string) { + + let percentPos = string.indexOf('%'); + if (percentPos === -1) { + return string; + } + + let decoded = ''; + let last = 0; + let codepoint = 0; + let startOfOctets = percentPos; + let state = internals.utf8.accept; + + while (percentPos > -1 && + percentPos < string.length) { + + const high = internals.resolveHex(string[percentPos + 1], 4); + const low = internals.resolveHex(string[percentPos + 2], 0); + const byte = high | low; + const type = internals.utf8.data[byte]; + state = internals.utf8.data[256 + state + type]; + codepoint = (codepoint << 6) | (byte & internals.utf8.data[364 + type]); + + if (state === internals.utf8.accept) { + decoded += string.slice(last, startOfOctets); + decoded += codepoint <= 0xFFFF + ? String.fromCharCode(codepoint) + : String.fromCharCode(0xD7C0 + (codepoint >> 10), 0xDC00 + (codepoint & 0x3FF)); + + codepoint = 0; + last = percentPos + 3; + percentPos = string.indexOf('%', last); + startOfOctets = percentPos; + continue; + } + + if (state === internals.utf8.reject) { + return null; + } + + percentPos += 3; + + if (percentPos >= string.length || + string[percentPos] !== '%') { + + return null; + } + } + + return decoded + string.slice(last); +}; + + +internals.resolveHex = function (char, shift) { + + const i = internals.hex[char]; + return i === undefined ? 255 : i << shift; +}; + + +internals.hex = { + '0': 0, '1': 1, '2': 2, '3': 3, '4': 4, + '5': 5, '6': 6, '7': 7, '8': 8, '9': 9, + 'a': 10, 'A': 10, 'b': 11, 'B': 11, 'c': 12, + 'C': 12, 'd': 13, 'D': 13, 'e': 14, 'E': 14, + 'f': 15, 'F': 15 +}; + + +internals.utf8 = { + accept: 12, + reject: 0, + data: [ + + // Maps bytes to character to a transition + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 7, 7, + 10, 9, 9, 9, 11, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + + // Maps a state to a new state when adding a transition + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 12, 0, 0, 0, 0, 24, 36, 48, 60, 72, 84, 96, + 0, 12, 12, 12, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 24, 24, 24, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 24, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 48, 48, 48, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 48, 48, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + // Maps the current transition to a mask that needs to apply to the byte + + 0x7F, 0x3F, 0x3F, 0x3F, 0x00, 0x1F, 0x0F, 0x0F, 0x0F, 0x07, 0x07, 0x07 + ] +}; diff --git a/lib/index.d.ts b/lib/index.d.ts index 32b5006..a533d73 100755 --- a/lib/index.d.ts +++ b/lib/index.d.ts @@ -173,6 +173,15 @@ export namespace ip { export namespace uri { + /** + * Faster version of decodeURIComponent() that does not throw. + * + * @param string - the URL string to decode. + * + * @returns the decoded string or null if invalid. + */ + function decode(string: string): string | null; + /** * Generates a regular expression used to validate URI addresses. * diff --git a/lib/index.js b/lib/index.js index 7e8064d..b93a9c5 100755 --- a/lib/index.js +++ b/lib/index.js @@ -1,5 +1,6 @@ 'use strict'; +const Decode = require('./decode'); const Domain = require('./domain'); const Email = require('./email'); const Errors = require('./errors'); @@ -46,6 +47,7 @@ module.exports = { regex: Ip.regex }, uri: { + decode: Decode.decode, regex: Uri.regex } }; diff --git a/test/decode.js b/test/decode.js new file mode 100755 index 0000000..53357c8 --- /dev/null +++ b/test/decode.js @@ -0,0 +1,102 @@ +'use strict'; + +const Address = require('..'); +const Code = require('@hapi/code'); +const Lab = require('@hapi/lab'); + + +const internals = {}; + + +const { describe, it } = exports.lab = Lab.script(); +const expect = Code.expect; + + +describe('uri.decode()', () => { + + it('decodes URI strings', () => { + + const strings = [ + '', + 'abcd', + '1+2+3+4', + 'a b c d', + '=x', + '%25', + 'p%C3%A5ss', + '%61+%4d%4D', + '\uFEFFtest', + '\uFEFF', + '%EF%BB%BFtest', + '%EF%BB%BF', + '%C2%B5', + '†', + '/a/b%2Fc', + '¢™💩', + encodeURI('¢™💩') + ]; + + for (const string of strings) { + expect(Address.uri.decode(string)).to.equal(decodeURIComponent(string)); + } + }); + + it('handles invalid strings', () => { + + const strings = [ + '%', + '%2', + '%%25%%', + '%ab', + '%ab%ac%ad', + 'f%C3%A5il%', + 'f%C3%A5%il', + '%f%C3%A5il', + 'f%%C3%%A5il', + '%C2%B5%', + '%%C2%B5%', + '%E0%A4%A', + '/a/b%"Fc', + '64I%C8yY3wM9tB89x2S~3Hs4AXz3TKPS', + 'l3k%Dbbbxn.L5P2ilI-tLxUgndaWnr81', + 'fum3GJU-DLBgO%dehn%MGDsM-jn-p-_Q', + 'AWgvg5oEgIJoS%eD28Co4koKtu346v3j', + 'k3%c4NVrqbGf~8IeQyDueGVwV1a8_vb4', + 'QlW8P%e9ARoU4chM4ckznRJWP-6RmIL5', + 'h7w6%dfcx4k.EYkPlGey._b%wfOb-Y1q', + 'zFtcAt%ca9ITgiTldiF_nfNlf7a0a578', + '.vQD.nCmjJNEpid%e5KglS35Sv-97GMk', + '8qYKc_4Zx%eA.1C6K99CtyuN4_Xl8edp', + '.Y4~dvjs%D7Qqhy8wQz3O~mLuFXGNG2T', + 'kou6MHS%f3AJTpe8.%eOhfZptvsGmCAC', + '-yUdrHiMrRp1%DfvjZ.vkn_dO9p~q07A', + 'e6BF%demc0%52iqSGOPL3kvYePf-7LIH', + 'Aeo_4FxaGyC.w~F1TAAK9uYf-y._m%ca', + 'z0krVTLPXhcqW~1PxkEmke0CmNcIT%EE', + '3KqqzjaF.6QH6M5gm5PnV5iR3X99n%Cb', + 'Nl_0qJEX6ZBVK2E3qvFNL0sMJzpxK%DF', + 'WKj35GkCYJ~ZF_mkKZnPBQzo2CJBj%D6', + 'ym8WNqRjaxrK9CEf.Y.Twn0he8.6b%ca', + 'S4q0CjXZW5aWtnGiJl.svb7ow8HG6%c9', + '0iL5JYG96IjiQ1PHfxTobQOjaqv7.%d3', + '3OzV6xpZ2xmPxSBoMTTC_LcFpnE0M%Ea', + 'dvQN9Ra2UoWefWY.MEZXaD69bUHNc%Cd' + ]; + + for (const string of strings) { + expect(() => decodeURIComponent(string)).to.throw(); + expect(Address.uri.decode(string)).to.be.null(); + } + }); + + it('decodes every character', () => { + + const chars = []; + for (let i = 0; i < 256; ++i) { + chars.push(encodeURI(String.fromCharCode(i))); + } + + const string = chars.join('a1$#'); + expect(Address.uri.decode(string)).to.equal(decodeURIComponent(string)); + }); +}); diff --git a/test/domain.js b/test/domain.js index a4320b0..35fc9c7 100755 --- a/test/domain.js +++ b/test/domain.js @@ -1,4 +1,4 @@ -'use strict'; +'use strict'; const Punycode = require('punycode'); diff --git a/test/email.js b/test/email.js index b6d160b..5be362f 100755 --- a/test/email.js +++ b/test/email.js @@ -1,4 +1,4 @@ -'use strict'; +'use strict'; const Punycode = require('punycode'); diff --git a/test/index.ts b/test/index.ts index 757a5f6..7263805 100755 --- a/test/index.ts +++ b/test/index.ts @@ -137,6 +137,16 @@ expect.error(Address.ip.regex({ version: 'x' })); expect.error(Address.ip.regex({ version: ['x'] })); +// uri.decode() + +Address.uri.decode('%25a%25'); + +expect.type(Address.uri.decode('')); + +expect.error(Address.uri.decode()); +expect.error(Address.uri.decode(123)); + + // uri.regex() Address.uri.regex(); diff --git a/test/uri.js b/test/uri.js index 0212639..bfb40ee 100755 --- a/test/uri.js +++ b/test/uri.js @@ -1,4 +1,4 @@ -'use strict'; +'use strict'; const Address = require('..'); const Code = require('@hapi/code');