Skip to content

Commit

Permalink
Add uri decode. Closes #16
Browse files Browse the repository at this point in the history
  • Loading branch information
hueniverse committed Oct 30, 2019
1 parent afa1432 commit 27e5425
Show file tree
Hide file tree
Showing 8 changed files with 246 additions and 3 deletions.
120 changes: 120 additions & 0 deletions lib/decode.js
@@ -0,0 +1,120 @@
'use strict';

// Adapted from:
// Copyright (c) 2017-2019 Justin Ridgewell, MIT Licensed, https://github.com/jridgewell/safe-decode-string-component
// Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>, MIT Licensed, http://bjoern.hoehrmann.de/utf-8/decoder/dfa/


const internals = {};


exports.decode = function (string) {

let percentPos = string.indexOf('%');
if (percentPos === -1) {
return string;
}

let decoded = '';
let last = 0;
let codepoint = 0;
let startOfOctets = percentPos;
let state = internals.utf8.accept;

while (percentPos > -1 &&
percentPos < string.length) {

const high = internals.resolveHex(string[percentPos + 1], 4);
const low = internals.resolveHex(string[percentPos + 2], 0);
const byte = high | low;
const type = internals.utf8.data[byte];
state = internals.utf8.data[256 + state + type];
codepoint = (codepoint << 6) | (byte & internals.utf8.data[364 + type]);

if (state === internals.utf8.accept) {
decoded += string.slice(last, startOfOctets);
decoded += codepoint <= 0xFFFF
? String.fromCharCode(codepoint)
: String.fromCharCode(0xD7C0 + (codepoint >> 10), 0xDC00 + (codepoint & 0x3FF));

codepoint = 0;
last = percentPos + 3;
percentPos = string.indexOf('%', last);
startOfOctets = percentPos;
continue;
}

if (state === internals.utf8.reject) {
return null;
}

percentPos += 3;

if (percentPos >= string.length ||
string[percentPos] !== '%') {

return null;
}
}

return decoded + string.slice(last);
};


internals.resolveHex = function (char, shift) {

const i = internals.hex[char];
return i === undefined ? 255 : i << shift;
};


internals.hex = {
'0': 0, '1': 1, '2': 2, '3': 3, '4': 4,
'5': 5, '6': 6, '7': 7, '8': 8, '9': 9,
'a': 10, 'A': 10, 'b': 11, 'B': 11, 'c': 12,
'C': 12, 'd': 13, 'D': 13, 'e': 14, 'E': 14,
'f': 15, 'F': 15
};


internals.utf8 = {
accept: 12,
reject: 0,
data: [

// Maps bytes to character to a transition

0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 7, 7,
10, 9, 9, 9, 11, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,

// Maps a state to a new state when adding a transition

0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12, 0, 0, 0, 0, 24, 36, 48, 60, 72, 84, 96,
0, 12, 12, 12, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 0,
0, 24, 24, 24, 0, 0, 0, 0, 0, 0, 0, 0,
0, 24, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 48, 48, 48, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 48, 48, 0, 0, 0, 0, 0, 0, 0, 0,
0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

// Maps the current transition to a mask that needs to apply to the byte

0x7F, 0x3F, 0x3F, 0x3F, 0x00, 0x1F, 0x0F, 0x0F, 0x0F, 0x07, 0x07, 0x07
]
};
9 changes: 9 additions & 0 deletions lib/index.d.ts
Expand Up @@ -173,6 +173,15 @@ export namespace ip {

export namespace uri {

/**
* Faster version of decodeURIComponent() that does not throw.
*
* @param string - the URL string to decode.
*
* @returns the decoded string or null if invalid.
*/
function decode(string: string): string | null;

/**
* Generates a regular expression used to validate URI addresses.
*
Expand Down
2 changes: 2 additions & 0 deletions lib/index.js
@@ -1,5 +1,6 @@
'use strict';

const Decode = require('./decode');
const Domain = require('./domain');
const Email = require('./email');
const Errors = require('./errors');
Expand Down Expand Up @@ -46,6 +47,7 @@ module.exports = {
regex: Ip.regex
},
uri: {
decode: Decode.decode,
regex: Uri.regex
}
};
Expand Down
102 changes: 102 additions & 0 deletions test/decode.js
@@ -0,0 +1,102 @@
'use strict';

const Address = require('..');
const Code = require('@hapi/code');
const Lab = require('@hapi/lab');


const internals = {};


const { describe, it } = exports.lab = Lab.script();
const expect = Code.expect;


describe('uri.decode()', () => {

it('decodes URI strings', () => {

const strings = [
'',
'abcd',
'1+2+3+4',
'a b c d',
'=x',
'%25',
'p%C3%A5ss',
'%61+%4d%4D',
'\uFEFFtest',
'\uFEFF',
'%EF%BB%BFtest',
'%EF%BB%BF',
'%C2%B5',
'†',
'/a/b%2Fc',
'¢™💩',
encodeURI('¢™💩')
];

for (const string of strings) {
expect(Address.uri.decode(string)).to.equal(decodeURIComponent(string));
}
});

it('handles invalid strings', () => {

const strings = [
'%',
'%2',
'%%25%%',
'%ab',
'%ab%ac%ad',
'f%C3%A5il%',
'f%C3%A5%il',
'%f%C3%A5il',
'f%%C3%%A5il',
'%C2%B5%',
'%%C2%B5%',
'%E0%A4%A',
'/a/b%"Fc',
'64I%C8yY3wM9tB89x2S~3Hs4AXz3TKPS',
'l3k%Dbbbxn.L5P2ilI-tLxUgndaWnr81',
'fum3GJU-DLBgO%dehn%MGDsM-jn-p-_Q',
'AWgvg5oEgIJoS%eD28Co4koKtu346v3j',
'k3%c4NVrqbGf~8IeQyDueGVwV1a8_vb4',
'QlW8P%e9ARoU4chM4ckznRJWP-6RmIL5',
'h7w6%dfcx4k.EYkPlGey._b%wfOb-Y1q',
'zFtcAt%ca9ITgiTldiF_nfNlf7a0a578',
'.vQD.nCmjJNEpid%e5KglS35Sv-97GMk',
'8qYKc_4Zx%eA.1C6K99CtyuN4_Xl8edp',
'.Y4~dvjs%D7Qqhy8wQz3O~mLuFXGNG2T',
'kou6MHS%f3AJTpe8.%eOhfZptvsGmCAC',
'-yUdrHiMrRp1%DfvjZ.vkn_dO9p~q07A',
'e6BF%demc0%52iqSGOPL3kvYePf-7LIH',
'Aeo_4FxaGyC.w~F1TAAK9uYf-y._m%ca',
'z0krVTLPXhcqW~1PxkEmke0CmNcIT%EE',
'3KqqzjaF.6QH6M5gm5PnV5iR3X99n%Cb',
'Nl_0qJEX6ZBVK2E3qvFNL0sMJzpxK%DF',
'WKj35GkCYJ~ZF_mkKZnPBQzo2CJBj%D6',
'ym8WNqRjaxrK9CEf.Y.Twn0he8.6b%ca',
'S4q0CjXZW5aWtnGiJl.svb7ow8HG6%c9',
'0iL5JYG96IjiQ1PHfxTobQOjaqv7.%d3',
'3OzV6xpZ2xmPxSBoMTTC_LcFpnE0M%Ea',
'dvQN9Ra2UoWefWY.MEZXaD69bUHNc%Cd'
];

for (const string of strings) {
expect(() => decodeURIComponent(string)).to.throw();
expect(Address.uri.decode(string)).to.be.null();
}
});

it('decodes every character', () => {

const chars = [];
for (let i = 0; i < 256; ++i) {
chars.push(encodeURI(String.fromCharCode(i)));
}

const string = chars.join('a1$#');
expect(Address.uri.decode(string)).to.equal(decodeURIComponent(string));
});
});
2 changes: 1 addition & 1 deletion test/domain.js
@@ -1,4 +1,4 @@
'use strict';
'use strict';

const Punycode = require('punycode');

Expand Down
2 changes: 1 addition & 1 deletion test/email.js
@@ -1,4 +1,4 @@
'use strict';
'use strict';

const Punycode = require('punycode');

Expand Down
10 changes: 10 additions & 0 deletions test/index.ts
Expand Up @@ -137,6 +137,16 @@ expect.error(Address.ip.regex({ version: 'x' }));
expect.error(Address.ip.regex({ version: ['x'] }));


// uri.decode()

Address.uri.decode('%25a%25');

expect.type<string | null>(Address.uri.decode(''));

expect.error(Address.uri.decode());
expect.error(Address.uri.decode(123));


// uri.regex()

Address.uri.regex();
Expand Down
2 changes: 1 addition & 1 deletion test/uri.js
@@ -1,4 +1,4 @@
'use strict';
'use strict';

const Address = require('..');
const Code = require('@hapi/code');
Expand Down

0 comments on commit 27e5425

Please sign in to comment.