-
Notifications
You must be signed in to change notification settings - Fork 138
Improve hostname and domain retrieval #547
Changes from 9 commits
e26ad72
765b1f4
96f925a
afbf31a
ecb4ac1
a9bc646
cf302d7
0eef358
b777c1d
21d6739
016e978
1e94197
492030d
6b527b7
4240eae
fde063f
3b15f45
b73fbf7
39e36a1
aadc4f1
15cfc1d
d687402
89792dd
7afdd1f
b074fd4
1f3f7e0
bfca537
2141195
b654bf3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -39,6 +39,7 @@ | |
"papaparse": "^5.3.0", | ||
"rxjs": "6.6.7", | ||
"tldjs": "^2.3.1", | ||
"tldts": "5.7.52", | ||
"zxcvbn": "^4.4.2" | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
export function parse(url: string, options?: object): null { | ||
return null; | ||
} | ||
|
||
export function getHostName(host: string, options?: object): null { | ||
return null; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
import * as tldjs from 'tldjs'; | ||
import * as tldts from 'tldts'; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to import it this way when it's a native Typescript project? We should be able to use the usual import format. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not necessarily, but I did do it this way, as the |
||
|
||
import { I18nService } from '../abstractions/i18n.service'; | ||
|
||
|
@@ -15,7 +16,8 @@ export class Utils { | |
static global: any = null; | ||
static tldEndingRegex = /.*\.(com|net|org|edu|uk|gov|ca|de|jp|fr|au|ru|ch|io|es|us|co|xyz|info|ly|mil)$/; | ||
// Transpiled version of /\p{Emoji_Presentation}/gu using https://mothereff.in/regexpu. Used for compatability in older browsers. | ||
static regexpEmojiPresentation = /(?:[\u231A\u231B\u23E9-\u23EC\u23F0\u23F3\u25FD\u25FE\u2614\u2615\u2648-\u2653\u267F\u2693\u26A1\u26AA\u26AB\u26BD\u26BE\u26C4\u26C5\u26CE\u26D4\u26EA\u26F2\u26F3\u26F5\u26FA\u26FD\u2705\u270A\u270B\u2728\u274C\u274E\u2753-\u2755\u2757\u2795-\u2797\u27B0\u27BF\u2B1B\u2B1C\u2B50\u2B55]|\uD83C[\uDC04\uDCCF\uDD8E\uDD91-\uDD9A\uDDE6-\uDDFF\uDE01\uDE1A\uDE2F\uDE32-\uDE36\uDE38-\uDE3A\uDE50\uDE51\uDF00-\uDF20\uDF2D-\uDF35\uDF37-\uDF7C\uDF7E-\uDF93\uDFA0-\uDFCA\uDFCF-\uDFD3\uDFE0-\uDFF0\uDFF4\uDFF8-\uDFFF]|\uD83D[\uDC00-\uDC3E\uDC40\uDC42-\uDCFC\uDCFF-\uDD3D\uDD4B-\uDD4E\uDD50-\uDD67\uDD7A\uDD95\uDD96\uDDA4\uDDFB-\uDE4F\uDE80-\uDEC5\uDECC\uDED0-\uDED2\uDED5-\uDED7\uDEEB\uDEEC\uDEF4-\uDEFC\uDFE0-\uDFEB]|\uD83E[\uDD0C-\uDD3A\uDD3C-\uDD45\uDD47-\uDD78\uDD7A-\uDDCB\uDDCD-\uDDFF\uDE70-\uDE74\uDE78-\uDE7A\uDE80-\uDE86\uDE90-\uDEA8\uDEB0-\uDEB6\uDEC0-\uDEC2\uDED0-\uDED6])/g; | ||
static regexpEmojiPresentation = /(?:[\u231A\u231B\u23E9-\u23EC\u23F0\u23F3\u25FD\u25FE\u2614\u2615\u2648-\u2653\u267F\u2693\u26A1\u26AA\u26AB\u26BD\u26BE\u26C4\u26C5\u26CE\u26D4\u26EA\u26F2\u26F3\u26F5\u26FA\u26FD\u2705\u270A\u270B\u2728\u274C\u274E\u2753-\u2755\u2757\u2795-\u2797\u27B0\u27BF\u2B1B\u2B1C\u2B50\u2B55]|\uD83C[\uDC04\uDCCF\uDD8E\uDD91-\uDD9A\uDDE6-\uDDFF\uDE01\uDE1A\uDE2F\uDE32-\uDE36\uDE38-\uDE3A\uDE50\uDE51\uDF00-\uDF20\uDF2D-\uDF35\uDF37-\uDF7C\uDF7E-\uDF93\uDFA0-\uDFCA\uDFCF-\uDFD3\uDFE0-\uDFF0\uDFF4\uDFF8-\uDFFF]|\uD83D[\uDC00-\uDC3E\uDC40\uDC42-\uDCFC\uDCFF-\uDD3D\uDD4B-\uDD4E\uDD50-\uDD67\uDD7A\uDD95\uDD96\uDDA4\uDDFB-\uDE4F\uDE80-\uDEC5\uDECC\uDED0-\uDED2\uDED5-\uDED7\uDEEB\uDEEC\uDEF4-\uDEFC\uDFE0-\uDFEB]|\uD83E[\uDD0C-\uDD3A\uDD3C-\uDD45\uDD47-\uDD78\uDD7A-\uDDCB\uDDCD-\uDDFF\uDE70-\uDE74\uDE78-\uDE7A\uDE80-\uDE86\uDE90-\uDEA8\uDEB0-\uDEB6\uDEC0-\uDEC2\uDED0-\uDED6])/g; | ||
eliykat marked this conversation as resolved.
Show resolved
Hide resolved
|
||
static readonly validHosts: string[] = ['localhost']; | ||
|
||
static init() { | ||
if (Utils.inited) { | ||
|
@@ -187,6 +189,28 @@ export class Utils { | |
} | ||
|
||
static getHostname(uriString: string): string { | ||
if (uriString == null) { | ||
return null; | ||
} | ||
|
||
uriString = uriString.trim(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd include this in the previous There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Calling |
||
if (uriString === '') { | ||
return null; | ||
} | ||
|
||
if (tldts.getHostname != null) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I assume this is a holdover pattern from tldjs. I don't know if/why we need this, I suspect it's a holdover from using a js library and/or the noop implementation. If you move this into its own class then I think you can remove this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, that is just a holdover in case we'd use the tldts.noop. Has been removed in a recent push. |
||
// Does uriString contain invalid characters | ||
// TODO Needs to possibly be extended, although '!' is a reserved character | ||
if (uriString.indexOf('!') > 0) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can tldts do this kind of validation for us? Crafting regexes here seems like a losing battle. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The only reason I included is, a test would be failing as it's assumed to be an invalid url. The issue is the exclamation mark is actually a reserved character, it's just not commonly used. So I'd prefer to adjust the test and get rid of this check. |
||
return null; | ||
} | ||
|
||
const hostname = tldts.getHostname(uriString, { validHosts: this.validHosts }); | ||
if (hostname != null) { | ||
return hostname; | ||
} | ||
} | ||
|
||
const url = Utils.getUrl(uriString); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably don't need this if we're removing the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also a fallback and I've removed it. |
||
try { | ||
return url != null && url.hostname !== '' ? url.hostname : null; | ||
|
@@ -225,6 +249,18 @@ export class Utils { | |
httpUrl = true; | ||
} | ||
|
||
const parseResult = tldts.parse != null ? tldts.parse(uriString, { validHosts: this.validHosts }) : null; | ||
if (parseResult != null && parseResult.hostname != null) { | ||
if (parseResult.hostname === 'localhost' || parseResult.isIp) { | ||
return parseResult.hostname; | ||
} | ||
|
||
if (parseResult.domain != null) { | ||
return parseResult.domain; | ||
} | ||
return null; | ||
} | ||
|
||
if (httpUrl) { | ||
try { | ||
const url = Utils.getUrlObject(uriString); | ||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,20 +20,84 @@ describe('Utils Service', () => { | |
}); | ||
|
||
it('should handle valid urls', () => { | ||
expect(Utils.getDomain('https://bitwarden')).toBe('bitwarden'); | ||
expect(Utils.getDomain('https://bitwarden.com')).toBe('bitwarden.com'); | ||
expect(Utils.getDomain('bitwarden.com')).toBe('bitwarden.com'); | ||
expect(Utils.getDomain('http://bitwarden.com')).toBe('bitwarden.com'); | ||
expect(Utils.getDomain('https://bitwarden.com')).toBe('bitwarden.com'); | ||
|
||
expect(Utils.getDomain('www.bitwarden.com')).toBe('bitwarden.com'); | ||
expect(Utils.getDomain('http://www.bitwarden.com')).toBe('bitwarden.com'); | ||
expect(Utils.getDomain('https://www.bitwarden.com')).toBe('bitwarden.com'); | ||
|
||
expect(Utils.getDomain('vault.bitwarden.com')).toBe('bitwarden.com'); | ||
expect(Utils.getDomain('http://vault.bitwarden.com')).toBe('bitwarden.com'); | ||
expect(Utils.getDomain('https://vault.bitwarden.com')).toBe('bitwarden.com'); | ||
|
||
expect(Utils.getDomain('www.vault.bitwarden.com')).toBe('bitwarden.com'); | ||
expect(Utils.getDomain('http://www.vault.bitwarden.com')).toBe('bitwarden.com'); | ||
expect(Utils.getDomain('https://www.vault.bitwarden.com')).toBe('bitwarden.com'); | ||
|
||
expect(Utils.getDomain('user:password@bitwarden.com:8080/password/sites?and&query#hash')) | ||
eliykat marked this conversation as resolved.
Show resolved
Hide resolved
|
||
.toBe('bitwarden.com'); | ||
expect(Utils.getDomain('http://user:password@bitwarden.com:8080/password/sites?and&query#hash')) | ||
.toBe('bitwarden.com'); | ||
expect(Utils.getDomain('https://user:password@bitwarden.com:8080/password/sites?and&query#hash')) | ||
.toBe('bitwarden.com'); | ||
|
||
expect(Utils.getDomain('bitwarden.unknown')).toBe('bitwarden.unknown'); | ||
expect(Utils.getDomain('http://bitwarden.unknown')).toBe('bitwarden.unknown'); | ||
expect(Utils.getDomain('https://bitwarden.unknown')).toBe('bitwarden.unknown'); | ||
}); | ||
|
||
it('should support localhost and IP', () => { | ||
it('should handle valid urls with an underscore in subdomain', () => { | ||
expect(Utils.getDomain('my_vault.bitwarden.com/')).toBe('bitwarden.com'); | ||
expect(Utils.getDomain('http://my_vault.bitwarden.com/')).toBe('bitwarden.com'); | ||
expect(Utils.getDomain('https://my_vault.bitwarden.com/')).toBe('bitwarden.com'); | ||
}); | ||
|
||
it('should support urls containing umlauts', () => { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think these are valid test cases because domain names do not actually contain unicode characters. If we want the domain name from a string containing unicode, I think it should do the punycode translation in order to get the "real" domain name. e.g.
However, this does depend on what components/services are using this method. There may be some situations where we want to display the unicode in the GUI. But for behind the scenes domain matching logic, we always need the punycode. I suggest doing some investigation here. |
||
expect(Utils.getDomain('bütwarden.com')).toBe('bütwarden.com'); | ||
expect(Utils.getDomain('http://bütwarden.com')).toBe('bütwarden.com'); | ||
expect(Utils.getDomain('https://bütwarden.com')).toBe('bütwarden.com'); | ||
|
||
expect(Utils.getDomain('subdomain.bütwarden.com')).toBe('bütwarden.com'); | ||
expect(Utils.getDomain('http://subdomain.bütwarden.com')).toBe('bütwarden.com'); | ||
expect(Utils.getDomain('https://subdomain.bütwarden.com')).toBe('bütwarden.com'); | ||
}); | ||
|
||
it('should support punycode urls', () => { | ||
expect(Utils.getDomain('xn--btwarden-65a.com')).toBe('xn--btwarden-65a.com'); | ||
expect(Utils.getDomain('xn--btwarden-65a.com')).toBe('xn--btwarden-65a.com'); | ||
expect(Utils.getDomain('xn--btwarden-65a.com')).toBe('xn--btwarden-65a.com'); | ||
|
||
expect(Utils.getDomain('subdomain.xn--btwarden-65a.com')).toBe('xn--btwarden-65a.com'); | ||
expect(Utils.getDomain('http://subdomain.xn--btwarden-65a.com')).toBe('xn--btwarden-65a.com'); | ||
expect(Utils.getDomain('https://subdomain.xn--btwarden-65a.com')).toBe('xn--btwarden-65a.com'); | ||
}); | ||
|
||
it('should support localhost', () => { | ||
expect(Utils.getDomain('localhost')).toBe('localhost'); | ||
expect(Utils.getDomain('http://localhost')).toBe('localhost'); | ||
expect(Utils.getDomain('https://localhost')).toBe('localhost'); | ||
}); | ||
|
||
it('should support localhost with subdomain', () => { | ||
expect(Utils.getDomain('subdomain.localhost')).toBe('localhost'); | ||
expect(Utils.getDomain('http://subdomain.localhost')).toBe('localhost'); | ||
expect(Utils.getDomain('https://subdomain.localhost')).toBe('localhost'); | ||
}); | ||
|
||
it('should support IPv4', () => { | ||
expect(Utils.getDomain('192.168.1.1')).toBe('192.168.1.1'); | ||
expect(Utils.getDomain('http://192.168.1.1')).toBe('192.168.1.1'); | ||
expect(Utils.getDomain('https://192.168.1.1')).toBe('192.168.1.1'); | ||
}); | ||
|
||
it('should support IPv6', () => { | ||
expect(Utils.getDomain('[2620:fe::fe]')).toBe('2620:fe::fe'); | ||
expect(Utils.getDomain('http://[2620:fe::fe]')).toBe('2620:fe::fe'); | ||
expect(Utils.getDomain('https://[2620:fe::fe]')).toBe('2620:fe::fe'); | ||
}); | ||
|
||
it('should reject invalid hostnames', () => { | ||
expect(Utils.getDomain('https://mywebsite.com$.mywebsite.com')).toBeNull(); | ||
expect(Utils.getDomain('https://mywebsite.com!.mywebsite.com')).toBeNull(); | ||
|
@@ -46,20 +110,85 @@ describe('Utils Service', () => { | |
expect(Utils.getHostname(undefined)).toBeNull(); | ||
expect(Utils.getHostname(' ')).toBeNull(); | ||
expect(Utils.getHostname('https://bit!:"_&ward.com')).toBeNull(); | ||
expect(Utils.getHostname('bitwarden')).toBeNull(); | ||
}); | ||
|
||
it('should handle valid urls', () => { | ||
expect(Utils.getHostname('bitwarden')).toBe('bitwarden'); | ||
expect(Utils.getHostname('http://bitwarden')).toBe('bitwarden'); | ||
expect(Utils.getHostname('https://bitwarden')).toBe('bitwarden'); | ||
|
||
expect(Utils.getHostname('bitwarden.com')).toBe('bitwarden.com'); | ||
expect(Utils.getHostname('https://bitwarden.com')).toBe('bitwarden.com'); | ||
expect(Utils.getHostname('http://bitwarden.com')).toBe('bitwarden.com'); | ||
expect(Utils.getHostname('https://bitwarden.com')).toBe('bitwarden.com'); | ||
|
||
expect(Utils.getHostname('www.bitwarden.com')).toBe('www.bitwarden.com'); | ||
expect(Utils.getHostname('http://www.bitwarden.com')).toBe('www.bitwarden.com'); | ||
expect(Utils.getHostname('https://www.bitwarden.com')).toBe('www.bitwarden.com'); | ||
|
||
expect(Utils.getHostname('vault.bitwarden.com')).toBe('vault.bitwarden.com'); | ||
expect(Utils.getHostname('http://vault.bitwarden.com')).toBe('vault.bitwarden.com'); | ||
expect(Utils.getHostname('https://vault.bitwarden.com')).toBe('vault.bitwarden.com'); | ||
|
||
expect(Utils.getHostname('www.vault.bitwarden.com')).toBe('www.vault.bitwarden.com'); | ||
expect(Utils.getHostname('http://www.vault.bitwarden.com')).toBe('www.vault.bitwarden.com'); | ||
expect(Utils.getHostname('https://www.vault.bitwarden.com')).toBe('www.vault.bitwarden.com'); | ||
|
||
expect(Utils.getHostname('user:password@bitwarden.com:8080/password/sites?and&query#hash')) | ||
.toBe('bitwarden.com'); | ||
expect(Utils.getHostname('https://user:password@bitwarden.com:8080/password/sites?and&query#hash')) | ||
.toBe('bitwarden.com'); | ||
expect(Utils.getHostname('https://bitwarden.unknown')).toBe('bitwarden.unknown'); | ||
}); | ||
|
||
it('should support localhost and IP', () => { | ||
it('should handle valid urls with an underscore in subdomain', () => { | ||
expect(Utils.getHostname('my_vault.bitwarden.com/')).toBe('my_vault.bitwarden.com'); | ||
expect(Utils.getHostname('http://my_vault.bitwarden.com/')).toBe('my_vault.bitwarden.com'); | ||
expect(Utils.getHostname('https://my_vault.bitwarden.com/')).toBe('my_vault.bitwarden.com'); | ||
}); | ||
|
||
it('should support urls containing umlauts', () => { | ||
expect(Utils.getHostname('bütwarden.com')).toBe('bütwarden.com'); | ||
expect(Utils.getHostname('http://bütwarden.com')).toBe('bütwarden.com'); | ||
expect(Utils.getHostname('https://bütwarden.com')).toBe('bütwarden.com'); | ||
|
||
expect(Utils.getHostname('subdomain.bütwarden.com')).toBe('subdomain.bütwarden.com'); | ||
expect(Utils.getHostname('http://subdomain.bütwarden.com')).toBe('subdomain.bütwarden.com'); | ||
expect(Utils.getHostname('https://subdomain.bütwarden.com')).toBe('subdomain.bütwarden.com'); | ||
}); | ||
|
||
it('should support punycode urls', () => { | ||
expect(Utils.getHostname('xn--btwarden-65a.com')).toBe('xn--btwarden-65a.com'); | ||
expect(Utils.getHostname('xn--btwarden-65a.com')).toBe('xn--btwarden-65a.com'); | ||
expect(Utils.getHostname('xn--btwarden-65a.com')).toBe('xn--btwarden-65a.com'); | ||
|
||
expect(Utils.getHostname('subdomain.xn--btwarden-65a.com')).toBe('subdomain.xn--btwarden-65a.com'); | ||
expect(Utils.getHostname('http://subdomain.xn--btwarden-65a.com')).toBe('subdomain.xn--btwarden-65a.com'); | ||
expect(Utils.getHostname('https://subdomain.xn--btwarden-65a.com')).toBe('subdomain.xn--btwarden-65a.com'); | ||
}); | ||
|
||
it('should support localhost', () => { | ||
expect(Utils.getHostname('localhost')).toBe('localhost'); | ||
expect(Utils.getHostname('http://localhost')).toBe('localhost'); | ||
expect(Utils.getHostname('https://localhost')).toBe('localhost'); | ||
}); | ||
|
||
it('should support localhost with subdomain', () => { | ||
expect(Utils.getHostname('subdomain.localhost')).toBe('subdomain.localhost'); | ||
expect(Utils.getHostname('http://subdomain.localhost')).toBe('subdomain.localhost'); | ||
expect(Utils.getHostname('https://subdomain.localhost')).toBe('subdomain.localhost'); | ||
}); | ||
|
||
it('should support IPv4', () => { | ||
expect(Utils.getHostname('192.168.1.1')).toBe('192.168.1.1'); | ||
expect(Utils.getHostname('http://192.168.1.1')).toBe('192.168.1.1'); | ||
expect(Utils.getHostname('https://192.168.1.1')).toBe('192.168.1.1'); | ||
}); | ||
|
||
it('should support IPv6', () => { | ||
expect(Utils.getHostname('[2620:fe::fe]')).toBe('2620:fe::fe'); | ||
expect(Utils.getHostname('http://[2620:fe::fe]')).toBe('2620:fe::fe'); | ||
expect(Utils.getHostname('https://[2620:fe::fe]')).toBe('2620:fe::fe'); | ||
}); | ||
}); | ||
|
||
describe('newGuid', () => { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I assume we have a noop version so that other clients can use the Utils class without needing this dependency. But this seems like a bad pattern. Can we extract this out to a new class/service instead, so that it's only imported by the clients that need it, it doesn't interfere with Utils, and we don't need the noop?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See comment here: #547 (comment)