Skip to content

Commit

Permalink
feat(URL): Support drive letters for file URLs on Windows (#5074)
Browse files Browse the repository at this point in the history
refactor: Parse URLs more sequentially. This makes it easier to change matching behaviour depending on the protocol.
fix: Fail when a host isn't given for certain protocols.
fix: Convert back-slashes info forward-slashes.
  • Loading branch information
nayeemrmn committed May 4, 2020
1 parent 6c02b06 commit 8c509bd
Show file tree
Hide file tree
Showing 4 changed files with 178 additions and 74 deletions.
56 changes: 56 additions & 0 deletions cli/js/tests/url_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,43 @@ unitTest(function urlSearchParamsReuse(): void {
assert(sp === url.searchParams, "Search params should be reused.");
});

unitTest(function urlBackSlashes(): void {
const url = new URL(
"https:\\\\foo:bar@baz.qat:8000\\qux\\quux?foo=bar&baz=12#qat"
);
assertEquals(
url.href,
"https://foo:bar@baz.qat:8000/qux/quux?foo=bar&baz=12#qat"
);
});

unitTest(function urlRequireHost(): void {
assertEquals(new URL("file:///").href, "file:///");
assertThrows(() => {
new URL("ftp:///");
});
assertThrows(() => {
new URL("http:///");
});
assertThrows(() => {
new URL("https:///");
});
assertThrows(() => {
new URL("ws:///");
});
assertThrows(() => {
new URL("wss:///");
});
});

unitTest(function urlDriveLetter() {
assertEquals(
new URL("file:///C:").href,
Deno.build.os == "windows" ? "file:///C:/" : "file:///C:"
);
assertEquals(new URL("http://example.com/C:").href, "http://example.com/C:");
});

unitTest(function urlBaseURL(): void {
const base = new URL(
"https://foo:bar@baz.qat:8000/qux/quux?foo=bar&baz=12#qat"
Expand All @@ -158,6 +195,25 @@ unitTest(function urlRelativeWithBase(): void {
assertEquals(new URL("../b", "file:///a/a/a").href, "file:///a/b");
});

unitTest(function urlDriveLetterBase() {
assertEquals(
new URL("/b", "file:///C:/a/b").href,
Deno.build.os == "windows" ? "file:///C:/b" : "file:///b"
);
assertEquals(
new URL("D:", "file:///C:/a/b").href,
Deno.build.os == "windows" ? "file:///D:/" : "file:///C:/a/D:"
);
assertEquals(
new URL("/D:", "file:///C:/a/b").href,
Deno.build.os == "windows" ? "file:///D:/" : "file:///D:"
);
assertEquals(
new URL("D:/b", "file:///C:/a/b").href,
Deno.build.os == "windows" ? "file:///D:/b" : "file:///C:/a/D:/b"
);
});

unitTest(function emptyBasePath(): void {
assertEquals(new URL("", "http://example.com").href, "http://example.com/");
});
Expand Down
192 changes: 120 additions & 72 deletions cli/js/web/url.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license.
import { build } from "../build.ts";
import { getRandomValues } from "../ops/get_random_values.ts";
import { customInspect } from "./console.ts";
import { urls } from "./url_search_params.ts";
import { getRandomValues } from "../ops/get_random_values.ts";

interface URLParts {
protocol: string;
Expand All @@ -14,32 +15,14 @@ interface URLParts {
hash: string;
}

const patterns = {
protocol: "(?:([a-z]+):)",
authority: "(?://([^/?#]*))",
path: "([^?#]*)",
query: "(\\?[^#]*)",
hash: "(#.*)",

authentication: "(?:([^:]*)(?::([^@]*))?@)",
hostname: "([^:]+)",
port: "(?::(\\d+))",
};

const urlRegExp = new RegExp(
`^${patterns.protocol}?${patterns.authority}?${patterns.path}${patterns.query}?${patterns.hash}?`
);

const authorityRegExp = new RegExp(
`^${patterns.authentication}?${patterns.hostname}${patterns.port}?$`
);

const searchParamsMethods: Array<keyof URLSearchParams> = [
"append",
"delete",
"set",
];

const specialSchemes = ["ftp", "file", "http", "https", "ws", "wss"];

// https://url.spec.whatwg.org/#special-scheme
const schemePorts: { [key: string]: string } = {
ftp: "21",
Expand All @@ -51,27 +34,69 @@ const schemePorts: { [key: string]: string } = {
};
const MAX_PORT = 2 ** 16 - 1;

function parse(url: string): URLParts | undefined {
const urlMatch = urlRegExp.exec(url);
if (urlMatch) {
const [, , authority] = urlMatch;
const authorityMatch = authority
? authorityRegExp.exec(authority)
: [null, null, null, null, null];
if (authorityMatch) {
return {
protocol: urlMatch[1] || "",
username: authorityMatch[1] || "",
password: authorityMatch[2] || "",
hostname: authorityMatch[3] || "",
port: authorityMatch[4] || "",
path: urlMatch[3] || "",
query: urlMatch[4] || "",
hash: urlMatch[5] || "",
};
}
// Remove the part of the string that matches the pattern and return the
// remainder (RHS) as well as the first captured group of the matched substring
// (LHS). e.g.
// takePattern("https://deno.land:80", /^([a-z]+):[/]{2}/)
// = ["http", "deno.land:80"]
// takePattern("deno.land:80", /^([^:]+):)
// = ["deno.land", "80"]
function takePattern(string: string, pattern: RegExp): [string, string] {
let capture = "";
const rest = string.replace(pattern, (_, capture_) => {
capture = capture_;
return "";
});
return [capture, rest];
}

function parse(url: string, isBase = true): URLParts | undefined {
const parts: Partial<URLParts> = {};
let restUrl;
[parts.protocol, restUrl] = takePattern(url, /^([a-z]+):/);
if (isBase && parts.protocol == "") {
return undefined;
}
return undefined;
if (parts.protocol == "file") {
parts.username = "";
parts.password = "";
[parts.hostname, restUrl] = takePattern(restUrl, /^[/\\]{2}([^/\\?#]*)/);
if (parts.hostname.includes(":")) {
return undefined;
}
parts.port = "";
} else if (specialSchemes.includes(parts.protocol)) {
let restAuthority;
[restAuthority, restUrl] = takePattern(
restUrl,
/^[/\\]{2}[/\\]*([^/\\?#]+)/
);
if (isBase && restAuthority == "") {
return undefined;
}
let restAuthentication;
[restAuthentication, restAuthority] = takePattern(restAuthority, /^(.*)@/);
[parts.username, restAuthentication] = takePattern(
restAuthentication,
/^([^:]*)/
);
[parts.password] = takePattern(restAuthentication, /^:(.*)/);
[parts.hostname, restAuthority] = takePattern(restAuthority, /^([^:]+)/);
[parts.port] = takePattern(restAuthority, /^:(.*)/);
if (!isValidPort(parts.port)) {
return undefined;
}
} else {
parts.username = "";
parts.password = "";
parts.hostname = "";
parts.port = "";
}
[parts.path, restUrl] = takePattern(restUrl, /^([^?#]*)/);
parts.path = parts.path.replace(/\\/g, "/");
[parts.query, restUrl] = takePattern(restUrl, /^(\?[^#]*)/);
[parts.hash] = takePattern(restUrl, /^(#.*)/);
return parts as URLParts;
}

// Based on https://github.com/kelektiv/node-uuid
Expand All @@ -92,7 +117,12 @@ function isAbsolutePath(path: string): boolean {

// Resolves `.`s and `..`s where possible.
// Preserves repeating and trailing `/`s by design.
function normalizePath(path: string): string {
// On Windows, drive letter paths will be given a leading slash, and also a
// trailing slash if there are no other components e.g. "C:" -> "/C:/".
function normalizePath(path: string, isFilePath = false): string {
if (build.os == "windows" && isFilePath) {
path = path.replace(/^\/*([A-Za-z]:)(\/|$)/, "/$1/");
}
const isAbsolute = isAbsolutePath(path);
path = path.replace(/^\//, "");
const pathSegments = path.split("/");
Expand Down Expand Up @@ -123,27 +153,54 @@ function normalizePath(path: string): string {
}

// Standard URL basing logic, applied to paths.
function resolvePathFromBase(path: string, basePath: string): string {
const normalizedPath = normalizePath(path);
function resolvePathFromBase(
path: string,
basePath: string,
isFilePath = false
): string {
let normalizedPath = normalizePath(path, isFilePath);
let normalizedBasePath = normalizePath(basePath, isFilePath);

let driveLetterPrefix = "";
if (build.os == "windows" && isFilePath) {
let driveLetter = "";
let baseDriveLetter = "";
[driveLetter, normalizedPath] = takePattern(
normalizedPath,
/^(\/[A-Za-z]:)(?=\/)/
);
[baseDriveLetter, normalizedBasePath] = takePattern(
normalizedBasePath,
/^(\/[A-Za-z]:)(?=\/)/
);
driveLetterPrefix = driveLetter || baseDriveLetter;
}

if (isAbsolutePath(normalizedPath)) {
return normalizedPath;
return `${driveLetterPrefix}${normalizedPath}`;
}
const normalizedBasePath = normalizePath(basePath);
if (!isAbsolutePath(normalizedBasePath)) {
throw new TypeError("Base path must be absolute.");
}

// Special case.
if (path == "") {
return normalizedBasePath;
return `${driveLetterPrefix}${normalizedBasePath}`;
}

// Remove everything after the last `/` in `normalizedBasePath`.
const prefix = normalizedBasePath.replace(/[^\/]*$/, "");
// If `normalizedPath` ends with `.` or `..`, add a trailing space.
// If `normalizedPath` ends with `.` or `..`, add a trailing slash.
const suffix = normalizedPath.replace(/(?<=(^|\/)(\.|\.\.))$/, "/");

return normalizePath(prefix + suffix);
return `${driveLetterPrefix}${normalizePath(prefix + suffix)}`;
}

function isValidPort(value: string): boolean {
// https://url.spec.whatwg.org/#port-state
if (value === "") true;
const port = Number(value);
return Number.isInteger(port) && port >= 0 && port <= MAX_PORT;
}

/** @internal */
Expand Down Expand Up @@ -189,18 +246,6 @@ export class URLImpl implements URL {
urls.set(searchParams, this);
};

#validatePort = (value: string): string | undefined => {
// https://url.spec.whatwg.org/#port-state
if (value === "") return value;

const port = Number(value);
if (Number.isInteger(port) && port >= 0 && port <= MAX_PORT) {
return port.toString();
}

return undefined;
};

get hash(): string {
return parts.get(this)!.hash;
}
Expand Down Expand Up @@ -300,8 +345,10 @@ export class URLImpl implements URL {
}

set port(value: string) {
const port = this.#validatePort(value);
parts.get(this)!.port = port ?? this.port;
if (!isValidPort(value)) {
return;
}
parts.get(this)!.port = value.toString();
}

get protocol(): string {
Expand Down Expand Up @@ -360,22 +407,19 @@ export class URLImpl implements URL {
let baseParts: URLParts | undefined;
if (base) {
baseParts = typeof base === "string" ? parse(base) : parts.get(base);
if (!baseParts || baseParts.protocol == "") {
if (baseParts == undefined) {
throw new TypeError("Invalid base URL.");
}
}

const urlParts = typeof url === "string" ? parse(url) : parts.get(url);
if (!urlParts) {
throw new TypeError("Invalid URL.");
}

const { port } = !urlParts.protocol && baseParts ? baseParts : urlParts;
if (this.#validatePort(port) === undefined) {
const urlParts =
typeof url === "string" ? parse(url, !baseParts) : parts.get(url);
if (urlParts == undefined) {
throw new TypeError("Invalid URL.");
}

if (urlParts.protocol) {
urlParts.path = normalizePath(urlParts.path, urlParts.protocol == "file");
parts.set(this, urlParts);
} else if (baseParts) {
parts.set(this, {
Expand All @@ -384,7 +428,11 @@ export class URLImpl implements URL {
password: baseParts.password,
hostname: baseParts.hostname,
port: baseParts.port,
path: resolvePathFromBase(urlParts.path, baseParts.path || "/"),
path: resolvePathFromBase(
urlParts.path,
baseParts.path || "/",
baseParts.protocol == "file"
),
query: urlParts.query,
hash: urlParts.hash,
});
Expand Down
2 changes: 1 addition & 1 deletion std/path/from_file_url_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ Deno.test("[path] fromFileUrl (win32)", function () {
// assertEquals(path.win32.fromFileUrl("file:////server"), "\\");
// assertEquals(path.win32.fromFileUrl("file:////server/file"), "\\file");
assertEquals(path.win32.fromFileUrl("file:///c"), "\\c");
assertEquals(path.win32.fromFileUrl("file:///c:"), "\\c:");
assertEquals(path.win32.fromFileUrl("file:///c:"), "c:\\");
assertEquals(path.win32.fromFileUrl("file:///c:/"), "c:\\");
assertEquals(path.win32.fromFileUrl("file:///C:/"), "C:\\");
assertEquals(path.win32.fromFileUrl("file:///C:/Users/"), "C:\\Users\\");
Expand Down
2 changes: 1 addition & 1 deletion std/path/win32.ts
Original file line number Diff line number Diff line change
Expand Up @@ -909,6 +909,6 @@ export function parse(path: string): ParsedPath {
*/
export function fromFileUrl(url: string | URL): string {
return new URL(url).pathname
.replace(/^\/(?=[A-Za-z]:\/)/, "")
.replace(/^\/*([A-Za-z]:)(\/|$)/, "$1/")
.replace(/\//g, "\\");
}

0 comments on commit 8c509bd

Please sign in to comment.