Skip to content

Commit

Permalink
Merge 8d9902e into d6ec96f
Browse files Browse the repository at this point in the history
  • Loading branch information
phluid61 committed Nov 24, 2017
2 parents d6ec96f + 8d9902e commit 66f1d41
Show file tree
Hide file tree
Showing 5 changed files with 201 additions and 44 deletions.
121 changes: 78 additions & 43 deletions lib/url.c
Expand Up @@ -2039,6 +2039,14 @@ static CURLcode parseurlandfillconn(struct Curl_easy *data,
('A' <= str[0] && str[0] <= 'Z')) && \
(str[1] == ':'))

/* MSDOS/Windows style drive prefix, optionally with
* a '|' instead of ':', followed by a slash or NUL */
#define STARTS_WITH_URL_DRIVE_PREFIX(str) \
((('a' <= (str)[0] && (str)[0] <= 'z') || \
('A' <= (str)[0] && (str)[0] <= 'Z')) && \
((str)[1] == ':' || (str)[1] == '|') && \
((str)[2] == '/' || (str)[2] == 0))

/* Don't mistake a drive letter for a scheme if the default protocol is file.
curld --proto-default file c:/foo/bar.txt */
if(STARTS_WITH_DRIVE_PREFIX(data->change.url) &&
Expand Down Expand Up @@ -2071,63 +2079,90 @@ static CURLcode parseurlandfillconn(struct Curl_easy *data,
return CURLE_URL_MALFORMAT;
}

if(url_has_scheme && path[0] == '/' && path[1] == '/') {
/* Allow omitted hostname (e.g. file:/<path>). This is not strictly
* speaking a valid file: URL by RFC 1738, but treating file:/<path> as
* file://localhost/<path> is similar to how other schemes treat missing
* hostnames. See RFC 1808. */

/* This cannot be done with strcpy() in a portable manner, since the
memory areas overlap! */
memmove(path, path + 2, strlen(path + 2) + 1);
if(url_has_scheme && path[0] == '/' && path[1] == '/' &&
path[2] == '/' && path[3] == '/') {
/* This appears to be a UNC string (usually indicating a SMB share).
* We don't do SMB in file: URLs. (TODO?)
*/
failf(data, "SMB shares are not supported in file: URLs.");
return CURLE_URL_MALFORMAT;
}

/*
* we deal with file://<host>/<path> differently since it supports no
* hostname other than "localhost" and "127.0.0.1", which is unique among
* the URL protocols specified in RFC 1738
/* Extra handling URLs with an authority component (i.e. that start with
* "file://")
*
* We allow omitted hostname (e.g. file:/<path>) -- valid according to
* RFC 8089, but not the (current) WHAT-WG URL spec.
*/
if(path[0] != '/' && !STARTS_WITH_DRIVE_PREFIX(path)) {
/* the URL includes a host name, it must match "localhost" or
"127.0.0.1" to be valid */
char *ptr;
if(!checkprefix("localhost/", path) &&
!checkprefix("127.0.0.1/", path)) {
failf(data, "Invalid file://hostname/, "
"expected localhost or 127.0.0.1 or none");
return CURLE_URL_MALFORMAT;
}
ptr = &path[9]; /* now points to the slash after the host */

/* there was a host name and slash present
RFC1738 (section 3.1, page 5) says:
The rest of the locator consists of data specific to the scheme,
and is known as the "url-path". It supplies the details of how the
specified resource can be accessed. Note that the "/" between the
host (or port) and the url-path is NOT part of the url-path.
if(url_has_scheme && path[0] == '/' && path[1] == '/') {
/* swallow the two slashes */
char *ptr = &path[2];

As most agents use file://localhost/foo to get '/foo' although the
slash preceding foo is a separator and not a slash for the path,
a URL as file://localhost//foo must be valid as well, to refer to
the same file with an absolute path.
*/
/*
* According to RFC 8089, a file: URL can be reliably dereferenced if:
*
* o it has no/blank hostname, or
*
* o the hostname matches "localhost" (case-insensitively), or
*
* o the hostname is a FQDN that resolves to this machine.
*
* For brevity, we only consider URLs with empty, "localhost", or
* "127.0.0.1" hostnames as local.
*
* Additionally, there is an exception for URLs with a Windows drive
* letter in the authority (which was accidentally omitted from RFC 8089
* Appendix E, but believe me, it was meant to be there. --MK)
*/
if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
/* the URL includes a host name, it must match "localhost" or
"127.0.0.1" to be valid */
if(!checkprefix("localhost/", ptr) &&
!checkprefix("127.0.0.1/", ptr)) {
failf(data, "Invalid file://hostname/, "
"expected localhost or 127.0.0.1 or none");
return CURLE_URL_MALFORMAT;
}
ptr += 9; /* now points to the slash after the host */
}

if('/' == ptr[1])
/* if there was two slashes, we skip the first one as that is then
used truly as a separator */
/*
* RFC 8089, Appendix D, Section D.1, says:
*
* > In a POSIX file system, the root of the file system is represented
* > as a directory with a zero-length name, usually written as "/"; the
* > presence of this root in a file URI can be taken as given by the
* > initial slash in the "path-absolute" rule.
*
* i.e. the first slash is part of the path.
*
* However in RFC 1738 the "/" between the host (or port) and the
* URL-path was NOT part of the URL-path. Any agent that followed the
* older spec strictly, and wanted to refer to a file with an absolute
* path, would have included a second slash. So if there are two
* slashes, swallow one.
*/
if('/' == ptr[1]) /* note: the only way ptr[0]!='/' is if ptr[1]==':' */
ptr++;

/* This cannot be made with strcpy, as the memory chunks overlap! */
/* This cannot be done with strcpy, as the memory chunks overlap! */
memmove(path, ptr, strlen(ptr) + 1);
}

#if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
if(STARTS_WITH_DRIVE_PREFIX(path)) {
/* Don't allow Windows drive letters when not in Windows.
* This catches both "file:/c:" and "file:c:" */
if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
STARTS_WITH_URL_DRIVE_PREFIX(path)) {
failf(data, "File drive letters are only accepted in MSDOS/Windows.");
return CURLE_URL_MALFORMAT;
}
#else
/* If the path starts with a slash and a drive letter, ditch the slash */
if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
/* This cannot be done with strcpy, as the memory chunks overlap! */
memmove(path, &path[1], strlen(&path[1]) + 1);
}
#endif

protop = "file"; /* protocol string */
Expand Down
4 changes: 3 additions & 1 deletion tests/data/Makefile.inc
Expand Up @@ -187,4 +187,6 @@ test2032 test2033 test2034 test2035 test2036 test2037 test2038 test2039 \
test2040 test2041 test2042 test2043 test2044 test2045 test2046 test2047 \
test2048 test2049 test2050 test2051 test2052 test2053 test2054 test2055 \
test2056 test2057 test2058 test2059 test2060 test2061 test2062 test2063 \
test2064 test2065 test2066 test2067 test2068 test2069
test2064 test2065 test2066 test2067 test2068 test2069 \
\
test2070 test2071 test2072
41 changes: 41 additions & 0 deletions tests/data/test2070
@@ -0,0 +1,41 @@
<testcase>
<info>
<keywords>
FILE
</keywords>
</info>

<reply>
<data>
foo
bar
bar
foo
moo
</data>
</reply>

# Client-side
<client>
<server>
file
</server>
<name>
basic file:// file with no authority
</name>
<command>
file:%PWD/log/test2070.txt
</command>
<file name="log/test2070.txt">
foo
bar
bar
foo
moo
</file>
</client>

# Verify data after the test has been "shot"
<verify>
</verify>
</testcase>
41 changes: 41 additions & 0 deletions tests/data/test2071
@@ -0,0 +1,41 @@
<testcase>
<info>
<keywords>
FILE
</keywords>
</info>

<reply>
<data>
foo
bar
bar
foo
moo
</data>
</reply>

# Client-side
<client>
<server>
file
</server>
<name>
basic file:// file with "127.0.0.1" hostname
</name>
<command>
file://127.0.0.1/%PWD/log/test2070.txt
</command>
<file name="log/test2070.txt">
foo
bar
bar
foo
moo
</file>
</client>

# Verify data after the test has been "shot"
<verify>
</verify>
</testcase>
38 changes: 38 additions & 0 deletions tests/data/test2072
@@ -0,0 +1,38 @@
<testcase>
<info>
<keywords>
FILE
</keywords>
</info>

<reply>
</reply>

# Client-side
<client>
<server>
file
</server>
<name>
file:// with SMB path
</name>
<command>
file:////bad-host%PWD/log/test1145.txt
</command>
<file name="log/test1145.txt">
foo
bar
bar
foo
moo
</file>
</client>

# Verify data after the test has been "shot"
<verify>
# CURLE_URL_MALFORMAT is error code 3
<errorcode>
3
</errorcode>
</verify>
</testcase>

0 comments on commit 66f1d41

Please sign in to comment.