Skip to content

Commit

Permalink
urlapi: add CURLU_GET_EMPTY for empty queries and fragments
Browse files Browse the repository at this point in the history
By default the API inhibits empty queries and fragments extracted.
Unless this new flag is set.

This also makes the behavior more consistent: without it set, zero
length queries and fragments are considered not present in the URL. With
the flag set, they are returned as a zero length strings if they were in
fact present in the URL.

This applies when extracting the individual query and fragment
components and for the full URL.

Closes #13396
  • Loading branch information
bagder committed Apr 18, 2024
1 parent 5379dbc commit 3eac21d
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 10 deletions.
12 changes: 12 additions & 0 deletions docs/libcurl/curl_url_get.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,18 @@ punycode.
(Added in curl 8.3.0)
## CURLU_GET_EMPTY
When this flag is used in curl_url_get(), it makes the function return empty
query and fragments parts or when used in the full URL. By default, libcurl
otherwise considers empty parts non-existing.
An empty query part is one where this is nothing following the question mark
(before the possible fragment). An empty fragments part is one where there is
nothing following the hash sign.
(Added in curl 8.8.0)
# PARTS
## CURLUPART_URL
Expand Down
1 change: 1 addition & 0 deletions docs/libcurl/symbols-in-versions
Original file line number Diff line number Diff line change
Expand Up @@ -1064,6 +1064,7 @@ CURLU_APPENDQUERY 7.62.0
CURLU_DEFAULT_PORT 7.62.0
CURLU_DEFAULT_SCHEME 7.62.0
CURLU_DISALLOW_USER 7.62.0
CURLU_GET_EMPTY 8.8.0
CURLU_GUESS_SCHEME 7.62.0
CURLU_NO_AUTHORITY 7.67.0
CURLU_NO_DEFAULT_PORT 7.62.0
Expand Down
3 changes: 3 additions & 0 deletions include/curl/urlapi.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@ typedef enum {
#define CURLU_ALLOW_SPACE (1<<11) /* Allow spaces in the URL */
#define CURLU_PUNYCODE (1<<12) /* get the host name in punycode */
#define CURLU_PUNY2IDN (1<<13) /* punycode => IDN conversion */
#define CURLU_GET_EMPTY (1<<14) /* allow empty queries and fragments
when extracting the URL or the
components */

typedef struct Curl_URL CURLU;

Expand Down
37 changes: 29 additions & 8 deletions lib/urlapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,9 @@ struct Curl_URL {
char *path;
char *query;
char *fragment;
long portnum; /* the numerical version */
unsigned short portnum; /* the numerical version */
BIT(query_present); /* to support blank */
BIT(fragment_present); /* to support blank */
};

#define DEFAULT_SCHEME "https"
Expand Down Expand Up @@ -561,7 +563,7 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
if(rest[0])
return CURLUE_BAD_PORT_NUMBER;

u->portnum = port;
u->portnum = (unsigned short) port;
/* generate a new port number string to get rid of leading zeroes etc */
free(u->port);
u->port = aprintf("%ld", port);
Expand Down Expand Up @@ -1245,6 +1247,7 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
fragment = strchr(path, '#');
if(fragment) {
fraglen = pathlen - (fragment - path);
u->fragment_present = TRUE;
if(fraglen > 1) {
/* skip the leading '#' in the copy but include the terminating null */
if(flags & CURLU_URLENCODE) {
Expand Down Expand Up @@ -1272,6 +1275,7 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
size_t qlen = fragment ? (size_t)(fragment - query) :
pathlen - (query - path);
pathlen -= qlen;
u->query_present = TRUE;
if(qlen > 1) {
if(flags & CURLU_URLENCODE) {
struct dynbuf enc;
Expand Down Expand Up @@ -1407,6 +1411,8 @@ CURLU *curl_url_dup(const CURLU *in)
DUP(u, in, fragment);
DUP(u, in, zoneid);
u->portnum = in->portnum;
u->fragment_present = in->fragment_present;
u->query_present = in->query_present;
}
return u;
fail:
Expand Down Expand Up @@ -1491,24 +1497,35 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
ptr = u->query;
ifmissing = CURLUE_NO_QUERY;
plusdecode = urldecode;
if(ptr && !ptr[0] && !(flags & CURLU_GET_EMPTY))
/* there was a blank query and the user do not ask for it */
ptr = NULL;
break;
case CURLUPART_FRAGMENT:
ptr = u->fragment;
ifmissing = CURLUE_NO_FRAGMENT;
if(!ptr && u->fragment_present && flags & CURLU_GET_EMPTY)
/* there was a blank fragment and the user asks for it */
ptr = "";
break;
case CURLUPART_URL: {
char *url;
char *scheme;
char *options = u->options;
char *port = u->port;
char *allochost = NULL;
bool show_fragment =
u->fragment || (u->fragment_present && flags & CURLU_GET_EMPTY);
bool show_query =
(u->query && u->query[0]) ||
(u->query_present && flags & CURLU_GET_EMPTY);
punycode = (flags & CURLU_PUNYCODE)?1:0;
depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
if(u->scheme && strcasecompare("file", u->scheme)) {
url = aprintf("file://%s%s%s",
u->path,
u->fragment? "#": "",
u->fragment? u->fragment : "");
show_fragment ? "#": "",
u->fragment ? u->fragment : "");
}
else if(!u->host)
return CURLUE_NO_HOST;
Expand Down Expand Up @@ -1596,9 +1613,9 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
port ? ":": "",
port ? port : "",
u->path ? u->path : "/",
(u->query && u->query[0]) ? "?": "",
(u->query && u->query[0]) ? u->query : "",
u->fragment? "#": "",
show_query ? "?": "",
u->query ? u->query : "",
show_fragment ? "#": "",
u->fragment? u->fragment : "");
free(allochost);
}
Expand Down Expand Up @@ -1733,9 +1750,11 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
break;
case CURLUPART_QUERY:
storep = &u->query;
u->query_present = FALSE;
break;
case CURLUPART_FRAGMENT:
storep = &u->fragment;
u->fragment_present = FALSE;
break;
default:
return CURLUE_UNKNOWN_PART;
Expand Down Expand Up @@ -1819,9 +1838,11 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
appendquery = (flags & CURLU_APPENDQUERY)?1:0;
equalsencode = appendquery;
storep = &u->query;
u->query_present = TRUE;
break;
case CURLUPART_FRAGMENT:
storep = &u->fragment;
u->fragment_present = TRUE;
break;
case CURLUPART_URL: {
/*
Expand Down Expand Up @@ -1972,6 +1993,6 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
/* set after the string, to make it not assigned if the allocation above
fails */
if(port)
u->portnum = port;
u->portnum = (unsigned short)port;
return CURLUE_OK;
}
4 changes: 2 additions & 2 deletions tests/data/test1201
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,15 @@ gopher
Gopher selector
</name>
<command>
gopher://%HOSTIP:%GOPHERPORT/1/selector/SELECTOR/%TESTNUMBER?
gopher://%HOSTIP:%GOPHERPORT/1/selector/SELECTOR/%TESTNUMBER
</command>
</client>

#
# Verify data after the test has been "shot"
<verify>
<protocol>
/selector/SELECTOR/%TESTNUMBER?
/selector/SELECTOR/%TESTNUMBER
</protocol>
</verify>
</testcase>
30 changes: 30 additions & 0 deletions tests/libtest/lib1560.c
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,21 @@ struct clearurlcase {
};

static const struct testcase get_parts_list[] ={
{"https://curl.se/#",
"https | [11] | [12] | [13] | curl.se | [15] | / | [16] | ",
0, CURLU_GET_EMPTY, CURLUE_OK},
{"https://curl.se/?#",
"https | [11] | [12] | [13] | curl.se | [15] | / | | ",
0, CURLU_GET_EMPTY, CURLUE_OK},
{"https://curl.se/?",
"https | [11] | [12] | [13] | curl.se | [15] | / | | [17]",
0, CURLU_GET_EMPTY, CURLUE_OK},
{"https://curl.se/?",
"https | [11] | [12] | [13] | curl.se | [15] | / | [16] | [17]",
0, 0, CURLUE_OK},
{"https://curl.se/?#",
"https | [11] | [12] | [13] | curl.se | [15] | / | [16] | [17]",
0, 0, CURLUE_OK},
{"https://curl.se/# ",
"https | [11] | [12] | [13] | curl.se | [15] | / | [16] | %20%20",
CURLU_URLENCODE|CURLU_ALLOW_SPACE, 0, CURLUE_OK},
Expand Down Expand Up @@ -508,6 +523,9 @@ static const struct testcase get_parts_list[] ={
};

static const struct urltestcase get_url_list[] = {
{"http://user@example.com?#",
"http://user@example.com/?#",
0, CURLU_GET_EMPTY, CURLUE_OK},
/* WHATWG disgrees, it wants "https:/0.0.0.0/" */
{"https://0x.0x.0", "https://0x.0x.0/", 0, 0, CURLUE_OK},

Expand Down Expand Up @@ -781,6 +799,18 @@ static int checkurl(const char *org, const char *url, const char *out)
3. Extract all components (not URL)
*/
static const struct setgetcase setget_parts_list[] = {
{"https://example.com/",
"query=\"\",",
"https | [11] | [12] | [13] | example.com | [15] | / | | [17]",
0, 0, CURLU_GET_EMPTY, CURLUE_OK},
{"https://example.com/",
"fragment=\"\",",
"https | [11] | [12] | [13] | example.com | [15] | / | [16] | ",
0, 0, CURLU_GET_EMPTY, CURLUE_OK},
{"https://example.com/",
"query=\"\",",
"https | [11] | [12] | [13] | example.com | [15] | / | [16] | [17]",
0, 0, 0, CURLUE_OK},
{"https://example.com",
"path=get,",
"https | [11] | [12] | [13] | example.com | [15] | /get | [16] | [17]",
Expand Down

0 comments on commit 3eac21d

Please sign in to comment.