Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

urlapi: add CURLU_GET_EMPTY for empty queries and fragments #13396

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
12 changes: 12 additions & 0 deletions docs/libcurl/curl_url_get.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,18 @@ punycode.

(Added in curl 8.3.0)

## CURLU_GET_EMPTY

When this flag is used in curl_url_get(), it makes the function return empty
query and fragments parts or when used in the full URL. By default, libcurl
otherwise considers empty parts non-existing.

An empty query part is one where this is nothing following the question mark
(before the possible fragment). An empty fragments part is one where there is
nothing following the hash sign.

(Added in curl 8.8.0)

# PARTS

## CURLUPART_URL
Expand Down
1 change: 1 addition & 0 deletions docs/libcurl/symbols-in-versions
Original file line number Diff line number Diff line change
Expand Up @@ -1064,6 +1064,7 @@ CURLU_APPENDQUERY 7.62.0
CURLU_DEFAULT_PORT 7.62.0
CURLU_DEFAULT_SCHEME 7.62.0
CURLU_DISALLOW_USER 7.62.0
CURLU_GET_EMPTY 8.8.0
CURLU_GUESS_SCHEME 7.62.0
CURLU_NO_AUTHORITY 7.67.0
CURLU_NO_DEFAULT_PORT 7.62.0
Expand Down
3 changes: 3 additions & 0 deletions include/curl/urlapi.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@ typedef enum {
#define CURLU_ALLOW_SPACE (1<<11) /* Allow spaces in the URL */
#define CURLU_PUNYCODE (1<<12) /* get the host name in punycode */
#define CURLU_PUNY2IDN (1<<13) /* punycode => IDN conversion */
#define CURLU_GET_EMPTY (1<<14) /* allow empty queries and fragments
when extracting the URL or the
components */

typedef struct Curl_URL CURLU;

Expand Down
37 changes: 29 additions & 8 deletions lib/urlapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,9 @@ struct Curl_URL {
char *path;
char *query;
char *fragment;
long portnum; /* the numerical version */
unsigned short portnum; /* the numerical version */
BIT(query_present); /* to support blank */
BIT(fragment_present); /* to support blank */
};

#define DEFAULT_SCHEME "https"
Expand Down Expand Up @@ -561,7 +563,7 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
if(rest[0])
return CURLUE_BAD_PORT_NUMBER;

u->portnum = port;
u->portnum = (unsigned short) port;
/* generate a new port number string to get rid of leading zeroes etc */
free(u->port);
u->port = aprintf("%ld", port);
Expand Down Expand Up @@ -1245,6 +1247,7 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
fragment = strchr(path, '#');
if(fragment) {
fraglen = pathlen - (fragment - path);
u->fragment_present = TRUE;
if(fraglen > 1) {
/* skip the leading '#' in the copy but include the terminating null */
if(flags & CURLU_URLENCODE) {
Expand Down Expand Up @@ -1272,6 +1275,7 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
size_t qlen = fragment ? (size_t)(fragment - query) :
pathlen - (query - path);
pathlen -= qlen;
u->query_present = TRUE;
if(qlen > 1) {
if(flags & CURLU_URLENCODE) {
struct dynbuf enc;
Expand Down Expand Up @@ -1407,6 +1411,8 @@ CURLU *curl_url_dup(const CURLU *in)
DUP(u, in, fragment);
DUP(u, in, zoneid);
u->portnum = in->portnum;
u->fragment_present = in->fragment_present;
u->query_present = in->query_present;
}
return u;
fail:
Expand Down Expand Up @@ -1491,24 +1497,35 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
ptr = u->query;
ifmissing = CURLUE_NO_QUERY;
plusdecode = urldecode;
if(ptr && !ptr[0] && !(flags & CURLU_GET_EMPTY))
/* there was a blank query and the user do not ask for it */
ptr = NULL;
break;
case CURLUPART_FRAGMENT:
ptr = u->fragment;
ifmissing = CURLUE_NO_FRAGMENT;
if(!ptr && u->fragment_present && flags & CURLU_GET_EMPTY)
/* there was a blank fragment and the user asks for it */
ptr = "";
break;
case CURLUPART_URL: {
char *url;
char *scheme;
char *options = u->options;
char *port = u->port;
char *allochost = NULL;
bool show_fragment =
u->fragment || (u->fragment_present && flags & CURLU_GET_EMPTY);
bool show_query =
(u->query && u->query[0]) ||
(u->query_present && flags & CURLU_GET_EMPTY);
punycode = (flags & CURLU_PUNYCODE)?1:0;
depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
if(u->scheme && strcasecompare("file", u->scheme)) {
url = aprintf("file://%s%s%s",
u->path,
u->fragment? "#": "",
u->fragment? u->fragment : "");
show_fragment ? "#": "",
u->fragment ? u->fragment : "");
}
else if(!u->host)
return CURLUE_NO_HOST;
Expand Down Expand Up @@ -1596,9 +1613,9 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
port ? ":": "",
port ? port : "",
u->path ? u->path : "/",
(u->query && u->query[0]) ? "?": "",
(u->query && u->query[0]) ? u->query : "",
u->fragment? "#": "",
show_query ? "?": "",
u->query ? u->query : "",
show_fragment ? "#": "",
u->fragment? u->fragment : "");
free(allochost);
}
Expand Down Expand Up @@ -1733,9 +1750,11 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
break;
case CURLUPART_QUERY:
storep = &u->query;
u->query_present = FALSE;
break;
case CURLUPART_FRAGMENT:
storep = &u->fragment;
u->fragment_present = FALSE;
break;
default:
return CURLUE_UNKNOWN_PART;
Expand Down Expand Up @@ -1819,9 +1838,11 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
appendquery = (flags & CURLU_APPENDQUERY)?1:0;
equalsencode = appendquery;
storep = &u->query;
u->query_present = TRUE;
break;
case CURLUPART_FRAGMENT:
storep = &u->fragment;
u->fragment_present = TRUE;
break;
case CURLUPART_URL: {
/*
Expand Down Expand Up @@ -1972,6 +1993,6 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
/* set after the string, to make it not assigned if the allocation above
fails */
if(port)
u->portnum = port;
u->portnum = (unsigned short)port;
return CURLUE_OK;
}
4 changes: 2 additions & 2 deletions tests/data/test1201
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,15 @@ gopher
Gopher selector
</name>
<command>
gopher://%HOSTIP:%GOPHERPORT/1/selector/SELECTOR/%TESTNUMBER?
gopher://%HOSTIP:%GOPHERPORT/1/selector/SELECTOR/%TESTNUMBER
</command>
</client>

#
# Verify data after the test has been "shot"
<verify>
<protocol>
/selector/SELECTOR/%TESTNUMBER?
/selector/SELECTOR/%TESTNUMBER
</protocol>
</verify>
</testcase>
30 changes: 30 additions & 0 deletions tests/libtest/lib1560.c
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,21 @@ struct clearurlcase {
};

static const struct testcase get_parts_list[] ={
{"https://curl.se/#",
"https | [11] | [12] | [13] | curl.se | [15] | / | [16] | ",
0, CURLU_GET_EMPTY, CURLUE_OK},
{"https://curl.se/?#",
"https | [11] | [12] | [13] | curl.se | [15] | / | | ",
0, CURLU_GET_EMPTY, CURLUE_OK},
{"https://curl.se/?",
"https | [11] | [12] | [13] | curl.se | [15] | / | | [17]",
0, CURLU_GET_EMPTY, CURLUE_OK},
{"https://curl.se/?",
"https | [11] | [12] | [13] | curl.se | [15] | / | [16] | [17]",
0, 0, CURLUE_OK},
{"https://curl.se/?#",
"https | [11] | [12] | [13] | curl.se | [15] | / | [16] | [17]",
0, 0, CURLUE_OK},
{"https://curl.se/# ",
"https | [11] | [12] | [13] | curl.se | [15] | / | [16] | %20%20",
CURLU_URLENCODE|CURLU_ALLOW_SPACE, 0, CURLUE_OK},
Expand Down Expand Up @@ -508,6 +523,9 @@ static const struct testcase get_parts_list[] ={
};

static const struct urltestcase get_url_list[] = {
{"http://user@example.com?#",
"http://user@example.com/?#",
0, CURLU_GET_EMPTY, CURLUE_OK},
/* WHATWG disgrees, it wants "https:/0.0.0.0/" */
{"https://0x.0x.0", "https://0x.0x.0/", 0, 0, CURLUE_OK},

Expand Down Expand Up @@ -781,6 +799,18 @@ static int checkurl(const char *org, const char *url, const char *out)
3. Extract all components (not URL)
*/
static const struct setgetcase setget_parts_list[] = {
{"https://example.com/",
"query=\"\",",
"https | [11] | [12] | [13] | example.com | [15] | / | | [17]",
0, 0, CURLU_GET_EMPTY, CURLUE_OK},
{"https://example.com/",
"fragment=\"\",",
"https | [11] | [12] | [13] | example.com | [15] | / | [16] | ",
0, 0, CURLU_GET_EMPTY, CURLUE_OK},
{"https://example.com/",
"query=\"\",",
"https | [11] | [12] | [13] | example.com | [15] | / | [16] | [17]",
0, 0, 0, CURLUE_OK},
{"https://example.com",
"path=get,",
"https | [11] | [12] | [13] | example.com | [15] | /get | [16] | [17]",
Expand Down