Skip to content

Commit

Permalink
urlapi: remove two fields from the URL handle struct
Browse files Browse the repository at this point in the history
and reduce copies and allocs
  • Loading branch information
bagder committed Sep 1, 2022
1 parent 7be5377 commit c03305e
Showing 1 changed file with 104 additions and 94 deletions.
198 changes: 104 additions & 94 deletions lib/urlapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "curl_ctype.h"
#include "inet_pton.h"
#include "inet_ntop.h"
#include "strdup.h"

/* The last 3 #include files should be in this order */
#include "curl_printf.h"
Expand Down Expand Up @@ -68,9 +69,6 @@ struct Curl_URL {
char *path;
char *query;
char *fragment;

char *scratch; /* temporary scratch area */
char *temppath; /* temporary path pointer */
long portnum; /* the numerical version */
};

Expand All @@ -88,8 +86,6 @@ static void free_urlhandle(struct Curl_URL *u)
free(u->path);
free(u->query);
free(u->fragment);
free(u->scratch);
free(u->temppath);
}

/*
Expand Down Expand Up @@ -458,7 +454,6 @@ static bool junkscan(const char *part, unsigned int flags)
*
*/
static CURLUcode parse_hostname_login(struct Curl_URL *u,
char **hostname,
unsigned int flags)
{
CURLUcode result = CURLUE_OK;
Expand All @@ -475,16 +470,16 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u,
* We need somewhere to put the embedded details, so do that first.
*/

char *ptr = strchr(*hostname, '@');
char *login = *hostname;
char *ptr = strchr(u->host, '@');
char *login = u->host;

if(!ptr)
goto out;

/* We will now try to extract the
* possible login information in a string like:
* ftp://user:password@ftp.my.site:8021/README */
*hostname = ++ptr;
ptr++;

/* if this is a known scheme, get some details */
if(u->scheme)
Expand Down Expand Up @@ -530,6 +525,9 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u,
u->options = optionsp;
}

/* move the name to the start of the host buffer */
memmove(u->host, ptr, strlen(ptr) + 1);

return CURLUE_OK;
out:

Expand Down Expand Up @@ -813,17 +811,18 @@ static CURLUcode decode_host(char *hostname, char **outp)
static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
{
char *path;
size_t pathlen;
bool path_alloced = FALSE;
bool uncpath = FALSE;
char *hostname;
char *query = NULL;
char *fragment = NULL;
CURLUcode result;
bool url_has_scheme = FALSE;
char schemebuf[MAX_SCHEME_LEN + 1];
const char *schemep = NULL;
size_t schemelen = 0;
size_t urllen;
CURLUcode result = CURLUE_OK;
size_t fraglen = 0;

DEBUGASSERT(url);

Expand All @@ -836,13 +835,6 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
/* excessive input length */
return CURLUE_MALFORMED_INPUT;

path = u->scratch = malloc(urllen * 2 + 2);
if(!path)
return CURLUE_OUT_OF_MEMORY;

hostname = &path[urllen + 1];
hostname[0] = 0;

if(Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf))) {
url_has_scheme = TRUE;
schemelen = strlen(schemebuf);
Expand All @@ -855,7 +847,7 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
return CURLUE_BAD_FILE_URL;

/* path has been allocated large enough to hold this */
strcpy(path, &url[5]);
path = (char *)&url[5];

u->scheme = strdup("file");
if(!u->scheme)
Expand Down Expand Up @@ -910,8 +902,10 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)

len = path - ptr;
if(len) {
memcpy(hostname, ptr, len);
hostname[len] = 0;
u->host = Curl_memdup(ptr, len + 1);
if(!u->host)
return CURLUE_OUT_OF_MEMORY;
u->host[len] = 0;
uncpath = TRUE;
}

Expand All @@ -928,21 +922,21 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
}

if(!uncpath)
hostname = NULL; /* no host for file: URLs by default */
/* no host for file: URLs by default */
Curl_safefree(u->host);

#if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
/* Don't allow Windows drive letters when not in Windows.
* This catches both "file:/c:" and "file:c:" */
if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
STARTS_WITH_URL_DRIVE_PREFIX(path)) {
STARTS_WITH_URL_DRIVE_PREFIX(path))
/* File drive letters are only accepted in MSDOS/Windows */
return CURLUE_BAD_FILE_URL;
}
#else
/* If the path starts with a slash and a drive letter, ditch the slash */
if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
/* This cannot be done with strcpy, as the memory chunks overlap! */
memmove(path, &path[1], strlen(&path[1]) + 1);
path++;
}
#endif

Expand All @@ -952,7 +946,6 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
const char *p;
const char *hostp;
size_t len;
path[0] = 0;

if(url_has_scheme) {
int i = 0;
Expand Down Expand Up @@ -994,15 +987,17 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)

len = p - hostp;
if(len) {
memcpy(hostname, hostp, len);
hostname[len] = 0;
u->host = Curl_memdup(hostp, len + 1);
if(!u->host)
return CURLUE_OUT_OF_MEMORY;
u->host[len] = 0;
}
else {
if(!(flags & CURLU_NO_AUTHORITY))
return CURLUE_NO_HOST;
}

strcpy(path, p);
path = (char *)p;

if(schemep) {
u->scheme = strdup(schemep);
Expand All @@ -1011,117 +1006,129 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
}
}

if((flags & CURLU_URLENCODE) && path[0]) {
/* worst case output length is 3x the original! */
char *newp = malloc(strlen(path) * 3);
if(!newp)
return CURLUE_OUT_OF_MEMORY;
path_alloced = TRUE;
strcpy_url(newp, path, TRUE); /* consider it relative */
u->temppath = path = newp;
}

fragment = strchr(path, '#');
if(fragment) {
*fragment++ = 0;
if(junkscan(fragment, flags))
return CURLUE_BAD_FRAGMENT;
if(fragment[0]) {
u->fragment = strdup(fragment);
fraglen = strlen(fragment);
if(fraglen > 1) {
/* skip the leading '#' in the copy but include the terminating null */
u->fragment = Curl_memdup(fragment + 1, fraglen);
if(!u->fragment)
return CURLUE_OUT_OF_MEMORY;

if(junkscan(u->fragment, flags))
return CURLUE_BAD_FRAGMENT;
}
}

query = strchr(path, '?');
if(query) {
*query++ = 0;
if(junkscan(query, flags))
return CURLUE_BAD_QUERY;
/* done even if the query part is a blank string */
u->query = strdup(query);
if(!u->query)
return CURLUE_OUT_OF_MEMORY;
}
if(query && (!fragment || (query < fragment))) {
size_t qlen = strlen(query) - fraglen; /* includes '?' */
pathlen = strlen(path) - qlen - fraglen;
if(qlen > 1) {
u->query = Curl_memdup(query + 1, qlen);
if(!u->query)
return CURLUE_OUT_OF_MEMORY;
u->query[qlen - 1] = 0;

if(junkscan(path, flags))
return CURLUE_BAD_PATH;
if(junkscan(u->query, flags))
return CURLUE_BAD_QUERY;
}
}
else
pathlen = strlen(path) - fraglen;

if(!path[0])
/* if there's no path left set, unset */
if(!pathlen) {
/* there is no path left, unset */
path = NULL;
}
else {
if(!path_alloced) {
u->path = Curl_memdup(path, pathlen + 1);
if(!u->path)
return CURLUE_OUT_OF_MEMORY;
path_alloced = TRUE;
u->path[pathlen] = 0;
path = u->path;
}

if(flags & CURLU_URLENCODE) {
/* worst case output length is 3x the original! */
char *newp = Curl_memdup(path, pathlen * 3 + 1);
if(!newp)
return CURLUE_OUT_OF_MEMORY;
strcpy_url(newp, path, TRUE); /* consider it relative */
free(u->path); /* free the old one */
path = u->path = newp;
}

if(junkscan(u->path, flags))
return CURLUE_BAD_PATH;

if(!(flags & CURLU_PATH_AS_IS)) {
/* remove ../ and ./ sequences according to RFC3986 */
char *newp = Curl_dedotdotify(path);
if(!newp)
return CURLUE_OUT_OF_MEMORY;

if(strcmp(newp, path)) {
/* if we got a new version */
if(path_alloced)
Curl_safefree(u->temppath);
u->temppath = path = newp;
path_alloced = TRUE;
/* we got a new version */
free(u->path);
u->path = newp;
}
else
free(newp);
}

u->path = path_alloced?path:strdup(path);
if(!u->path)
return CURLUE_OUT_OF_MEMORY;
u->temppath = NULL; /* used now */
}

if(hostname) {
if(u->host) {
char normalized_ipv4[sizeof("255.255.255.255") + 1];

/*
* Parse the login details and strip them out of the host name.
*/
result = parse_hostname_login(u, &hostname, flags);
result = parse_hostname_login(u, flags);
if(!result)
result = Curl_parse_port(u, u->host, url_has_scheme);
if(result)
return result;

result = Curl_parse_port(u, hostname, url_has_scheme);
if(result)
return result;

if(junkscan(hostname, flags))
if(junkscan(u->host, flags))
return CURLUE_BAD_HOSTNAME;

if(0 == strlen(hostname) && (flags & CURLU_NO_AUTHORITY)) {
/* Skip hostname check, it's allowed to be empty. */
u->host = strdup("");
if(ipv4_normalize(u->host, normalized_ipv4, sizeof(normalized_ipv4))) {
free(u->host);
u->host = strdup(normalized_ipv4);
if(!u->host)
return CURLUE_OUT_OF_MEMORY;
}
else {
if(ipv4_normalize(hostname, normalized_ipv4, sizeof(normalized_ipv4)))
u->host = strdup(normalized_ipv4);
else {
result = decode_host(hostname, &u->host);
if(result)
return result;
char *decod;
result = decode_host(u->host, &decod);
if(!result) {
free(u->host);
u->host = decod;
result = hostname_check(u, u->host);
if(result)
return result;
}
if(result)
return result;
}
if(!u->host)
return CURLUE_OUT_OF_MEMORY;

if((flags & CURLU_GUESS_SCHEME) && !schemep) {
/* legacy curl-style guess based on host name */
if(checkprefix("ftp.", hostname))
if(checkprefix("ftp.", u->host))
schemep = "ftp";
else if(checkprefix("dict.", hostname))
else if(checkprefix("dict.", u->host))
schemep = "dict";
else if(checkprefix("ldap.", hostname))
else if(checkprefix("ldap.", u->host))
schemep = "ldap";
else if(checkprefix("imap.", hostname))
else if(checkprefix("imap.", u->host))
schemep = "imap";
else if(checkprefix("smtp.", hostname))
else if(checkprefix("smtp.", u->host))
schemep = "smtp";
else if(checkprefix("pop3.", hostname))
else if(checkprefix("pop3.", u->host))
schemep = "pop3";
else
schemep = "http";
Expand All @@ -1131,11 +1138,14 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
return CURLUE_OUT_OF_MEMORY;
}
}
else if(flags & CURLU_NO_AUTHORITY) {
/* allowed to be empty. */
u->host = strdup("");
if(!u->host)
return CURLUE_OUT_OF_MEMORY;
}

Curl_safefree(u->scratch);
Curl_safefree(u->temppath);

return CURLUE_OK;
return result;
}

/*
Expand Down

0 comments on commit c03305e

Please sign in to comment.