From 46e164069d1a5230e4e64cbd2ff46c46cce056bb Mon Sep 17 00:00:00 2001 From: Daniel Stenberg Date: Fri, 14 Sep 2018 23:33:28 +0200 Subject: [PATCH] url: use the URL API internally as well ... to make it a truly unified URL parser. Closes #3017 --- lib/curl_path.c | 4 +- lib/dict.c | 2 +- lib/easy.c | 4 - lib/file.c | 4 +- lib/ftp.c | 35 +- lib/ftp.h | 2 + lib/gopher.c | 2 +- lib/http.c | 125 +++--- lib/imap.c | 20 +- lib/ldap.c | 10 +- lib/multi.c | 9 +- lib/pop3.c | 5 +- lib/smb.c | 2 +- lib/smtp.c | 5 +- lib/tftp.c | 4 +- lib/transfer.c | 45 +-- lib/url.c | 987 ++++++++++----------------------------------- lib/url.h | 2 + lib/urldata.h | 17 +- tests/data/test325 | 4 +- tests/data/test523 | 4 +- tests/data/test563 | 2 +- 22 files changed, 376 insertions(+), 918 deletions(-) diff --git a/lib/curl_path.c b/lib/curl_path.c index e843deac7cfe06..68f3e44ba8f8e7 100644 --- a/lib/curl_path.c +++ b/lib/curl_path.c @@ -5,7 +5,7 @@ * | (__| |_| | _ <| |___ * \___|\___/|_| \_\_____| * - * Copyright (C) 1998 - 2017, Daniel Stenberg, , et al. + * Copyright (C) 1998 - 2018, Daniel Stenberg, , et al. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms @@ -39,7 +39,7 @@ CURLcode Curl_getworkingpath(struct connectdata *conn, char *working_path; size_t working_path_len; CURLcode result = - Curl_urldecode(data, data->state.path, 0, &working_path, + Curl_urldecode(data, data->state.up.path, 0, &working_path, &working_path_len, FALSE); if(result) return result; diff --git a/lib/dict.c b/lib/dict.c index 408d57b92f8701..78ef046d4434ff 100644 --- a/lib/dict.c +++ b/lib/dict.c @@ -136,7 +136,7 @@ static CURLcode dict_do(struct connectdata *conn, bool *done) struct Curl_easy *data = conn->data; curl_socket_t sockfd = conn->sock[FIRSTSOCKET]; - char *path = data->state.path; + char *path = data->state.up.path; curl_off_t *bytecount = &data->req.bytecount; *done = TRUE; /* unconditionally */ diff --git a/lib/easy.c b/lib/easy.c index 88fc4f4601a365..fb9105a1cebd49 100644 --- a/lib/easy.c +++ b/lib/easy.c @@ -1002,10 +1002,6 @@ struct Curl_easy *curl_easy_duphandle(struct Curl_easy *data) */ void curl_easy_reset(struct Curl_easy *data) { - Curl_safefree(data->state.pathbuffer); - - data->state.path = NULL; - Curl_free_request_state(data); /* zero out UserDefined data: */ diff --git a/lib/file.c b/lib/file.c index 542f34a4523230..3cfa0e703862b4 100644 --- a/lib/file.c +++ b/lib/file.c @@ -143,7 +143,7 @@ static CURLcode file_connect(struct connectdata *conn, bool *done) #endif size_t real_path_len; - CURLcode result = Curl_urldecode(data, data->state.path, 0, &real_path, + CURLcode result = Curl_urldecode(data, data->state.up.path, 0, &real_path, &real_path_len, FALSE); if(result) return result; @@ -197,7 +197,7 @@ static CURLcode file_connect(struct connectdata *conn, bool *done) file->fd = fd; if(!data->set.upload && (fd == -1)) { - failf(data, "Couldn't open file %s", data->state.path); + failf(data, "Couldn't open file %s", data->state.up.path); file_done(conn, CURLE_FILE_COULDNT_READ_FILE, FALSE); return CURLE_FILE_COULDNT_READ_FILE; } diff --git a/lib/ftp.c b/lib/ftp.c index 429708fc560866..270b1e5f0db845 100644 --- a/lib/ftp.c +++ b/lib/ftp.c @@ -1444,6 +1444,7 @@ static CURLcode ftp_state_list(struct connectdata *conn) { CURLcode result = CURLE_OK; struct Curl_easy *data = conn->data; + struct FTP *ftp = data->req.protop; /* If this output is to be machine-parsed, the NLST command might be better to use, since the LIST command output is not specified or standard in any @@ -1460,7 +1461,7 @@ static CURLcode ftp_state_list(struct connectdata *conn) then just do LIST (in that case: nothing to do here) */ char *cmd, *lstArg, *slashPos; - const char *inpath = data->state.path; + const char *inpath = ftp->path; lstArg = NULL; if((data->set.ftp_filemethod == FTPFILE_NOCWD) && @@ -3141,7 +3142,7 @@ static CURLcode ftp_done(struct connectdata *conn, CURLcode status, int ftpcode; CURLcode result = CURLE_OK; char *path = NULL; - const char *path_to_use = data->state.path; + const char *path_to_use = ftp->path; if(!ftp) return CURLE_OK; @@ -3346,7 +3347,7 @@ static CURLcode ftp_done(struct connectdata *conn, CURLcode status, /* Send any post-transfer QUOTE strings? */ if(!status && !result && !premature && data->set.postquote) result = ftp_sendquote(conn, data->set.postquote); - + Curl_safefree(ftp->pathalloc); return result; } @@ -3695,12 +3696,13 @@ static void wc_data_dtor(void *ptr) static CURLcode init_wc_data(struct connectdata *conn) { char *last_slash; - char *path = conn->data->state.path; + struct FTP *ftp = conn->data->req.protop; + char *path = ftp->path; struct WildcardData *wildcard = &(conn->data->wildcard); CURLcode result = CURLE_OK; struct ftp_wc *ftpwc = NULL; - last_slash = strrchr(conn->data->state.path, '/'); + last_slash = strrchr(ftp->path, '/'); if(last_slash) { last_slash++; if(last_slash[0] == '\0') { @@ -3757,7 +3759,7 @@ static CURLcode init_wc_data(struct connectdata *conn) goto fail; } - wildcard->path = strdup(conn->data->state.path); + wildcard->path = strdup(ftp->path); if(!wildcard->path) { result = CURLE_OUT_OF_MEMORY; goto fail; @@ -3828,16 +3830,15 @@ static CURLcode wc_statemach(struct connectdata *conn) /* filelist has at least one file, lets get first one */ struct ftp_conn *ftpc = &conn->proto.ftpc; struct curl_fileinfo *finfo = wildcard->filelist.head->ptr; + struct FTP *ftp = conn->data->req.protop; char *tmp_path = aprintf("%s%s", wildcard->path, finfo->filename); if(!tmp_path) return CURLE_OUT_OF_MEMORY; - /* switch default "state.pathbuffer" and tmp_path, good to see - ftp_parse_url_path function to understand this trick */ - Curl_safefree(conn->data->state.pathbuffer); - conn->data->state.pathbuffer = tmp_path; - conn->data->state.path = tmp_path; + /* switch default ftp->path and tmp_path */ + free(ftp->pathalloc); + ftp->pathalloc = ftp->path = tmp_path; infof(conn->data, "Wildcard - START of \"%s\"\n", finfo->filename); if(conn->data->set.chunk_bgn) { @@ -4105,7 +4106,7 @@ CURLcode ftp_parse_url_path(struct connectdata *conn) struct FTP *ftp = data->req.protop; struct ftp_conn *ftpc = &conn->proto.ftpc; const char *slash_pos; /* position of the first '/' char in curpos */ - const char *path_to_use = data->state.path; + const char *path_to_use = ftp->path; const char *cur_pos; const char *filename = NULL; @@ -4191,7 +4192,7 @@ CURLcode ftp_parse_url_path(struct connectdata *conn) /* parse the URL path into separate path components */ while((slash_pos = strchr(cur_pos, '/')) != NULL) { /* 1 or 0 pointer offset to indicate absolute directory */ - ssize_t absolute_dir = ((cur_pos - data->state.path > 0) && + ssize_t absolute_dir = ((cur_pos - ftp->path > 0) && (ftpc->dirdepth == 0))?1:0; /* seek out the next path component */ @@ -4268,7 +4269,7 @@ CURLcode ftp_parse_url_path(struct connectdata *conn) size_t dlen; char *path; CURLcode result = - Curl_urldecode(conn->data, data->state.path, 0, &path, &dlen, TRUE); + Curl_urldecode(conn->data, ftp->path, 0, &path, &dlen, TRUE); if(result) { freedirs(ftpc); return result; @@ -4388,16 +4389,16 @@ static CURLcode ftp_setup_connection(struct connectdata *conn) char *type; struct FTP *ftp; - conn->data->req.protop = ftp = malloc(sizeof(struct FTP)); + conn->data->req.protop = ftp = calloc(sizeof(struct FTP), 1); if(NULL == ftp) return CURLE_OUT_OF_MEMORY; - data->state.path++; /* don't include the initial slash */ + ftp->path = &data->state.up.path[1]; /* don't include the initial slash */ data->state.slash_removed = TRUE; /* we've skipped the slash */ /* FTP URLs support an extension like ";type=" that * we'll try to get now! */ - type = strstr(data->state.path, ";type="); + type = strstr(ftp->path, ";type="); if(!type) type = strstr(conn->host.rawalloc, ";type="); diff --git a/lib/ftp.h b/lib/ftp.h index 7ec339118ebff1..38d03223ca8912 100644 --- a/lib/ftp.h +++ b/lib/ftp.h @@ -105,6 +105,8 @@ struct FTP { curl_off_t *bytecountp; char *user; /* user name string */ char *passwd; /* password string */ + char *path; /* points to the urlpieces struct field */ + char *pathalloc; /* if non-NULL a pointer to an allocated path */ /* transfer a file/body or not, done as a typedefed enum just to make debuggers display the full symbol and not just the numerical value */ diff --git a/lib/gopher.c b/lib/gopher.c index 3ecee9bdc31655..b441a641d9dea3 100644 --- a/lib/gopher.c +++ b/lib/gopher.c @@ -78,7 +78,7 @@ static CURLcode gopher_do(struct connectdata *conn, bool *done) curl_socket_t sockfd = conn->sock[FIRSTSOCKET]; curl_off_t *bytecount = &data->req.bytecount; - char *path = data->state.path; + char *path = data->state.up.path; char *sel = NULL; char *sel_org = NULL; ssize_t amount, k; diff --git a/lib/http.c b/lib/http.c index c1d0d68cd30e42..f6b24f0d560e4e 100644 --- a/lib/http.c +++ b/lib/http.c @@ -1877,7 +1877,8 @@ CURLcode Curl_http(struct connectdata *conn, bool *done) struct Curl_easy *data = conn->data; CURLcode result = CURLE_OK; struct HTTP *http; - const char *ppath = data->state.path; + const char *path = data->state.up.path; + const char *query = data->state.up.query; bool paste_ftp_userpwd = FALSE; char ftp_typecode[sizeof("/;type=?")] = ""; const char *host = conn->host.name; @@ -1995,7 +1996,7 @@ CURLcode Curl_http(struct connectdata *conn, bool *done) } /* setup the authentication headers */ - result = Curl_http_output_auth(conn, request, ppath, FALSE); + result = Curl_http_output_auth(conn, request, path, FALSE); if(result) return result; @@ -2223,47 +2224,59 @@ CURLcode Curl_http(struct connectdata *conn, bool *done) /* The path sent to the proxy is in fact the entire URL. But if the remote host is a IDN-name, we must make sure that the request we produce only uses the encoded host name! */ + + /* and no fragment part */ + CURLUcode uc; + char *url; + CURLU *h = curl_url_dup(data->state.uh); + if(!h) + return CURLE_OUT_OF_MEMORY; + if(conn->host.dispname != conn->host.name) { - char *url = data->change.url; - ptr = strstr(url, conn->host.dispname); - if(ptr) { - /* This is where the display name starts in the URL, now replace this - part with the encoded name. TODO: This method of replacing the host - name is rather crude as I believe there's a slight risk that the - user has entered a user name or password that contain the host name - string. */ - size_t currlen = strlen(conn->host.dispname); - size_t newlen = strlen(conn->host.name); - size_t urllen = strlen(url); - - char *newurl; - - newurl = malloc(urllen + newlen - currlen + 1); - if(newurl) { - /* copy the part before the host name */ - memcpy(newurl, url, ptr - url); - /* append the new host name instead of the old */ - memcpy(newurl + (ptr - url), conn->host.name, newlen); - /* append the piece after the host name */ - memcpy(newurl + newlen + (ptr - url), - ptr + currlen, /* copy the trailing zero byte too */ - urllen - (ptr-url) - currlen + 1); - if(data->change.url_alloc) { - Curl_safefree(data->change.url); - data->change.url_alloc = FALSE; - } - data->change.url = newurl; - data->change.url_alloc = TRUE; - } - else - return CURLE_OUT_OF_MEMORY; + uc = curl_url_set(h, CURLUPART_HOST, conn->host.name, 0); + if(uc) { + curl_url_cleanup(h); + return CURLE_OUT_OF_MEMORY; } } - ppath = data->change.url; - if(checkprefix("ftp://", ppath)) { + uc = curl_url_set(h, CURLUPART_FRAGMENT, NULL, 0); + if(uc) { + curl_url_cleanup(h); + return CURLE_OUT_OF_MEMORY; + } + + if(strcasecompare("http", data->state.up.scheme)) { + /* when getting HTTP, we don't want the userinfo the URL */ + uc = curl_url_set(h, CURLUPART_USER, NULL, 0); + if(uc) { + curl_url_cleanup(h); + return CURLE_OUT_OF_MEMORY; + } + uc = curl_url_set(h, CURLUPART_PASSWORD, NULL, 0); + if(uc) { + curl_url_cleanup(h); + return CURLE_OUT_OF_MEMORY; + } + } + /* now extract the new version of the URL */ + uc = curl_url_get(h, CURLUPART_URL, &url, 0); + if(uc) { + curl_url_cleanup(h); + return CURLE_OUT_OF_MEMORY; + } + + if(data->change.url_alloc) + free(data->change.url); + + data->change.url = url; + data->change.url_alloc = TRUE; + + curl_url_cleanup(h); + + if(strcasecompare("ftp", data->state.up.scheme)) { if(data->set.proxy_transfer_mode) { /* when doing ftp, append ;type= if not present */ - char *type = strstr(ppath, ";type="); + char *type = strstr(path, ";type="); if(type && type[6] && type[7] == 0) { switch(Curl_raw_toupper(type[6])) { case 'A': @@ -2278,7 +2291,7 @@ CURLcode Curl_http(struct connectdata *conn, bool *done) char *p = ftp_typecode; /* avoid sending invalid URLs like ftp://example.com;type=i if the * user specified ftp://example.com without the slash */ - if(!*data->state.path && ppath[strlen(ppath) - 1] != '/') { + if(!*data->state.up.path && path[strlen(path) - 1] != '/') { *p++ = '/'; } snprintf(p, sizeof(ftp_typecode) - 1, ";type=%c", @@ -2431,18 +2444,32 @@ CURLcode Curl_http(struct connectdata *conn, bool *done) if(result) return result; - if(data->set.str[STRING_TARGET]) - ppath = data->set.str[STRING_TARGET]; + if(data->set.str[STRING_TARGET]) { + path = data->set.str[STRING_TARGET]; + query = NULL; + } /* url */ - if(paste_ftp_userpwd) + if(conn->bits.httpproxy && !conn->bits.tunnel_proxy) { + char *url = data->change.url; + result = Curl_add_buffer(&req_buffer, url, strlen(url)); + if(result) + return result; + } + else if(paste_ftp_userpwd) result = Curl_add_bufferf(&req_buffer, "ftp://%s:%s@%s", conn->user, conn->passwd, - ppath + sizeof("ftp://") - 1); - else - result = Curl_add_buffer(&req_buffer, ppath, strlen(ppath)); - if(result) - return result; + path + sizeof("ftp://") - 1); + else { + result = Curl_add_buffer(&req_buffer, path, strlen(path)); + if(result) + return result; + if(query) { + result = Curl_add_bufferf(&req_buffer, "?%s", query); + if(result) + return result; + } + } result = Curl_add_bufferf(&req_buffer, @@ -2515,7 +2542,7 @@ CURLcode Curl_http(struct connectdata *conn, bool *done) co = Curl_cookie_getlist(data->cookies, conn->allocptr.cookiehost? conn->allocptr.cookiehost:host, - data->state.path, + data->state.up.path, (conn->handler->protocol&CURLPROTO_HTTPS)? TRUE:FALSE); Curl_share_unlock(data, CURL_LOCK_DATA_COOKIE); @@ -3836,7 +3863,7 @@ CURLcode Curl_http_readwrite_headers(struct Curl_easy *data, here, or else use real peer host name. */ conn->allocptr.cookiehost? conn->allocptr.cookiehost:conn->host.name, - data->state.path); + data->state.up.path); Curl_share_unlock(data, CURL_LOCK_DATA_COOKIE); } #endif diff --git a/lib/imap.c b/lib/imap.c index 63fcb4d41f4f16..3ef89097f9a32b 100644 --- a/lib/imap.c +++ b/lib/imap.c @@ -1717,8 +1717,6 @@ static CURLcode imap_regular_transfer(struct connectdata *conn, static CURLcode imap_setup_connection(struct connectdata *conn) { - struct Curl_easy *data = conn->data; - /* Initialise the IMAP layer */ CURLcode result = imap_init(conn); if(result) @@ -1726,7 +1724,6 @@ static CURLcode imap_setup_connection(struct connectdata *conn) /* Clear the TLS upgraded flag */ conn->tls_upgraded = FALSE; - data->state.path++; /* don't include the initial slash */ return CURLE_OK; } @@ -1959,7 +1956,7 @@ static CURLcode imap_parse_url_path(struct connectdata *conn) CURLcode result = CURLE_OK; struct Curl_easy *data = conn->data; struct IMAP *imap = data->req.protop; - const char *begin = data->state.path; + const char *begin = &data->state.up.path[1]; /* skip leading slash */ const char *ptr = begin; /* See how much of the URL is a valid path and decode it */ @@ -2065,17 +2062,10 @@ static CURLcode imap_parse_url_path(struct connectdata *conn) /* Does the URL contain a query parameter? Only valid when we have a mailbox and no UID as per RFC-5092 */ - if(imap->mailbox && !imap->uid && !imap->mindex && *ptr == '?') { - /* Find the length of the query parameter */ - begin = ++ptr; - while(imap_is_bchar(*ptr)) - ptr++; - - /* Decode the query parameter */ - result = Curl_urldecode(data, begin, ptr - begin, &imap->query, NULL, - TRUE); - if(result) - return result; + if(imap->mailbox && !imap->uid && !imap->mindex) { + /* Get the query parameter, URL decoded */ + (void)curl_url_get(data->state.uh, CURLUPART_QUERY, &imap->query, + CURLU_URLDECODE); } /* Any extra stuff at the end of the URL is an error */ diff --git a/lib/ldap.c b/lib/ldap.c index 4d8f4fa2883a94..e42d1fbbceb041 100644 --- a/lib/ldap.c +++ b/lib/ldap.c @@ -5,7 +5,7 @@ * | (__| |_| | _ <| |___ * \___|\___/|_| \_\_____| * - * Copyright (C) 1998 - 2017, Daniel Stenberg, , et al. + * Copyright (C) 1998 - 2018, Daniel Stenberg, , et al. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms @@ -838,9 +838,9 @@ static int _ldap_url_parse2(const struct connectdata *conn, LDAPURLDesc *ludp) size_t i; if(!conn->data || - !conn->data->state.path || - conn->data->state.path[0] != '/' || - !checkprefix("LDAP", conn->data->change.url)) + !conn->data->state.up.path || + conn->data->state.up.path[0] != '/' || + !strcasecompare("LDAP", conn->data->state.up.scheme)) return LDAP_INVALID_SYNTAX; ludp->lud_scope = LDAP_SCOPE_BASE; @@ -848,7 +848,7 @@ static int _ldap_url_parse2(const struct connectdata *conn, LDAPURLDesc *ludp) ludp->lud_host = conn->host.name; /* Duplicate the path */ - p = path = strdup(conn->data->state.path + 1); + p = path = strdup(conn->data->state.up.path + 1); if(!path) return LDAP_NO_MEMORY; diff --git a/lib/multi.c b/lib/multi.c index 2faeaa74f9dee3..d5e09aab48e2bb 100644 --- a/lib/multi.c +++ b/lib/multi.c @@ -542,10 +542,8 @@ static CURLcode multi_done(struct connectdata **connp, Curl_getoff_all_pipelines(data, conn); /* Cleanup possible redirect junk */ - free(data->req.newurl); - data->req.newurl = NULL; - free(data->req.location); - data->req.location = NULL; + Curl_safefree(data->req.newurl); + Curl_safefree(data->req.location); switch(status) { case CURLE_ABORTED_BY_CALLBACK: @@ -657,7 +655,6 @@ static CURLcode multi_done(struct connectdata **connp, cache here, and therefore cannot be used from this point on */ Curl_free_request_state(data); - return result; } @@ -2015,8 +2012,6 @@ static CURLMcode multi_runsingle(struct Curl_multi *multi, } else if(comeback) rc = CURLM_CALL_MULTI_PERFORM; - - free(newurl); break; } diff --git a/lib/pop3.c b/lib/pop3.c index cd994f63d1c8c7..5e0fd2299beb86 100644 --- a/lib/pop3.c +++ b/lib/pop3.c @@ -1303,8 +1303,6 @@ static CURLcode pop3_regular_transfer(struct connectdata *conn, static CURLcode pop3_setup_connection(struct connectdata *conn) { - struct Curl_easy *data = conn->data; - /* Initialise the POP3 layer */ CURLcode result = pop3_init(conn); if(result) @@ -1312,7 +1310,6 @@ static CURLcode pop3_setup_connection(struct connectdata *conn) /* Clear the TLS upgraded flag */ conn->tls_upgraded = FALSE; - data->state.path++; /* don't include the initial slash */ return CURLE_OK; } @@ -1387,7 +1384,7 @@ static CURLcode pop3_parse_url_path(struct connectdata *conn) /* The POP3 struct is already initialised in pop3_connect() */ struct Curl_easy *data = conn->data; struct POP3 *pop3 = data->req.protop; - const char *path = data->state.path; + const char *path = &data->state.up.path[1]; /* skip leading path */ /* URL decode the path for the message ID */ return Curl_urldecode(data, path, 0, &pop3->id, NULL, TRUE); diff --git a/lib/smb.c b/lib/smb.c index e1209e0995e161..e4f266e1922565 100644 --- a/lib/smb.c +++ b/lib/smb.c @@ -969,7 +969,7 @@ static CURLcode smb_parse_url_path(struct connectdata *conn) char *slash; /* URL decode the path */ - result = Curl_urldecode(data, data->state.path, 0, &path, NULL, TRUE); + result = Curl_urldecode(data, data->state.up.path, 0, &path, NULL, TRUE); if(result) return result; diff --git a/lib/smtp.c b/lib/smtp.c index 50c0b347771a5c..587562306ab707 100644 --- a/lib/smtp.c +++ b/lib/smtp.c @@ -1441,7 +1441,6 @@ static CURLcode smtp_regular_transfer(struct connectdata *conn, static CURLcode smtp_setup_connection(struct connectdata *conn) { - struct Curl_easy *data = conn->data; CURLcode result; /* Clear the TLS upgraded flag */ @@ -1452,8 +1451,6 @@ static CURLcode smtp_setup_connection(struct connectdata *conn) if(result) return result; - data->state.path++; /* don't include the initial slash */ - return CURLE_OK; } @@ -1507,7 +1504,7 @@ static CURLcode smtp_parse_url_path(struct connectdata *conn) /* The SMTP struct is already initialised in smtp_connect() */ struct Curl_easy *data = conn->data; struct smtp_conn *smtpc = &conn->proto.smtpc; - const char *path = data->state.path; + const char *path = &data->state.up.path[1]; /* skip leading path */ char localhost[HOSTNAME_MAX + 1]; /* Calculate the path if necessary */ diff --git a/lib/tftp.c b/lib/tftp.c index e5bc80b02b5fed..5b74e8e08cd433 100644 --- a/lib/tftp.c +++ b/lib/tftp.c @@ -485,7 +485,7 @@ static CURLcode tftp_send_first(tftp_state_data_t *state, tftp_event_t event) /* As RFC3617 describes the separator slash is not actually part of the file name so we skip the always-present first letter of the path string. */ - result = Curl_urldecode(data, &state->conn->data->state.path[1], 0, + result = Curl_urldecode(data, &state->conn->data->state.up.path[1], 0, &filename, NULL, FALSE); if(result) return result; @@ -1374,7 +1374,7 @@ static CURLcode tftp_setup_connection(struct connectdata * conn) /* TFTP URLs support an extension like ";mode=" that * we'll try to get now! */ - type = strstr(data->state.path, ";mode="); + type = strstr(data->state.up.path, ";mode="); if(!type) type = strstr(conn->host.rawalloc, ";mode="); diff --git a/lib/transfer.c b/lib/transfer.c index 3d8089ee6daa9d..6a80bf31a311fb 100644 --- a/lib/transfer.c +++ b/lib/transfer.c @@ -567,7 +567,7 @@ static CURLcode readwrite_data(struct Curl_easy *data, infof(data, "Rewinding stream by : %zd" " bytes on url %s (zero-length body)\n", - nread, data->state.path); + nread, data->state.up.path); read_rewind(conn, (size_t)nread); } else { @@ -575,7 +575,7 @@ static CURLcode readwrite_data(struct Curl_easy *data, "Excess found in a non pipelined read:" " excess = %zd" " url = %s (zero-length body)\n", - nread, data->state.path); + nread, data->state.up.path); } } @@ -744,7 +744,7 @@ static CURLcode readwrite_data(struct Curl_easy *data, " bytes on url %s (size = %" CURL_FORMAT_CURL_OFF_T ", maxdownload = %" CURL_FORMAT_CURL_OFF_T ", bytecount = %" CURL_FORMAT_CURL_OFF_T ", nread = %zd)\n", - excess, data->state.path, + excess, data->state.up.path, k->size, k->maxdownload, k->bytecount, nread); read_rewind(conn, excess); } @@ -1474,6 +1474,7 @@ CURLcode Curl_follow(struct Curl_easy *data, /* Location: redirect */ bool disallowport = FALSE; bool reachedmax = FALSE; + CURLUcode uc; if(type == FOLLOW_REDIR) { if((data->set.maxredirs != -1) && @@ -1506,33 +1507,21 @@ CURLcode Curl_follow(struct Curl_easy *data, } } - if(!Curl_is_absolute_url(newurl, NULL, 8)) { - /*** - *DANG* this is an RFC 2068 violation. The URL is supposed - to be absolute and this doesn't seem to be that! - */ - char *absolute = Curl_concat_url(data->change.url, newurl); - if(!absolute) - return CURLE_OUT_OF_MEMORY; - newurl = absolute; - } - else { - /* The new URL MAY contain space or high byte values, that means a mighty - stupid redirect URL but we still make an effort to do "right". */ - char *newest; - size_t newlen = Curl_strlen_url(newurl, FALSE); - + if(Curl_is_absolute_url(newurl, NULL, MAX_SCHEME_LEN)) /* This is an absolute URL, don't allow the custom port number */ disallowport = TRUE; - newest = malloc(newlen + 1); /* get memory for this */ - if(!newest) - return CURLE_OUT_OF_MEMORY; - - Curl_strcpy_url(newest, newurl, FALSE); /* create a space-free URL */ - newurl = newest; /* use this instead now */ + DEBUGASSERT(data->state.uh); + uc = curl_url_set(data->state.uh, CURLUPART_URL, newurl, 0); + free(newurl); + if(uc) + /* TODO: consider an error code remap here */ + return CURLE_URL_MALFORMAT; - } + uc = curl_url_get(data->state.uh, CURLUPART_URL, &newurl, 0); + if(uc) + /* TODO: consider an error code remap here */ + return CURLE_OUT_OF_MEMORY; if(type == FOLLOW_FAKE) { /* we're only figuring out the new url if we would've followed locations @@ -1549,10 +1538,8 @@ CURLcode Curl_follow(struct Curl_easy *data, if(disallowport) data->state.allow_port = FALSE; - if(data->change.url_alloc) { + if(data->change.url_alloc) Curl_safefree(data->change.url); - data->change.url_alloc = FALSE; - } data->change.url = newurl; data->change.url_alloc = TRUE; diff --git a/lib/url.c b/lib/url.c index 249d1237d80e5d..761d7cc76dbe19 100644 --- a/lib/url.c +++ b/lib/url.c @@ -92,6 +92,7 @@ bool curl_win32_idn_to_ascii(const char *in, char **out); #include "non-ascii.h" #include "inet_pton.h" #include "getinfo.h" +#include "urlapi-int.h" /* And now for the protocols */ #include "ftp.h" @@ -127,10 +128,6 @@ bool curl_win32_idn_to_ascii(const char *in, char **out); static void conn_free(struct connectdata *conn); static void free_fixed_hostname(struct hostname *host); -static CURLcode parse_url_login(struct Curl_easy *data, - struct connectdata *conn, - char **userptr, char **passwdptr, - char **optionsptr); static unsigned int get_protocol_family(unsigned int protocol); /* Some parts of the code (e.g. chunked encoding) assume this buffer has at @@ -294,6 +291,22 @@ void Curl_freeset(struct Curl_easy *data) Curl_mime_cleanpart(&data->set.mimepost); } +/* free the URL pieces */ +void Curl_up_free(struct Curl_easy *data) +{ + struct urlpieces *up = &data->state.up; + Curl_safefree(up->scheme); + Curl_safefree(up->hostname); + Curl_safefree(up->port); + Curl_safefree(up->user); + Curl_safefree(up->password); + Curl_safefree(up->options); + Curl_safefree(up->path); + Curl_safefree(up->query); + curl_url_cleanup(data->state.uh); + data->state.uh = NULL; +} + /* * This is the internal function curl_easy_cleanup() calls. This should * cleanup and free all resources associated with this sessionhandle. @@ -313,7 +326,6 @@ CURLcode Curl_close(struct Curl_easy *data) Curl_expire_clear(data); /* shut off timers */ m = data->multi; - if(m) /* This handle is still part of a multi handle, take care of this first and detach this handle from there. */ @@ -336,10 +348,6 @@ CURLcode Curl_close(struct Curl_easy *data) if(data->state.rangestringalloc) free(data->state.range); - /* Free the pathbuffer */ - Curl_safefree(data->state.pathbuffer); - data->state.path = NULL; - /* freed here just in case DONE wasn't called */ Curl_free_request_state(data); @@ -359,12 +367,7 @@ CURLcode Curl_close(struct Curl_easy *data) } data->change.referer = NULL; - if(data->change.url_alloc) { - Curl_safefree(data->change.url); - data->change.url_alloc = FALSE; - } - data->change.url = NULL; - + Curl_up_free(data); Curl_safefree(data->state.buffer); Curl_safefree(data->state.headerbuff); Curl_safefree(data->state.ulbuf); @@ -1992,379 +1995,134 @@ static CURLcode findprotocol(struct Curl_easy *data, return CURLE_UNSUPPORTED_PROTOCOL; } + +static CURLcode uc_to_curlcode(CURLUcode uc) +{ + switch(uc) { + default: + return CURLE_URL_MALFORMAT; + case CURLUE_UNSUPPORTED_SCHEME: + return CURLE_UNSUPPORTED_PROTOCOL; + case CURLUE_OUT_OF_MEMORY: + return CURLE_OUT_OF_MEMORY; + case CURLUE_USER_NOT_ALLOWED: + return CURLE_LOGIN_DENIED; + } +} + /* * Parse URL and fill in the relevant members of the connection struct. */ static CURLcode parseurlandfillconn(struct Curl_easy *data, - struct connectdata *conn, - bool *prot_missing, - char **userp, char **passwdp, - char **optionsp) + struct connectdata *conn) { - char *at; - char *fragment; - char *path = data->state.path; - char *query; - int rc; - const char *protop = ""; CURLcode result; - bool rebuild_url = FALSE; - bool url_has_scheme = FALSE; - char protobuf[16]; + CURLU *uh; + CURLUcode uc; + char *hostname; - *prot_missing = FALSE; + Curl_up_free(data); /* cleanup previous leftovers first */ - /* We might pass the entire URL into the request so we need to make sure - * there are no bad characters in there.*/ - if(strpbrk(data->change.url, "\r\n")) { - failf(data, "Illegal characters found in URL"); - return CURLE_URL_MALFORMAT; - } - - /************************************************************* - * Parse the URL. - * - * We need to parse the url even when using the proxy, because we will need - * the hostname and port in case we are trying to SSL connect through the - * proxy -- and we don't know if we will need to use SSL until we parse the - * url ... - ************************************************************/ - if(data->change.url[0] == ':') { - failf(data, "Bad URL, colon is first character"); - return CURLE_URL_MALFORMAT; - } + /* parse the URL */ + uh = data->state.uh = curl_url(); + if(!uh) + return CURLE_OUT_OF_MEMORY; - /* MSDOS/Windows style drive prefix, eg c: in c:foo */ -#define STARTS_WITH_DRIVE_PREFIX(str) \ - ((('a' <= str[0] && str[0] <= 'z') || \ - ('A' <= str[0] && str[0] <= 'Z')) && \ - (str[1] == ':')) - - /* MSDOS/Windows style drive prefix, optionally with - * a '|' instead of ':', followed by a slash or NUL */ -#define STARTS_WITH_URL_DRIVE_PREFIX(str) \ - ((('a' <= (str)[0] && (str)[0] <= 'z') || \ - ('A' <= (str)[0] && (str)[0] <= 'Z')) && \ - ((str)[1] == ':' || (str)[1] == '|') && \ - ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0)) - - /* Don't mistake a drive letter for a scheme if the default protocol is file. - curld --proto-default file c:/foo/bar.txt */ - if(STARTS_WITH_DRIVE_PREFIX(data->change.url) && - data->set.str[STRING_DEFAULT_PROTOCOL] && - strcasecompare(data->set.str[STRING_DEFAULT_PROTOCOL], "file")) { - ; /* do nothing */ - } - else { /* check for a scheme */ - int i; - for(i = 0; i < 16 && data->change.url[i]; ++i) { - if(data->change.url[i] == '/') - break; - if(data->change.url[i] == ':') { - url_has_scheme = TRUE; - break; - } - } + if(data->set.str[STRING_DEFAULT_PROTOCOL] && + !Curl_is_absolute_url(data->change.url, NULL, MAX_SCHEME_LEN)) { + char *url; + if(data->change.url_alloc) + free(data->change.url); + url = aprintf("%s://%s", data->set.str[STRING_DEFAULT_PROTOCOL], + data->change.url); + if(!url) + return CURLE_OUT_OF_MEMORY; + data->change.url = url; + data->change.url_alloc = TRUE; } - /* handle the file: scheme */ - if((url_has_scheme && strncasecompare(data->change.url, "file:", 5)) || - (!url_has_scheme && data->set.str[STRING_DEFAULT_PROTOCOL] && - strcasecompare(data->set.str[STRING_DEFAULT_PROTOCOL], "file"))) { - if(url_has_scheme) - rc = sscanf(data->change.url, "%*15[^\n/:]:%[^\n]", path); - else - rc = sscanf(data->change.url, "%[^\n]", path); + uc = curl_url_set(uh, CURLUPART_URL, data->change.url, + CURLU_GUESS_SCHEME | + CURLU_NON_SUPPORT_SCHEME | + (data->set.disallow_username_in_url ? + CURLU_DISALLOW_USER : 0) | + (data->set.path_as_is ? CURLU_PATH_AS_IS : 0)); + if(uc) + return uc_to_curlcode(uc); - if(rc != 1) { - failf(data, "Bad URL"); - return CURLE_URL_MALFORMAT; - } + uc = curl_url_get(uh, CURLUPART_SCHEME, &data->state.up.scheme, 0); + if(uc) + return uc_to_curlcode(uc); - /* Extra handling URLs with an authority component (i.e. that start with - * "file://") - * - * We allow omitted hostname (e.g. file:/) -- valid according to - * RFC 8089, but not the (current) WHAT-WG URL spec. - */ - if(url_has_scheme && path[0] == '/' && path[1] == '/') { - /* swallow the two slashes */ - char *ptr = &path[2]; - - /* - * According to RFC 8089, a file: URL can be reliably dereferenced if: - * - * o it has no/blank hostname, or - * - * o the hostname matches "localhost" (case-insensitively), or - * - * o the hostname is a FQDN that resolves to this machine. - * - * For brevity, we only consider URLs with empty, "localhost", or - * "127.0.0.1" hostnames as local. - * - * Additionally, there is an exception for URLs with a Windows drive - * letter in the authority (which was accidentally omitted from RFC 8089 - * Appendix E, but believe me, it was meant to be there. --MK) - */ - if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) { - /* the URL includes a host name, it must match "localhost" or - "127.0.0.1" to be valid */ - if(!checkprefix("localhost/", ptr) && - !checkprefix("127.0.0.1/", ptr)) { - failf(data, "Invalid file://hostname/, " - "expected localhost or 127.0.0.1 or none"); - return CURLE_URL_MALFORMAT; - } - ptr += 9; /* now points to the slash after the host */ - } - - /* This cannot be done with strcpy, as the memory chunks overlap! */ - memmove(path, ptr, strlen(ptr) + 1); - } - -#if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__) - /* Don't allow Windows drive letters when not in Windows. - * This catches both "file:/c:" and "file:c:" */ - if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) || - STARTS_WITH_URL_DRIVE_PREFIX(path)) { - failf(data, "File drive letters are only accepted in MSDOS/Windows."); - return CURLE_URL_MALFORMAT; - } -#else - /* If the path starts with a slash and a drive letter, ditch the slash */ - if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) { - /* This cannot be done with strcpy, as the memory chunks overlap! */ - memmove(path, &path[1], strlen(&path[1]) + 1); - } -#endif + result = findprotocol(data, conn, data->state.up.scheme); + if(result) + return result; - protop = "file"; /* protocol string */ - *prot_missing = !url_has_scheme; + uc = curl_url_get(uh, CURLUPART_USER, &data->state.up.user, + CURLU_URLDECODE); + if(!uc) { + conn->user = strdup(data->state.up.user); + if(!conn->user) + return CURLE_OUT_OF_MEMORY; + conn->bits.user_passwd = TRUE; } - else { - /* clear path */ - char slashbuf[4]; - path[0] = 0; - - rc = sscanf(data->change.url, - "%15[^\n/:]:%3[/]%[^\n/?#]%[^\n]", - protobuf, slashbuf, conn->host.name, path); - if(2 == rc) { - failf(data, "Bad URL"); - return CURLE_URL_MALFORMAT; - } - if(3 > rc) { - - /* - * The URL was badly formatted, let's try the browser-style _without_ - * protocol specified like 'http://'. - */ - rc = sscanf(data->change.url, "%[^\n/?#]%[^\n]", conn->host.name, path); - if(1 > rc) { - /* - * We couldn't even get this format. - * djgpp 2.04 has a sscanf() bug where 'conn->host.name' is - * assigned, but the return value is EOF! - */ -#if defined(__DJGPP__) && (DJGPP_MINOR == 4) - if(!(rc == -1 && *conn->host.name)) -#endif - { - failf(data, " malformed"); - return CURLE_URL_MALFORMAT; - } - } - - /* - * Since there was no protocol part specified in the URL use the - * user-specified default protocol. If we weren't given a default make a - * guess by matching some protocols against the host's outermost - * sub-domain name. Finally if there was no match use HTTP. - */ - - protop = data->set.str[STRING_DEFAULT_PROTOCOL]; - if(!protop) { - /* Note: if you add a new protocol, please update the list in - * lib/version.c too! */ - if(checkprefix("FTP.", conn->host.name)) - protop = "ftp"; - else if(checkprefix("DICT.", conn->host.name)) - protop = "DICT"; - else if(checkprefix("LDAP.", conn->host.name)) - protop = "LDAP"; - else if(checkprefix("IMAP.", conn->host.name)) - protop = "IMAP"; - else if(checkprefix("SMTP.", conn->host.name)) - protop = "smtp"; - else if(checkprefix("POP3.", conn->host.name)) - protop = "pop3"; - else - protop = "http"; - } + else if(uc != CURLUE_NO_USER) + return uc_to_curlcode(uc); - *prot_missing = TRUE; /* not given in URL */ - } - else { - size_t s = strlen(slashbuf); - protop = protobuf; - if(s != 2) { - infof(data, "Unwillingly accepted illegal URL using %zu slash%s!\n", - s, s>1?"es":""); - - if(data->change.url_alloc) - free(data->change.url); - /* repair the URL to use two slashes */ - data->change.url = aprintf("%s://%s%s", - protobuf, conn->host.name, path); - if(!data->change.url) - return CURLE_OUT_OF_MEMORY; - data->change.url_alloc = TRUE; - } - } + uc = curl_url_get(uh, CURLUPART_PASSWORD, &data->state.up.password, + CURLU_URLDECODE); + if(!uc) { + conn->passwd = strdup(data->state.up.password); + if(!conn->passwd) + return CURLE_OUT_OF_MEMORY; + conn->bits.user_passwd = TRUE; } + else if(uc != CURLUE_NO_PASSWORD) + return uc_to_curlcode(uc); - /* We search for '?' in the host name (but only on the right side of a - * @-letter to allow ?-letters in username and password) to handle things - * like http://example.com?param= (notice the missing '/'). - */ - at = strchr(conn->host.name, '@'); - if(at) - query = strchr(at + 1, '?'); - else - query = strchr(conn->host.name, '?'); - - if(query) { - /* We must insert a slash before the '?'-letter in the URL. If the URL had - a slash after the '?', that is where the path currently begins and the - '?string' is still part of the host name. - - We must move the trailing part from the host name and put it first in - the path. And have it all prefixed with a slash. - */ - - size_t hostlen = strlen(query); - size_t pathlen = strlen(path); - - /* move the existing path plus the zero byte forward, to make room for - the host-name part */ - memmove(path + hostlen + 1, path, pathlen + 1); - - /* now copy the trailing host part in front of the existing path */ - memcpy(path + 1, query, hostlen); - - path[0]='/'; /* prepend the missing slash */ - rebuild_url = TRUE; - - *query = 0; /* now cut off the hostname at the ? */ - } - else if(!path[0]) { - /* if there's no path set, use a single slash */ - strcpy(path, "/"); - rebuild_url = TRUE; + uc = curl_url_get(uh, CURLUPART_OPTIONS, &data->state.up.options, + CURLU_URLDECODE); + if(!uc) { + conn->options = strdup(data->state.up.options); + if(!conn->options) + return CURLE_OUT_OF_MEMORY; } + else if(uc != CURLUE_NO_OPTIONS) + return uc_to_curlcode(uc); - /* If the URL is malformatted (missing a '/' after hostname before path) we - * insert a slash here. The only letters except '/' that can start a path is - * '?' and '#' - as controlled by the two sscanf() patterns above. - */ - if(path[0] != '/') { - /* We need this function to deal with overlapping memory areas. We know - that the memory area 'path' points to is 'urllen' bytes big and that - is bigger than the path. Use +1 to move the zero byte too. */ - memmove(&path[1], path, strlen(path) + 1); - path[0] = '/'; - rebuild_url = TRUE; - } - else if(!data->set.path_as_is) { - /* sanitise paths and remove ../ and ./ sequences according to RFC3986 */ - char *newp = Curl_dedotdotify(path); - if(!newp) + uc = curl_url_get(uh, CURLUPART_HOST, &data->state.up.hostname, 0); + if(uc) { + if(!strcasecompare("file", data->state.up.scheme)) return CURLE_OUT_OF_MEMORY; - - if(strcmp(newp, path)) { - rebuild_url = TRUE; - free(data->state.pathbuffer); - data->state.pathbuffer = newp; - data->state.path = newp; - path = newp; - } - else - free(newp); } - /* - * "rebuild_url" means that one or more URL components have been modified so - * we need to generate an updated full version. We need the corrected URL - * when communicating over HTTP proxy and we don't know at this point if - * we're using a proxy or not. - */ - if(rebuild_url) { - char *reurl; - - size_t plen = strlen(path); /* new path, should be 1 byte longer than - the original */ - size_t prefixlen = strlen(conn->host.name); - - if(!*prot_missing) { - size_t protolen = strlen(protop); - - if(curl_strnequal(protop, data->change.url, protolen)) - prefixlen += protolen; - else { - failf(data, " malformed"); - return CURLE_URL_MALFORMAT; - } + uc = curl_url_get(uh, CURLUPART_PATH, &data->state.up.path, 0); + if(uc) + return uc_to_curlcode(uc); - if(curl_strnequal("://", &data->change.url[protolen], 3)) - prefixlen += 3; - /* only file: is allowed to omit one or both slashes */ - else if(curl_strnequal("file:", data->change.url, 5)) - prefixlen += 1 + (data->change.url[5] == '/'); - else { - failf(data, " malformed"); - return CURLE_URL_MALFORMAT; - } - } - - reurl = malloc(prefixlen + plen + 1); - if(!reurl) + uc = curl_url_get(uh, CURLUPART_PORT, &data->state.up.port, + CURLU_DEFAULT_PORT); + if(uc) { + if(!strcasecompare("file", data->state.up.scheme)) return CURLE_OUT_OF_MEMORY; - - /* copy the prefix */ - memcpy(reurl, data->change.url, prefixlen); - - /* append the trailing piece + zerobyte */ - memcpy(&reurl[prefixlen], path, plen + 1); - - /* possible free the old one */ - if(data->change.url_alloc) { - Curl_safefree(data->change.url); - data->change.url_alloc = FALSE; - } - - infof(data, "Rebuilt URL to: %s\n", reurl); - - data->change.url = reurl; - data->change.url_alloc = TRUE; /* free this later */ + } + else { + unsigned long port = strtoul(data->state.up.port, NULL, 10); + conn->remote_port = curlx_ultous(port); } - result = findprotocol(data, conn, protop); - if(result) - return result; + (void)curl_url_get(uh, CURLUPART_QUERY, &data->state.up.query, 0); - /* - * Parse the login details from the URL and strip them out of - * the host name - */ - result = parse_url_login(data, conn, userp, passwdp, optionsp); - if(result) - return result; + hostname = data->state.up.hostname; + if(!hostname) + /* this is for file:// transfers, get a dummy made */ + hostname = (char *)""; - if(conn->host.name[0] == '[') { + if(hostname[0] == '[') { /* This looks like an IPv6 address literal. See if there is an address - scope if there is no location header */ - char *percent = strchr(conn->host.name, '%'); + scope. */ + char *percent = strchr(++hostname, '%'); + conn->bits.ipv6_ip = TRUE; if(percent) { unsigned int identifier_offset = 3; char *endp; @@ -2412,33 +2170,22 @@ static CURLcode parseurlandfillconn(struct Curl_easy *data, infof(data, "Invalid IPv6 address format\n"); } } + percent = strchr(hostname, ']'); + if(percent) + /* terminate IPv6 numerical at end bracket */ + *percent = 0; } + /* make sure the connect struct gets its own copy of the host name */ + conn->host.rawalloc = strdup(hostname); + if(!conn->host.rawalloc) + return CURLE_OUT_OF_MEMORY; + conn->host.name = conn->host.rawalloc; + if(data->set.scope_id) /* Override any scope that was set above. */ conn->scope_id = data->set.scope_id; - /* Remove the fragment part of the path. Per RFC 2396, this is always the - last part of the URI. We are looking for the first '#' so that we deal - gracefully with non conformant URI such as http://example.com#foo#bar. */ - fragment = strchr(path, '#'); - if(fragment) { - *fragment = 0; - - /* we know the path part ended with a fragment, so we know the full URL - string does too and we need to cut it off from there so it isn't used - over proxy */ - fragment = strchr(data->change.url, '#'); - if(fragment) - *fragment = 0; - } - - /* - * So if the URL was A://B/C#D, - * protop is A - * conn->host.name is B - * data->state.path is /C - */ return CURLE_OK; } @@ -2553,11 +2300,8 @@ static bool check_noproxy(const char *name, const char *no_proxy) if(!endptr) return FALSE; name++; - } - else - endptr = strchr(name, ':'); - if(endptr) namelen = endptr - name; + } else namelen = strlen(name); @@ -3089,131 +2833,6 @@ static CURLcode create_conn_helper_init_proxy(struct connectdata *conn) } #endif /* CURL_DISABLE_PROXY */ -/* - * parse_url_login() - * - * Parse the login details (user name, password and options) from the URL and - * strip them out of the host name - * - * Inputs: data->set.use_netrc (CURLOPT_NETRC) - * conn->host.name - * - * Outputs: (almost :- all currently undefined) - * conn->bits.user_passwd - non-zero if non-default passwords exist - * user - non-zero length if defined - * passwd - non-zero length if defined - * options - non-zero length if defined - * conn->host.name - remove user name and password - */ -static CURLcode parse_url_login(struct Curl_easy *data, - struct connectdata *conn, - char **user, char **passwd, char **options) -{ - CURLcode result = CURLE_OK; - char *userp = NULL; - char *passwdp = NULL; - char *optionsp = NULL; - - /* At this point, we're hoping all the other special cases have - * been taken care of, so conn->host.name is at most - * [user[:password][;options]]@]hostname - * - * We need somewhere to put the embedded details, so do that first. - */ - - char *ptr = strchr(conn->host.name, '@'); - char *login = conn->host.name; - - DEBUGASSERT(!**user); - DEBUGASSERT(!**passwd); - DEBUGASSERT(!**options); - DEBUGASSERT(conn->handler); - - if(!ptr) - goto out; - - /* We will now try to extract the - * possible login information in a string like: - * ftp://user:password@ftp.my.site:8021/README */ - conn->host.name = ++ptr; - - /* So the hostname is sane. Only bother interpreting the - * results if we could care. It could still be wasted - * work because it might be overtaken by the programmatically - * set user/passwd, but doing that first adds more cases here :-( - */ - - if(data->set.use_netrc == CURL_NETRC_REQUIRED) - goto out; - - /* We could use the login information in the URL so extract it. Only parse - options if the handler says we should. */ - result = - Curl_parse_login_details(login, ptr - login - 1, - &userp, &passwdp, - (conn->handler->flags & PROTOPT_URLOPTIONS)? - &optionsp:NULL); - if(result) - goto out; - - if(userp) { - char *newname; - - if(data->set.disallow_username_in_url) { - failf(data, "Option DISALLOW_USERNAME_IN_URL is set " - "and url contains username."); - result = CURLE_LOGIN_DENIED; - goto out; - } - - /* We have a user in the URL */ - conn->bits.userpwd_in_url = TRUE; - conn->bits.user_passwd = TRUE; /* enable user+password */ - - /* Decode the user */ - result = Curl_urldecode(data, userp, 0, &newname, NULL, FALSE); - if(result) { - goto out; - } - - free(*user); - *user = newname; - } - - if(passwdp) { - /* We have a password in the URL so decode it */ - char *newpasswd; - result = Curl_urldecode(data, passwdp, 0, &newpasswd, NULL, FALSE); - if(result) { - goto out; - } - - free(*passwd); - *passwd = newpasswd; - } - - if(optionsp) { - /* We have an options list in the URL so decode it */ - char *newoptions; - result = Curl_urldecode(data, optionsp, 0, &newoptions, NULL, FALSE); - if(result) { - goto out; - } - - free(*options); - *options = newoptions; - } - - - out: - - free(userp); - free(passwdp); - free(optionsp); - - return result; -} - /* * Curl_parse_login_details() * @@ -3347,131 +2966,23 @@ CURLcode Curl_parse_login_details(const char *login, const size_t len, * No matter if we use a proxy or not, we have to figure out the remote * port number of various reasons. * - * To be able to detect port number flawlessly, we must not confuse them - * IPv6-specified addresses in the [0::1] style. (RFC2732) - * - * The conn->host.name is currently [user:passwd@]host[:port] where host - * could be a hostname, IPv4 address or IPv6 address. - * * The port number embedded in the URL is replaced, if necessary. *************************************************************/ static CURLcode parse_remote_port(struct Curl_easy *data, struct connectdata *conn) { - char *portptr; - char endbracket; - - /* Note that at this point, the IPv6 address cannot contain any scope - suffix as that has already been removed in the parseurlandfillconn() - function */ - if((1 == sscanf(conn->host.name, "[%*45[0123456789abcdefABCDEF:.]%c", - &endbracket)) && - (']' == endbracket)) { - /* this is a RFC2732-style specified IP-address */ - conn->bits.ipv6_ip = TRUE; - - conn->host.name++; /* skip over the starting bracket */ - portptr = strchr(conn->host.name, ']'); - if(portptr) { - *portptr++ = '\0'; /* zero terminate, killing the bracket */ - if(*portptr) { - if (*portptr != ':') { - failf(data, "IPv6 closing bracket followed by '%c'", *portptr); - return CURLE_URL_MALFORMAT; - } - } - else - portptr = NULL; /* no port number available */ - } - } - else { -#ifdef ENABLE_IPV6 - struct in6_addr in6; - if(Curl_inet_pton(AF_INET6, conn->host.name, &in6) > 0) { - /* This is a numerical IPv6 address, meaning this is a wrongly formatted - URL */ - failf(data, "IPv6 numerical address used in URL without brackets"); - return CURLE_URL_MALFORMAT; - } -#endif - - portptr = strchr(conn->host.name, ':'); - } if(data->set.use_port && data->state.allow_port) { - /* if set, we use this and ignore the port possibly given in the URL */ + /* if set, we use this instead of the port possibly given in the URL */ + char portbuf[16]; + CURLUcode uc; conn->remote_port = (unsigned short)data->set.use_port; - if(portptr) - *portptr = '\0'; /* cut off the name there anyway - if there was a port - number - since the port number is to be ignored! */ - if(conn->bits.httpproxy) { - /* we need to create new URL with the new port number */ - char *url; - char type[12]=""; - - if(conn->bits.type_set) - snprintf(type, sizeof(type), ";type=%c", - data->set.prefer_ascii?'A': - (data->set.ftp_list_only?'D':'I')); - - /* - * This synthesized URL isn't always right--suffixes like ;type=A are - * stripped off. It would be better to work directly from the original - * URL and simply replace the port part of it. - */ - url = aprintf("%s://%s%s%s:%d%s%s%s", conn->given->scheme, - conn->bits.ipv6_ip?"[":"", conn->host.name, - conn->bits.ipv6_ip?"]":"", conn->remote_port, - data->state.slash_removed?"/":"", data->state.path, - type); - if(!url) - return CURLE_OUT_OF_MEMORY; - - if(data->change.url_alloc) { - Curl_safefree(data->change.url); - data->change.url_alloc = FALSE; - } - - data->change.url = url; - data->change.url_alloc = TRUE; - } - } - else if(portptr) { - /* no CURLOPT_PORT given, extract the one from the URL */ - - char *rest; - long port; - - port = strtol(portptr + 1, &rest, 10); /* Port number must be decimal */ - - if((port < 0) || (port > 0xffff)) { - /* Single unix standard says port numbers are 16 bits long */ - failf(data, "Port number out of range"); - return CURLE_URL_MALFORMAT; - } - - if(rest[0]) { - failf(data, "Port number ended with '%c'", rest[0]); - return CURLE_URL_MALFORMAT; - } - - if(rest != &portptr[1]) { - *portptr = '\0'; /* cut off the name there */ - conn->remote_port = curlx_ultous(port); - } - else { - /* Browser behavior adaptation. If there's a colon with no digits after, - just cut off the name there which makes us ignore the colon and just - use the default port. Firefox and Chrome both do that. */ - *portptr = '\0'; - } + snprintf(portbuf, sizeof(portbuf), "%u", conn->remote_port); + uc = curl_url_set(data->state.uh, CURLUPART_PORT, portbuf, 0); + if(uc) + return CURLE_OUT_OF_MEMORY; } - /* only if remote_port was not already parsed off the URL we use the - default port number */ - if(conn->remote_port < 0) - conn->remote_port = (unsigned short)conn->given->defport; - return CURLE_OK; } @@ -3483,11 +2994,16 @@ static CURLcode override_login(struct Curl_easy *data, struct connectdata *conn, char **userp, char **passwdp, char **optionsp) { + bool user_changed = FALSE; + bool passwd_changed = FALSE; + CURLUcode uc; if(data->set.str[STRING_USERNAME]) { free(*userp); *userp = strdup(data->set.str[STRING_USERNAME]); if(!*userp) return CURLE_OUT_OF_MEMORY; + conn->bits.user_passwd = TRUE; /* enable user+password */ + user_changed = TRUE; } if(data->set.str[STRING_PASSWORD]) { @@ -3495,6 +3011,8 @@ static CURLcode override_login(struct Curl_easy *data, *passwdp = strdup(data->set.str[STRING_PASSWORD]); if(!*passwdp) return CURLE_OUT_OF_MEMORY; + conn->bits.user_passwd = TRUE; /* enable user+password */ + passwd_changed = TRUE; } if(data->set.str[STRING_OPTIONS]) { @@ -3506,9 +3024,16 @@ static CURLcode override_login(struct Curl_easy *data, conn->bits.netrc = FALSE; if(data->set.use_netrc != CURL_NETRC_IGNORED) { - int ret = Curl_parsenetrc(conn->host.name, - userp, passwdp, - data->set.str[STRING_NETRC_FILE]); + char *nuser = NULL; + char *npasswd = NULL; + int ret; + + if(data->set.use_netrc == CURL_NETRC_OPTIONAL) + nuser = *userp; /* to separate otherwise indentical machines */ + + ret = Curl_parsenetrc(conn->host.name, + &nuser, &npasswd, + data->set.str[STRING_NETRC_FILE]); if(ret > 0) { infof(data, "Couldn't find host %s in the " DOT_CHAR "netrc file; using defaults\n", @@ -3522,55 +3047,85 @@ static CURLcode override_login(struct Curl_easy *data, file, so that it is safe to use even if we followed a Location: to a different host or similar. */ conn->bits.netrc = TRUE; - conn->bits.user_passwd = TRUE; /* enable user+password */ + + if(data->set.use_netrc == CURL_NETRC_OPTIONAL) { + /* prefer credentials outside netrc */ + if(nuser && !*userp) { + free(*userp); + *userp = nuser; + user_changed = TRUE; + } + if(npasswd && !*passwdp) { + free(*passwdp); + *passwdp = npasswd; + passwd_changed = TRUE; + } + } + else { + /* prefer netrc credentials */ + if(nuser) { + free(*userp); + *userp = nuser; + user_changed = TRUE; + } + if(npasswd) { + free(*passwdp); + *passwdp = npasswd; + passwd_changed = TRUE; + } + } } } + /* for updated strings, we update them in the URL */ + if(user_changed) { + uc = curl_url_set(data->state.uh, CURLUPART_USER, *userp, 0); + if(uc) + return uc_to_curlcode(uc); + } + if(passwd_changed) { + uc = curl_url_set(data->state.uh, CURLUPART_PASSWORD, *passwdp, 0); + if(uc) + return uc_to_curlcode(uc); + } return CURLE_OK; } /* * Set the login details so they're available in the connection */ -static CURLcode set_login(struct connectdata *conn, - const char *user, const char *passwd, - const char *options) +static CURLcode set_login(struct connectdata *conn) { CURLcode result = CURLE_OK; + const char *setuser = CURL_DEFAULT_USER; + const char *setpasswd = CURL_DEFAULT_PASSWORD; /* If our protocol needs a password and we have none, use the defaults */ - if((conn->handler->flags & PROTOPT_NEEDSPWD) && !conn->bits.user_passwd) { - /* Store the default user */ - conn->user = strdup(CURL_DEFAULT_USER); - - /* Store the default password */ - if(conn->user) - conn->passwd = strdup(CURL_DEFAULT_PASSWORD); - else - conn->passwd = NULL; - - /* This is the default password, so DON'T set conn->bits.user_passwd */ - } + if((conn->handler->flags & PROTOPT_NEEDSPWD) && !conn->bits.user_passwd) + ; else { - /* Store the user, zero-length if not set */ - conn->user = strdup(user); - - /* Store the password (only if user is present), zero-length if not set */ - if(conn->user) - conn->passwd = strdup(passwd); - else - conn->passwd = NULL; + setuser = ""; + setpasswd = ""; + } + /* Store the default user */ + if(!conn->user) { + conn->user = strdup(setuser); + if(!conn->user) + return CURLE_OUT_OF_MEMORY; } - if(!conn->user || !conn->passwd) - result = CURLE_OUT_OF_MEMORY; - - /* Store the options, null if not set */ - if(!result && options[0]) { - conn->options = strdup(options); + /* Store the default password */ + if(!conn->passwd) { + conn->passwd = strdup(setpasswd); + if(!conn->passwd) + result = CURLE_OUT_OF_MEMORY; + } - if(!conn->options) + /* if there's a user without password, consider password blank */ + if(conn->user && !conn->passwd) { + conn->passwd = strdup(""); + if(!conn->passwd) result = CURLE_OUT_OF_MEMORY; } @@ -4022,12 +3577,7 @@ static CURLcode create_conn(struct Curl_easy *data, CURLcode result = CURLE_OK; struct connectdata *conn; struct connectdata *conn_temp = NULL; - size_t urllen; - char *user = NULL; - char *passwd = NULL; - char *options = NULL; bool reuse; - bool prot_missing = FALSE; bool connections_available = TRUE; bool force_reuse = FALSE; bool waitpipe = FALSE; @@ -4039,7 +3589,6 @@ static CURLcode create_conn(struct Curl_easy *data, /************************************************************* * Check input data *************************************************************/ - if(!data->change.url) { result = CURLE_URL_MALFORMAT; goto out; @@ -4061,107 +3610,10 @@ static CURLcode create_conn(struct Curl_easy *data, any failure */ *in_connect = conn; - /* This initing continues below, see the comment "Continue connectdata - * initialization here" */ - - /*********************************************************** - * We need to allocate memory to store the path in. We get the size of the - * full URL to be sure, and we need to make it at least 256 bytes since - * other parts of the code will rely on this fact - ***********************************************************/ -#define LEAST_PATH_ALLOC 256 - urllen = strlen(data->change.url); - if(urllen < LEAST_PATH_ALLOC) - urllen = LEAST_PATH_ALLOC; - - /* - * We malloc() the buffers below urllen+2 to make room for 2 possibilities: - * 1 - an extra terminating zero - * 2 - an extra slash (in case a syntax like "www.host.com?moo" is used) - */ - - Curl_safefree(data->state.pathbuffer); - data->state.path = NULL; - - data->state.pathbuffer = malloc(urllen + 2); - if(NULL == data->state.pathbuffer) { - result = CURLE_OUT_OF_MEMORY; /* really bad error */ - goto out; - } - data->state.path = data->state.pathbuffer; - - conn->host.rawalloc = malloc(urllen + 2); - if(NULL == conn->host.rawalloc) { - Curl_safefree(data->state.pathbuffer); - data->state.path = NULL; - result = CURLE_OUT_OF_MEMORY; - goto out; - } - - conn->host.name = conn->host.rawalloc; - conn->host.name[0] = 0; - - user = strdup(""); - passwd = strdup(""); - options = strdup(""); - if(!user || !passwd || !options) { - result = CURLE_OUT_OF_MEMORY; - goto out; - } - - result = parseurlandfillconn(data, conn, &prot_missing, &user, &passwd, - &options); + result = parseurlandfillconn(data, conn); if(result) goto out; - /************************************************************* - * No protocol part in URL was used, add it! - *************************************************************/ - if(prot_missing) { - /* We're guessing prefixes here and if we're told to use a proxy or if - we're going to follow a Location: later or... then we need the protocol - part added so that we have a valid URL. */ - char *reurl; - char *ch_lower; - - reurl = aprintf("%s://%s", conn->handler->scheme, data->change.url); - - if(!reurl) { - result = CURLE_OUT_OF_MEMORY; - goto out; - } - - /* Change protocol prefix to lower-case */ - for(ch_lower = reurl; *ch_lower != ':'; ch_lower++) - *ch_lower = (char)TOLOWER(*ch_lower); - - if(data->change.url_alloc) { - Curl_safefree(data->change.url); - data->change.url_alloc = FALSE; - } - - data->change.url = reurl; - data->change.url_alloc = TRUE; /* free this later */ - } - - /************************************************************* - * If the protocol can't handle url query strings, then cut - * off the unhandable part - *************************************************************/ - if((conn->given->flags&PROTOPT_NOURLQUERY)) { - char *path_q_sep = strchr(conn->data->state.path, '?'); - if(path_q_sep) { - /* according to rfc3986, allow the query (?foo=bar) - also on protocols that can't handle it. - - cut the string-part after '?' - */ - - /* terminate the string */ - path_q_sep[0] = 0; - } - } - if(data->set.str[STRING_BEARER]) { conn->oauth_bearer = strdup(data->set.str[STRING_BEARER]); if(!conn->oauth_bearer) { @@ -4205,10 +3657,12 @@ static CURLcode create_conn(struct Curl_easy *data, /* Check for overridden login details and set them accordingly so they they are known when protocol->setup_connection is called! */ - result = override_login(data, conn, &user, &passwd, &options); + result = override_login(data, conn, &conn->user, &conn->passwd, + &conn->options); if(result) goto out; - result = set_login(conn, user, passwd, options); + + result = set_login(conn); /* default credentials */ if(result) goto out; @@ -4394,6 +3848,9 @@ static CURLcode create_conn(struct Curl_easy *data, * new one. *************************************************************/ + DEBUGASSERT(conn->user); + DEBUGASSERT(conn->passwd); + /* reuse_fresh is TRUE if we are told to use a new connection by force, but we only acknowledge this option if this is not a re-used connection already (which happens due to follow-location or during a HTTP @@ -4569,10 +4026,6 @@ static CURLcode create_conn(struct Curl_easy *data, result = resolve_server(data, conn, async); out: - - free(options); - free(passwd); - free(user); return result; } diff --git a/lib/url.h b/lib/url.h index 7b9aff5c42f2a7..1c18f713783b8c 100644 --- a/lib/url.h +++ b/lib/url.h @@ -48,6 +48,8 @@ CURLcode Curl_open(struct Curl_easy **curl); CURLcode Curl_init_userdefined(struct Curl_easy *data); void Curl_freeset(struct Curl_easy * data); +/* free the URL pieces */ +void Curl_up_free(struct Curl_easy *data); CURLcode Curl_close(struct Curl_easy *data); /* opposite of curl_open() */ CURLcode Curl_connect(struct Curl_easy *, struct connectdata **, bool *async, bool *protocol_connect); diff --git a/lib/urldata.h b/lib/urldata.h index 85712ba20575f9..1fede5090d2f47 100644 --- a/lib/urldata.h +++ b/lib/urldata.h @@ -1224,6 +1224,18 @@ struct time_node { expire_id eid; }; +/* individual pieces of the URL */ +struct urlpieces { + char *scheme; + char *hostname; + char *port; + char *user; + char *password; + char *options; + char *path; + char *query; +}; + struct UrlState { /* Points to the connection cache */ @@ -1314,9 +1326,6 @@ struct UrlState { /* for FTP downloads: how many CRLFs did we converted to LFs? */ curl_off_t crlf_conversions; #endif - char *pathbuffer;/* allocated buffer to store the URL's path part in */ - char *path; /* path to use, points to somewhere within the pathbuffer - area */ bool slash_removed; /* set TRUE if the 'path' points to a path where the initial URL slash separator has been taken off */ bool use_range; @@ -1350,6 +1359,8 @@ struct UrlState { #ifdef CURLDEBUG bool conncache_lock; #endif + CURLU *uh; /* URL handle for the current parsed URL */ + struct urlpieces up; }; diff --git a/tests/data/test325 b/tests/data/test325 index 6d5898d454017e..922f37ba2b6093 100644 --- a/tests/data/test325 +++ b/tests/data/test325 @@ -15,7 +15,7 @@ HTTP/1.1 301 OK Date: Thu, 09 Nov 2010 14:49:00 GMT Server: test-server/fake Content-Length: 7 -Location: http://%HOSTIP:%HTTPPORT/325 +Location: http://somewhere/325 MooMoo @@ -24,7 +24,7 @@ HTTP/1.1 301 OK Date: Thu, 09 Nov 2010 14:49:00 GMT Server: test-server/fake Content-Length: 7 -Location: http://%HOSTIP:%HTTPPORT/325 +Location: http://somewhere/325 diff --git a/tests/data/test523 b/tests/data/test523 index 9abe0ed2230c83..665211d48e0d3c 100644 --- a/tests/data/test523 +++ b/tests/data/test523 @@ -39,7 +39,7 @@ HTTP GET with proxy and CURLOPT_PORT # first URL then proxy -http://www.example.com:999/523 http://%HOSTIP:%HTTPPORT +http://www.example.com:999/523 http://%HOSTIP:%HTTPPORT @@ -50,7 +50,7 @@ http://www.example.com:999/523 http://%HOSTIP:%HTTPPORT ^User-Agent:.* -GET HTTP://www.example.com:19999/523 HTTP/1.1 +GET http://www.example.com:19999/523 HTTP/1.1 Host: www.example.com:19999 Authorization: Basic eHh4Onl5eQ== Accept: */* diff --git a/tests/data/test563 b/tests/data/test563 index cecbedc21443bf..c9df79219725d1 100644 --- a/tests/data/test563 +++ b/tests/data/test563 @@ -47,7 +47,7 @@ ftp_proxy=http://%HOSTIP:%HTTPPORT/ # Verify data after the test has been "shot" -GET FTP://%HOSTIP:%FTPPORT/563;type=A HTTP/1.1 +GET ftp://%HOSTIP:%FTPPORT/563;type=A HTTP/1.1 Host: %HOSTIP:%FTPPORT Accept: */* Proxy-Connection: Keep-Alive