From 46e164069d1a5230e4e64cbd2ff46c46cce056bb Mon Sep 17 00:00:00 2001
From: Daniel Stenberg <daniel@haxx.se>
Date: Fri, 14 Sep 2018 23:33:28 +0200
Subject: [PATCH] url: use the URL API internally as well

... to make it a truly unified URL parser.

Closes #3017
---
 lib/curl_path.c    |   4 +-
 lib/dict.c         |   2 +-
 lib/easy.c         |   4 -
 lib/file.c         |   4 +-
 lib/ftp.c          |  35 +-
 lib/ftp.h          |   2 +
 lib/gopher.c       |   2 +-
 lib/http.c         | 125 +++---
 lib/imap.c         |  20 +-
 lib/ldap.c         |  10 +-
 lib/multi.c        |   9 +-
 lib/pop3.c         |   5 +-
 lib/smb.c          |   2 +-
 lib/smtp.c         |   5 +-
 lib/tftp.c         |   4 +-
 lib/transfer.c     |  45 +--
 lib/url.c          | 987 ++++++++++-----------------------------------
 lib/url.h          |   2 +
 lib/urldata.h      |  17 +-
 tests/data/test325 |   4 +-
 tests/data/test523 |   4 +-
 tests/data/test563 |   2 +-
 22 files changed, 376 insertions(+), 918 deletions(-)

diff --git a/lib/curl_path.c b/lib/curl_path.c
index e843deac7cfe06..68f3e44ba8f8e7 100644
--- a/lib/curl_path.c
+++ b/lib/curl_path.c
@@ -5,7 +5,7 @@
  *                            | (__| |_| |  _ <| |___
  *                             \___|\___/|_| \_\_____|
  *
- * Copyright (C) 1998 - 2017, Daniel Stenberg, <daniel@haxx.se>, et al.
+ * Copyright (C) 1998 - 2018, Daniel Stenberg, <daniel@haxx.se>, et al.
  *
  * This software is licensed as described in the file COPYING, which
  * you should have received as part of this distribution. The terms
@@ -39,7 +39,7 @@ CURLcode Curl_getworkingpath(struct connectdata *conn,
   char *working_path;
   size_t working_path_len;
   CURLcode result =
-    Curl_urldecode(data, data->state.path, 0, &working_path,
+    Curl_urldecode(data, data->state.up.path, 0, &working_path,
                    &working_path_len, FALSE);
   if(result)
     return result;
diff --git a/lib/dict.c b/lib/dict.c
index 408d57b92f8701..78ef046d4434ff 100644
--- a/lib/dict.c
+++ b/lib/dict.c
@@ -136,7 +136,7 @@ static CURLcode dict_do(struct connectdata *conn, bool *done)
   struct Curl_easy *data = conn->data;
   curl_socket_t sockfd = conn->sock[FIRSTSOCKET];
 
-  char *path = data->state.path;
+  char *path = data->state.up.path;
   curl_off_t *bytecount = &data->req.bytecount;
 
   *done = TRUE; /* unconditionally */
diff --git a/lib/easy.c b/lib/easy.c
index 88fc4f4601a365..fb9105a1cebd49 100644
--- a/lib/easy.c
+++ b/lib/easy.c
@@ -1002,10 +1002,6 @@ struct Curl_easy *curl_easy_duphandle(struct Curl_easy *data)
  */
 void curl_easy_reset(struct Curl_easy *data)
 {
-  Curl_safefree(data->state.pathbuffer);
-
-  data->state.path = NULL;
-
   Curl_free_request_state(data);
 
   /* zero out UserDefined data: */
diff --git a/lib/file.c b/lib/file.c
index 542f34a4523230..3cfa0e703862b4 100644
--- a/lib/file.c
+++ b/lib/file.c
@@ -143,7 +143,7 @@ static CURLcode file_connect(struct connectdata *conn, bool *done)
 #endif
   size_t real_path_len;
 
-  CURLcode result = Curl_urldecode(data, data->state.path, 0, &real_path,
+  CURLcode result = Curl_urldecode(data, data->state.up.path, 0, &real_path,
                                    &real_path_len, FALSE);
   if(result)
     return result;
@@ -197,7 +197,7 @@ static CURLcode file_connect(struct connectdata *conn, bool *done)
 
   file->fd = fd;
   if(!data->set.upload && (fd == -1)) {
-    failf(data, "Couldn't open file %s", data->state.path);
+    failf(data, "Couldn't open file %s", data->state.up.path);
     file_done(conn, CURLE_FILE_COULDNT_READ_FILE, FALSE);
     return CURLE_FILE_COULDNT_READ_FILE;
   }
diff --git a/lib/ftp.c b/lib/ftp.c
index 429708fc560866..270b1e5f0db845 100644
--- a/lib/ftp.c
+++ b/lib/ftp.c
@@ -1444,6 +1444,7 @@ static CURLcode ftp_state_list(struct connectdata *conn)
 {
   CURLcode result = CURLE_OK;
   struct Curl_easy *data = conn->data;
+  struct FTP *ftp = data->req.protop;
 
   /* If this output is to be machine-parsed, the NLST command might be better
      to use, since the LIST command output is not specified or standard in any
@@ -1460,7 +1461,7 @@ static CURLcode ftp_state_list(struct connectdata *conn)
      then just do LIST (in that case: nothing to do here)
   */
   char *cmd, *lstArg, *slashPos;
-  const char *inpath = data->state.path;
+  const char *inpath = ftp->path;
 
   lstArg = NULL;
   if((data->set.ftp_filemethod == FTPFILE_NOCWD) &&
@@ -3141,7 +3142,7 @@ static CURLcode ftp_done(struct connectdata *conn, CURLcode status,
   int ftpcode;
   CURLcode result = CURLE_OK;
   char *path = NULL;
-  const char *path_to_use = data->state.path;
+  const char *path_to_use = ftp->path;
 
   if(!ftp)
     return CURLE_OK;
@@ -3346,7 +3347,7 @@ static CURLcode ftp_done(struct connectdata *conn, CURLcode status,
   /* Send any post-transfer QUOTE strings? */
   if(!status && !result && !premature && data->set.postquote)
     result = ftp_sendquote(conn, data->set.postquote);
-
+  Curl_safefree(ftp->pathalloc);
   return result;
 }
 
@@ -3695,12 +3696,13 @@ static void wc_data_dtor(void *ptr)
 static CURLcode init_wc_data(struct connectdata *conn)
 {
   char *last_slash;
-  char *path = conn->data->state.path;
+  struct FTP *ftp = conn->data->req.protop;
+  char *path = ftp->path;
   struct WildcardData *wildcard = &(conn->data->wildcard);
   CURLcode result = CURLE_OK;
   struct ftp_wc *ftpwc = NULL;
 
-  last_slash = strrchr(conn->data->state.path, '/');
+  last_slash = strrchr(ftp->path, '/');
   if(last_slash) {
     last_slash++;
     if(last_slash[0] == '\0') {
@@ -3757,7 +3759,7 @@ static CURLcode init_wc_data(struct connectdata *conn)
     goto fail;
   }
 
-  wildcard->path = strdup(conn->data->state.path);
+  wildcard->path = strdup(ftp->path);
   if(!wildcard->path) {
     result = CURLE_OUT_OF_MEMORY;
     goto fail;
@@ -3828,16 +3830,15 @@ static CURLcode wc_statemach(struct connectdata *conn)
     /* filelist has at least one file, lets get first one */
     struct ftp_conn *ftpc = &conn->proto.ftpc;
     struct curl_fileinfo *finfo = wildcard->filelist.head->ptr;
+    struct FTP *ftp = conn->data->req.protop;
 
     char *tmp_path = aprintf("%s%s", wildcard->path, finfo->filename);
     if(!tmp_path)
       return CURLE_OUT_OF_MEMORY;
 
-    /* switch default "state.pathbuffer" and tmp_path, good to see
-       ftp_parse_url_path function to understand this trick */
-    Curl_safefree(conn->data->state.pathbuffer);
-    conn->data->state.pathbuffer = tmp_path;
-    conn->data->state.path = tmp_path;
+    /* switch default ftp->path and tmp_path */
+    free(ftp->pathalloc);
+    ftp->pathalloc = ftp->path = tmp_path;
 
     infof(conn->data, "Wildcard - START of \"%s\"\n", finfo->filename);
     if(conn->data->set.chunk_bgn) {
@@ -4105,7 +4106,7 @@ CURLcode ftp_parse_url_path(struct connectdata *conn)
   struct FTP *ftp = data->req.protop;
   struct ftp_conn *ftpc = &conn->proto.ftpc;
   const char *slash_pos;  /* position of the first '/' char in curpos */
-  const char *path_to_use = data->state.path;
+  const char *path_to_use = ftp->path;
   const char *cur_pos;
   const char *filename = NULL;
 
@@ -4191,7 +4192,7 @@ CURLcode ftp_parse_url_path(struct connectdata *conn)
       /* parse the URL path into separate path components */
       while((slash_pos = strchr(cur_pos, '/')) != NULL) {
         /* 1 or 0 pointer offset to indicate absolute directory */
-        ssize_t absolute_dir = ((cur_pos - data->state.path > 0) &&
+        ssize_t absolute_dir = ((cur_pos - ftp->path > 0) &&
                                 (ftpc->dirdepth == 0))?1:0;
 
         /* seek out the next path component */
@@ -4268,7 +4269,7 @@ CURLcode ftp_parse_url_path(struct connectdata *conn)
     size_t dlen;
     char *path;
     CURLcode result =
-      Curl_urldecode(conn->data, data->state.path, 0, &path, &dlen, TRUE);
+      Curl_urldecode(conn->data, ftp->path, 0, &path, &dlen, TRUE);
     if(result) {
       freedirs(ftpc);
       return result;
@@ -4388,16 +4389,16 @@ static CURLcode ftp_setup_connection(struct connectdata *conn)
   char *type;
   struct FTP *ftp;
 
-  conn->data->req.protop = ftp = malloc(sizeof(struct FTP));
+  conn->data->req.protop = ftp = calloc(sizeof(struct FTP), 1);
   if(NULL == ftp)
     return CURLE_OUT_OF_MEMORY;
 
-  data->state.path++;   /* don't include the initial slash */
+  ftp->path = &data->state.up.path[1]; /* don't include the initial slash */
   data->state.slash_removed = TRUE; /* we've skipped the slash */
 
   /* FTP URLs support an extension like ";type=<typecode>" that
    * we'll try to get now! */
-  type = strstr(data->state.path, ";type=");
+  type = strstr(ftp->path, ";type=");
 
   if(!type)
     type = strstr(conn->host.rawalloc, ";type=");
diff --git a/lib/ftp.h b/lib/ftp.h
index 7ec339118ebff1..38d03223ca8912 100644
--- a/lib/ftp.h
+++ b/lib/ftp.h
@@ -105,6 +105,8 @@ struct FTP {
   curl_off_t *bytecountp;
   char *user;    /* user name string */
   char *passwd;  /* password string */
+  char *path;    /* points to the urlpieces struct field */
+  char *pathalloc; /* if non-NULL a pointer to an allocated path */
 
   /* transfer a file/body or not, done as a typedefed enum just to make
      debuggers display the full symbol and not just the numerical value */
diff --git a/lib/gopher.c b/lib/gopher.c
index 3ecee9bdc31655..b441a641d9dea3 100644
--- a/lib/gopher.c
+++ b/lib/gopher.c
@@ -78,7 +78,7 @@ static CURLcode gopher_do(struct connectdata *conn, bool *done)
   curl_socket_t sockfd = conn->sock[FIRSTSOCKET];
 
   curl_off_t *bytecount = &data->req.bytecount;
-  char *path = data->state.path;
+  char *path = data->state.up.path;
   char *sel = NULL;
   char *sel_org = NULL;
   ssize_t amount, k;
diff --git a/lib/http.c b/lib/http.c
index c1d0d68cd30e42..f6b24f0d560e4e 100644
--- a/lib/http.c
+++ b/lib/http.c
@@ -1877,7 +1877,8 @@ CURLcode Curl_http(struct connectdata *conn, bool *done)
   struct Curl_easy *data = conn->data;
   CURLcode result = CURLE_OK;
   struct HTTP *http;
-  const char *ppath = data->state.path;
+  const char *path = data->state.up.path;
+  const char *query = data->state.up.query;
   bool paste_ftp_userpwd = FALSE;
   char ftp_typecode[sizeof("/;type=?")] = "";
   const char *host = conn->host.name;
@@ -1995,7 +1996,7 @@ CURLcode Curl_http(struct connectdata *conn, bool *done)
   }
 
   /* setup the authentication headers */
-  result = Curl_http_output_auth(conn, request, ppath, FALSE);
+  result = Curl_http_output_auth(conn, request, path, FALSE);
   if(result)
     return result;
 
@@ -2223,47 +2224,59 @@ CURLcode Curl_http(struct connectdata *conn, bool *done)
     /* The path sent to the proxy is in fact the entire URL. But if the remote
        host is a IDN-name, we must make sure that the request we produce only
        uses the encoded host name! */
+
+    /* and no fragment part */
+    CURLUcode uc;
+    char *url;
+    CURLU *h = curl_url_dup(data->state.uh);
+    if(!h)
+      return CURLE_OUT_OF_MEMORY;
+
     if(conn->host.dispname != conn->host.name) {
-      char *url = data->change.url;
-      ptr = strstr(url, conn->host.dispname);
-      if(ptr) {
-        /* This is where the display name starts in the URL, now replace this
-           part with the encoded name. TODO: This method of replacing the host
-           name is rather crude as I believe there's a slight risk that the
-           user has entered a user name or password that contain the host name
-           string. */
-        size_t currlen = strlen(conn->host.dispname);
-        size_t newlen = strlen(conn->host.name);
-        size_t urllen = strlen(url);
-
-        char *newurl;
-
-        newurl = malloc(urllen + newlen - currlen + 1);
-        if(newurl) {
-          /* copy the part before the host name */
-          memcpy(newurl, url, ptr - url);
-          /* append the new host name instead of the old */
-          memcpy(newurl + (ptr - url), conn->host.name, newlen);
-          /* append the piece after the host name */
-          memcpy(newurl + newlen + (ptr - url),
-                 ptr + currlen, /* copy the trailing zero byte too */
-                 urllen - (ptr-url) - currlen + 1);
-          if(data->change.url_alloc) {
-            Curl_safefree(data->change.url);
-            data->change.url_alloc = FALSE;
-          }
-          data->change.url = newurl;
-          data->change.url_alloc = TRUE;
-        }
-        else
-          return CURLE_OUT_OF_MEMORY;
+      uc = curl_url_set(h, CURLUPART_HOST, conn->host.name, 0);
+      if(uc) {
+        curl_url_cleanup(h);
+        return CURLE_OUT_OF_MEMORY;
       }
     }
-    ppath = data->change.url;
-    if(checkprefix("ftp://", ppath)) {
+    uc = curl_url_set(h, CURLUPART_FRAGMENT, NULL, 0);
+    if(uc) {
+      curl_url_cleanup(h);
+      return CURLE_OUT_OF_MEMORY;
+    }
+
+    if(strcasecompare("http", data->state.up.scheme)) {
+      /* when getting HTTP, we don't want the userinfo the URL */
+      uc = curl_url_set(h, CURLUPART_USER, NULL, 0);
+      if(uc) {
+        curl_url_cleanup(h);
+        return CURLE_OUT_OF_MEMORY;
+      }
+      uc = curl_url_set(h, CURLUPART_PASSWORD, NULL, 0);
+      if(uc) {
+        curl_url_cleanup(h);
+        return CURLE_OUT_OF_MEMORY;
+      }
+    }
+    /* now extract the new version of the URL */
+    uc = curl_url_get(h, CURLUPART_URL, &url, 0);
+    if(uc) {
+      curl_url_cleanup(h);
+      return CURLE_OUT_OF_MEMORY;
+    }
+
+    if(data->change.url_alloc)
+      free(data->change.url);
+
+    data->change.url = url;
+    data->change.url_alloc = TRUE;
+
+    curl_url_cleanup(h);
+
+    if(strcasecompare("ftp", data->state.up.scheme)) {
       if(data->set.proxy_transfer_mode) {
         /* when doing ftp, append ;type=<a|i> if not present */
-        char *type = strstr(ppath, ";type=");
+        char *type = strstr(path, ";type=");
         if(type && type[6] && type[7] == 0) {
           switch(Curl_raw_toupper(type[6])) {
           case 'A':
@@ -2278,7 +2291,7 @@ CURLcode Curl_http(struct connectdata *conn, bool *done)
           char *p = ftp_typecode;
           /* avoid sending invalid URLs like ftp://example.com;type=i if the
            * user specified ftp://example.com without the slash */
-          if(!*data->state.path && ppath[strlen(ppath) - 1] != '/') {
+          if(!*data->state.up.path && path[strlen(path) - 1] != '/') {
             *p++ = '/';
           }
           snprintf(p, sizeof(ftp_typecode) - 1, ";type=%c",
@@ -2431,18 +2444,32 @@ CURLcode Curl_http(struct connectdata *conn, bool *done)
   if(result)
     return result;
 
-  if(data->set.str[STRING_TARGET])
-    ppath = data->set.str[STRING_TARGET];
+  if(data->set.str[STRING_TARGET]) {
+    path = data->set.str[STRING_TARGET];
+    query = NULL;
+  }
 
   /* url */
-  if(paste_ftp_userpwd)
+  if(conn->bits.httpproxy && !conn->bits.tunnel_proxy) {
+    char *url = data->change.url;
+    result = Curl_add_buffer(&req_buffer, url, strlen(url));
+    if(result)
+      return result;
+  }
+  else if(paste_ftp_userpwd)
     result = Curl_add_bufferf(&req_buffer, "ftp://%s:%s@%s",
                               conn->user, conn->passwd,
-                              ppath + sizeof("ftp://") - 1);
-  else
-    result = Curl_add_buffer(&req_buffer, ppath, strlen(ppath));
-  if(result)
-    return result;
+                              path + sizeof("ftp://") - 1);
+  else {
+    result = Curl_add_buffer(&req_buffer, path, strlen(path));
+    if(result)
+      return result;
+    if(query) {
+      result = Curl_add_bufferf(&req_buffer, "?%s", query);
+      if(result)
+        return result;
+    }
+  }
 
   result =
     Curl_add_bufferf(&req_buffer,
@@ -2515,7 +2542,7 @@ CURLcode Curl_http(struct connectdata *conn, bool *done)
       co = Curl_cookie_getlist(data->cookies,
                                conn->allocptr.cookiehost?
                                conn->allocptr.cookiehost:host,
-                               data->state.path,
+                               data->state.up.path,
                                (conn->handler->protocol&CURLPROTO_HTTPS)?
                                TRUE:FALSE);
       Curl_share_unlock(data, CURL_LOCK_DATA_COOKIE);
@@ -3836,7 +3863,7 @@ CURLcode Curl_http_readwrite_headers(struct Curl_easy *data,
                          here, or else use real peer host name. */
                       conn->allocptr.cookiehost?
                       conn->allocptr.cookiehost:conn->host.name,
-                      data->state.path);
+                      data->state.up.path);
       Curl_share_unlock(data, CURL_LOCK_DATA_COOKIE);
     }
 #endif
diff --git a/lib/imap.c b/lib/imap.c
index 63fcb4d41f4f16..3ef89097f9a32b 100644
--- a/lib/imap.c
+++ b/lib/imap.c
@@ -1717,8 +1717,6 @@ static CURLcode imap_regular_transfer(struct connectdata *conn,
 
 static CURLcode imap_setup_connection(struct connectdata *conn)
 {
-  struct Curl_easy *data = conn->data;
-
   /* Initialise the IMAP layer */
   CURLcode result = imap_init(conn);
   if(result)
@@ -1726,7 +1724,6 @@ static CURLcode imap_setup_connection(struct connectdata *conn)
 
   /* Clear the TLS upgraded flag */
   conn->tls_upgraded = FALSE;
-  data->state.path++;   /* don't include the initial slash */
 
   return CURLE_OK;
 }
@@ -1959,7 +1956,7 @@ static CURLcode imap_parse_url_path(struct connectdata *conn)
   CURLcode result = CURLE_OK;
   struct Curl_easy *data = conn->data;
   struct IMAP *imap = data->req.protop;
-  const char *begin = data->state.path;
+  const char *begin = &data->state.up.path[1]; /* skip leading slash */
   const char *ptr = begin;
 
   /* See how much of the URL is a valid path and decode it */
@@ -2065,17 +2062,10 @@ static CURLcode imap_parse_url_path(struct connectdata *conn)
 
   /* Does the URL contain a query parameter? Only valid when we have a mailbox
      and no UID as per RFC-5092 */
-  if(imap->mailbox && !imap->uid && !imap->mindex && *ptr == '?') {
-    /* Find the length of the query parameter */
-    begin = ++ptr;
-    while(imap_is_bchar(*ptr))
-      ptr++;
-
-    /* Decode the query parameter */
-    result = Curl_urldecode(data, begin, ptr - begin, &imap->query, NULL,
-                            TRUE);
-    if(result)
-      return result;
+  if(imap->mailbox && !imap->uid && !imap->mindex) {
+    /* Get the query parameter, URL decoded */
+    (void)curl_url_get(data->state.uh, CURLUPART_QUERY, &imap->query,
+                       CURLU_URLDECODE);
   }
 
   /* Any extra stuff at the end of the URL is an error */
diff --git a/lib/ldap.c b/lib/ldap.c
index 4d8f4fa2883a94..e42d1fbbceb041 100644
--- a/lib/ldap.c
+++ b/lib/ldap.c
@@ -5,7 +5,7 @@
  *                | (__| |_| |  _ <| |___
  *                 \___|\___/|_| \_\_____|
  *
- * Copyright (C) 1998 - 2017, Daniel Stenberg, <daniel@haxx.se>, et al.
+ * Copyright (C) 1998 - 2018, Daniel Stenberg, <daniel@haxx.se>, et al.
  *
  * This software is licensed as described in the file COPYING, which
  * you should have received as part of this distribution. The terms
@@ -838,9 +838,9 @@ static int _ldap_url_parse2(const struct connectdata *conn, LDAPURLDesc *ludp)
   size_t i;
 
   if(!conn->data ||
-     !conn->data->state.path ||
-     conn->data->state.path[0] != '/' ||
-     !checkprefix("LDAP", conn->data->change.url))
+     !conn->data->state.up.path ||
+     conn->data->state.up.path[0] != '/' ||
+     !strcasecompare("LDAP", conn->data->state.up.scheme))
     return LDAP_INVALID_SYNTAX;
 
   ludp->lud_scope = LDAP_SCOPE_BASE;
@@ -848,7 +848,7 @@ static int _ldap_url_parse2(const struct connectdata *conn, LDAPURLDesc *ludp)
   ludp->lud_host  = conn->host.name;
 
   /* Duplicate the path */
-  p = path = strdup(conn->data->state.path + 1);
+  p = path = strdup(conn->data->state.up.path + 1);
   if(!path)
     return LDAP_NO_MEMORY;
 
diff --git a/lib/multi.c b/lib/multi.c
index 2faeaa74f9dee3..d5e09aab48e2bb 100644
--- a/lib/multi.c
+++ b/lib/multi.c
@@ -542,10 +542,8 @@ static CURLcode multi_done(struct connectdata **connp,
   Curl_getoff_all_pipelines(data, conn);
 
   /* Cleanup possible redirect junk */
-  free(data->req.newurl);
-  data->req.newurl = NULL;
-  free(data->req.location);
-  data->req.location = NULL;
+  Curl_safefree(data->req.newurl);
+  Curl_safefree(data->req.location);
 
   switch(status) {
   case CURLE_ABORTED_BY_CALLBACK:
@@ -657,7 +655,6 @@ static CURLcode multi_done(struct connectdata **connp,
                     cache here, and therefore cannot be used from this point on
                  */
   Curl_free_request_state(data);
-
   return result;
 }
 
@@ -2015,8 +2012,6 @@ static CURLMcode multi_runsingle(struct Curl_multi *multi,
       }
       else if(comeback)
         rc = CURLM_CALL_MULTI_PERFORM;
-
-      free(newurl);
       break;
     }
 
diff --git a/lib/pop3.c b/lib/pop3.c
index cd994f63d1c8c7..5e0fd2299beb86 100644
--- a/lib/pop3.c
+++ b/lib/pop3.c
@@ -1303,8 +1303,6 @@ static CURLcode pop3_regular_transfer(struct connectdata *conn,
 
 static CURLcode pop3_setup_connection(struct connectdata *conn)
 {
-  struct Curl_easy *data = conn->data;
-
   /* Initialise the POP3 layer */
   CURLcode result = pop3_init(conn);
   if(result)
@@ -1312,7 +1310,6 @@ static CURLcode pop3_setup_connection(struct connectdata *conn)
 
   /* Clear the TLS upgraded flag */
   conn->tls_upgraded = FALSE;
-  data->state.path++;   /* don't include the initial slash */
 
   return CURLE_OK;
 }
@@ -1387,7 +1384,7 @@ static CURLcode pop3_parse_url_path(struct connectdata *conn)
   /* The POP3 struct is already initialised in pop3_connect() */
   struct Curl_easy *data = conn->data;
   struct POP3 *pop3 = data->req.protop;
-  const char *path = data->state.path;
+  const char *path = &data->state.up.path[1]; /* skip leading path */
 
   /* URL decode the path for the message ID */
   return Curl_urldecode(data, path, 0, &pop3->id, NULL, TRUE);
diff --git a/lib/smb.c b/lib/smb.c
index e1209e0995e161..e4f266e1922565 100644
--- a/lib/smb.c
+++ b/lib/smb.c
@@ -969,7 +969,7 @@ static CURLcode smb_parse_url_path(struct connectdata *conn)
   char *slash;
 
   /* URL decode the path */
-  result = Curl_urldecode(data, data->state.path, 0, &path, NULL, TRUE);
+  result = Curl_urldecode(data, data->state.up.path, 0, &path, NULL, TRUE);
   if(result)
     return result;
 
diff --git a/lib/smtp.c b/lib/smtp.c
index 50c0b347771a5c..587562306ab707 100644
--- a/lib/smtp.c
+++ b/lib/smtp.c
@@ -1441,7 +1441,6 @@ static CURLcode smtp_regular_transfer(struct connectdata *conn,
 
 static CURLcode smtp_setup_connection(struct connectdata *conn)
 {
-  struct Curl_easy *data = conn->data;
   CURLcode result;
 
   /* Clear the TLS upgraded flag */
@@ -1452,8 +1451,6 @@ static CURLcode smtp_setup_connection(struct connectdata *conn)
   if(result)
     return result;
 
-  data->state.path++;   /* don't include the initial slash */
-
   return CURLE_OK;
 }
 
@@ -1507,7 +1504,7 @@ static CURLcode smtp_parse_url_path(struct connectdata *conn)
   /* The SMTP struct is already initialised in smtp_connect() */
   struct Curl_easy *data = conn->data;
   struct smtp_conn *smtpc = &conn->proto.smtpc;
-  const char *path = data->state.path;
+  const char *path = &data->state.up.path[1]; /* skip leading path */
   char localhost[HOSTNAME_MAX + 1];
 
   /* Calculate the path if necessary */
diff --git a/lib/tftp.c b/lib/tftp.c
index e5bc80b02b5fed..5b74e8e08cd433 100644
--- a/lib/tftp.c
+++ b/lib/tftp.c
@@ -485,7 +485,7 @@ static CURLcode tftp_send_first(tftp_state_data_t *state, tftp_event_t event)
     /* As RFC3617 describes the separator slash is not actually part of the
        file name so we skip the always-present first letter of the path
        string. */
-    result = Curl_urldecode(data, &state->conn->data->state.path[1], 0,
+    result = Curl_urldecode(data, &state->conn->data->state.up.path[1], 0,
                             &filename, NULL, FALSE);
     if(result)
       return result;
@@ -1374,7 +1374,7 @@ static CURLcode tftp_setup_connection(struct connectdata * conn)
 
   /* TFTP URLs support an extension like ";mode=<typecode>" that
    * we'll try to get now! */
-  type = strstr(data->state.path, ";mode=");
+  type = strstr(data->state.up.path, ";mode=");
 
   if(!type)
     type = strstr(conn->host.rawalloc, ";mode=");
diff --git a/lib/transfer.c b/lib/transfer.c
index 3d8089ee6daa9d..6a80bf31a311fb 100644
--- a/lib/transfer.c
+++ b/lib/transfer.c
@@ -567,7 +567,7 @@ static CURLcode readwrite_data(struct Curl_easy *data,
             infof(data,
                   "Rewinding stream by : %zd"
                   " bytes on url %s (zero-length body)\n",
-                  nread, data->state.path);
+                  nread, data->state.up.path);
             read_rewind(conn, (size_t)nread);
           }
           else {
@@ -575,7 +575,7 @@ static CURLcode readwrite_data(struct Curl_easy *data,
                   "Excess found in a non pipelined read:"
                   " excess = %zd"
                   " url = %s (zero-length body)\n",
-                  nread, data->state.path);
+                  nread, data->state.up.path);
           }
         }
 
@@ -744,7 +744,7 @@ static CURLcode readwrite_data(struct Curl_easy *data,
                   " bytes on url %s (size = %" CURL_FORMAT_CURL_OFF_T
                   ", maxdownload = %" CURL_FORMAT_CURL_OFF_T
                   ", bytecount = %" CURL_FORMAT_CURL_OFF_T ", nread = %zd)\n",
-                  excess, data->state.path,
+                  excess, data->state.up.path,
                   k->size, k->maxdownload, k->bytecount, nread);
             read_rewind(conn, excess);
           }
@@ -1474,6 +1474,7 @@ CURLcode Curl_follow(struct Curl_easy *data,
   /* Location: redirect */
   bool disallowport = FALSE;
   bool reachedmax = FALSE;
+  CURLUcode uc;
 
   if(type == FOLLOW_REDIR) {
     if((data->set.maxredirs != -1) &&
@@ -1506,33 +1507,21 @@ CURLcode Curl_follow(struct Curl_easy *data,
     }
   }
 
-  if(!Curl_is_absolute_url(newurl, NULL, 8)) {
-    /***
-     *DANG* this is an RFC 2068 violation. The URL is supposed
-     to be absolute and this doesn't seem to be that!
-     */
-    char *absolute = Curl_concat_url(data->change.url, newurl);
-    if(!absolute)
-      return CURLE_OUT_OF_MEMORY;
-    newurl = absolute;
-  }
-  else {
-    /* The new URL MAY contain space or high byte values, that means a mighty
-       stupid redirect URL but we still make an effort to do "right". */
-    char *newest;
-    size_t newlen = Curl_strlen_url(newurl, FALSE);
-
+  if(Curl_is_absolute_url(newurl, NULL, MAX_SCHEME_LEN))
     /* This is an absolute URL, don't allow the custom port number */
     disallowport = TRUE;
 
-    newest = malloc(newlen + 1); /* get memory for this */
-    if(!newest)
-      return CURLE_OUT_OF_MEMORY;
-
-    Curl_strcpy_url(newest, newurl, FALSE); /* create a space-free URL */
-    newurl = newest; /* use this instead now */
+  DEBUGASSERT(data->state.uh);
+  uc = curl_url_set(data->state.uh, CURLUPART_URL, newurl, 0);
+  free(newurl);
+  if(uc)
+    /* TODO: consider an error code remap here */
+    return CURLE_URL_MALFORMAT;
 
-  }
+  uc = curl_url_get(data->state.uh, CURLUPART_URL, &newurl, 0);
+  if(uc)
+    /* TODO: consider an error code remap here */
+    return CURLE_OUT_OF_MEMORY;
 
   if(type == FOLLOW_FAKE) {
     /* we're only figuring out the new url if we would've followed locations
@@ -1549,10 +1538,8 @@ CURLcode Curl_follow(struct Curl_easy *data,
   if(disallowport)
     data->state.allow_port = FALSE;
 
-  if(data->change.url_alloc) {
+  if(data->change.url_alloc)
     Curl_safefree(data->change.url);
-    data->change.url_alloc = FALSE;
-  }
 
   data->change.url = newurl;
   data->change.url_alloc = TRUE;
diff --git a/lib/url.c b/lib/url.c
index 249d1237d80e5d..761d7cc76dbe19 100644
--- a/lib/url.c
+++ b/lib/url.c
@@ -92,6 +92,7 @@ bool curl_win32_idn_to_ascii(const char *in, char **out);
 #include "non-ascii.h"
 #include "inet_pton.h"
 #include "getinfo.h"
+#include "urlapi-int.h"
 
 /* And now for the protocols */
 #include "ftp.h"
@@ -127,10 +128,6 @@ bool curl_win32_idn_to_ascii(const char *in, char **out);
 
 static void conn_free(struct connectdata *conn);
 static void free_fixed_hostname(struct hostname *host);
-static CURLcode parse_url_login(struct Curl_easy *data,
-                                struct connectdata *conn,
-                                char **userptr, char **passwdptr,
-                                char **optionsptr);
 static unsigned int get_protocol_family(unsigned int protocol);
 
 /* Some parts of the code (e.g. chunked encoding) assume this buffer has at
@@ -294,6 +291,22 @@ void Curl_freeset(struct Curl_easy *data)
   Curl_mime_cleanpart(&data->set.mimepost);
 }
 
+/* free the URL pieces */
+void Curl_up_free(struct Curl_easy *data)
+{
+  struct urlpieces *up = &data->state.up;
+  Curl_safefree(up->scheme);
+  Curl_safefree(up->hostname);
+  Curl_safefree(up->port);
+  Curl_safefree(up->user);
+  Curl_safefree(up->password);
+  Curl_safefree(up->options);
+  Curl_safefree(up->path);
+  Curl_safefree(up->query);
+  curl_url_cleanup(data->state.uh);
+  data->state.uh = NULL;
+}
+
 /*
  * This is the internal function curl_easy_cleanup() calls. This should
  * cleanup and free all resources associated with this sessionhandle.
@@ -313,7 +326,6 @@ CURLcode Curl_close(struct Curl_easy *data)
   Curl_expire_clear(data); /* shut off timers */
 
   m = data->multi;
-
   if(m)
     /* This handle is still part of a multi handle, take care of this first
        and detach this handle from there. */
@@ -336,10 +348,6 @@ CURLcode Curl_close(struct Curl_easy *data)
   if(data->state.rangestringalloc)
     free(data->state.range);
 
-  /* Free the pathbuffer */
-  Curl_safefree(data->state.pathbuffer);
-  data->state.path = NULL;
-
   /* freed here just in case DONE wasn't called */
   Curl_free_request_state(data);
 
@@ -359,12 +367,7 @@ CURLcode Curl_close(struct Curl_easy *data)
   }
   data->change.referer = NULL;
 
-  if(data->change.url_alloc) {
-    Curl_safefree(data->change.url);
-    data->change.url_alloc = FALSE;
-  }
-  data->change.url = NULL;
-
+  Curl_up_free(data);
   Curl_safefree(data->state.buffer);
   Curl_safefree(data->state.headerbuff);
   Curl_safefree(data->state.ulbuf);
@@ -1992,379 +1995,134 @@ static CURLcode findprotocol(struct Curl_easy *data,
   return CURLE_UNSUPPORTED_PROTOCOL;
 }
 
+
+static CURLcode uc_to_curlcode(CURLUcode uc)
+{
+  switch(uc) {
+  default:
+    return CURLE_URL_MALFORMAT;
+  case CURLUE_UNSUPPORTED_SCHEME:
+    return CURLE_UNSUPPORTED_PROTOCOL;
+  case CURLUE_OUT_OF_MEMORY:
+    return CURLE_OUT_OF_MEMORY;
+  case CURLUE_USER_NOT_ALLOWED:
+    return CURLE_LOGIN_DENIED;
+  }
+}
+
 /*
  * Parse URL and fill in the relevant members of the connection struct.
  */
 static CURLcode parseurlandfillconn(struct Curl_easy *data,
-                                    struct connectdata *conn,
-                                    bool *prot_missing,
-                                    char **userp, char **passwdp,
-                                    char **optionsp)
+                                    struct connectdata *conn)
 {
-  char *at;
-  char *fragment;
-  char *path = data->state.path;
-  char *query;
-  int rc;
-  const char *protop = "";
   CURLcode result;
-  bool rebuild_url = FALSE;
-  bool url_has_scheme = FALSE;
-  char protobuf[16];
+  CURLU *uh;
+  CURLUcode uc;
+  char *hostname;
 
-  *prot_missing = FALSE;
+  Curl_up_free(data); /* cleanup previous leftovers first */
 
-  /* We might pass the entire URL into the request so we need to make sure
-   * there are no bad characters in there.*/
-  if(strpbrk(data->change.url, "\r\n")) {
-    failf(data, "Illegal characters found in URL");
-    return CURLE_URL_MALFORMAT;
-  }
-
-  /*************************************************************
-   * Parse the URL.
-   *
-   * We need to parse the url even when using the proxy, because we will need
-   * the hostname and port in case we are trying to SSL connect through the
-   * proxy -- and we don't know if we will need to use SSL until we parse the
-   * url ...
-   ************************************************************/
-  if(data->change.url[0] == ':') {
-    failf(data, "Bad URL, colon is first character");
-    return CURLE_URL_MALFORMAT;
-  }
+  /* parse the URL */
+  uh = data->state.uh = curl_url();
+  if(!uh)
+    return CURLE_OUT_OF_MEMORY;
 
-  /* MSDOS/Windows style drive prefix, eg c: in c:foo */
-#define STARTS_WITH_DRIVE_PREFIX(str) \
-  ((('a' <= str[0] && str[0] <= 'z') || \
-    ('A' <= str[0] && str[0] <= 'Z')) && \
-   (str[1] == ':'))
-
-  /* MSDOS/Windows style drive prefix, optionally with
-   * a '|' instead of ':', followed by a slash or NUL */
-#define STARTS_WITH_URL_DRIVE_PREFIX(str) \
-  ((('a' <= (str)[0] && (str)[0] <= 'z') || \
-    ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
-   ((str)[1] == ':' || (str)[1] == '|') && \
-   ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
-
-  /* Don't mistake a drive letter for a scheme if the default protocol is file.
-     curld --proto-default file c:/foo/bar.txt */
-  if(STARTS_WITH_DRIVE_PREFIX(data->change.url) &&
-     data->set.str[STRING_DEFAULT_PROTOCOL] &&
-     strcasecompare(data->set.str[STRING_DEFAULT_PROTOCOL], "file")) {
-    ; /* do nothing */
-  }
-  else { /* check for a scheme */
-    int i;
-    for(i = 0; i < 16 && data->change.url[i]; ++i) {
-      if(data->change.url[i] == '/')
-        break;
-      if(data->change.url[i] == ':') {
-        url_has_scheme = TRUE;
-        break;
-      }
-    }
+  if(data->set.str[STRING_DEFAULT_PROTOCOL] &&
+     !Curl_is_absolute_url(data->change.url, NULL, MAX_SCHEME_LEN)) {
+    char *url;
+    if(data->change.url_alloc)
+      free(data->change.url);
+    url = aprintf("%s://%s", data->set.str[STRING_DEFAULT_PROTOCOL],
+                  data->change.url);
+    if(!url)
+      return CURLE_OUT_OF_MEMORY;
+    data->change.url = url;
+    data->change.url_alloc = TRUE;
   }
 
-  /* handle the file: scheme */
-  if((url_has_scheme && strncasecompare(data->change.url, "file:", 5)) ||
-     (!url_has_scheme && data->set.str[STRING_DEFAULT_PROTOCOL] &&
-      strcasecompare(data->set.str[STRING_DEFAULT_PROTOCOL], "file"))) {
-    if(url_has_scheme)
-      rc = sscanf(data->change.url, "%*15[^\n/:]:%[^\n]", path);
-    else
-      rc = sscanf(data->change.url, "%[^\n]", path);
+  uc = curl_url_set(uh, CURLUPART_URL, data->change.url,
+                    CURLU_GUESS_SCHEME |
+                    CURLU_NON_SUPPORT_SCHEME |
+                    (data->set.disallow_username_in_url ?
+                     CURLU_DISALLOW_USER : 0) |
+                    (data->set.path_as_is ? CURLU_PATH_AS_IS : 0));
+  if(uc)
+    return uc_to_curlcode(uc);
 
-    if(rc != 1) {
-      failf(data, "Bad URL");
-      return CURLE_URL_MALFORMAT;
-    }
+  uc = curl_url_get(uh, CURLUPART_SCHEME, &data->state.up.scheme, 0);
+  if(uc)
+    return uc_to_curlcode(uc);
 
-    /* Extra handling URLs with an authority component (i.e. that start with
-     * "file://")
-     *
-     * We allow omitted hostname (e.g. file:/<path>) -- valid according to
-     * RFC 8089, but not the (current) WHAT-WG URL spec.
-     */
-    if(url_has_scheme && path[0] == '/' && path[1] == '/') {
-      /* swallow the two slashes */
-      char *ptr = &path[2];
-
-      /*
-       * According to RFC 8089, a file: URL can be reliably dereferenced if:
-       *
-       *  o it has no/blank hostname, or
-       *
-       *  o the hostname matches "localhost" (case-insensitively), or
-       *
-       *  o the hostname is a FQDN that resolves to this machine.
-       *
-       * For brevity, we only consider URLs with empty, "localhost", or
-       * "127.0.0.1" hostnames as local.
-       *
-       * Additionally, there is an exception for URLs with a Windows drive
-       * letter in the authority (which was accidentally omitted from RFC 8089
-       * Appendix E, but believe me, it was meant to be there. --MK)
-       */
-      if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
-        /* the URL includes a host name, it must match "localhost" or
-           "127.0.0.1" to be valid */
-        if(!checkprefix("localhost/", ptr) &&
-           !checkprefix("127.0.0.1/", ptr)) {
-          failf(data, "Invalid file://hostname/, "
-                      "expected localhost or 127.0.0.1 or none");
-          return CURLE_URL_MALFORMAT;
-        }
-        ptr += 9; /* now points to the slash after the host */
-      }
-
-      /* This cannot be done with strcpy, as the memory chunks overlap! */
-      memmove(path, ptr, strlen(ptr) + 1);
-    }
-
-#if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
-    /* Don't allow Windows drive letters when not in Windows.
-     * This catches both "file:/c:" and "file:c:" */
-    if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
-       STARTS_WITH_URL_DRIVE_PREFIX(path)) {
-      failf(data, "File drive letters are only accepted in MSDOS/Windows.");
-      return CURLE_URL_MALFORMAT;
-    }
-#else
-    /* If the path starts with a slash and a drive letter, ditch the slash */
-    if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
-      /* This cannot be done with strcpy, as the memory chunks overlap! */
-      memmove(path, &path[1], strlen(&path[1]) + 1);
-    }
-#endif
+  result = findprotocol(data, conn, data->state.up.scheme);
+  if(result)
+    return result;
 
-    protop = "file"; /* protocol string */
-    *prot_missing = !url_has_scheme;
+  uc = curl_url_get(uh, CURLUPART_USER, &data->state.up.user,
+                    CURLU_URLDECODE);
+  if(!uc) {
+    conn->user = strdup(data->state.up.user);
+    if(!conn->user)
+      return CURLE_OUT_OF_MEMORY;
+    conn->bits.user_passwd = TRUE;
   }
-  else {
-    /* clear path */
-    char slashbuf[4];
-    path[0] = 0;
-
-    rc = sscanf(data->change.url,
-                "%15[^\n/:]:%3[/]%[^\n/?#]%[^\n]",
-                protobuf, slashbuf, conn->host.name, path);
-    if(2 == rc) {
-      failf(data, "Bad URL");
-      return CURLE_URL_MALFORMAT;
-    }
-    if(3 > rc) {
-
-      /*
-       * The URL was badly formatted, let's try the browser-style _without_
-       * protocol specified like 'http://'.
-       */
-      rc = sscanf(data->change.url, "%[^\n/?#]%[^\n]", conn->host.name, path);
-      if(1 > rc) {
-        /*
-         * We couldn't even get this format.
-         * djgpp 2.04 has a sscanf() bug where 'conn->host.name' is
-         * assigned, but the return value is EOF!
-         */
-#if defined(__DJGPP__) && (DJGPP_MINOR == 4)
-        if(!(rc == -1 && *conn->host.name))
-#endif
-        {
-          failf(data, "<url> malformed");
-          return CURLE_URL_MALFORMAT;
-        }
-      }
-
-      /*
-       * Since there was no protocol part specified in the URL use the
-       * user-specified default protocol. If we weren't given a default make a
-       * guess by matching some protocols against the host's outermost
-       * sub-domain name. Finally if there was no match use HTTP.
-       */
-
-      protop = data->set.str[STRING_DEFAULT_PROTOCOL];
-      if(!protop) {
-        /* Note: if you add a new protocol, please update the list in
-         * lib/version.c too! */
-        if(checkprefix("FTP.", conn->host.name))
-          protop = "ftp";
-        else if(checkprefix("DICT.", conn->host.name))
-          protop = "DICT";
-        else if(checkprefix("LDAP.", conn->host.name))
-          protop = "LDAP";
-        else if(checkprefix("IMAP.", conn->host.name))
-          protop = "IMAP";
-        else if(checkprefix("SMTP.", conn->host.name))
-          protop = "smtp";
-        else if(checkprefix("POP3.", conn->host.name))
-          protop = "pop3";
-        else
-          protop = "http";
-      }
+  else if(uc != CURLUE_NO_USER)
+    return uc_to_curlcode(uc);
 
-      *prot_missing = TRUE; /* not given in URL */
-    }
-    else {
-      size_t s = strlen(slashbuf);
-      protop = protobuf;
-      if(s != 2) {
-        infof(data, "Unwillingly accepted illegal URL using %zu slash%s!\n",
-              s, s>1?"es":"");
-
-        if(data->change.url_alloc)
-          free(data->change.url);
-        /* repair the URL to use two slashes */
-        data->change.url = aprintf("%s://%s%s",
-                                   protobuf, conn->host.name, path);
-        if(!data->change.url)
-          return CURLE_OUT_OF_MEMORY;
-        data->change.url_alloc = TRUE;
-      }
-    }
+  uc = curl_url_get(uh, CURLUPART_PASSWORD, &data->state.up.password,
+                    CURLU_URLDECODE);
+  if(!uc) {
+    conn->passwd = strdup(data->state.up.password);
+    if(!conn->passwd)
+      return CURLE_OUT_OF_MEMORY;
+    conn->bits.user_passwd = TRUE;
   }
+  else if(uc != CURLUE_NO_PASSWORD)
+    return uc_to_curlcode(uc);
 
-  /* We search for '?' in the host name (but only on the right side of a
-   * @-letter to allow ?-letters in username and password) to handle things
-   * like http://example.com?param= (notice the missing '/').
-   */
-  at = strchr(conn->host.name, '@');
-  if(at)
-    query = strchr(at + 1, '?');
-  else
-    query = strchr(conn->host.name, '?');
-
-  if(query) {
-    /* We must insert a slash before the '?'-letter in the URL. If the URL had
-       a slash after the '?', that is where the path currently begins and the
-       '?string' is still part of the host name.
-
-       We must move the trailing part from the host name and put it first in
-       the path. And have it all prefixed with a slash.
-    */
-
-    size_t hostlen = strlen(query);
-    size_t pathlen = strlen(path);
-
-    /* move the existing path plus the zero byte forward, to make room for
-       the host-name part */
-    memmove(path + hostlen + 1, path, pathlen + 1);
-
-    /* now copy the trailing host part in front of the existing path */
-    memcpy(path + 1, query, hostlen);
-
-    path[0]='/'; /* prepend the missing slash */
-    rebuild_url = TRUE;
-
-    *query = 0; /* now cut off the hostname at the ? */
-  }
-  else if(!path[0]) {
-    /* if there's no path set, use a single slash */
-    strcpy(path, "/");
-    rebuild_url = TRUE;
+  uc = curl_url_get(uh, CURLUPART_OPTIONS, &data->state.up.options,
+                    CURLU_URLDECODE);
+  if(!uc) {
+    conn->options = strdup(data->state.up.options);
+    if(!conn->options)
+      return CURLE_OUT_OF_MEMORY;
   }
+  else if(uc != CURLUE_NO_OPTIONS)
+    return uc_to_curlcode(uc);
 
-  /* If the URL is malformatted (missing a '/' after hostname before path) we
-   * insert a slash here. The only letters except '/' that can start a path is
-   * '?' and '#' - as controlled by the two sscanf() patterns above.
-   */
-  if(path[0] != '/') {
-    /* We need this function to deal with overlapping memory areas. We know
-       that the memory area 'path' points to is 'urllen' bytes big and that
-       is bigger than the path. Use +1 to move the zero byte too. */
-    memmove(&path[1], path, strlen(path) + 1);
-    path[0] = '/';
-    rebuild_url = TRUE;
-  }
-  else if(!data->set.path_as_is) {
-    /* sanitise paths and remove ../ and ./ sequences according to RFC3986 */
-    char *newp = Curl_dedotdotify(path);
-    if(!newp)
+  uc = curl_url_get(uh, CURLUPART_HOST, &data->state.up.hostname, 0);
+  if(uc) {
+    if(!strcasecompare("file", data->state.up.scheme))
       return CURLE_OUT_OF_MEMORY;
-
-    if(strcmp(newp, path)) {
-      rebuild_url = TRUE;
-      free(data->state.pathbuffer);
-      data->state.pathbuffer = newp;
-      data->state.path = newp;
-      path = newp;
-    }
-    else
-      free(newp);
   }
 
-  /*
-   * "rebuild_url" means that one or more URL components have been modified so
-   * we need to generate an updated full version.  We need the corrected URL
-   * when communicating over HTTP proxy and we don't know at this point if
-   * we're using a proxy or not.
-   */
-  if(rebuild_url) {
-    char *reurl;
-
-    size_t plen = strlen(path); /* new path, should be 1 byte longer than
-                                   the original */
-    size_t prefixlen = strlen(conn->host.name);
-
-    if(!*prot_missing) {
-      size_t protolen = strlen(protop);
-
-      if(curl_strnequal(protop, data->change.url, protolen))
-        prefixlen += protolen;
-      else {
-        failf(data, "<url> malformed");
-        return CURLE_URL_MALFORMAT;
-      }
+  uc = curl_url_get(uh, CURLUPART_PATH, &data->state.up.path, 0);
+  if(uc)
+    return uc_to_curlcode(uc);
 
-      if(curl_strnequal("://", &data->change.url[protolen], 3))
-        prefixlen += 3;
-      /* only file: is allowed to omit one or both slashes */
-      else if(curl_strnequal("file:", data->change.url, 5))
-        prefixlen += 1 + (data->change.url[5] == '/');
-      else {
-        failf(data, "<url> malformed");
-        return CURLE_URL_MALFORMAT;
-      }
-    }
-
-    reurl = malloc(prefixlen + plen + 1);
-    if(!reurl)
+  uc = curl_url_get(uh, CURLUPART_PORT, &data->state.up.port,
+                    CURLU_DEFAULT_PORT);
+  if(uc) {
+    if(!strcasecompare("file", data->state.up.scheme))
       return CURLE_OUT_OF_MEMORY;
-
-    /* copy the prefix */
-    memcpy(reurl, data->change.url, prefixlen);
-
-    /* append the trailing piece + zerobyte */
-    memcpy(&reurl[prefixlen], path, plen + 1);
-
-    /* possible free the old one */
-    if(data->change.url_alloc) {
-      Curl_safefree(data->change.url);
-      data->change.url_alloc = FALSE;
-    }
-
-    infof(data, "Rebuilt URL to: %s\n", reurl);
-
-    data->change.url = reurl;
-    data->change.url_alloc = TRUE; /* free this later */
+  }
+  else {
+    unsigned long port = strtoul(data->state.up.port, NULL, 10);
+    conn->remote_port = curlx_ultous(port);
   }
 
-  result = findprotocol(data, conn, protop);
-  if(result)
-    return result;
+  (void)curl_url_get(uh, CURLUPART_QUERY, &data->state.up.query, 0);
 
-  /*
-   * Parse the login details from the URL and strip them out of
-   * the host name
-   */
-  result = parse_url_login(data, conn, userp, passwdp, optionsp);
-  if(result)
-    return result;
+  hostname = data->state.up.hostname;
+  if(!hostname)
+    /* this is for file:// transfers, get a dummy made */
+    hostname = (char *)"";
 
-  if(conn->host.name[0] == '[') {
+  if(hostname[0] == '[') {
     /* This looks like an IPv6 address literal.  See if there is an address
-       scope if there is no location header */
-    char *percent = strchr(conn->host.name, '%');
+       scope. */
+    char *percent = strchr(++hostname, '%');
+    conn->bits.ipv6_ip = TRUE;
     if(percent) {
       unsigned int identifier_offset = 3;
       char *endp;
@@ -2412,33 +2170,22 @@ static CURLcode parseurlandfillconn(struct Curl_easy *data,
           infof(data, "Invalid IPv6 address format\n");
       }
     }
+    percent = strchr(hostname, ']');
+    if(percent)
+      /* terminate IPv6 numerical at end bracket */
+      *percent = 0;
   }
 
+  /* make sure the connect struct gets its own copy of the host name */
+  conn->host.rawalloc = strdup(hostname);
+  if(!conn->host.rawalloc)
+    return CURLE_OUT_OF_MEMORY;
+  conn->host.name = conn->host.rawalloc;
+
   if(data->set.scope_id)
     /* Override any scope that was set above.  */
     conn->scope_id = data->set.scope_id;
 
-  /* Remove the fragment part of the path. Per RFC 2396, this is always the
-     last part of the URI. We are looking for the first '#' so that we deal
-     gracefully with non conformant URI such as http://example.com#foo#bar. */
-  fragment = strchr(path, '#');
-  if(fragment) {
-    *fragment = 0;
-
-    /* we know the path part ended with a fragment, so we know the full URL
-       string does too and we need to cut it off from there so it isn't used
-       over proxy */
-    fragment = strchr(data->change.url, '#');
-    if(fragment)
-      *fragment = 0;
-  }
-
-  /*
-   * So if the URL was A://B/C#D,
-   *   protop is A
-   *   conn->host.name is B
-   *   data->state.path is /C
-   */
   return CURLE_OK;
 }
 
@@ -2553,11 +2300,8 @@ static bool check_noproxy(const char *name, const char *no_proxy)
       if(!endptr)
         return FALSE;
       name++;
-    }
-    else
-      endptr = strchr(name, ':');
-    if(endptr)
       namelen = endptr - name;
+    }
     else
       namelen = strlen(name);
 
@@ -3089,131 +2833,6 @@ static CURLcode create_conn_helper_init_proxy(struct connectdata *conn)
 }
 #endif /* CURL_DISABLE_PROXY */
 
-/*
- * parse_url_login()
- *
- * Parse the login details (user name, password and options) from the URL and
- * strip them out of the host name
- *
- * Inputs: data->set.use_netrc (CURLOPT_NETRC)
- *         conn->host.name
- *
- * Outputs: (almost :- all currently undefined)
- *          conn->bits.user_passwd  - non-zero if non-default passwords exist
- *          user                    - non-zero length if defined
- *          passwd                  - non-zero length if defined
- *          options                 - non-zero length if defined
- *          conn->host.name         - remove user name and password
- */
-static CURLcode parse_url_login(struct Curl_easy *data,
-                                struct connectdata *conn,
-                                char **user, char **passwd, char **options)
-{
-  CURLcode result = CURLE_OK;
-  char *userp = NULL;
-  char *passwdp = NULL;
-  char *optionsp = NULL;
-
-  /* At this point, we're hoping all the other special cases have
-   * been taken care of, so conn->host.name is at most
-   *    [user[:password][;options]]@]hostname
-   *
-   * We need somewhere to put the embedded details, so do that first.
-   */
-
-  char *ptr = strchr(conn->host.name, '@');
-  char *login = conn->host.name;
-
-  DEBUGASSERT(!**user);
-  DEBUGASSERT(!**passwd);
-  DEBUGASSERT(!**options);
-  DEBUGASSERT(conn->handler);
-
-  if(!ptr)
-    goto out;
-
-  /* We will now try to extract the
-   * possible login information in a string like:
-   * ftp://user:password@ftp.my.site:8021/README */
-  conn->host.name = ++ptr;
-
-  /* So the hostname is sane.  Only bother interpreting the
-   * results if we could care.  It could still be wasted
-   * work because it might be overtaken by the programmatically
-   * set user/passwd, but doing that first adds more cases here :-(
-   */
-
-  if(data->set.use_netrc == CURL_NETRC_REQUIRED)
-    goto out;
-
-  /* We could use the login information in the URL so extract it. Only parse
-     options if the handler says we should. */
-  result =
-    Curl_parse_login_details(login, ptr - login - 1,
-                             &userp, &passwdp,
-                             (conn->handler->flags & PROTOPT_URLOPTIONS)?
-                             &optionsp:NULL);
-  if(result)
-    goto out;
-
-  if(userp) {
-    char *newname;
-
-    if(data->set.disallow_username_in_url) {
-      failf(data, "Option DISALLOW_USERNAME_IN_URL is set "
-                  "and url contains username.");
-      result = CURLE_LOGIN_DENIED;
-      goto out;
-    }
-
-    /* We have a user in the URL */
-    conn->bits.userpwd_in_url = TRUE;
-    conn->bits.user_passwd = TRUE; /* enable user+password */
-
-    /* Decode the user */
-    result = Curl_urldecode(data, userp, 0, &newname, NULL, FALSE);
-    if(result) {
-      goto out;
-    }
-
-    free(*user);
-    *user = newname;
-  }
-
-  if(passwdp) {
-    /* We have a password in the URL so decode it */
-    char *newpasswd;
-    result = Curl_urldecode(data, passwdp, 0, &newpasswd, NULL, FALSE);
-    if(result) {
-      goto out;
-    }
-
-    free(*passwd);
-    *passwd = newpasswd;
-  }
-
-  if(optionsp) {
-    /* We have an options list in the URL so decode it */
-    char *newoptions;
-    result = Curl_urldecode(data, optionsp, 0, &newoptions, NULL, FALSE);
-    if(result) {
-      goto out;
-    }
-
-    free(*options);
-    *options = newoptions;
-  }
-
-
-  out:
-
-  free(userp);
-  free(passwdp);
-  free(optionsp);
-
-  return result;
-}
-
 /*
  * Curl_parse_login_details()
  *
@@ -3347,131 +2966,23 @@ CURLcode Curl_parse_login_details(const char *login, const size_t len,
  * No matter if we use a proxy or not, we have to figure out the remote
  * port number of various reasons.
  *
- * To be able to detect port number flawlessly, we must not confuse them
- * IPv6-specified addresses in the [0::1] style. (RFC2732)
- *
- * The conn->host.name is currently [user:passwd@]host[:port] where host
- * could be a hostname, IPv4 address or IPv6 address.
- *
  * The port number embedded in the URL is replaced, if necessary.
  *************************************************************/
 static CURLcode parse_remote_port(struct Curl_easy *data,
                                   struct connectdata *conn)
 {
-  char *portptr;
-  char endbracket;
-
-  /* Note that at this point, the IPv6 address cannot contain any scope
-     suffix as that has already been removed in the parseurlandfillconn()
-     function */
-  if((1 == sscanf(conn->host.name, "[%*45[0123456789abcdefABCDEF:.]%c",
-                  &endbracket)) &&
-     (']' == endbracket)) {
-    /* this is a RFC2732-style specified IP-address */
-    conn->bits.ipv6_ip = TRUE;
-
-    conn->host.name++; /* skip over the starting bracket */
-    portptr = strchr(conn->host.name, ']');
-    if(portptr) {
-      *portptr++ = '\0'; /* zero terminate, killing the bracket */
-      if(*portptr) {
-        if (*portptr != ':') {
-          failf(data, "IPv6 closing bracket followed by '%c'", *portptr);
-          return CURLE_URL_MALFORMAT;
-        }
-      }
-      else
-        portptr = NULL; /* no port number available */
-    }
-  }
-  else {
-#ifdef ENABLE_IPV6
-    struct in6_addr in6;
-    if(Curl_inet_pton(AF_INET6, conn->host.name, &in6) > 0) {
-      /* This is a numerical IPv6 address, meaning this is a wrongly formatted
-         URL */
-      failf(data, "IPv6 numerical address used in URL without brackets");
-      return CURLE_URL_MALFORMAT;
-    }
-#endif
-
-    portptr = strchr(conn->host.name, ':');
-  }
 
   if(data->set.use_port && data->state.allow_port) {
-    /* if set, we use this and ignore the port possibly given in the URL */
+    /* if set, we use this instead of the port possibly given in the URL */
+    char portbuf[16];
+    CURLUcode uc;
     conn->remote_port = (unsigned short)data->set.use_port;
-    if(portptr)
-      *portptr = '\0'; /* cut off the name there anyway - if there was a port
-                      number - since the port number is to be ignored! */
-    if(conn->bits.httpproxy) {
-      /* we need to create new URL with the new port number */
-      char *url;
-      char type[12]="";
-
-      if(conn->bits.type_set)
-        snprintf(type, sizeof(type), ";type=%c",
-                 data->set.prefer_ascii?'A':
-                 (data->set.ftp_list_only?'D':'I'));
-
-      /*
-       * This synthesized URL isn't always right--suffixes like ;type=A are
-       * stripped off. It would be better to work directly from the original
-       * URL and simply replace the port part of it.
-       */
-      url = aprintf("%s://%s%s%s:%d%s%s%s", conn->given->scheme,
-                    conn->bits.ipv6_ip?"[":"", conn->host.name,
-                    conn->bits.ipv6_ip?"]":"", conn->remote_port,
-                    data->state.slash_removed?"/":"", data->state.path,
-                    type);
-      if(!url)
-        return CURLE_OUT_OF_MEMORY;
-
-      if(data->change.url_alloc) {
-        Curl_safefree(data->change.url);
-        data->change.url_alloc = FALSE;
-      }
-
-      data->change.url = url;
-      data->change.url_alloc = TRUE;
-    }
-  }
-  else if(portptr) {
-    /* no CURLOPT_PORT given, extract the one from the URL */
-
-    char *rest;
-    long port;
-
-    port = strtol(portptr + 1, &rest, 10);  /* Port number must be decimal */
-
-    if((port < 0) || (port > 0xffff)) {
-      /* Single unix standard says port numbers are 16 bits long */
-      failf(data, "Port number out of range");
-      return CURLE_URL_MALFORMAT;
-    }
-
-    if(rest[0]) {
-      failf(data, "Port number ended with '%c'", rest[0]);
-      return CURLE_URL_MALFORMAT;
-    }
-
-    if(rest != &portptr[1]) {
-      *portptr = '\0'; /* cut off the name there */
-      conn->remote_port = curlx_ultous(port);
-    }
-    else {
-      /* Browser behavior adaptation. If there's a colon with no digits after,
-         just cut off the name there which makes us ignore the colon and just
-         use the default port. Firefox and Chrome both do that. */
-      *portptr = '\0';
-    }
+    snprintf(portbuf, sizeof(portbuf), "%u", conn->remote_port);
+    uc = curl_url_set(data->state.uh, CURLUPART_PORT, portbuf, 0);
+    if(uc)
+      return CURLE_OUT_OF_MEMORY;
   }
 
-  /* only if remote_port was not already parsed off the URL we use the
-     default port number */
-  if(conn->remote_port < 0)
-    conn->remote_port = (unsigned short)conn->given->defport;
-
   return CURLE_OK;
 }
 
@@ -3483,11 +2994,16 @@ static CURLcode override_login(struct Curl_easy *data,
                                struct connectdata *conn,
                                char **userp, char **passwdp, char **optionsp)
 {
+  bool user_changed = FALSE;
+  bool passwd_changed = FALSE;
+  CURLUcode uc;
   if(data->set.str[STRING_USERNAME]) {
     free(*userp);
     *userp = strdup(data->set.str[STRING_USERNAME]);
     if(!*userp)
       return CURLE_OUT_OF_MEMORY;
+    conn->bits.user_passwd = TRUE; /* enable user+password */
+    user_changed = TRUE;
   }
 
   if(data->set.str[STRING_PASSWORD]) {
@@ -3495,6 +3011,8 @@ static CURLcode override_login(struct Curl_easy *data,
     *passwdp = strdup(data->set.str[STRING_PASSWORD]);
     if(!*passwdp)
       return CURLE_OUT_OF_MEMORY;
+    conn->bits.user_passwd = TRUE; /* enable user+password */
+    passwd_changed = TRUE;
   }
 
   if(data->set.str[STRING_OPTIONS]) {
@@ -3506,9 +3024,16 @@ static CURLcode override_login(struct Curl_easy *data,
 
   conn->bits.netrc = FALSE;
   if(data->set.use_netrc != CURL_NETRC_IGNORED) {
-    int ret = Curl_parsenetrc(conn->host.name,
-                              userp, passwdp,
-                              data->set.str[STRING_NETRC_FILE]);
+    char *nuser = NULL;
+    char *npasswd = NULL;
+    int ret;
+
+    if(data->set.use_netrc == CURL_NETRC_OPTIONAL)
+      nuser = *userp; /* to separate otherwise indentical machines */
+
+    ret = Curl_parsenetrc(conn->host.name,
+                          &nuser, &npasswd,
+                          data->set.str[STRING_NETRC_FILE]);
     if(ret > 0) {
       infof(data, "Couldn't find host %s in the "
             DOT_CHAR "netrc file; using defaults\n",
@@ -3522,55 +3047,85 @@ static CURLcode override_login(struct Curl_easy *data,
          file, so that it is safe to use even if we followed a Location: to a
          different host or similar. */
       conn->bits.netrc = TRUE;
-
       conn->bits.user_passwd = TRUE; /* enable user+password */
+
+      if(data->set.use_netrc == CURL_NETRC_OPTIONAL) {
+        /* prefer credentials outside netrc */
+        if(nuser && !*userp) {
+          free(*userp);
+          *userp = nuser;
+          user_changed = TRUE;
+        }
+        if(npasswd && !*passwdp) {
+          free(*passwdp);
+          *passwdp = npasswd;
+          passwd_changed = TRUE;
+        }
+      }
+      else {
+        /* prefer netrc credentials */
+        if(nuser) {
+          free(*userp);
+          *userp = nuser;
+          user_changed = TRUE;
+        }
+        if(npasswd) {
+          free(*passwdp);
+          *passwdp = npasswd;
+          passwd_changed = TRUE;
+        }
+      }
     }
   }
 
+  /* for updated strings, we update them in the URL */
+  if(user_changed) {
+    uc = curl_url_set(data->state.uh, CURLUPART_USER, *userp, 0);
+    if(uc)
+      return uc_to_curlcode(uc);
+  }
+  if(passwd_changed) {
+    uc = curl_url_set(data->state.uh, CURLUPART_PASSWORD, *passwdp, 0);
+    if(uc)
+      return uc_to_curlcode(uc);
+  }
   return CURLE_OK;
 }
 
 /*
  * Set the login details so they're available in the connection
  */
-static CURLcode set_login(struct connectdata *conn,
-                          const char *user, const char *passwd,
-                          const char *options)
+static CURLcode set_login(struct connectdata *conn)
 {
   CURLcode result = CURLE_OK;
+  const char *setuser = CURL_DEFAULT_USER;
+  const char *setpasswd = CURL_DEFAULT_PASSWORD;
 
   /* If our protocol needs a password and we have none, use the defaults */
-  if((conn->handler->flags & PROTOPT_NEEDSPWD) && !conn->bits.user_passwd) {
-    /* Store the default user */
-    conn->user = strdup(CURL_DEFAULT_USER);
-
-    /* Store the default password */
-    if(conn->user)
-      conn->passwd = strdup(CURL_DEFAULT_PASSWORD);
-    else
-      conn->passwd = NULL;
-
-    /* This is the default password, so DON'T set conn->bits.user_passwd */
-  }
+  if((conn->handler->flags & PROTOPT_NEEDSPWD) && !conn->bits.user_passwd)
+    ;
   else {
-    /* Store the user, zero-length if not set */
-    conn->user = strdup(user);
-
-    /* Store the password (only if user is present), zero-length if not set */
-    if(conn->user)
-      conn->passwd = strdup(passwd);
-    else
-      conn->passwd = NULL;
+    setuser = "";
+    setpasswd = "";
+  }
+  /* Store the default user */
+  if(!conn->user) {
+    conn->user = strdup(setuser);
+    if(!conn->user)
+      return CURLE_OUT_OF_MEMORY;
   }
 
-  if(!conn->user || !conn->passwd)
-    result = CURLE_OUT_OF_MEMORY;
-
-  /* Store the options, null if not set */
-  if(!result && options[0]) {
-    conn->options = strdup(options);
+  /* Store the default password */
+  if(!conn->passwd) {
+    conn->passwd = strdup(setpasswd);
+    if(!conn->passwd)
+      result = CURLE_OUT_OF_MEMORY;
+  }
 
-    if(!conn->options)
+  /* if there's a user without password, consider password blank */
+  if(conn->user && !conn->passwd) {
+    conn->passwd = strdup("");
+    if(!conn->passwd)
       result = CURLE_OUT_OF_MEMORY;
   }
 
@@ -4022,12 +3577,7 @@ static CURLcode create_conn(struct Curl_easy *data,
   CURLcode result = CURLE_OK;
   struct connectdata *conn;
   struct connectdata *conn_temp = NULL;
-  size_t urllen;
-  char *user = NULL;
-  char *passwd = NULL;
-  char *options = NULL;
   bool reuse;
-  bool prot_missing = FALSE;
   bool connections_available = TRUE;
   bool force_reuse = FALSE;
   bool waitpipe = FALSE;
@@ -4039,7 +3589,6 @@ static CURLcode create_conn(struct Curl_easy *data,
   /*************************************************************
    * Check input data
    *************************************************************/
-
   if(!data->change.url) {
     result = CURLE_URL_MALFORMAT;
     goto out;
@@ -4061,107 +3610,10 @@ static CURLcode create_conn(struct Curl_easy *data,
      any failure */
   *in_connect = conn;
 
-  /* This initing continues below, see the comment "Continue connectdata
-   * initialization here" */
-
-  /***********************************************************
-   * We need to allocate memory to store the path in. We get the size of the
-   * full URL to be sure, and we need to make it at least 256 bytes since
-   * other parts of the code will rely on this fact
-   ***********************************************************/
-#define LEAST_PATH_ALLOC 256
-  urllen = strlen(data->change.url);
-  if(urllen < LEAST_PATH_ALLOC)
-    urllen = LEAST_PATH_ALLOC;
-
-  /*
-   * We malloc() the buffers below urllen+2 to make room for 2 possibilities:
-   * 1 - an extra terminating zero
-   * 2 - an extra slash (in case a syntax like "www.host.com?moo" is used)
-   */
-
-  Curl_safefree(data->state.pathbuffer);
-  data->state.path = NULL;
-
-  data->state.pathbuffer = malloc(urllen + 2);
-  if(NULL == data->state.pathbuffer) {
-    result = CURLE_OUT_OF_MEMORY; /* really bad error */
-    goto out;
-  }
-  data->state.path = data->state.pathbuffer;
-
-  conn->host.rawalloc = malloc(urllen + 2);
-  if(NULL == conn->host.rawalloc) {
-    Curl_safefree(data->state.pathbuffer);
-    data->state.path = NULL;
-    result = CURLE_OUT_OF_MEMORY;
-    goto out;
-  }
-
-  conn->host.name = conn->host.rawalloc;
-  conn->host.name[0] = 0;
-
-  user = strdup("");
-  passwd = strdup("");
-  options = strdup("");
-  if(!user || !passwd || !options) {
-    result = CURLE_OUT_OF_MEMORY;
-    goto out;
-  }
-
-  result = parseurlandfillconn(data, conn, &prot_missing, &user, &passwd,
-                               &options);
+  result = parseurlandfillconn(data, conn);
   if(result)
     goto out;
 
-  /*************************************************************
-   * No protocol part in URL was used, add it!
-   *************************************************************/
-  if(prot_missing) {
-    /* We're guessing prefixes here and if we're told to use a proxy or if
-       we're going to follow a Location: later or... then we need the protocol
-       part added so that we have a valid URL. */
-    char *reurl;
-    char *ch_lower;
-
-    reurl = aprintf("%s://%s", conn->handler->scheme, data->change.url);
-
-    if(!reurl) {
-      result = CURLE_OUT_OF_MEMORY;
-      goto out;
-    }
-
-    /* Change protocol prefix to lower-case */
-    for(ch_lower = reurl; *ch_lower != ':'; ch_lower++)
-      *ch_lower = (char)TOLOWER(*ch_lower);
-
-    if(data->change.url_alloc) {
-      Curl_safefree(data->change.url);
-      data->change.url_alloc = FALSE;
-    }
-
-    data->change.url = reurl;
-    data->change.url_alloc = TRUE; /* free this later */
-  }
-
-  /*************************************************************
-   * If the protocol can't handle url query strings, then cut
-   * off the unhandable part
-   *************************************************************/
-  if((conn->given->flags&PROTOPT_NOURLQUERY)) {
-    char *path_q_sep = strchr(conn->data->state.path, '?');
-    if(path_q_sep) {
-      /* according to rfc3986, allow the query (?foo=bar)
-         also on protocols that can't handle it.
-
-         cut the string-part after '?'
-      */
-
-      /* terminate the string */
-      path_q_sep[0] = 0;
-    }
-  }
-
   if(data->set.str[STRING_BEARER]) {
     conn->oauth_bearer = strdup(data->set.str[STRING_BEARER]);
     if(!conn->oauth_bearer) {
@@ -4205,10 +3657,12 @@ static CURLcode create_conn(struct Curl_easy *data,
 
   /* Check for overridden login details and set them accordingly so they
      they are known when protocol->setup_connection is called! */
-  result = override_login(data, conn, &user, &passwd, &options);
+  result = override_login(data, conn, &conn->user, &conn->passwd,
+                          &conn->options);
   if(result)
     goto out;
-  result = set_login(conn, user, passwd, options);
+
+  result = set_login(conn); /* default credentials */
   if(result)
     goto out;
 
@@ -4394,6 +3848,9 @@ static CURLcode create_conn(struct Curl_easy *data,
    * new one.
    *************************************************************/
 
+  DEBUGASSERT(conn->user);
+  DEBUGASSERT(conn->passwd);
+
   /* reuse_fresh is TRUE if we are told to use a new connection by force, but
      we only acknowledge this option if this is not a re-used connection
      already (which happens due to follow-location or during a HTTP
@@ -4569,10 +4026,6 @@ static CURLcode create_conn(struct Curl_easy *data,
   result = resolve_server(data, conn, async);
 
 out:
-
-  free(options);
-  free(passwd);
-  free(user);
   return result;
 }
 
diff --git a/lib/url.h b/lib/url.h
index 7b9aff5c42f2a7..1c18f713783b8c 100644
--- a/lib/url.h
+++ b/lib/url.h
@@ -48,6 +48,8 @@ CURLcode Curl_open(struct Curl_easy **curl);
 CURLcode Curl_init_userdefined(struct Curl_easy *data);
 
 void Curl_freeset(struct Curl_easy * data);
+/* free the URL pieces */
+void Curl_up_free(struct Curl_easy *data);
 CURLcode Curl_close(struct Curl_easy *data); /* opposite of curl_open() */
 CURLcode Curl_connect(struct Curl_easy *, struct connectdata **,
                       bool *async, bool *protocol_connect);
diff --git a/lib/urldata.h b/lib/urldata.h
index 85712ba20575f9..1fede5090d2f47 100644
--- a/lib/urldata.h
+++ b/lib/urldata.h
@@ -1224,6 +1224,18 @@ struct time_node {
   expire_id eid;
 };
 
+/* individual pieces of the URL */
+struct urlpieces {
+  char *scheme;
+  char *hostname;
+  char *port;
+  char *user;
+  char *password;
+  char *options;
+  char *path;
+  char *query;
+};
+
 struct UrlState {
 
   /* Points to the connection cache */
@@ -1314,9 +1326,6 @@ struct UrlState {
   /* for FTP downloads: how many CRLFs did we converted to LFs? */
   curl_off_t crlf_conversions;
 #endif
-  char *pathbuffer;/* allocated buffer to store the URL's path part in */
-  char *path;      /* path to use, points to somewhere within the pathbuffer
-                      area */
   bool slash_removed; /* set TRUE if the 'path' points to a path where the
                          initial URL slash separator has been taken off */
   bool use_range;
@@ -1350,6 +1359,8 @@ struct UrlState {
 #ifdef CURLDEBUG
   bool conncache_lock;
 #endif
+  CURLU *uh; /* URL handle for the current parsed URL */
+  struct urlpieces up;
 };
 
 
diff --git a/tests/data/test325 b/tests/data/test325
index 6d5898d454017e..922f37ba2b6093 100644
--- a/tests/data/test325
+++ b/tests/data/test325
@@ -15,7 +15,7 @@ HTTP/1.1 301 OK
 Date: Thu, 09 Nov 2010 14:49:00 GMT
 Server: test-server/fake
 Content-Length: 7
-Location: http://%HOSTIP:%HTTPPORT/325
+Location: http://somewhere/325
 
 MooMoo
 </data>
@@ -24,7 +24,7 @@ HTTP/1.1 301 OK
 Date: Thu, 09 Nov 2010 14:49:00 GMT
 Server: test-server/fake
 Content-Length: 7
-Location: http://%HOSTIP:%HTTPPORT/325
+Location: http://somewhere/325
 
 </datacheck>
 </reply>
diff --git a/tests/data/test523 b/tests/data/test523
index 9abe0ed2230c83..665211d48e0d3c 100644
--- a/tests/data/test523
+++ b/tests/data/test523
@@ -39,7 +39,7 @@ HTTP GET with proxy and CURLOPT_PORT
  </name>
 # first URL then proxy
  <command>
-http://www.example.com:999/523 http://%HOSTIP:%HTTPPORT 
+http://www.example.com:999/523 http://%HOSTIP:%HTTPPORT
 </command>
 </client>
 
@@ -50,7 +50,7 @@ http://www.example.com:999/523 http://%HOSTIP:%HTTPPORT
 ^User-Agent:.*
 </strip>
 <protocol>
-GET HTTP://www.example.com:19999/523 HTTP/1.1
+GET http://www.example.com:19999/523 HTTP/1.1
 Host: www.example.com:19999
 Authorization: Basic eHh4Onl5eQ==
 Accept: */*
diff --git a/tests/data/test563 b/tests/data/test563
index cecbedc21443bf..c9df79219725d1 100644
--- a/tests/data/test563
+++ b/tests/data/test563
@@ -47,7 +47,7 @@ ftp_proxy=http://%HOSTIP:%HTTPPORT/
 # Verify data after the test has been "shot"
 <verify>
 <protocol>
-GET FTP://%HOSTIP:%FTPPORT/563;type=A HTTP/1.1
+GET ftp://%HOSTIP:%FTPPORT/563;type=A HTTP/1.1
 Host: %HOSTIP:%FTPPORT
 Accept: */*
 Proxy-Connection: Keep-Alive