Permalink
Browse files

URL-encode characters in the path

Change-Id: I37d810e6b833b3e8e32590e3646476afde10e037
Reviewed-on: http://review.couchbase.org/25115
Reviewed-by: Sergey Avseyev <sergey.avseyev@gmail.com>
Tested-by: Sergey Avseyev <sergey.avseyev@gmail.com>
  • Loading branch information...
1 parent 429e5e5 commit f74d651052b4586393180c3318d9a6854652d401 @trondn trondn committed with avsej Mar 12, 2013
Showing with 364 additions and 6 deletions.
  1. +4 −0 Makefile.am
  2. +1 −0 NMakefile
  3. +6 −2 include/libcouchbase/error.h
  4. +5 −4 src/http.c
  5. +1 −0 src/internal.h
  6. +2 −0 src/strerror.c
  7. +154 −0 src/url_encoding.c
  8. +33 −0 src/url_encoding.h
  9. +158 −0 tests/url_encoding-tests.cc
View
@@ -153,6 +153,8 @@ libcouchbase_la_SOURCES = \
src/timings.c \
src/touch.c \
src/trace.h \
+ src/url_encoding.c \
+ src/url_encoding.h \
src/utilities.c \
src/verbosity.c \
src/wait.c
@@ -260,6 +262,7 @@ tests_unit_tests_SOURCES = \
src/base64.c \
src/hashset.c \
src/ringbuffer.c \
+ src/url_encoding.c \
tests/base64-unit-test.cc \
tests/behavior-unit-tests.cc \
tests/cluster-unit-tests.cc \
@@ -269,6 +272,7 @@ tests_unit_tests_SOURCES = \
tests/strerror-unit-test.cc \
tests/timeout.c \
tests/unit_tests.cc \
+ tests/url_encoding-tests.cc \
tests/hostname-unit-test.cc \
tests/ccbc_103.cc
View
@@ -37,6 +37,7 @@ libcouchbase_SOURCES = src\arithmetic.c src\base64.c src\behavior.c \
src\wait.c src\gethrtime.c plugins\io\win32\plugin-win32.c src\isasl.c \
src\compat.c contrib\http_parser\http_parser.c src\http.c \
src\observe.c src\timer.c src\verbosity.c src\sanitycheck.c \
+ src\url_encoding.c \
contrib\libvbucket\crc32.c contrib\libvbucket\cJSON.c \
contrib\libvbucket\vbucket.c contrib\libvbucket\ketama.c \
contrib\libvbucket\rfc1321\md5c.c
@@ -180,13 +180,17 @@ extern "C" {
/**
* The bootstrap hosts list use an invalid/unsupported format
*/
- LCB_INVALID_HOST_FORMAT = 0x1f
+ LCB_INVALID_HOST_FORMAT = 0x1f,
+ /**
+ * Invalid character used in the path component of an URL
+ */
+ LCB_INVALID_CHAR = 0x20
#ifdef LIBCOUCHBASE_INTERNAL
/**
* This is a private value used by the tests in libcouchbase
*/
- , LCB_MAX_ERROR_VAL = 0x20
+ , LCB_MAX_ERROR_VAL = 0x21
#endif
} lcb_error_t;
View
@@ -555,6 +555,7 @@ lcb_error_t lcb_make_http_request(lcb_t instance,
lcb_size_t nn, nbase, nbody, npath;
lcb_http_method_t method;
int chunked;
+ lcb_error_t error;
switch (cmd->version) {
case 0:
@@ -674,12 +675,12 @@ lcb_error_t lcb_make_http_request(lcb_t instance,
req->command_cookie = command_cookie;
req->chunked = chunked;
req->method = method;
- req->npath = npath;
- if ((req->path = malloc(req->npath)) == NULL) {
+ error = lcb_urlencode_path(path, npath, &req->path, &req->npath);
+
+ if (error != LCB_SUCCESS) {
http_request_destroy(req);
- return lcb_synchandler_return(instance, LCB_CLIENT_ENOMEM);
+ return lcb_synchandler_return(instance, error);
}
- memcpy(req->path, path, req->npath);
#define BUFF_APPEND(dst, src, len) \
if (len != ringbuffer_write(dst, src, len)) { \
View
@@ -45,6 +45,7 @@
#include "http_parser/http_parser.h"
#include "ringbuffer.h"
+#include "url_encoding.h"
#include "hashset.h"
#include "debug.h"
#include "handler.h"
View
@@ -98,6 +98,8 @@ const char *lcb_strerror(lcb_t instance, lcb_error_t error)
case LCB_INVALID_HOST_FORMAT:
return "One of the hostnames specified use invalid characters"
" or an unsupported format";
+ case LCB_INVALID_CHAR:
+ return "An invalid character is used";
default:
return "Unknown error.. are you sure libcouchbase gave you that?";
}
View
@@ -0,0 +1,154 @@
+/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+/*
+ * Copyright 2012-2013 Couchbase, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal.h"
+
+static int maybe_skip_encoding(const char *p, int c, int l) {
+ int ii;
+ p += c;
+
+ for (ii = 0; ii < 2; ++ii, ++p, ++c) {
+ if (c == l) {
+ return 0;
+ }
+
+ switch (tolower(*p)) {
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'e':
+ case 'f':
+ break;
+ default:
+ if (isdigit(*p) == 0) {
+ return 0;
+ }
+ }
+ }
+
+ return 1;
+}
+
+static int is_legal_uri_character(char c) {
+ if (isalpha(c) || isdigit(c)) {
+ return 1;
+ }
+
+ switch (c) {
+ case '-':
+ case '_':
+ case '.':
+ case '~':
+ case '!':
+ case '*':
+ case '\'':
+ case '(':
+ case ')':
+ case ';':
+ case ':':
+ case '@':
+ case '&':
+ case '=':
+ case '+':
+ case '$':
+ case ',':
+ case '/':
+ case '?':
+ case '#':
+ case '[':
+ case ']':
+ return 1;
+ default:
+ ;
+ }
+
+ return 0;
+}
+
+lcb_error_t lcb_urlencode_path(const char *path,
+ size_t npath,
+ char **out,
+ size_t *nout)
+{
+ size_t ii;
+ size_t n = 0;
+ int skip_encoding = 0;
+ char *op;
+
+ /* Allocate for a worst case scenario (it will probably not be
+ * that bad anyway
+ */
+ if ((op = malloc(npath * 3)) == NULL) {
+ return LCB_CLIENT_ENOMEM;
+ }
+
+ *out = op;
+
+ for (ii = 0; ii < npath; ++ii) {
+ if (skip_encoding == 0) {
+ switch (path[ii]) {
+ case '%':
+ skip_encoding = maybe_skip_encoding(path, ii + 1, npath);
+ break;
+ case '+':
+ skip_encoding = 1;
+ default:
+ ;
+ }
+ }
+
+ if (skip_encoding || is_legal_uri_character(path[ii])) {
+ if (skip_encoding && path[ii] != '%' && !is_legal_uri_character(path[ii])) {
+ free(op);
+ return LCB_INVALID_CHAR;
+ }
+
+ op[n++] = path[ii];
+ } else {
+ unsigned int c = (unsigned char)path[ii];
+ int numbytes;
+ int xx;
+
+ if ((c & 0x80) == 0) { // Ascii character
+ numbytes = 1;
+ } else if ((c & 0xE0) == 0xC0) { // 110x xxxx
+ numbytes = 2;
+ } else if ((c & 0xF0 ) == 0xE0) { // 1110 xxxx
+ numbytes = 3;
+ } else if ((c & 0xF8) == 0xF0) { // 1111 0xxx
+ numbytes = 4;
+ } else {
+ free(op);
+ return LCB_INVALID_CHAR;
+ }
+
+ for (xx = 0; xx < numbytes; ++xx, ++ii) {
+ c = (unsigned char)path[ii];
+ sprintf(op + n, "%%%02X", c);
+ n += 3;
+ }
+
+ /* we updated ii too much */
+ --ii;
+ }
+ }
+
+ *nout = n;
+
+ return LCB_SUCCESS;
+}
View
@@ -0,0 +1,33 @@
+/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+/*
+ * Copyright 2013 Couchbase, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef URL_ENCODING_H
+#define URL_ENCODING_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+ lcb_error_t lcb_urlencode_path(const char *path,
+ size_t npath,
+ char **out,
+ size_t *nout);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
Oops, something went wrong.

0 comments on commit f74d651

Please sign in to comment.