awslabs · graebm · Aug 6, 2020 · Jun 23, 2020 · Jun 23, 2020 · Jun 23, 2020
diff --git a/include/aws/common/byte_buf.h b/include/aws/common/byte_buf.h
@@ -213,11 +213,12 @@ bool aws_byte_buf_eq_c_str_ignore_case(const struct aws_byte_buf *const buf, con
  * No copies, no buffer allocations. Iterates over input_str, and returns the next substring between split_on instances.
  *
  * Edge case rules are as follows:
+ * If the input is an empty string, an empty cursor will be the one entry returned.
  * If the input begins with split_on, an empty cursor will be the first entry returned.
  * If the input has two adjacent split_on tokens, an empty cursor will be returned.
  * If the input ends with split_on, an empty cursor will be returned last.
  *
- * It is the user's responsibility to properly zero-initialize substr.
+ * It is the user's responsibility zero-initialize substr before the first call.
  *
  * It is the user's responsibility to make sure the input buffer stays in memory
  * long enough to use the results.
@@ -505,6 +506,21 @@ uint64_t aws_hash_byte_cursor_ptr_ignore_case(const void *item);
 AWS_COMMON_API
 const uint8_t *aws_lookup_table_to_lower_get(void);
 
+/**
+ * Returns lookup table to go from ASCII/UTF-8 hex character to a number (0-15).
+ * Non-hex characters map to 255.
+ * Valid examples:
+ * '0' -> 0
+ * 'F' -> 15
+ * 'f' -> 15
+ * Invalid examples:
+ * ' ' -> 255
+ * 'Z' -> 255
+ * '\0' -> 255
+ */
+AWS_COMMON_API
+const uint8_t *aws_lookup_table_hex_to_num_get(void);
+
 /**
  * Lexical (byte value) comparison of two byte cursors
  */
@@ -624,24 +640,24 @@ AWS_COMMON_API bool aws_byte_cursor_read_be24(struct aws_byte_cursor *cur, uint3
 AWS_COMMON_API bool aws_byte_cursor_read_be32(struct aws_byte_cursor *cur, uint32_t *var);
 
 /**
- * Reads a 32-bit value in network byte order from cur, and places it in host
+ * Reads a 64-bit value in network byte order from cur, and places it in host
  * byte order into var.
  *
  * On success, returns true and updates the cursor pointer/length accordingly.
  * If there is insufficient space in the cursor, returns false, leaving the
  * cursor unchanged.
  */
-AWS_COMMON_API bool aws_byte_cursor_read_float_be32(struct aws_byte_cursor *cur, float *var);
+AWS_COMMON_API bool aws_byte_cursor_read_be64(struct aws_byte_cursor *cur, uint64_t *var);
 
 /**
- * Reads a 64-bit value in network byte order from cur, and places it in host
+ * Reads a 32-bit value in network byte order from cur, and places it in host
  * byte order into var.
  *
  * On success, returns true and updates the cursor pointer/length accordingly.
  * If there is insufficient space in the cursor, returns false, leaving the
  * cursor unchanged.
  */
-AWS_COMMON_API bool aws_byte_cursor_read_float_be64(struct aws_byte_cursor *cur, double *var);
+AWS_COMMON_API bool aws_byte_cursor_read_float_be32(struct aws_byte_cursor *cur, float *var);
 
 /**
  * Reads a 64-bit value in network byte order from cur, and places it in host
@@ -651,7 +667,18 @@ AWS_COMMON_API bool aws_byte_cursor_read_float_be64(struct aws_byte_cursor *cur,
  * If there is insufficient space in the cursor, returns false, leaving the
  * cursor unchanged.
  */
-AWS_COMMON_API bool aws_byte_cursor_read_be64(struct aws_byte_cursor *cur, uint64_t *var);
+AWS_COMMON_API bool aws_byte_cursor_read_float_be64(struct aws_byte_cursor *cur, double *var);
+
+/**
+ * Reads 2 hex characters from ASCII/UTF-8 text to produce an 8-bit number.
+ * Accepts both lowercase 'a'-'f' and uppercase 'A'-'F'.
+ * For example: "0F" produces 15.
+ *
+ * On success, returns true and advances the cursor by 2.
+ * If there is insufficient space in the cursor or an invalid character
+ * is encountered, returns false, leaving the cursor unchanged.
+ */
+AWS_COMMON_API bool aws_byte_cursor_read_hex_u8(struct aws_byte_cursor *cur, uint8_t *var);
 
 /**
  * Appends a sub-buffer to the specified buffer.

diff --git a/include/aws/common/string.h b/include/aws/common/string.h
@@ -100,15 +100,31 @@ bool aws_string_eq_c_str_ignore_case(const struct aws_string *str, const char *c
  */
 AWS_COMMON_API
 struct aws_string *aws_string_new_from_c_str(struct aws_allocator *allocator, const char *c_str);
+
+/**
+ * Allocate a new string with the same contents as array.
+ */
 AWS_COMMON_API
 struct aws_string *aws_string_new_from_array(struct aws_allocator *allocator, const uint8_t *bytes, size_t len);
 
 /**
- * Allocate a new string with the same contents as the old.
+ * Allocate a new string with the same contents as another string.
  */
 AWS_COMMON_API
 struct aws_string *aws_string_new_from_string(struct aws_allocator *allocator, const struct aws_string *str);
 
+/**
+ * Allocate a new string with the same contents as cursor.
+ */
+AWS_COMMON_API
+struct aws_string *aws_string_new_from_cursor(struct aws_allocator *allocator, const struct aws_byte_cursor *cursor);
+
+/**
+ * Allocate a new string with the same contents as buf.
+ */
+AWS_COMMON_API
+struct aws_string *aws_string_new_from_buf(struct aws_allocator *allocator, const struct aws_byte_buf *buf);
+
 /**
  * Deallocate string.
  */

diff --git a/source/byte_buf.c b/source/byte_buf.c
@@ -20,6 +20,7 @@ int aws_byte_buf_init(struct aws_byte_buf *buf, struct aws_allocator *allocator,
 
     buf->buffer = (capacity == 0) ? NULL : aws_mem_acquire(allocator, capacity);
     if (capacity != 0 && buf->buffer == NULL) {
+        AWS_ZERO_STRUCT(*buf);
         return AWS_OP_ERR;
     }
 
@@ -162,48 +163,58 @@ bool aws_byte_cursor_next_split(
     char split_on,
     struct aws_byte_cursor *AWS_RESTRICT substr) {
 
-    bool first_run = false;
-    if (!substr->ptr) {
-        first_run = true;
-        substr->ptr = input_str->ptr;
-        substr->len = 0;
-    }
+    AWS_PRECONDITION(aws_byte_cursor_is_valid(input_str));
 
-    if (substr->ptr > input_str->ptr + input_str->len) {
-        /* This will hit if the last substring returned was an empty string after terminating split_on. */
-        AWS_ZERO_STRUCT(*substr);
-        return false;
-    }
+    /* If substr is zeroed-out, then this is the first run. */
+    const bool first_run = substr->ptr == NULL;
 
-    /* Calculate first byte to search. */
-    substr->ptr += substr->len;
-    /* Remaining bytes is the number we started with minus the number of bytes already read. */
-    substr->len = input_str->len - (substr->ptr - input_str->ptr);
+    /* It's legal for input_str to be zeroed out: {.ptr=NULL, .len=0}
+     * Deal with this case separately */
+    if (AWS_UNLIKELY(input_str->ptr == NULL)) {
+        if (first_run) {
+            /* Set substr->ptr to something non-NULL so that next split() call doesn't look like the first run */
+            substr->ptr = (void *)"";
+            substr->len = 0;
+            return true;
+        }
 
-    if (!first_run && substr->len == 0) {
-        /* This will hit if the string doesn't end with split_on but we're done. */
+        /* done */
         AWS_ZERO_STRUCT(*substr);
         return false;
     }
 
-    if (!first_run && *substr->ptr == split_on) {
-        /* If not first rodeo and the character after substr is split_on, skip. */
-        ++substr->ptr;
-        --substr->len;
+    /* Rest of function deals with non-NULL input_str->ptr */
 
-        if (substr->len == 0) {
-            /* If split character was last in the string, return empty substr. */
-            return true;
+    if (first_run) {
+        *substr = *input_str;
+    } else {
+        /* This is not the first run.
+         * Advance substr past the previous split. */
+        const uint8_t *input_end = input_str->ptr + input_str->len;
+        substr->ptr += substr->len + 1;
+
+        /* Note that it's ok if substr->ptr == input_end, this happens in the
+         * final valid split of an input_str that ends with the split_on character:
+         * Ex: "AB&" split on '&' produces "AB" and "" */
+        if (substr->ptr > input_end || substr->ptr < input_str->ptr) { /* 2nd check is overflow check */
+            /* done */
+            AWS_ZERO_STRUCT(*substr);
+            return false;
         }
+
+        /* update len to be remainder of the string */
+        substr->len = input_str->len - (substr->ptr - input_str->ptr);
     }
 
+    /* substr is now remainder of string, search for next split */
     uint8_t *new_location = memchr(substr->ptr, split_on, substr->len);
     if (new_location) {
 
         /* Character found, update string length. */
         substr->len = new_location - substr->ptr;
     }
 
+    AWS_POSTCONDITION(aws_byte_cursor_is_valid(substr));
     return true;
 }
 
@@ -212,7 +223,7 @@ int aws_byte_cursor_split_on_char_n(
     char split_on,
     size_t n,
     struct aws_array_list *AWS_RESTRICT output) {
-    AWS_ASSERT(input_str && input_str->ptr);
+    AWS_ASSERT(aws_byte_cursor_is_valid(input_str));
     AWS_ASSERT(output);
     AWS_ASSERT(output->item_size >= sizeof(struct aws_byte_cursor));
 
@@ -326,7 +337,7 @@ bool aws_byte_cursor_eq_ignore_case(const struct aws_byte_cursor *a, const struc
 }
 
 /* Every possible uint8_t value, lowercased */
-static const uint8_t s_tolower_table[256] = {
+static const uint8_t s_tolower_table[] = {
     0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  15,  16,  17,  18,  19,  20,  21,
     22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,
     44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  'a',
@@ -339,6 +350,7 @@ static const uint8_t s_tolower_table[256] = {
     198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,
     220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241,
     242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255};
+AWS_STATIC_ASSERT(AWS_ARRAY_SIZE(s_tolower_table) == 256);
 
 const uint8_t *aws_lookup_table_to_lower_get(void) {
     return s_tolower_table;
@@ -1250,6 +1262,59 @@ bool aws_byte_cursor_read_be64(struct aws_byte_cursor *cur, uint64_t *var) {
     return rv;
 }
 
+/* Lookup from '0' -> 0, 'f' -> 0xf, 'F' -> 0xF, etc
+ * invalid characters have value 255 */
+/* clang-format off */
+static const uint8_t s_hex_to_num_table[] = {
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255, 255,
+    /* 0 - 9 */
+    0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+    255, 255, 255, 255, 255, 255, 255,
+    /* A - F */
+    0xA, 0xB, 0xC, 0xD, 0xE, 0xF,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255, 255, 255,
+    /* a - f */
+    0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+};
+AWS_STATIC_ASSERT(AWS_ARRAY_SIZE(s_hex_to_num_table) == 256);
+/* clang-format on */
+
+const uint8_t *aws_lookup_table_hex_to_num_get(void) {
+    return s_hex_to_num_table;
+}
+
+bool aws_byte_cursor_read_hex_u8(struct aws_byte_cursor *cur, uint8_t *var) {
+    AWS_PRECONDITION(aws_byte_cursor_is_valid(cur));
+    AWS_PRECONDITION(AWS_OBJECT_PTR_IS_WRITABLE(var));
+
+    bool success = false;
+    if (AWS_LIKELY(cur->len >= 2)) {
+        const uint8_t hi = s_hex_to_num_table[cur->ptr[0]];
+        const uint8_t lo = s_hex_to_num_table[cur->ptr[1]];
+
+        /* table maps invalid characters to 255 */
+        if (AWS_LIKELY(hi != 255 && lo != 255)) {
+            *var = (hi << 4) | lo;
+            cur->ptr += 2;
+            cur->len -= 2;
+            success = true;
+        }
+    }
+
+    AWS_POSTCONDITION(aws_byte_cursor_is_valid(cur));
+    return success;
+}
+
 /**
  * Appends a sub-buffer to the specified buffer.
  *

diff --git a/source/string.c b/source/string.c
@@ -34,6 +34,16 @@ struct aws_string *aws_string_new_from_string(struct aws_allocator *allocator, c
     return aws_string_new_from_array(allocator, str->bytes, str->len);
 }
 
+struct aws_string *aws_string_new_from_cursor(struct aws_allocator *allocator, const struct aws_byte_cursor *cursor) {
+    AWS_PRECONDITION(allocator && aws_byte_cursor_is_valid(cursor));
+    return aws_string_new_from_array(allocator, cursor->ptr, cursor->len);
+}
+
+struct aws_string *aws_string_new_from_buf(struct aws_allocator *allocator, const struct aws_byte_buf *buf) {
+    AWS_PRECONDITION(allocator && aws_byte_buf_is_valid(buf));
+    return aws_string_new_from_array(allocator, buf->buffer, buf->len);
+}
+
 void aws_string_destroy(struct aws_string *str) {
     AWS_PRECONDITION(!str || aws_string_is_valid(str));
     if (str && str->allocator) {

diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -211,6 +211,7 @@ add_test_case(test_byte_cursor_advance_nospec)
 add_test_case(byte_cursor_write_tests)
 add_test_case(byte_cursor_read_tests)
 add_test_case(byte_cursor_limit_tests)
+add_test_case(test_byte_cursor_read_hex_u8)
 add_test_case(test_byte_cursor_right_trim_empty)
 add_test_case(test_byte_cursor_right_trim_all_whitespace)
 add_test_case(test_byte_cursor_right_trim_basic)
@@ -229,6 +230,7 @@ add_test_case(test_char_split_happy_path)
 add_test_case(test_char_split_ends_with_token)
 add_test_case(test_char_split_token_not_present)
 add_test_case(test_char_split_empty)
+add_test_case(test_char_split_zeroed)
 add_test_case(test_char_split_adj_tokens)
 add_test_case(test_char_split_begins_with_token)
 add_test_case(test_char_split_with_max_splits)

diff --git a/tests/cursor_test.c b/tests/cursor_test.c
@@ -259,6 +259,56 @@ static int s_byte_cursor_limit_tests_fn(struct aws_allocator *allocator, void *c
     return 0;
 }
 
+AWS_TEST_CASE(test_byte_cursor_read_hex_u8, s_test_byte_cursor_read_hex_u8)
+static int s_test_byte_cursor_read_hex_u8(struct aws_allocator *allocator, void *ctx) {
+    (void)allocator;
+    (void)ctx;
+
+    struct aws_byte_cursor cur;
+    uint8_t val = 0;
+
+    cur = aws_byte_cursor_from_c_str("90");
+    ASSERT_TRUE(aws_byte_cursor_read_hex_u8(&cur, &val));
+    ASSERT_UINT_EQUALS(0x90, val);
+    ASSERT_UINT_EQUALS(0, cur.len);
+
+    cur = aws_byte_cursor_from_c_str("001");
+    ASSERT_TRUE(aws_byte_cursor_read_hex_u8(&cur, &val));
+    ASSERT_UINT_EQUALS(0x00, val);
+    ASSERT_UINT_EQUALS(1, cur.len);
+    ASSERT_UINT_EQUALS('1', cur.ptr[0]);
+
+    cur = aws_byte_cursor_from_c_str("Fa");
+    ASSERT_TRUE(aws_byte_cursor_read_hex_u8(&cur, &val));
+    ASSERT_UINT_EQUALS(0xFA, val);
+    ASSERT_UINT_EQUALS(0, cur.len);
+
+    /* bad short buffer */
+    cur = aws_byte_cursor_from_c_str("0");
+    ASSERT_FALSE(aws_byte_cursor_read_hex_u8(&cur, &val));
+    ASSERT_UINT_EQUALS(1, cur.len);
+
+    cur.len = 0;
+    ASSERT_FALSE(aws_byte_cursor_read_hex_u8(&cur, &val));
+    ASSERT_UINT_EQUALS(0, cur.len);
+
+    /* bad characters */
+    uint8_t bad_chars[][2] = {
+        {'0', 0},
+        {'-', '0'},
+        {'/', '0'},
+        {'g', '0'},
+        {'x', '0'},
+    };
+    for (size_t i = 0; i < AWS_ARRAY_SIZE(bad_chars); ++i) {
+        cur = aws_byte_cursor_from_array(bad_chars[i], 2);
+        ASSERT_FALSE(aws_byte_cursor_read_hex_u8(&cur, &val));
+        ASSERT_UINT_EQUALS(2, cur.len);
+    }
+
+    return 0;
+}
+
 #define TEST_STRING "hello"
 
 static const char *s_empty = "";