Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 33 additions & 6 deletions include/aws/common/byte_buf.h
Original file line number Diff line number Diff line change
Expand Up @@ -213,11 +213,12 @@ bool aws_byte_buf_eq_c_str_ignore_case(const struct aws_byte_buf *const buf, con
* No copies, no buffer allocations. Iterates over input_str, and returns the next substring between split_on instances.
*
* Edge case rules are as follows:
* If the input is an empty string, an empty cursor will be the one entry returned.
* If the input begins with split_on, an empty cursor will be the first entry returned.
* If the input has two adjacent split_on tokens, an empty cursor will be returned.
* If the input ends with split_on, an empty cursor will be returned last.
*
* It is the user's responsibility to properly zero-initialize substr.
* It is the user's responsibility zero-initialize substr before the first call.
*
* It is the user's responsibility to make sure the input buffer stays in memory
* long enough to use the results.
Expand Down Expand Up @@ -505,6 +506,21 @@ uint64_t aws_hash_byte_cursor_ptr_ignore_case(const void *item);
AWS_COMMON_API
const uint8_t *aws_lookup_table_to_lower_get(void);

/**
* Returns lookup table to go from ASCII/UTF-8 hex character to a number (0-15).
* Non-hex characters map to 255.
* Valid examples:
* '0' -> 0
* 'F' -> 15
* 'f' -> 15
* Invalid examples:
* ' ' -> 255
* 'Z' -> 255
* '\0' -> 255
*/
AWS_COMMON_API
const uint8_t *aws_lookup_table_hex_to_num_get(void);

/**
* Lexical (byte value) comparison of two byte cursors
*/
Expand Down Expand Up @@ -624,24 +640,24 @@ AWS_COMMON_API bool aws_byte_cursor_read_be24(struct aws_byte_cursor *cur, uint3
AWS_COMMON_API bool aws_byte_cursor_read_be32(struct aws_byte_cursor *cur, uint32_t *var);

/**
* Reads a 32-bit value in network byte order from cur, and places it in host
* Reads a 64-bit value in network byte order from cur, and places it in host
* byte order into var.
*
* On success, returns true and updates the cursor pointer/length accordingly.
* If there is insufficient space in the cursor, returns false, leaving the
* cursor unchanged.
*/
AWS_COMMON_API bool aws_byte_cursor_read_float_be32(struct aws_byte_cursor *cur, float *var);
AWS_COMMON_API bool aws_byte_cursor_read_be64(struct aws_byte_cursor *cur, uint64_t *var);

/**
* Reads a 64-bit value in network byte order from cur, and places it in host
* Reads a 32-bit value in network byte order from cur, and places it in host
* byte order into var.
*
* On success, returns true and updates the cursor pointer/length accordingly.
* If there is insufficient space in the cursor, returns false, leaving the
* cursor unchanged.
*/
AWS_COMMON_API bool aws_byte_cursor_read_float_be64(struct aws_byte_cursor *cur, double *var);
AWS_COMMON_API bool aws_byte_cursor_read_float_be32(struct aws_byte_cursor *cur, float *var);
Comment on lines +650 to +660
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These didn't really change names. I just moved them so all the whole-number read functions were together and all the read_float functions were together


/**
* Reads a 64-bit value in network byte order from cur, and places it in host
Expand All @@ -651,7 +667,18 @@ AWS_COMMON_API bool aws_byte_cursor_read_float_be64(struct aws_byte_cursor *cur,
* If there is insufficient space in the cursor, returns false, leaving the
* cursor unchanged.
*/
AWS_COMMON_API bool aws_byte_cursor_read_be64(struct aws_byte_cursor *cur, uint64_t *var);
AWS_COMMON_API bool aws_byte_cursor_read_float_be64(struct aws_byte_cursor *cur, double *var);

/**
* Reads 2 hex characters from ASCII/UTF-8 text to produce an 8-bit number.
* Accepts both lowercase 'a'-'f' and uppercase 'A'-'F'.
* For example: "0F" produces 15.
*
* On success, returns true and advances the cursor by 2.
* If there is insufficient space in the cursor or an invalid character
* is encountered, returns false, leaving the cursor unchanged.
*/
AWS_COMMON_API bool aws_byte_cursor_read_hex_u8(struct aws_byte_cursor *cur, uint8_t *var);

/**
* Appends a sub-buffer to the specified buffer.
Expand Down
18 changes: 17 additions & 1 deletion include/aws/common/string.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,15 +100,31 @@ bool aws_string_eq_c_str_ignore_case(const struct aws_string *str, const char *c
*/
AWS_COMMON_API
struct aws_string *aws_string_new_from_c_str(struct aws_allocator *allocator, const char *c_str);

/**
* Allocate a new string with the same contents as array.
*/
AWS_COMMON_API
struct aws_string *aws_string_new_from_array(struct aws_allocator *allocator, const uint8_t *bytes, size_t len);

/**
* Allocate a new string with the same contents as the old.
* Allocate a new string with the same contents as another string.
*/
AWS_COMMON_API
struct aws_string *aws_string_new_from_string(struct aws_allocator *allocator, const struct aws_string *str);

/**
* Allocate a new string with the same contents as cursor.
*/
AWS_COMMON_API
struct aws_string *aws_string_new_from_cursor(struct aws_allocator *allocator, const struct aws_byte_cursor *cursor);

/**
* Allocate a new string with the same contents as buf.
*/
AWS_COMMON_API
struct aws_string *aws_string_new_from_buf(struct aws_allocator *allocator, const struct aws_byte_buf *buf);

/**
* Deallocate string.
*/
Expand Down
117 changes: 91 additions & 26 deletions source/byte_buf.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ int aws_byte_buf_init(struct aws_byte_buf *buf, struct aws_allocator *allocator,

buf->buffer = (capacity == 0) ? NULL : aws_mem_acquire(allocator, capacity);
if (capacity != 0 && buf->buffer == NULL) {
AWS_ZERO_STRUCT(*buf);
return AWS_OP_ERR;
}

Expand Down Expand Up @@ -162,48 +163,58 @@ bool aws_byte_cursor_next_split(
char split_on,
struct aws_byte_cursor *AWS_RESTRICT substr) {

bool first_run = false;
if (!substr->ptr) {
first_run = true;
substr->ptr = input_str->ptr;
substr->len = 0;
}
AWS_PRECONDITION(aws_byte_cursor_is_valid(input_str));

if (substr->ptr > input_str->ptr + input_str->len) {
/* This will hit if the last substring returned was an empty string after terminating split_on. */
AWS_ZERO_STRUCT(*substr);
return false;
}
/* If substr is zeroed-out, then this is the first run. */
const bool first_run = substr->ptr == NULL;

/* Calculate first byte to search. */
substr->ptr += substr->len;
/* Remaining bytes is the number we started with minus the number of bytes already read. */
substr->len = input_str->len - (substr->ptr - input_str->ptr);
/* It's legal for input_str to be zeroed out: {.ptr=NULL, .len=0}
* Deal with this case separately */
if (AWS_UNLIKELY(input_str->ptr == NULL)) {
if (first_run) {
/* Set substr->ptr to something non-NULL so that next split() call doesn't look like the first run */
substr->ptr = (void *)"";
substr->len = 0;
return true;
}

if (!first_run && substr->len == 0) {
/* This will hit if the string doesn't end with split_on but we're done. */
/* done */
AWS_ZERO_STRUCT(*substr);
return false;
}

if (!first_run && *substr->ptr == split_on) {
/* If not first rodeo and the character after substr is split_on, skip. */
++substr->ptr;
--substr->len;
/* Rest of function deals with non-NULL input_str->ptr */

if (substr->len == 0) {
/* If split character was last in the string, return empty substr. */
return true;
if (first_run) {
*substr = *input_str;
} else {
/* This is not the first run.
* Advance substr past the previous split. */
const uint8_t *input_end = input_str->ptr + input_str->len;
substr->ptr += substr->len + 1;

/* Note that it's ok if substr->ptr == input_end, this happens in the
* final valid split of an input_str that ends with the split_on character:
* Ex: "AB&" split on '&' produces "AB" and "" */
if (substr->ptr > input_end || substr->ptr < input_str->ptr) { /* 2nd check is overflow check */
/* done */
AWS_ZERO_STRUCT(*substr);
return false;
}

/* update len to be remainder of the string */
substr->len = input_str->len - (substr->ptr - input_str->ptr);
}

/* substr is now remainder of string, search for next split */
uint8_t *new_location = memchr(substr->ptr, split_on, substr->len);
if (new_location) {

/* Character found, update string length. */
substr->len = new_location - substr->ptr;
}

AWS_POSTCONDITION(aws_byte_cursor_is_valid(substr));
return true;
}

Expand All @@ -212,7 +223,7 @@ int aws_byte_cursor_split_on_char_n(
char split_on,
size_t n,
struct aws_array_list *AWS_RESTRICT output) {
AWS_ASSERT(input_str && input_str->ptr);
AWS_ASSERT(aws_byte_cursor_is_valid(input_str));
AWS_ASSERT(output);
AWS_ASSERT(output->item_size >= sizeof(struct aws_byte_cursor));

Expand Down Expand Up @@ -326,7 +337,7 @@ bool aws_byte_cursor_eq_ignore_case(const struct aws_byte_cursor *a, const struc
}

/* Every possible uint8_t value, lowercased */
static const uint8_t s_tolower_table[256] = {
static const uint8_t s_tolower_table[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 'a',
Expand All @@ -339,6 +350,7 @@ static const uint8_t s_tolower_table[256] = {
198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,
220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241,
242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255};
AWS_STATIC_ASSERT(AWS_ARRAY_SIZE(s_tolower_table) == 256);

const uint8_t *aws_lookup_table_to_lower_get(void) {
return s_tolower_table;
Expand Down Expand Up @@ -1250,6 +1262,59 @@ bool aws_byte_cursor_read_be64(struct aws_byte_cursor *cur, uint64_t *var) {
return rv;
}

/* Lookup from '0' -> 0, 'f' -> 0xf, 'F' -> 0xF, etc
* invalid characters have value 255 */
/* clang-format off */
static const uint8_t s_hex_to_num_table[] = {
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255,
/* 0 - 9 */
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
255, 255, 255, 255, 255, 255, 255,
/* A - F */
0xA, 0xB, 0xC, 0xD, 0xE, 0xF,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255,
/* a - f */
0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
};
AWS_STATIC_ASSERT(AWS_ARRAY_SIZE(s_hex_to_num_table) == 256);
/* clang-format on */

const uint8_t *aws_lookup_table_hex_to_num_get(void) {
return s_hex_to_num_table;
}

bool aws_byte_cursor_read_hex_u8(struct aws_byte_cursor *cur, uint8_t *var) {
AWS_PRECONDITION(aws_byte_cursor_is_valid(cur));
AWS_PRECONDITION(AWS_OBJECT_PTR_IS_WRITABLE(var));

bool success = false;
if (AWS_LIKELY(cur->len >= 2)) {
const uint8_t hi = s_hex_to_num_table[cur->ptr[0]];
const uint8_t lo = s_hex_to_num_table[cur->ptr[1]];

/* table maps invalid characters to 255 */
if (AWS_LIKELY(hi != 255 && lo != 255)) {
*var = (hi << 4) | lo;
cur->ptr += 2;
cur->len -= 2;
success = true;
}
}

AWS_POSTCONDITION(aws_byte_cursor_is_valid(cur));
return success;
}

/**
* Appends a sub-buffer to the specified buffer.
*
Expand Down
10 changes: 10 additions & 0 deletions source/string.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,16 @@ struct aws_string *aws_string_new_from_string(struct aws_allocator *allocator, c
return aws_string_new_from_array(allocator, str->bytes, str->len);
}

struct aws_string *aws_string_new_from_cursor(struct aws_allocator *allocator, const struct aws_byte_cursor *cursor) {
AWS_PRECONDITION(allocator && aws_byte_cursor_is_valid(cursor));
return aws_string_new_from_array(allocator, cursor->ptr, cursor->len);
}

struct aws_string *aws_string_new_from_buf(struct aws_allocator *allocator, const struct aws_byte_buf *buf) {
AWS_PRECONDITION(allocator && aws_byte_buf_is_valid(buf));
return aws_string_new_from_array(allocator, buf->buffer, buf->len);
}

void aws_string_destroy(struct aws_string *str) {
AWS_PRECONDITION(!str || aws_string_is_valid(str));
if (str && str->allocator) {
Expand Down
2 changes: 2 additions & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ add_test_case(test_byte_cursor_advance_nospec)
add_test_case(byte_cursor_write_tests)
add_test_case(byte_cursor_read_tests)
add_test_case(byte_cursor_limit_tests)
add_test_case(test_byte_cursor_read_hex_u8)
add_test_case(test_byte_cursor_right_trim_empty)
add_test_case(test_byte_cursor_right_trim_all_whitespace)
add_test_case(test_byte_cursor_right_trim_basic)
Expand All @@ -229,6 +230,7 @@ add_test_case(test_char_split_happy_path)
add_test_case(test_char_split_ends_with_token)
add_test_case(test_char_split_token_not_present)
add_test_case(test_char_split_empty)
add_test_case(test_char_split_zeroed)
add_test_case(test_char_split_adj_tokens)
add_test_case(test_char_split_begins_with_token)
add_test_case(test_char_split_with_max_splits)
Expand Down
50 changes: 50 additions & 0 deletions tests/cursor_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,56 @@ static int s_byte_cursor_limit_tests_fn(struct aws_allocator *allocator, void *c
return 0;
}

AWS_TEST_CASE(test_byte_cursor_read_hex_u8, s_test_byte_cursor_read_hex_u8)
static int s_test_byte_cursor_read_hex_u8(struct aws_allocator *allocator, void *ctx) {
(void)allocator;
(void)ctx;

struct aws_byte_cursor cur;
uint8_t val = 0;

cur = aws_byte_cursor_from_c_str("90");
ASSERT_TRUE(aws_byte_cursor_read_hex_u8(&cur, &val));
ASSERT_UINT_EQUALS(0x90, val);
ASSERT_UINT_EQUALS(0, cur.len);

cur = aws_byte_cursor_from_c_str("001");
ASSERT_TRUE(aws_byte_cursor_read_hex_u8(&cur, &val));
ASSERT_UINT_EQUALS(0x00, val);
ASSERT_UINT_EQUALS(1, cur.len);
ASSERT_UINT_EQUALS('1', cur.ptr[0]);

cur = aws_byte_cursor_from_c_str("Fa");
ASSERT_TRUE(aws_byte_cursor_read_hex_u8(&cur, &val));
ASSERT_UINT_EQUALS(0xFA, val);
ASSERT_UINT_EQUALS(0, cur.len);

/* bad short buffer */
cur = aws_byte_cursor_from_c_str("0");
ASSERT_FALSE(aws_byte_cursor_read_hex_u8(&cur, &val));
ASSERT_UINT_EQUALS(1, cur.len);

cur.len = 0;
ASSERT_FALSE(aws_byte_cursor_read_hex_u8(&cur, &val));
ASSERT_UINT_EQUALS(0, cur.len);

/* bad characters */
uint8_t bad_chars[][2] = {
{'0', 0},
{'-', '0'},
{'/', '0'},
{'g', '0'},
{'x', '0'},
};
for (size_t i = 0; i < AWS_ARRAY_SIZE(bad_chars); ++i) {
cur = aws_byte_cursor_from_array(bad_chars[i], 2);
ASSERT_FALSE(aws_byte_cursor_read_hex_u8(&cur, &val));
ASSERT_UINT_EQUALS(2, cur.len);
}

return 0;
}

#define TEST_STRING "hello"

static const char *s_empty = "";
Expand Down
Loading