Skip to content

Commit

Permalink
ares_strsplit() rewrite as wrapper around ares__buf_split()
Browse files Browse the repository at this point in the history
We want to limit as much as possible any hand written parsers.
ares__buf_split() uses the new memory-safe parsing routines.  This
adds a couple of additional flags to remove duplicates which the
existing split code did.

Fix By: Brad House (@bradh352)
  • Loading branch information
bradh352 committed Dec 19, 2023
1 parent c6708cf commit 88c444d
Show file tree
Hide file tree
Showing 7 changed files with 151 additions and 92 deletions.
62 changes: 47 additions & 15 deletions src/lib/ares__buf.c
Expand Up @@ -793,6 +793,34 @@ static void ares__buf_destroy_cb(void *arg)
ares__buf_destroy(arg);
}

static ares_bool_t ares__buf_split_isduplicate(ares__llist_t *list,
const unsigned char *val,
size_t len,
ares__buf_split_t flags)
{
ares__llist_node_t *node;

for (node = ares__llist_node_first(list); node != NULL;
node = ares__llist_node_next(node)) {
ares__buf_t *buf = ares__llist_node_val(node);
size_t plen = 0;
const unsigned char *ptr = ares__buf_peek(buf, &plen);

/* Can't be duplicate if lengths mismatch */
if (plen != len)
continue;

if (flags & ARES_BUF_SPLIT_CASE_INSENSITIVE) {
if (ares__memeq_ci(ptr, val, len))
return ARES_TRUE;
} else {
if (memcmp(ptr, val, len) == 0)
return ARES_TRUE;
}
}
return ARES_FALSE;
}

ares_status_t ares__buf_split(ares__buf_t *buf, const unsigned char *delims,
size_t delims_len, ares__buf_split_t flags,
ares__llist_t **list)
Expand Down Expand Up @@ -826,23 +854,27 @@ ares_status_t ares__buf_split(ares__buf_t *buf, const unsigned char *delims,
const unsigned char *ptr = ares__buf_tag_fetch(buf, &len);
ares__buf_t *data;

/* Since we don't allow const buffers of 0 length, and user wants 0-length
* buffers, swap what we do here */
if (len) {
data = ares__buf_create_const(ptr, len);
} else {
data = ares__buf_create();
}
if (!(flags & ARES_BUF_SPLIT_NO_DUPLICATES) ||
!ares__buf_split_isduplicate(*list, ptr, len, flags)) {

if (data == NULL) {
status = ARES_ENOMEM;
goto done;
}
/* Since we don't allow const buffers of 0 length, and user wants
* 0-length buffers, swap what we do here */
if (len) {
data = ares__buf_create_const(ptr, len);
} else {
data = ares__buf_create();
}

if (ares__llist_insert_last(*list, data) == NULL) {
ares__buf_destroy(data);
status = ARES_ENOMEM;
goto done;
if (data == NULL) {
status = ARES_ENOMEM;
goto done;
}

if (ares__llist_insert_last(*list, data) == NULL) {
ares__buf_destroy(data);
status = ARES_ENOMEM;
goto done;
}
}
}

Expand Down
6 changes: 5 additions & 1 deletion src/lib/ares__buf.h
Expand Up @@ -420,7 +420,11 @@ typedef enum {
/*! Allow blank sections, by default blank sections are not emitted. If using
* ARES_BUF_SPLIT_DONT_CONSUME_DELIMS, the delimiter is not counted as part
* of the section */
ARES_BUF_SPLIT_ALLOW_BLANK = 1 << 1
ARES_BUF_SPLIT_ALLOW_BLANK = 1 << 1,
/*! Remove duplicate entries */
ARES_BUF_SPLIT_NO_DUPLICATES = 1 << 2,
/*! Perform case-insenstive matching when comparing values */
ARES_BUF_SPLIT_CASE_INSENSITIVE = 1 << 3
} ares__buf_split_t;

/*! Split the provided buffer into multiple sub-buffers stored in the variable
Expand Down
26 changes: 1 addition & 25 deletions src/lib/ares__htable.c
Expand Up @@ -399,30 +399,6 @@ unsigned int ares__htable_hash_FNV1a(const unsigned char *key, size_t key_len,
return hv;
}

/* tolower() is locale-specific. Use a lookup table fast conversion that only
* operates on ASCII */
static const unsigned char ares__tolower_lookup[] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C,
0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26,
0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33,
0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, 0x40,
0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D,
0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A,
0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74,
0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80, 0x81,
0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E,
0x8F, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B,
0x9C, 0x9D, 0x9E, 0x9F, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8,
0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5,
0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2,
0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC,
0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9,
0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6,
0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
};

/* Case insensitive version, meant for ASCII strings */
unsigned int ares__htable_hash_FNV1a_casecmp(const unsigned char *key,
Expand All @@ -433,7 +409,7 @@ unsigned int ares__htable_hash_FNV1a_casecmp(const unsigned char *key,
size_t i;

for (i = 0; i < key_len; i++) {
hv ^= (unsigned int)ares__tolower_lookup[key[i]];
hv ^= (unsigned int)ares__tolower(key[i]);
/* hv *= 0x01000193 */
hv += (hv << 1) + (hv << 4) + (hv << 7) + (hv << 8) + (hv << 24);
}
Expand Down
42 changes: 42 additions & 0 deletions src/lib/ares_str.c
Expand Up @@ -109,3 +109,45 @@ ares_bool_t ares_str_isnum(const char *str)
}
return ARES_TRUE;
}

/* tolower() is locale-specific. Use a lookup table fast conversion that only
* operates on ASCII */
static const unsigned char ares__tolower_lookup[] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C,
0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26,
0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33,
0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, 0x40,
0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D,
0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A,
0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74,
0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80, 0x81,
0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E,
0x8F, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B,
0x9C, 0x9D, 0x9E, 0x9F, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8,
0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5,
0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2,
0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC,
0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9,
0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6,
0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
};

unsigned char ares__tolower(unsigned char c)
{
return ares__tolower_lookup[c];
}

ares_bool_t ares__memeq_ci(const unsigned char *ptr, const unsigned char *val,
size_t len)
{
size_t i;
for (i=0; i<len; i++) {
if (ares__tolower_lookup[ptr[i]] != ares__tolower_lookup[val[i]]) {
return ARES_FALSE;
}
}
return ARES_TRUE;
}
4 changes: 4 additions & 0 deletions src/lib/ares_str.h
Expand Up @@ -48,4 +48,8 @@ size_t ares_strcpy(char *dest, const char *src, size_t dest_size);

ares_bool_t ares_str_isnum(const char *str);

unsigned char ares__tolower(unsigned char c);
ares_bool_t ares__memeq_ci(const unsigned char *ptr, const unsigned char *val,
size_t len);

#endif /* HEADER_CARES_STRDUP_H */
101 changes: 51 additions & 50 deletions src/lib/ares_strsplit.c
Expand Up @@ -71,72 +71,73 @@ char **ares__strsplit_duplicate(char **elms, size_t num_elm)
return out;
}


char **ares__strsplit(const char *in, const char *delms, size_t *num_elm)
{
const char *p;
char **table;
void *tmp;
size_t i;
size_t j;
size_t k;
size_t count;
ares_status_t status;
ares__buf_t *buf = NULL;
ares__llist_t *llist = NULL;
ares__llist_node_t *node;
char **out = NULL;
size_t cnt = 0;
size_t idx = 0;

if (in == NULL || delms == NULL || num_elm == NULL) {
return NULL;
}

*num_elm = 0;

/* count non-empty delimited substrings */
count = 0;
p = in;
do {
i = strcspn(p, delms);
if (i != 0) {
/* string is non-empty */
count++;
p += i;
}
} while (*p++ != 0);

if (count == 0) {
buf = ares__buf_create_const((const unsigned char *)in, ares_strlen(in));
if (buf == NULL) {
return NULL;
}
table = ares_malloc(count * sizeof(*table));
if (table == NULL) {
return NULL;

status = ares__buf_split(buf, (const unsigned char *)delms,
ares_strlen(delms),
ARES_BUF_SPLIT_NO_DUPLICATES|
ARES_BUF_SPLIT_CASE_INSENSITIVE,
&llist);
if (status != ARES_SUCCESS) {
goto done;
}

cnt = ares__llist_len(llist);
if (cnt == 0) {
status = ARES_EFORMERR;
goto done;
}

j = 0; /* current table entry */
/* re-calculate indices and allocate new strings for table */
for (p = in; j < count; p += i + 1) {
i = strcspn(p, delms);
if (i != 0) {
for (k = 0; k < j; k++) {
if (strncasecmp(table[k], p, i) == 0 && table[k][i] == 0) {
break;
}
}
if (k == j) {
/* copy unique strings only */
table[j] = ares_malloc(i + 1);
if (table[j] == NULL) {
ares__strsplit_free(table, j);
return NULL;
}
ares_strcpy(table[j], p, i + 1);
j++;
} else {
count--;
}

out = ares_malloc_zero(cnt * sizeof(*out));
if (out == NULL) {
status = ARES_ENOMEM;
goto done;
}

for (node = ares__llist_node_first(llist); node != NULL;
node = ares__llist_node_next(node)) {
ares__buf_t *val = ares__llist_node_val(node);
char *temp = NULL;

status = ares__buf_fetch_str_dup(val, ares__buf_len(val), &temp);
if (status != ARES_SUCCESS) {
goto done;
}

out[idx++] = temp;
}

tmp = ares_realloc(table, count * sizeof(*table));
if (tmp != NULL) {
table = tmp;
*num_elm = cnt;
status = ARES_SUCCESS;

done:
ares__llist_destroy(llist);
ares__buf_destroy(buf);
if (status != ARES_SUCCESS) {
ares__strsplit_free(out, cnt);
out = NULL;
}

*num_elm = count;
return table;
return out;
}
2 changes: 1 addition & 1 deletion src/lib/ares_strsplit.h
Expand Up @@ -35,7 +35,7 @@
* Each character in the string is a delimiter so
* there can be multiple delimiters to split on.
* E.g. ", " will split on all comma's and spaces.
* Duplicate entries are removed.
* Duplicate (case-insensitive) entries are removed.
* param num_elm Return parameter of the number of elements
* in the result array.
*
Expand Down

0 comments on commit 88c444d

Please sign in to comment.