Permalink
Browse files

implemented iteration

  • Loading branch information...
1 parent f4808ca commit 9b7b45cd46c2b73e95bc3b06474e5973ce7ff308 @dcjones committed Aug 14, 2011
Showing with 765 additions and 68 deletions.
  1. +3 −3 README.md
  2. +0 −1 TODO
  3. +14 −1 configure.ac
  4. +1 −0 src/Makefile.am
  5. +6 −1 src/ahtable.c
  6. +1 −1 src/ahtable.h
  7. +224 −0 src/hat-trie.c
  8. +12 −1 src/hat-trie.h
  9. +11 −5 src/misc.c
  10. +1 −1 src/misc.h
  11. +29 −5 src/superfasthash.c
  12. +29 −13 src/superfasthash.h
  13. +6 −4 test/Makefile.am
  14. +79 −16 test/check_ahtable.c
  15. +79 −16 test/check_hattrie.c
  16. +216 −0 test/str_map.c
  17. +54 −0 test/str_map.h
View
6 README.md
@@ -5,9 +5,9 @@ Hat-Trie
This a ANSI C99 implementation of the HAT-trie data structure of Askitis and
Sinha, an extremely efficient (space and time) modern variant of tries.
-The version implemented here maps null-terminated strings to words (i.e.,
-unsigned longs), which can be used to store counts, pointers, etc, or not used
-at all if you simply want to maintain a set of unique strings.
+The version implemented here maps arrays of bytes to words (i.e., unsigned
+longs), which can be used to store counts, pointers, etc, or not used at all if
+you simply want to maintain a set of unique strings.
For details see,
View
1 TODO
@@ -2,6 +2,5 @@
todo:
* Deletion in ahtable.
* Deletion in hattrie.
- * Iteration in hattrie.
View
15 configure.ac
@@ -2,8 +2,21 @@
AC_INIT([hat-trie], [0.1.0], [dcjones@cs.washington.edu])
AM_INIT_AUTOMAKE([foreign])
AC_CONFIG_HEADERS([config.h])
+m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES([yes])])
+
+base_CFLAGS="-std=c99 -Wall -Wextra -pedantic"
+opt_CFLAGS="${base_CFLAGS} -O3"
+dbg_CFLAGS="${base_CFLAGS} -g -O0"
+
+AC_ARG_ENABLE([debugging],
+ [AS_HELP_STRING([--enable-debugging],
+ [enable debugging info (default is no)])],
+ [], [enable_debugging=no])
+
+AS_IF([test "x$enable_debugging" = xyes],
+ [CFLAGS="$dbg_CFLAGS"],
+ [CFLAGS="$opt_CFLAGS"])
-CFLAGS="-Wall -Wextra -std=c99 -pedantic -g -O0"
AC_PROG_CC
AC_PROG_CPP
View
1 src/Makefile.am
@@ -8,3 +8,4 @@ libhat_trie_la_SOURCES = common.h \
superfasthash.h superfasthash.c
pkginclude_HEADERS = hat-trie.h ahtable.h common.h
+
View
7 src/ahtable.c
@@ -1,4 +1,9 @@
-
+/*
+ * This file is part of hat-trie.
+ *
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
+ *
+ */
#include "ahtable.h"
#include "misc.h"
View
2 src/ahtable.h
@@ -1,5 +1,5 @@
/*
- * This file is part of libhattrie
+ * This file is part of hat-trie.
*
* Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
*
View
224 src/hat-trie.c
@@ -1,3 +1,9 @@
+/*
+ * This file is part of hat-trie.
+ *
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
+ *
+ */
#include "hat-trie.h"
#include "ahtable.h"
@@ -336,3 +342,221 @@ value_t* hattrie_tryget(hattrie_t* T, const char* key, size_t len)
}
+/* plan for iteration:
+ * This is tricky, as we have no parent pointers currently, and I would like to
+ * avoid adding them. That means maintaining a stack
+ *
+ */
+
+typedef struct hattrie_node_stack_t_
+{
+ char c;
+ size_t level;
+
+ node_ptr node;
+ struct hattrie_node_stack_t_* next;
+
+} hattrie_node_stack_t;
+
+
+struct hattrie_iter_t_
+{
+ char* key;
+ size_t keysize; // space reserved for the key
+ size_t level;
+
+ /* keep track of keys stored in trie nodes */
+ bool has_nil_key;
+ value_t nil_val;
+
+ const hattrie_t* T;
+ ahtable_iter_t* i;
+ hattrie_node_stack_t* stack;
+};
+
+
+static void hattrie_iter_pushchar(hattrie_iter_t* i, size_t level, char c)
+{
+ if (i->keysize < level) {
+ i->keysize *= 2;
+ i->key = realloc_or_die(i->key, i->keysize * sizeof(char));
+ }
+
+ if (level > 0) {
+ i->key[level - 1] = c;
+ }
+
+ i->level = level;
+}
+
+
+static void hattrie_iter_nextnode(hattrie_iter_t* i)
+{
+ if (i->stack == NULL) return;
+
+ /* pop the stack */
+ node_ptr node;
+ hattrie_node_stack_t* next;
+ char c;
+ size_t level;
+
+ node = i->stack->node;
+ next = i->stack->next;
+ c = i->stack->c;
+ level = i->stack->level;
+
+ free(i->stack);
+ i->stack = next;
+
+ if (*node.flag & NODE_TYPE_TRIE) {
+ hattrie_iter_pushchar(i, level, c);
+
+ if(node.t->has_val) {
+ i->has_nil_key = true;
+ i->nil_val = node.t->val;
+ }
+
+ /* push all child nodes from right to left */
+ int j;
+ for (j = 255; j >= 0; --j) {
+ if (j < 255 && node.t->xs[j].t == node.t->xs[j + 1].t) continue;
+
+ // push stack
+ next = i->stack;
+ i->stack = malloc_or_die(sizeof(hattrie_node_stack_t));
+ i->stack->node = node.t->xs[j];
+ i->stack->next = next;
+ i->stack->level = level + 1;
+ i->stack->c = (char) j;
+ }
+ }
+ else {
+ if (*node.flag & NODE_TYPE_PURE_BUCKET) {
+ hattrie_iter_pushchar(i, level, c);
+ }
+
+ i->i = ahtable_iter_begin(node.b);
+ }
+}
+
+
+hattrie_iter_t* hattrie_iter_begin(const hattrie_t* T)
+{
+ hattrie_iter_t* i = malloc_or_die(sizeof(hattrie_iter_t));
+ i->T = T;
+ i->i = NULL;
+ i->keysize = 16;
+ i->key = malloc_or_die(i->keysize * sizeof(char));
+ i->level = 0;
+ i->has_nil_key = false;
+ i->nil_val = 0;
+
+ i->stack = malloc_or_die(sizeof(hattrie_node_stack_t));
+ i->stack->next = NULL;
+ i->stack->node = T->root;
+ i->stack->c = '\0';
+ i->stack->level = 0;
+
+
+ while (((i->i == NULL || ahtable_iter_finished(i->i)) && !i->has_nil_key) &&
+ i->stack != NULL ) {
+
+ ahtable_iter_free(i->i);
+ i->i = NULL;
+ hattrie_iter_nextnode(i);
+ }
+
+ return i;
+}
+
+
+
+void hattrie_iter_next(hattrie_iter_t* i)
+{
+ if (hattrie_iter_finished(i)) return;
+
+ if (i->i != NULL && !ahtable_iter_finished(i->i)) {
+ ahtable_iter_next(i->i);
+ }
+ else if (i->has_nil_key) {
+ i->has_nil_key = false;
+ i->nil_val = 0;
+ hattrie_iter_nextnode(i);
+ }
+
+ while (((i->i == NULL || ahtable_iter_finished(i->i)) && !i->has_nil_key) &&
+ i->stack != NULL ) {
+
+ ahtable_iter_free(i->i);
+ i->i = NULL;
+ hattrie_iter_nextnode(i);
+ }
+
+ if (ahtable_iter_finished(i->i)) {
+ ahtable_iter_free(i->i);
+ i->i = NULL;
+ }
+}
+
+
+
+bool hattrie_iter_finished(hattrie_iter_t* i)
+{
+ return i->stack == NULL && i->i == NULL && !i->has_nil_key;
+}
+
+
+void hattrie_iter_free(hattrie_iter_t* i)
+{
+ if (i == NULL) return;
+ if (i->i) ahtable_iter_free(i->i);
+
+ hattrie_node_stack_t* next;
+ while (i->stack) {
+ next = i->stack->next;
+ free(i->stack);
+ i->stack = next;
+ }
+
+ free(i->key);
+ free(i);
+}
+
+
+const char* hattrie_iter_key(hattrie_iter_t* i, size_t* len)
+{
+ if (hattrie_iter_finished(i)) return NULL;
+
+ size_t sublen;
+ const char* subkey;
+
+ if (i->has_nil_key) {
+ subkey = NULL;
+ sublen = 0;
+ }
+ else subkey = ahtable_iter_key(i->i, &sublen);
+
+ if (i->keysize < i->level + sublen + 1) {
+ while (i->keysize < i->level + sublen + 1) i->keysize *= 2;
+ i->key = realloc_or_die(i->key, i->keysize * sizeof(char));
+ }
+
+ memcpy(i->key + i->level, subkey, sublen);
+ i->key[i->level + sublen] = '\0';
+
+ *len = i->level + sublen;
+ return i->key;
+}
+
+
+value_t* hattrie_iter_val(hattrie_iter_t* i)
+{
+ if (i->has_nil_key) return &i->nil_val;
+
+ if (hattrie_iter_finished(i)) return NULL;
+
+ return ahtable_iter_val(i->i);
+}
+
+
+
View
13 src/hat-trie.h
@@ -1,5 +1,5 @@
/*
- * This file is part of libhattrie
+ * This file is part of hat-trie
*
* Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
*
@@ -20,6 +20,7 @@
#include "common.h"
#include <stdlib.h>
+#include <stdbool.h>
typedef struct hattrie_t_ hattrie_t;
@@ -44,6 +45,16 @@ value_t* hattrie_get (hattrie_t*, const char* key, size_t len);
value_t* hattrie_tryget (hattrie_t*, const char* key, size_t len);
+typedef struct hattrie_iter_t_ hattrie_iter_t;
+
+hattrie_iter_t* hattrie_iter_begin (const hattrie_t*);
+void hattrie_iter_next (hattrie_iter_t*);
+bool hattrie_iter_finished (hattrie_iter_t*);
+void hattrie_iter_free (hattrie_iter_t*);
+const char* hattrie_iter_key (hattrie_iter_t*, size_t* len);
+value_t* hattrie_iter_val (hattrie_iter_t*);
+
+
#endif
View
16 src/misc.c
@@ -1,3 +1,9 @@
+/*
+ * This file is part of hat-trie.
+ *
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
+ *
+ */
#include "misc.h"
#include <stdlib.h>
@@ -6,8 +12,8 @@
void* malloc_or_die(size_t n)
{
void* p = malloc(n);
- if (p == NULL) {
- fprintf(stderr, "Can not allocate %zu bytes.\n", n);
+ if (p == NULL && n != 0) {
+ fprintf(stderr, "Cannot allocate %zu bytes.\n", n);
exit(1);
}
return p;
@@ -17,8 +23,8 @@ void* malloc_or_die(size_t n)
void* realloc_or_die(void* ptr, size_t n)
{
void* p = realloc(ptr, n);
- if (p == NULL) {
- fprintf(stderr, "Can not allocate %zu bytes.\n", n);
+ if (p == NULL && n != 0) {
+ fprintf(stderr, "Cannot allocate %zu bytes.\n", n);
exit(1);
}
return p;
@@ -29,7 +35,7 @@ FILE* fopen_or_die(const char* path, const char* mode)
{
FILE* f = fopen(path, mode);
if (f == NULL) {
- fprintf(stderr, "Can not open file %s with mode %s.\n", path, mode);
+ fprintf(stderr, "Cannot open file %s with mode %s.\n", path, mode);
exit(1);
}
return f;
View
2 src/misc.h
@@ -1,5 +1,5 @@
/*
- * This file is part of lineset.
+ * This file is part of hat-trie.
*
* Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
*
View
34 src/superfasthash.c
@@ -1,10 +1,34 @@
-
-/*
- * Paul Hsieh's SuperFastHash
- * http://www.azillionmonkeys.com/qed/hash.html
+/*
+ * Copyright (c) 2010, Paul Hsieh
+ *
+ * All rights reserved. Redistribution and use in source and binary forms,
+ * with or without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither my name, Paul Hsieh, nor the names of any other contributors to
+ * the code use may not be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
*/
-
#include "superfasthash.h"
#include <stdlib.h>
#include <stdint.h>
View
42 src/superfasthash.h
@@ -1,25 +1,41 @@
-
-/*
- * Paul Hsieh's SuperFastHash
- * http://www.azillionmonkeys.com/qed/hash.html
+/*
+ * Copyright (c) 2010, Paul Hsieh
+ *
+ * All rights reserved. Redistribution and use in source and binary forms,
+ * with or without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither my name, Paul Hsieh, nor the names of any other contributors to
+ * the code use may not be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
*/
#ifndef SUPERFASTHASH_H
#define SUPERFASTHASH_H
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
#include <stdint.h>
uint32_t hash(const char * data, int len);
uint32_t hash_inc(const char * data, int len, uint32_t hash);
-
-#if defined(__cplusplus)
-}
-#endif
-
#endif
View
10 test/Makefile.am
@@ -2,8 +2,10 @@
TESTS = check_ahtable
check_PROGRAMS = check_ahtable check_hattrie
-check_ahtable_SOURCES = check_ahtable.c
-check_ahtable_LDADD = $(top_builddir)/src/libhat-trie.la
+check_ahtable_SOURCES = check_ahtable.c str_map.c
+check_ahtable_LDADD = $(top_builddir)/src/libhat-trie.la
+check_ahtable_CPPFLAGS = -I$(top_builddir)/src
-check_hattrie_SOURCES = check_hattrie.c
-check_hattrie_LDADD = $(top_builddir)/src/libhat-trie.la
+check_hattrie_SOURCES = check_hattrie.c str_map.c
+check_hattrie_LDADD = $(top_builddir)/src/libhat-trie.la
+check_hattrie_CPPFLAGS = -I$(top_builddir)/src
View
95 test/check_ahtable.c
@@ -2,6 +2,8 @@
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
+
+#include "str_map.h"
#include "../src/ahtable.h"
/* Simple random string generation. */
@@ -14,32 +16,36 @@ void randstr(char* x, size_t len)
}
-const size_t n = 1000000; // how many uniques strings
+const size_t n = 2000000; // how many uniques strings
const size_t m = 50; // length of each string
+const size_t k = 2000000; // number of insertions
char** xs;
-int* cs;
+
ahtable_t* T;
+str_map* M;
+
void setup()
{
+ fprintf(stderr, "generating %zu keys ... ", n);
xs = malloc(n * sizeof(char*));
size_t i;
for (i = 0; i < n; ++i) {
xs[i] = malloc(m + 1);
randstr(xs[i], m);
}
- cs = malloc(n * sizeof(int));
- memset(cs, 0, n * sizeof(int));
-
T = ahtable_create();
+ M = str_map_create();
+ fprintf(stderr, "done.\n");
}
+
void teardown()
{
ahtable_free(T);
+ str_map_destroy(M);
- free(cs);
size_t i;
for (i = 0; i < n; ++i) {
free(xs[i]);
@@ -50,33 +56,90 @@ void teardown()
void test_ahtable_insert()
{
- size_t k = 1000000; // number of insertions
fprintf(stderr, "inserting %zu keys ... \n", k);
- size_t i;
- value_t* val;
+ size_t i, j;
+ value_t* u;
+ value_t v;
- while (k--) {
+ for (j = 0; j < k; ++j) {
i = rand() % n;
- cs[i] += 1;
- val = ahtable_get(T, xs[i], strlen(xs[i]));
- *val += 1;
- if ((size_t) cs[i] != *val) {
- fprintf(stderr, "[error] tally mismatch (reported: %zu, correct: %d)\n",
- *val, cs[i]);
+
+ v = 1 + str_map_get(M, xs[i], strlen(xs[i]));
+ str_map_set(M, xs[i], strlen(xs[i]), v);
+
+
+ u = ahtable_get(T, xs[i], strlen(xs[i]));
+ *u += 1;
+
+
+ if (*u != v) {
+ fprintf(stderr, "[error] tally mismatch (reported: %lu, correct: %lu)\n",
+ *u, v);
+ }
+ }
+
+ fprintf(stderr, "done.\n");
+}
+
+
+
+void test_ahtable_iteration()
+{
+ fprintf(stderr, "iterating through %zu keys ... \n", k);
+
+ ahtable_iter_t* i = ahtable_iter_begin(T);
+
+ size_t count = 0;
+ value_t* u;
+ value_t v;
+
+ size_t len;
+ const char* key;
+
+ while (!ahtable_iter_finished(i)) {
+ ++count;
+
+ key = ahtable_iter_key(i, &len);
+ u = ahtable_iter_val(i);
+
+ v = str_map_get(M, key, len);
+
+ if (*u != v) {
+ if (v == 0) {
+ fprintf(stderr, "[error] incorrect iteration (%lu, %lu)\n", *u, v);
+ }
+ else {
+ fprintf(stderr, "[error] incorrect iteration tally (%lu, %lu)\n", *u, v);
+ }
}
+
+ // this way we will see an error if the same key is iterated through
+ // twice
+ str_map_set(M, key, len, 0);
+
+ ahtable_iter_next(i);
}
+ if (count != M->m) {
+ fprintf(stderr, "[error] iterated through %zu element, expected %zu\n",
+ count, M->m);
+ }
+
+ ahtable_iter_free(i);
+
fprintf(stderr, "done.\n");
}
+
int main()
{
setup();
test_ahtable_insert();
+ test_ahtable_iteration();
teardown();
return 0;
View
95 test/check_hattrie.c
@@ -2,6 +2,8 @@
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
+
+#include "str_map.h"
#include "../src/hat-trie.h"
/* Simple random string generation. */
@@ -14,32 +16,36 @@ void randstr(char* x, size_t len)
}
-const size_t n = 2000000; // how many uniques strings
+const size_t n = 1000000; // how many uniques strings
const size_t m = 50; // length of each string
+const size_t k = 2000000; // number of insertions
char** xs;
-int* cs;
+
hattrie_t* T;
+str_map* M;
+
void setup()
{
+ fprintf(stderr, "generating %zu keys ... ", n);
xs = malloc(n * sizeof(char*));
size_t i;
for (i = 0; i < n; ++i) {
xs[i] = malloc(m + 1);
randstr(xs[i], m);
}
- cs = malloc(n * sizeof(int));
- memset(cs, 0, n * sizeof(int));
-
T = hattrie_create();
+ M = str_map_create();
+ fprintf(stderr, "done.\n");
}
+
void teardown()
{
hattrie_free(T);
+ str_map_destroy(M);
- free(cs);
size_t i;
for (i = 0; i < n; ++i) {
free(xs[i]);
@@ -50,33 +56,90 @@ void teardown()
void test_hattrie_insert()
{
- size_t k = 2000000; // number of insertions
fprintf(stderr, "inserting %zu keys ... \n", k);
- size_t i;
- value_t* val;
+ size_t i, j;
+ value_t* u;
+ value_t v;
- while (k--) {
+ for (j = 0; j < k; ++j) {
i = rand() % n;
- cs[i] += 1;
- val = hattrie_get(T, xs[i], strlen(xs[i]));
- *val += 1;
- if ((size_t) cs[i] != *val) {
- fprintf(stderr, "[error] tally mismatch (reported: %zu, correct: %d)\n",
- *val, cs[i]);
+
+ v = 1 + str_map_get(M, xs[i], strlen(xs[i]));
+ str_map_set(M, xs[i], strlen(xs[i]), v);
+
+
+ u = hattrie_get(T, xs[i], strlen(xs[i]));
+ *u += 1;
+
+
+ if (*u != v) {
+ fprintf(stderr, "[error] tally mismatch (reported: %lu, correct: %lu)\n",
+ *u, v);
+ }
+ }
+
+ fprintf(stderr, "done.\n");
+}
+
+
+
+void test_hattrie_iteration()
+{
+ fprintf(stderr, "iterating through %zu keys ... \n", k);
+
+ hattrie_iter_t* i = hattrie_iter_begin(T);
+
+ size_t count = 0;
+ value_t* u;
+ value_t v;
+
+ size_t len;
+ const char* key;
+
+ while (!hattrie_iter_finished(i)) {
+ ++count;
+
+ key = hattrie_iter_key(i, &len);
+ u = hattrie_iter_val(i);
+
+ v = str_map_get(M, key, len);
+
+ if (*u != v) {
+ if (v == 0) {
+ fprintf(stderr, "[error] incorrect iteration (%lu, %lu)\n", *u, v);
+ }
+ else {
+ fprintf(stderr, "[error] incorrect iteration tally (%lu, %lu)\n", *u, v);
+ }
}
+
+ // this way we will see an error if the same key is iterated through
+ // twice
+ str_map_set(M, key, len, 0);
+
+ hattrie_iter_next(i);
}
+ if (count != M->m) {
+ fprintf(stderr, "[error] iterated through %zu element, expected %zu\n",
+ count, k);
+ }
+
+ hattrie_iter_free(i);
+
fprintf(stderr, "done.\n");
}
+
int main()
{
setup();
test_hattrie_insert();
+ test_hattrie_iteration();
teardown();
return 0;
View
216 test/str_map.c
@@ -0,0 +1,216 @@
+
+/*
+ * This file is part of fastq-tools.
+ *
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
+ *
+ */
+
+
+#include "str_map.h"
+#include "misc.h"
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+
+
+static const size_t INITIAL_TABLE_SIZE = 16;
+static const double MAX_LOAD = 0.77;
+
+
+/*
+ * Paul Hsieh's SuperFastHash
+ * http://www.azillionmonkeys.com/qed/hash.html
+ */
+
+
+#undef get16bits
+#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
+ || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
+#define get16bits(d) (*((const uint16_t *) (d)))
+#endif
+
+#if !defined (get16bits)
+#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\
+ +(uint32_t)(((const uint8_t *)(d))[0]) )
+#endif
+
+static uint32_t hash(const char * data, size_t len) {
+ uint32_t hash = len, tmp;
+ int rem;
+
+ if (len <= 0 || data == NULL) return 0;
+
+ rem = len & 3;
+ len >>= 2;
+
+ /* Main loop */
+ for (;len > 0; len--) {
+ hash += get16bits (data);
+ tmp = (get16bits (data+2) << 11) ^ hash;
+ hash = (hash << 16) ^ tmp;
+ data += 2*sizeof (uint16_t);
+ hash += hash >> 11;
+ }
+
+ /* Handle end cases */
+ switch (rem) {
+ case 3: hash += get16bits (data);
+ hash ^= hash << 16;
+ hash ^= data[sizeof (uint16_t)] << 18;
+ hash += hash >> 11;
+ break;
+ case 2: hash += get16bits (data);
+ hash ^= hash << 11;
+ hash += hash >> 17;
+ break;
+ case 1: hash += *data;
+ hash ^= hash << 10;
+ hash += hash >> 1;
+ }
+
+ /* Force "avalanching" of final 127 bits */
+ hash ^= hash << 3;
+ hash += hash >> 5;
+ hash ^= hash << 4;
+ hash += hash >> 17;
+ hash ^= hash << 25;
+ hash += hash >> 6;
+
+ return hash;
+}
+
+
+
+static void rehash(str_map* T, size_t new_n);
+static void clear(str_map*);
+
+
+
+str_map* str_map_create()
+{
+ str_map* T = malloc_or_die(sizeof(str_map));
+ T->A = malloc_or_die(INITIAL_TABLE_SIZE * sizeof(str_map_pair*));
+ memset(T->A, 0, INITIAL_TABLE_SIZE * sizeof(str_map_pair*));
+ T->n = INITIAL_TABLE_SIZE;
+ T->m = 0;
+ T->max_m = T->n * MAX_LOAD;
+
+ return T;
+}
+
+
+void str_map_destroy(str_map* T)
+{
+ if (T != NULL) {
+ clear(T);
+ free(T->A);
+ free(T);
+ }
+}
+
+
+
+void clear(str_map* T)
+{
+ str_map_pair* u;
+ size_t i;
+ for (i = 0; i < T->n; i++) {
+ while (T->A[i]) {
+ u = T->A[i]->next;
+ free(T->A[i]->key);
+ free(T->A[i]);
+ T->A[i] = u;
+ }
+ }
+
+ T->m = 0;
+}
+
+
+static void insert_without_copy(str_map* T, str_map_pair* V)
+{
+ uint32_t h = hash(V->key, V->keylen) % T->n;
+ V->next = T->A[h];
+ T->A[h] = V;
+ T->m++;
+}
+
+
+
+static void rehash(str_map* T, size_t new_n)
+{
+ str_map U;
+ U.n = new_n;
+ U.m = 0;
+ U.max_m = U.n * MAX_LOAD;
+ U.A = malloc_or_die(U.n * sizeof(str_map_pair*));
+ memset(U.A, 0, U.n * sizeof(str_map_pair*));
+
+ str_map_pair *j, *k;
+ size_t i;
+ for (i = 0; i < T->n; i++) {
+ j = T->A[i];
+ while (j) {
+ k = j->next;
+ insert_without_copy(&U, j);
+ j = k;
+ }
+ T->A[i] = NULL;
+ }
+
+ free(T->A);
+ T->A = U.A;
+ T->n = U.n;
+ T->max_m = U.max_m;
+}
+
+
+void str_map_set(str_map* T, const char* key, size_t keylen, value_t value)
+{
+ if (T->m >= T->max_m) rehash(T, T->n * 2);
+
+ uint32_t h = hash(key, keylen) % T->n;
+
+ str_map_pair* u = T->A[h];
+
+ while (u) {
+ if (u->keylen == keylen && memcmp(u->key, key, keylen) == 0) {
+ u->value = value;
+ return;
+ }
+
+ u = u->next;
+ }
+
+ u = malloc_or_die(sizeof(str_map_pair));
+ u->key = malloc_or_die(keylen);
+ memcpy(u->key, key, keylen);
+ u->keylen = keylen;
+ u->value = value;
+
+ u->next = T->A[h];
+ T->A[h] = u;
+
+ T->m++;
+}
+
+
+value_t str_map_get(const str_map* T, const char* key, size_t keylen)
+{
+ uint32_t h = hash(key, keylen) % T->n;
+
+ str_map_pair* u = T->A[h];
+
+ while (u) {
+ if (u->keylen == keylen && memcmp(u->key, key, keylen) == 0) {
+ return u->value;
+ }
+
+ u = u->next;
+ }
+
+ return 0;
+}
+
+
View
54 test/str_map.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
+ *
+ * hash :
+ * A quick and simple hash table mapping strings to things.
+ *
+ */
+
+
+#ifndef ISOLATOR_STR_MAP_H
+#define ISOLATOR_STR_MAP_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <stdlib.h>
+#include <stdint.h>
+
+#include "common.h"
+
+
+typedef struct str_map_pair_
+{
+ char* key;
+ size_t keylen;
+ value_t value;
+
+ struct str_map_pair_* next;
+} str_map_pair;
+
+
+typedef struct
+{
+ str_map_pair** A; /* table proper */
+ size_t n; /* table size */
+ size_t m; /* hashed items */
+ size_t max_m; /* max hashed items before rehash */
+} str_map;
+
+
+
+str_map* str_map_create(void);
+void str_map_destroy(str_map*);
+void str_map_set(str_map*, const char* key, size_t keylen, value_t value);
+value_t str_map_get(const str_map*, const char* key, size_t keylen);
+
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
+

0 comments on commit 9b7b45c

Please sign in to comment.