Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 655 lines (592 sloc) 19.271 kb
89dbed7 Salvatore Sanfilippo Visitors moved to Git
authored
1 /* An implementation of in-memory hash tables:
2 * Copyright (c) 2000-2004 Salvatore Sanfilippo <antirez@invece.org>
3 *
4 * -- VERSION 2004.05.22 --
5 *
6 * COPYRIGHT AND PERMISSION NOTICE
7 * -------------------------------
8 *
9 * Copyright (c) 2000 Salvatore Sanfilippo <antirez@invece.org>
10 * Copyright (c) 2001 Salvatore Sanfilippo <antirez@invece.org>
11 * Copyright (c) 2002 Salvatore Sanfilippo <antirez@invece.org>
12 * Copyright (c) 2003 Salvatore Sanfilippo <antirez@invece.org>
13 * Copyright (c) 2004 Salvatore Sanfilippo <antirez@invece.org>
14 *
15 * All rights reserved.
16 *
17 * Permission is hereby granted, free of charge, to any person obtaining a
18 * copy of this software and associated documentation files (the
19 * "Software"), to deal in the Software without restriction, including
20 * without limitation the rights to use, copy, modify, merge, publish,
21 * distribute, and/or sell copies of the Software, and to permit persons
22 * to whom the Software is furnished to do so, provided that the above
23 * copyright notice(s) and this permission notice appear in all copies of
24 * the Software and that both the above copyright notice(s) and this
25 * permission notice appear in supporting documentation.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
28 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
30 * OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
31 * HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
32 * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
33 * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
34 * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
35 * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
36 *
37 * Except as contained in this notice, the name of a copyright holder
38 * shall not be used in advertising or otherwise to promote the sale, use
39 * or other dealings in this Software without prior written authorization
40 * of the copyright holder.
41 *
42 * CHANGELOG
43 * ---------
44 *
45 * 22May2004 - Fixed a but in ht_destroy(). Now after this call the
46 * hashtable is really ready to be reused. Fixed also a memory leak
47 * in the same function. Luckly this function is only called at exit
48 * in many programs.
49 *
50 * OVERVIEW
51 * --------
52 *
53 * AHT is an implementation of a dictionary with support for
54 * INSERT, DELETE and SEARCH operations. It uses the hash table
55 * as base data structure to provide almost constant times for
56 * the three operations. AHT also automatically care about the
57 * size of the current key-values set increasing the hash table
58 * as needed.
59 *
60 * DESIGN PRINCIPLE
61 * ----------------
62 *
63 * - AHT try to resist to attacker-induced worst-case behaviour
64 * trought the randomization of the hash-function. This is
65 * optional.
66 *
67 * - AHT take care of the hash table expansion when needed.
68 * The hash table load ranges from 0 to 0.5, the hash table
69 * size is a power of two.
70 *
71 * - A simple implementation. The collisions resolution used
72 * is a simple linear probing, that takes advantage of
73 * the modern CPU caches, the low hash table max load and
74 * the use of a strong hash function provided with this library
75 * (ht_strong_hash), should mitigate the primary clustering
76 * enough. Experimental results shown that double hashing
77 * was a performance lost with common key types in modern
78 * CPUs.
79 *
80 * - Moderatly method oriented, it is possible to define the hash
81 * function, key/value destructors, key compare function, for a
82 * given hash table, but not with a per-element base.
83 *
84 * === WARNING ===
85 * = Before to use this library, think about the -fact- that the
86 * = worst case is O(N). Like for the quick sort algorithm, it may
87 * = be a bad idea to use this library in medical software, or other
88 * = software for wich the worst case should be taken in account
89 * = even if not likely to happen.
90 * = Good alternatives are red-black trees, and other trees with
91 * = a good worst-case behavior.
92 * ===============
93 *
94 * TODO
95 * ----
96 *
97 * - Write the documentation
98 * - ht_copy() to copy an element between hash tables
99 * - ht_dup() to duplicate an entire hash table
100 * - ht_merge() to add the content of one hash table to another
101 * - disk operations, the ability to save an hashtable from the
102 * memory to the disk and the reverse operation.
103 *
104 * Most of this features needs additional methods, like one
105 * to copy an object, and should return an error if such methods
106 * are not defined.
107 *
108 */
109
110 #include <sys/types.h>
111 #include <stdio.h>
112 #include <stdlib.h>
113 #include <string.h>
114 #include <assert.h>
115 #include "aht.h"
116
117 /* -------------------------- private prototypes ---------------------------- */
118 static int ht_expand_if_needed(struct hashtable *t);
119 static unsigned int next_power(unsigned int size);
120 static int ht_insert(struct hashtable *t, void *key, unsigned int *avail_index);
121
122 /* The special ht_free_element pointer is used to mark
123 * a freed element in the hash table (note that the elements
124 * neven used are just NULL pointers) */
125 static struct ht_ele *ht_free_element = (void*) -1;
126
127 /* -------------------------- hash functions -------------------------------- */
128 /* The djb hash function, that's under public domain */
129 u_int32_t djb_hash(unsigned char *buf, size_t len)
130 {
131 u_int32_t h = 5381;
132 while(len--)
133 h = (h + (h << 5)) ^ *buf++;
134 return h;
135 }
136
137 u_int32_t djb_hashR(unsigned char *buf, size_t len)
138 {
139 u_int32_t h = 5381;
140 buf += len-1;
141 while(len--)
142 h = (h + (h << 5)) ^ *buf--;
143 return h;
144 }
145
146 /* Another trivial hash function */
147 #define ROT32R(x,n) (((x)>>n)|(x<<(32-n)))
148 u_int32_t trivial_hash(unsigned char *buf, size_t len)
149 {
150 u_int32_t h = 0;
151 while(len--) {
152 h = h + *buf++;
153 h = ROT32R(h, 3);
154 }
155 return h;
156 }
157
158 u_int32_t trivial_hashR(unsigned char *buf, size_t len)
159 {
160 u_int32_t h = 0;
161 buf += len-1;
162 while(len--) {
163 h = h + *buf--;
164 h = ROT32R(h, 3);
165 }
166 return h;
167 }
168
169 /* A strong hash function that should be the default with this
170 * hashtable implementation. Our hash tables does not support
171 * double hashing for design: the idea is to avoid double
172 * hashing and use a bit slower but very strong hash function like
173 * this. This should provide quite good performances with
174 * all the kinds of keys if you take the default max load of 50%.
175 *
176 * For more information see: http://burtleburtle.net/bob/hash/evahash.html */
177
178 /* The mixing step */
179 #define mix(a,b,c) \
180 { \
181 a=a-b; a=a-c; a=a^(c>>13); \
182 b=b-c; b=b-a; b=b^(a<<8); \
183 c=c-a; c=c-b; c=c^(b>>13); \
184 a=a-b; a=a-c; a=a^(c>>12); \
185 b=b-c; b=b-a; b=b^(a<<16); \
186 c=c-a; c=c-b; c=c^(b>>5); \
187 a=a-b; a=a-c; a=a^(c>>3); \
188 b=b-c; b=b-a; b=b^(a<<10); \
189 c=c-a; c=c-b; c=c^(b>>15); \
190 }
191
192 /* The whole new hash function */
193 u_int32_t __ht_strong_hash(u_int8_t *k, u_int32_t length, u_int32_t initval)
194 {
195 u_int32_t a,b,c; /* the internal state */
196 u_int32_t len; /* how many key bytes still need mixing */
197
198 /* Set up the internal state */
199 len = length;
200 a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
201 c = initval; /* variable initialization of internal state */
202
203 /*---------------------------------------- handle most of the key */
204 while (len >= 12)
205 {
206 a=a+(k[0]+((u_int32_t)k[1]<<8)+((u_int32_t)k[2]<<16)+
207 ((u_int32_t)k[3]<<24));
208 b=b+(k[4]+((u_int32_t)k[5]<<8)+((u_int32_t)k[6]<<16)+
209 ((u_int32_t)k[7]<<24));
210 c=c+(k[8]+((u_int32_t)k[9]<<8)+((u_int32_t)k[10]<<16)+
211 ((u_int32_t)k[11]<<24));
212 mix(a,b,c);
213 k = k+12; len = len-12;
214 }
215
216 /*------------------------------------- handle the last 11 bytes */
217 c = c+length;
218 switch(len) /* all the case statements fall through */
219 {
220 case 11: c=c+((u_int32_t)k[10]<<24);
221 case 10: c=c+((u_int32_t)k[9]<<16);
222 case 9 : c=c+((u_int32_t)k[8]<<8);
223 /* the first byte of c is reserved for the length */
224 case 8 : b=b+((u_int32_t)k[7]<<24);
225 case 7 : b=b+((u_int32_t)k[6]<<16);
226 case 6 : b=b+((u_int32_t)k[5]<<8);
227 case 5 : b=b+k[4];
228 case 4 : a=a+((u_int32_t)k[3]<<24);
229 case 3 : a=a+((u_int32_t)k[2]<<16);
230 case 2 : a=a+((u_int32_t)k[1]<<8);
231 case 1 : a=a+k[0];
232 /* case 0: nothing left to add */
233 }
234 mix(a,b,c);
235 /*-------------------------------------------- report the result */
236 return c;
237 }
238
239 /* ----------------------------- API implementation ------------------------- */
240 /* reset an hashtable already initialized with ht_init().
241 * NOTE: This function should only called by ht_destroy(). */
242 static void ht_reset(struct hashtable *t)
243 {
244 t->table = NULL;
245 t->size = 0;
246 t->sizemask = 0;
247 t->used = 0;
248 t->collisions = 0;
249 }
250
251 /* Initialize the hash table */
252 int ht_init(struct hashtable *t)
253 {
254 ht_reset(t);
255 t->hashf = ht_hash_pointer;
256 t->key_destructor = ht_no_destructor;
257 t->val_destructor = ht_no_destructor;
258 t->key_compare = ht_compare_ptr;
259 return HT_OK;
260 }
261
262 /* Resize the table to the minimal size that contains all the elements */
263 int ht_resize(struct hashtable *t)
264 {
265 int minimal = (t->used * 2)+1;
266
267 if (minimal < HT_INITIAL_SIZE)
268 minimal = HT_INITIAL_SIZE;
269 return ht_expand(t, minimal);
270 }
271
272 /* Move an element accross hash tables */
273 int ht_move(struct hashtable *orig, struct hashtable *dest, unsigned int index)
274 {
275 int ret;
276 unsigned int new_index;
277
278 /* If the element isn't in the table ht_search will store
279 * the index of the free ht_ele in the integer pointer by *index */
280 ret = ht_insert(dest, orig->table[index]->key, &new_index);
281 if (ret != HT_OK)
282 return ret;
283
284 /* Move the element */
285 dest->table[new_index] = orig->table[index];
286 orig->table[index] = ht_free_element;
287 orig->used--;
288 dest->used++;
289 return HT_OK;
290 }
291
292 /* Expand or create the hashtable */
293 int ht_expand(struct hashtable *t, size_t size)
294 {
295 struct hashtable n; /* the new hashtable */
296 unsigned int realsize = next_power(size), i;
297
298 /* the size is invalid if it is smaller than the number of
299 * elements already inside the hashtable */
300 if (t->used >= size)
301 return HT_INVALID;
302
303 ht_init(&n);
304 n.size = realsize;
305 n.sizemask = realsize-1;
306 n.table = malloc(realsize*sizeof(struct ht_ele*));
307 if (n.table == NULL)
308 return HT_NOMEM;
309 /* Copy methods */
310 n.hashf = t->hashf;
311 n.key_destructor = t->key_destructor;
312 n.val_destructor = t->val_destructor;
313 n.key_compare= t->key_compare;
314
315 /* Initialize all the pointers to NULL */
316 memset(n.table, 0, realsize*sizeof(struct ht_ele*));
317
318 /* Copy all the elements from the old to the new table:
319 * note that if the old hash table is empty t->size is zero,
320 * so ht_expand() acts like an ht_create() */
321 n.used = t->used;
322 for (i = 0; i < t->size && t->used > 0; i++) {
323 if (t->table[i] != NULL && t->table[i] != ht_free_element) {
324 u_int32_t h;
325
326 /* Get the new element index: note that we
327 * know that there aren't freed elements in 'n' */
328 h = n.hashf(t->table[i]->key) & n.sizemask;
329 if (n.table[h]) {
330 n.collisions++;
331 while(1) {
332 h = (h+1) & n.sizemask;
333 if (!n.table[h])
334 break;
335 n.collisions++;
336 }
337 }
338 /* Move the element */
339 n.table[h] = t->table[i];
340 t->used--;
341 }
342 }
343 assert(t->used == 0);
344 free(t->table);
345
346 /* Remap the new hashtable in the old */
347 *t = n;
348 return HT_OK;
349 }
350
351 /* Add an element, discarding the old if the key already exists */
352 int ht_replace(struct hashtable *t, void *key, void *data)
353 {
354 int ret;
355 unsigned int index;
356
357 /* Try to add the element */
358 ret = ht_add(t, key, data);
359 if (ret == HT_OK || ret != HT_BUSY)
360 return ret;
361 /* It already exists, get the index */
362 ret = ht_search(t, key, &index);
363 assert(ret == HT_FOUND);
364 /* Remove the old */
365 ret = ht_free(t, index);
366 assert(ret == HT_OK);
367 /* And add the new */
368 return ht_add(t, key, data);
369 }
370
371 /* Add an element to the target hash table */
372 int ht_add(struct hashtable *t, void *key, void *data)
373 {
374 int ret;
375 unsigned int index;
376
377 /* If the element isn't in the table ht_insert() will store
378 * the index of the free ht_ele in the integer pointer by *index */
379 ret = ht_insert(t, key, &index);
380 if (ret != HT_OK)
381 return ret;
382
383 /* Allocates the memory and stores key */
384 if ((t->table[index] = malloc(sizeof(struct ht_ele))) == NULL)
385 return HT_NOMEM;
386 /* Store the pointers */
387 t->table[index]->key = key;
388 t->table[index]->data = data;
389 t->used++;
390 return HT_OK;
391 }
392
393 /* search and remove an element */
394 int ht_rm(struct hashtable *t, void *key)
395 {
396 int ret;
397 unsigned int index;
398
399 if ((ret = ht_search(t, key, &index)) != HT_FOUND)
400 return ret;
401 return ht_free(t, index);
402 }
403
404 /* Destroy an entire hash table */
405 int ht_destroy(struct hashtable *t)
406 {
407 unsigned int i;
408
409 /* Free all the elements */
410 for (i = 0; i < t->size && t->used > 0; i++) {
411 if (t->table[i] != NULL && t->table[i] != ht_free_element) {
412 if (t->key_destructor)
413 t->key_destructor(t->table[i]->key);
414 if (t->val_destructor)
415 t->val_destructor(t->table[i]->data);
416 free(t->table[i]);
417 t->used--;
418 }
419 }
420 /* Free the table and the allocated cache structure */
421 free(t->table);
422 /* Re-initialize the table */
423 ht_reset(t);
424 return HT_OK; /* Actually ht_destroy never fails */
425 }
426
427 /* Free an element in the hash table */
428 int ht_free(struct hashtable *t, unsigned int index)
429 {
430 if (index >= t->size)
431 return HT_IOVERFLOW; /* Index overflow */
432 /* ht_free() calls against non-existent elements are ignored */
433 if (t->table[index] != NULL && t->table[index] != ht_free_element) {
434 /* release the key */
435 if (t->key_destructor)
436 t->key_destructor(t->table[index]->key);
437 /* release the value */
438 if (t->val_destructor)
439 t->val_destructor(t->table[index]->data);
440 /* free the element structure */
441 free(t->table[index]);
442 /* mark the element as freed */
443 t->table[index] = ht_free_element;
444 t->used--;
445 }
446 return HT_OK;
447 }
448
449 /* Search the element with the given key */
450 int ht_search(struct hashtable *t, void *key, unsigned int *found_index)
451 {
452 int ret;
453 u_int32_t h;
454
455 /* Expand the hashtable if needed */
456 if (t->size == 0) {
457 if ((ret = ht_expand_if_needed(t)) != HT_OK)
458 return ret;
459 }
460
461 /* Try using the first hash functions */
462 h = t->hashf(key) & t->sizemask;
463 /* this handles the removed elements */
464 if (!t->table[h])
465 return HT_NOTFOUND;
466 if (t->table[h] != ht_free_element &&
467 t->key_compare(key, t->table[h]->key))
468 {
469 *found_index = h;
470 return HT_FOUND;
471 }
472
473 while(1) {
474 h = (h+1) & t->sizemask;
475 /* this handles the removed elements */
476 if (t->table[h] == ht_free_element)
477 continue;
478 if (!t->table[h])
479 return HT_NOTFOUND;
480 if (t->key_compare(key, t->table[h]->key)) {
481 *found_index = h;
482 return HT_FOUND;
483 }
484 }
485 }
486
487 /* This function is used to run the entire hash table,
488 * it returns:
489 * 1 if the element with the given index is valid
490 * 0 if the element with the given index is empty or marked free
491 * -1 if the element if out of the range */
492 int ht_get_byindex(struct hashtable *t, unsigned int index)
493 {
494 if (index >= t->size)
495 return -1;
496 if (t->table[index] == NULL || t->table[index] == ht_free_element)
497 return 0;
498 return 1;
499 }
500
501 /* Returns the hash table as an array of paris of key/value void* pointers.
502 * The array is allocated with malloc() and should be freed when no
503 * longer useful. The key and value pointers should not be freed or
504 * altered in any way, they will be handled by the hash table structure.
505 *
506 * This function is mainly useful to sort the hashtable's content
507 * without to alter the hashtable itself.
508 *
509 * Returns NULL on out of memory. */
510 void **ht_get_array(struct hashtable *t)
511 {
512 int used = ht_used(t);
513 void **table, **tptr;
514 long idx;
515
516 if ((table = (void**) malloc(sizeof(void*)*(used*2))) == NULL)
517 return NULL;
518 tptr = table;
519 for (idx = 0; ;idx++) {
520 int type = ht_get_byindex(t, idx);
521 if (type == -1) break;
522 if (type == 0) continue;
523 *tptr++ = ht_key(t, idx);
524 *tptr++ = ht_value(t, idx);
525 }
526 return table;
527 }
528 /* ------------------------- private functions ------------------------------ */
529
530 /* Expand the hash table if needed */
531 static int ht_expand_if_needed(struct hashtable *t)
532 {
533 /* If the hash table is empty expand it to the intial size,
534 * if the table is half-full redobule its size. */
535 if (t->size == 0)
536 return ht_expand(t, HT_INITIAL_SIZE);
537 if (t->size <= t->used*2)
538 return ht_expand(t, t->size * 2);
539 return HT_OK;
540 }
541
542 /* Our hash table capability is a power of two */
543 static unsigned int next_power(unsigned int size)
544 {
545 unsigned int i = 256;
546
547 if (size >= 2147483648U)
548 return 2147483648U;
549 while(1) {
550 if (i >= size)
551 return i;
552 i *= 2;
553 }
554 }
555
556 /* the insert function to add elements out of ht expansion */
557 static int ht_insert(struct hashtable *t, void *key, unsigned int *avail_index)
558 {
559 int ret;
560 u_int32_t h;
561
562 /* Expand the hashtable if needed */
563 if ((ret = ht_expand_if_needed(t)) != HT_OK)
564 return ret;
565
566 /* Try using the first hash functions */
567 h = t->hashf(key) & t->sizemask;
568 /* this handles the removed elements */
569 if (!t->table[h] || t->table[h] == ht_free_element) {
570 *avail_index = h;
571 return HT_OK;
572 }
573 t->collisions++;
574 if (t->key_compare(key, t->table[h]->key))
575 return HT_BUSY;
576
577 while(1) {
578 h = (h+1) & t->sizemask;
579 /* this handles the removed elements */
580 if (!t->table[h] || t->table[h] == ht_free_element) {
581 *avail_index = h;
582 return HT_OK;
583 }
584 t->collisions++;
585 if (t->key_compare(key, t->table[h]->key))
586 return HT_BUSY;
587 }
588 }
589
590 /* ------------------------- provided destructors --------------------------- */
591
592 /* destructor for heap allocated keys/values */
593 void ht_destructor_free(void *obj)
594 {
595 free(obj);
596 }
597
598 /* ------------------------- provided comparators --------------------------- */
599
600 /* default key_compare method */
601 int ht_compare_ptr(void *key1, void *key2)
602 {
603 return (key1 == key2);
604 }
605
606 /* key compare for nul-terminated strings */
607 int ht_compare_string(void *key1, void *key2)
608 {
609 return (strcmp(key1, key2) == 0) ? 1 : 0;
610 }
611
612 /* -------------------- hash functions for common data types --------------- */
613
614 /* We make this global to allow hash function randomization,
615 * as security measure against attacker-induced worst case behaviuor.
616 *
617 * Note that being H_i the strong hash function with init value of i
618 * and H_i' the same hash function with init value of i' than:
619 *
620 * if H_i(StringOne) is equal to H_i(CollidingStringTwo)
621 *
622 * it is NOT true that
623 *
624 * H_i'(StringOne) is equal to H_i''(CollidingStringTwo)
625 */
626 static u_int32_t strong_hash_init_val = 0xF937A21;
627
628 /* Set the secret initialization value. It should be set from
629 * a secure PRNG like /dev/urandom at program initialization time */
630 void ht_set_strong_hash_init_val(u_int32_t secret)
631 {
632 strong_hash_init_val = secret;
633 }
634
635 /* __ht_strong_hash wrapper that mix a user-provided initval
636 * with the global strong_hash_init_val. __ht_strong_hash is
637 * even exported directly. */
638 u_int32_t ht_strong_hash(u_int8_t *k, u_int32_t length, u_int32_t initval)
639 {
640 return __ht_strong_hash(k, length, initval^strong_hash_init_val);
641 }
642
643 /* Hash function suitable for C strings and other data types using
644 * a 0-byte as terminator */
645 u_int32_t ht_hash_string(void *key)
646 {
647 return __ht_strong_hash(key, strlen(key), strong_hash_init_val);
648 }
649
650 /* This one is to hash the value of the pointer itself. */
651 u_int32_t ht_hash_pointer(void *key)
652 {
653 return __ht_strong_hash((void*)&key, sizeof(void*), strong_hash_init_val);
654 }
Something went wrong with that request. Please try again.