Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

[6model/c] add hashtable and a few tests (works only on Linux)

commit 74a549e76d53fb66b2adde5dd2c76167c9cb61e9 1 parent 12a4dbf
@mberends mberends authored
View
5 c/Configure.bat
@@ -1,6 +1,7 @@
:: Configure.bat
-:: This script is written for Microsoft Visual C++ Express 2010 or GCC.
+:: This script should work on a Microsoft Windows operating system with
+:: either Microsoft Visual C++ or GNU Compiler Collection gcc.
:: TODO
:: - Add a --help option and provide for more options in general
@@ -65,4 +66,4 @@ tools\build\Configure.exe tools\build\Makefile.in Makefile
:: friendliness grounds, .bat reassures the reader that the script will do
:: only "simple" things.
:: You could argue it the other way - we use cmd.exe, not command.com, so we
-:: should use the extension that command.com cannot handle. Dunno...
+:: should use the extension that command.com cannot handle. Dunno...
View
6 c/Configure.sh
@@ -1,5 +1,7 @@
#!/bin/sh
# TODO
# - Add a --help option and provide for more options in general
-cc -Wall -o tools/build/Configure tools/build/Configure.c
-tools/build/Configure tools/build/Makefile.in Makefile
+export COMPILER=GCC
+set opts_gcc=-Wall -DCC=$COMPILER
+cc $opts_gcc -o tools/build/Configure tools/build/Configure.c
+tools/build/Configure tools/build/Makefile.in Makefile
View
511 c/src/hashtable.c
@@ -0,0 +1,511 @@
+/* hashtable.c */
+
+/* This file defines a general purpose set of hash table routines. */
+/* Hash tables are a form of associative array used by 6model/c both */
+/* at compile time in the parser, and often at run time by compiled */
+/* Perl 6 programs. */
+
+/* The hash table contains a series of key and value pairs. A key */
+/* must be unique and may not be null. The value may be duplicated */
+/* or null. To be general purpose, each key and value is handled as */
+/* an opaque variable size array of bytes. The hash table contains */
+/* only pointers to the keys and values, not their contents, so if */
+/* those are moved in memory their entries in the hash table must be */
+/* updated. The struct hashtable_entry defines each key and value */
+/* pair, and struct hashtable has an entrylist that points to the */
+/* variable size array of all of them. */
+
+/* Hashing is the algorithm used to quickly find an entry given its */
+/* key. A hashing function reads the key bytes and computes a hash */
+/* code, which selects a hash bucket. The struct hashtable points to */
+/* hash bucket lists stored separately from the entrylist, so that */
+/* they can be resized and/or rebuilt by separate threads. Hash */
+/* bucket lists are variable size arrays of pointers to, and sizes */
+/* of, hash buckets. Each hash bucket is a variable size list of */
+/* indices of members of the entrylist whose keys hash to the same */
+/* bucket number. Ideally each bucket would contain only one */
+/* pointer, and the number of buckets would equal the number of keys. */
+/* In practice some buckets are empty and others contain several */
+/* pointers to hash table entries. */
+
+/* hash --> struct hash +--------------->+--------------->key0----->
+ | | | value0--->
+ entrylist ---+ +--->entry0---|---+ +-------->key1----->
+ seed | entry1---|---|--|--+ value1--->
+ bucketlist---+ | entry2---+ | | | +-->key2----->
+ +----------------+ | | | | | value2--->
+ | | +---|--|--|--|-->key3----->
+ +--->bucket0--------+ | | | | | value3--->
+ bucket1-size-list-->entry0---|---|--+ +--|-->key4----->
+ bucket2---x | | | value4--->
+ bucket3-size-list-->entry0---+ +--------|-->key5----->
+ etc entry1----------------+ value5--->
+ etc
+
+/* The hashing algorithm is like the PERL_HASH() macro in Perl 5, see */
+/* 'perldoc perlguts'. The number of hash buckets is 2 raised to an */
+/* integral power so that the modulo arithmetic that converts a hash */
+/* code to a bucket number simplifies to a bit mask (bitwise and) */
+/* operation instead of division. The hashing algorthm has one */
+/* change from Perl 5, an initial hashcode salt instead of a 0 to stave */
+/* off potential denial of service attacks. The salt is a random */
+/* integer that is replaced when a re-hash adds new buckets (see the */
+/* next point). */
+
+/* The ratio of keys to buckets is called the load factor. During a */
+/* store operation, if the load factor rises above a hard coded limit */
+/* (eg 0.75 or 1.25) the software builds a new hash index with twice */
+/* the number of buckets, and then re-hashes every key to put it into */
+/* the right bucket. After the new index is complete, it replaces */
+/* the old one which then gets freed. An advisory lock prevents other */
+/* threads changing keys during the re-hash process. Changing values */
+/* of existing keys is fine during re-hashing. */
+
+/* The re-hashing is amortized over subsequent store operations to */
+/* avoid a stop-the-world type reorganization. Instead, there are */
+/* two bucket lists, the old and the new. */
+/* If a new list is already being created, it must be finished before */
+/* another one new one may be started. */
+
+/* During a delete operation, if the load factor reduces below some */
+/* other hard coded limit (eg 0.3 or 0.5) the software will halve the */
+/* number of buckets. There is no need to re-hash the keys, each */
+/* pair of successive buckets can simply be merged, again under the */
+/* protection of an advisory lock and possibly in a separate thread. */
+
+/* To avoid another stop-the-world type operation, the bucket merging */
+/* is also done incrementally, with an old and a new bucket list. */
+/* If a new list is already being created, it must be finished before */
+/* another one may be started. */
+
+/* The payload of a hash table, the values, can be whatever you want. */
+/* Remember that a key and a value are each expressed as an address */
+/* and a "length". Only the application accesses the value. It is */
+/* therefore also fine if the "length" is put to some other use of an */
+/* int, for example an enum identifying a type. In such cases, the */
+/* application should manage the size of the data at the address, for */
+/* example with structs. */
+
+/* When a key-value pair is deleted from the entrylist, it becomes a */
+/* hole. To save time, remaining entries are not moved. All the */
+/* holes are connected in a double linked list ordered from lowest to */
+/* highest address. Thus subsequent new insertions re-use the lowest */
+/* available address. In a hole, the "key" address and size fields */
+/* are null. The "value" address field is the forward link and the */
+/* "value" integer field is the backward link. */
+
+#include <assert.h> /* assert */
+#include <stdlib.h> /* NULL rand srand */
+#include <stdio.h> /* printf */
+#include <string.h> /* memmove */
+#ifdef _MSC_VER
+ #include <time.h> /* gettimeofday */
+ #include <winsock2.h> /* struct timeval */
+#else
+ #include <sys/time.h> /* gettimeofday */
+#endif
+#include "hashtable.h" /* hashtable */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* hashtable_new */
+struct hashtable *
+hashtable_new()
+{
+ struct hashtable * hash;
+ struct timeval tv; /* microsecond clock to seed random number */
+ /* fprintf(stderr,"enter hashtable_new\n"); */
+ /* Seed the random number generator with values from the system time */
+ #ifndef _MSC_VER
+ assert( gettimeofday(&tv,NULL)==0 ); /* read clock */
+ srand(tv.tv_sec ^ tv.tv_usec);
+ #endif
+ /* Initialize the hash */
+ hash = (struct hashtable *) malloc(sizeof(struct hashtable));
+ assert( hash != NULL );
+ hash->loadfactorlow = 0.3;
+ hash->loadfactorhigh = 1.25;
+ hash->salt = rand();
+ hash->entrycount = 0;
+ /* Initialize the hashtable entry list with one deleted entry (a hole) */
+ /* to simplify the code required in hashtable_store(), which */
+ /* is called much more frequently. */
+ hash->entrylistsize = 1;
+ hash->entrylist = (struct hashtable_entry *) calloc(hash->entrylistsize, sizeof(struct hashtable_entry));
+ assert( hash->entrylist != NULL );
+ hash->entrylist->keyint = -1;
+ hash->entrylist->valueint = -1;
+ hash->entrylist->valuepointer = (void *) -1;
+ /* terminate the back linked list */
+ /* The zeroes that calloc() put in there are correct for most */
+ /* fields, but a zero in the backward linked list would be a */
+ /* valid subscript for another hole, causing in some infinite */
+ /* loops. */
+ hash->deletedentryhead = 0;
+ hash->deletedentrytail = 0;
+ /* Initialize the first bucket list with a single empty bucket */
+ hash->bucketmask1 = 0; /* (number of buckets) - 1 */
+ hash->bucketlist1 = (struct hashtable_bucket *) malloc(sizeof(struct hashtable_bucket));
+ assert( hash->bucketlist1 != NULL );
+ hash->bucketlist1->size = 0;
+ hash->bucketlist1->list = NULL;
+ /* Initially there is no second bucket list */
+ hash->bucketlist2= NULL;
+ hash->bucketmask2= 0;
+ return hash;
+}
+
+
+/* hashtable_store */
+int
+hashtable_store(struct hashtable * hash, void * keypointer,
+ int keylength, void * valuepointer, int valueint)
+{
+ /* A store operation either inserts a new key and value or keeps */
+ /* the existing key and overwrites the existing value. */
+ struct hashtable_bucket * bucket;
+ struct hashtable_entry * entry, * entrylist = hash->entrylist;
+ char * hashingpointer;
+ int hashinglength, hashcode, i, entryindex, * bucketentry;
+ /* fprintf(stderr,"store '%*s' => '%*s'\n", -keylength, (char *)keypointer, -valueint, (char*)valuepointer); */
+ /* Search for the entry by hashing the key and scanning a bucket */
+ hashcode = hash->salt;
+ hashinglength = keylength;
+ hashingpointer = (char *) keypointer;
+ while (hashinglength--)
+ hashcode = (hashcode * 33) + * hashingpointer ++;
+ hashcode += hashcode >> 5;
+ bucket = hash->bucketlist1 + (hashcode & hash->bucketmask1);
+ for (bucketentry = bucket->list, entryindex = 0, i=bucket->size; --i >= 0; ++entryindex) {
+ entry = entrylist + * bucketentry++;
+ /* fprintf(stderr,"?'%*s'? ", -entry->keyint, (char *) entry->keypointer); */
+ if (entry->keyint==keylength && (memcmp(
+ entry->keypointer,keypointer,keylength)==0)) {
+ i = -1; /* becomes -2, terminates bucket scan with found */
+ }
+ }
+ /* If the key is not found via the first bucket list, it may be */
+ /* because it should be found via the second bucket list that is */
+ /* being constructed. */
+ if ((i==-1) && hash->bucketlist2) {
+ bucket = hash->bucketlist2 + (hashcode & hash->bucketmask2);
+ for (bucketentry = bucket->list, entryindex = 0, i=bucket->size; --i >= 0; ++entryindex) {
+ /* fprintf(stderr,"?'%*s'? ", -entry->keyint, (char *) entry->keypointer); */
+ if (entry->keyint==keylength && (memcmp(
+ entry->keypointer,keypointer,keylength)==0)) {
+ i = -1; /* becomes -2, terminates bucket scan with found */
+ }
+ }
+ }
+ /* fprintf(stderr,"scan result %d, %s key '%*s'. ", i, i == -1 ? "new" : "existing", -keylength, (char *) keypointer ); */
+ /* After the search i==-1 means not found and i==-2 means found. */
+ switch (i) {
+ case -1:
+ /* Add a new entry with key and value. */
+ ++ (hash->entrycount);
+ /* If there are deleted entries, recycle the first one */
+ if (hash->deletedentryhead >= 0) {
+ /* There is a linked list of deleted entries */
+ entry = hash->entrylist + hash->deletedentryhead;
+ assert( entry->valueint == -1 );
+ /* detach the first empty entry from the empties list */
+ if ((hash->deletedentryhead=(long)entry->valuepointer)>=0) {
+ /* there is are more empties in the list */
+ hash->entrylist[(long)entry->valuepointer].valueint = -1;
+ /* fprintf(stderr,"more empty entries\n"); */
+ }
+ else {
+ /* entry was the only one in the list */
+ hash->deletedentryhead = -1;
+ hash->deletedentrytail = -1;
+ /* fprintf(stderr,"last empty entry\n"); */
+ }
+ }
+ else {
+ /* There is no empty entry, extend the entry list. */
+ /* Add the required entry plus one empty entry. */
+ /* fprintf(stderr,"extend entry list to %d\n", (hash->entrylistsize)+1); */
+ hash->entrylistsize += 1;
+ hash->entrylist = (struct hashtable_entry *)
+ realloc(hash->entrylist, hash->entrylistsize *
+ sizeof(struct hashtable_entry));
+ assert( hash->entrylist != NULL );
+ /* initialize the empty entry */
+// hash->deletedentryhead = hash->entrylistsize - 1;
+// hash->deletedentrytail = hash->entrylistsize - 1;
+ entry = hash->entrylist + hash->entrylistsize - 1;
+ entry->keyint = 0;
+ entry->valueint = -1;
+ entry->keypointer = NULL;
+ entry->valuepointer = (void *) -1;
+ /* point to the new entry to be used */
+ entry = hash->entrylist + hash->entrylistsize - 1;
+ }
+ /* TODO: consider doubling the number of buckets */
+ /* put the key into the entry */
+ entry->keypointer = keypointer;
+ entry->keyint = keylength;
+ /* Insert this new hashtable list entry into the front of this bucket */
+// ++ (bucket->size);
+ bucket->list = (int *) realloc( bucket->list, (++bucket->size) * sizeof(int) );
+ assert( bucket->list != NULL );
+// fprintf(stderr, "bucket size %d at %x\n", bucket->size, bucket->list);
+ memmove(bucket->list+1, bucket->list, (bucket->size-1)*sizeof(int));
+ * bucket->list = entryindex;
+ /* no break: fall through to the update case */
+ case -2:
+ /* update the existing entry in place with a new value */
+ entry->valuepointer = valuepointer;
+ entry->valueint = valueint;
+ break;
+ default:
+ fprintf(stderr,"hashtable_store internal error\n");
+ exit(1);
+ break;
+ }
+ return hashcode;
+}
+
+
+/* hashtable_fetch */
+int
+hashtable_fetch(struct hashtable * hash, void * keypointer,
+ int keylength, void ** valuepointerpointer, int * valueintpointer)
+{
+ struct hashtable_bucket * bucket;
+ struct hashtable_entry ** listitem;
+ struct hashtable_entry * entry, * entrylist = hash->entrylist;
+ char * hashingpointer;
+ int hashinglength, hashcode, i, entryindex, * bucketentry;
+ /* fprintf(stderr,"fetch '%*s'\n", -keylength, (char *) keypointer); */
+ /* Search for the entry by hashing the key and scanning a bucket */
+ hashcode = hash->salt;
+ hashinglength = keylength;
+ hashingpointer = (char *) keypointer;
+ while (hashinglength--)
+ hashcode = (hashcode * 33) + * hashingpointer ++;
+ hashcode += hashcode >> 5;
+ bucket = hash->bucketlist1 + (hashcode & hash->bucketmask1);
+ for (bucketentry = bucket->list, entryindex = 0, i=bucket->size; --i >= 0; ++entryindex) {
+ entry = entrylist + * bucketentry++;
+ /* fprintf(stderr,"?'%*s'? ", -entry->keyint, (char *) entry->keypointer); */
+ if (entry->keyint==keylength && (memcmp(
+ entry->keypointer,keypointer,keylength)==0)) {
+ i = -1; /* becomes -2, terminates bucket scan with found */
+ }
+ }
+ /* If the key is not found via the first bucket list, it may be */
+ /* because it should be found via the second bucket list that is */
+ /* being constructed. */
+ if ((i==-1) && hash->bucketlist2) {
+ bucket = hash->bucketlist2 + (hashcode & hash->bucketmask2);
+ for (bucketentry = bucket->list, entryindex = 0, i=bucket->size; --i >= 0; ++entryindex) {
+ /* fprintf(stderr,"?'%*s'? ", -entry->keyint, (char *) entry->keypointer); */
+ if (entry->keyint==keylength && (memcmp(
+ entry->keypointer,keypointer,keylength)==0)) {
+ i = -1; /* becomes -2, terminates bucket scan with found */
+ }
+ }
+ }
+ /* fprintf(stderr,"scan result %d, %s key '%*s'. ", i, i == -1 ? "new" : "existing", -keylength, (char *) keypointer ); */
+ /* After the search i==-1 means not found and i==-2 means found. */
+ switch (i) {
+ case -1:
+ * valuepointerpointer = NULL;
+ * valueintpointer = 0;
+ i = 0;
+ break;
+ case -2:
+ * valuepointerpointer = entry->valuepointer;
+ * valueintpointer = entry->valueint;
+ i = 1;
+ break;
+ default:
+ fprintf(stderr,"hashtable_fetch internal error\n");
+ exit(2);
+ break;
+ }
+ return i;
+}
+
+
+/* hashtable_delete */
+int
+hashtable_delete(struct hashtable * hash, void * keypointer,
+ int keylength)
+{
+ /* A delete operation empties a hashtable entry, links it into the */
+ /* empty entry list, and deletes the pointer to the entry from */
+ /* its bucket. */
+ struct hashtable_bucket * bucket;
+ struct hashtable_entry ** listitem, * entry, * entrylist = hash->entrylist;
+ char * hashingpointer;
+ int hashinglength, hashcode, i, entryindex, * bucketentry;
+ /* Search for the entry by hashing the key and scanning a bucket */
+ hashcode = hash->salt;
+ hashinglength = keylength;
+ hashingpointer = (char *) keypointer;
+ while (hashinglength--)
+ hashcode = (hashcode * 33) + * hashingpointer ++;
+ hashcode += hashcode >> 5;
+ bucket = hash->bucketlist1 + (hashcode & hash->bucketmask1);
+ for (bucketentry = bucket->list, entryindex = 0, i=bucket->size; --i >= 0; ++entryindex) {
+ entry = entrylist + * bucketentry++;
+ /* fprintf(stderr,"?'%*s'? ", -entry->keyint, (char *) entry->keypointer); */
+ if (entry->keyint==keylength && (memcmp(
+ entry->keypointer,keypointer,keylength)==0)) {
+ i = -1; /* becomes -2, terminates bucket scan with found */
+ }
+ }
+ /* If the key is not found via the first bucket list, it may be */
+ /* because it should be found via the second bucket list that is */
+ /* being constructed. */
+ if ((i==-1) && hash->bucketlist2) {
+ bucket = hash->bucketlist2 + (hashcode & hash->bucketmask2);
+ for (bucketentry = bucket->list, entryindex = 0, i=bucket->size; --i >= 0; ++entryindex) {
+ /* fprintf(stderr,"?'%*s'? ", -entry->keyint, (char *) entry->keypointer); */
+ if (entry->keyint==keylength && (memcmp(
+ entry->keypointer,keypointer,keylength)==0)) {
+ i = -1; /* becomes -2, terminates bucket scan with found */
+ }
+ }
+ }
+ /* fprintf(stderr,"scan result %d, %s key '%*s'. ", i, i == -1 ? "new" : "existing", -keylength, (char *) keypointer ); */
+ /* After the search i==-1 means not found and i==-2 means found. */
+ switch (i) {
+ case -1:
+ break;
+ case -2:
+ /* TODO: consider halving the number of buckets */
+ break;
+ default:
+ fprintf(stderr,"hashtable_delete internal error\n");
+ exit(3);
+ break;
+ }
+ return hashcode;
+}
+
+
+/* hashtable_free */
+void
+hashtable_free(struct hashtable * hash)
+{
+ /* What this routine cannot do is free the memory used by the */
+ /* values in the hashtable. The calling code that knows more about */
+ /* the structure of the values should do that beforehand, or */
+ /* (worse) leave the litter scattered around the heap for some */
+ /* unfortunate garbage collector to pick up (the usual managed */
+ /* memory cop-out). */
+ int i;
+ struct hashtable_bucket ** bucketpointerpointer;
+ struct hashtable_bucket * bucketpointer;
+ bucketpointer = hash->bucketlist1;
+ for (i=0; i<=hash->bucketmask1; ++i)
+ free(bucketpointer->list);
+ free(hash->entrylist);
+ free(hash->bucketlist1);
+ free(hash);
+}
+
+
+/* hashtable_iterator_next */
+int
+hashtable_iterator_next(struct hashtable_iterator *
+ iter, struct hashtable_entry * entry)
+{
+ struct hashtable_entry * list;
+ int i, size, status;
+ /* first check whether the iterator is still active */
+ if ((i=iter->nextentryindex) >= 0) {
+ list = iter->hashtable->entrylist;
+ size = iter->hashtable->entrylistsize;
+ /* skip any deleted entries */
+ while (i < size-1 && list[i].keypointer == NULL) {
+ ++i;
+ }
+ if (i <= size-1 && list[i].keypointer != NULL) {
+ entry->keypointer = list[i].keypointer;
+ entry->valuepointer = list[i].valuepointer;
+ entry->keyint = list[i].keyint;
+ entry->valueint = list[i].valueint;
+ iter->nextentryindex = i+1;
+ status = 1;
+ }
+ else {
+ entry->keypointer = entry->valuepointer = NULL;
+ entry->keyint = entry->valueint = 0;
+ iter->nextentryindex = -1;
+ status = 0;
+ }
+ }
+ else {
+ entry->keypointer = entry->valuepointer = NULL;
+ entry->keyint = entry->valueint = 0;
+ status = 0;
+ }
+ return status;
+}
+
+
+/* hashtable_iterator_init */
+void
+hashtable_iterator_init(struct hashtable * hash,
+ struct hashtable_iterator * iter)
+{
+ iter->hashtable = hash;
+ iter->nextentryindex = 0;
+}
+
+
+#ifdef __cplusplus
+}
+#endif
+
+/* TODO:
+LHF: Track the total number of keys, total number of bytes in all the
+keys, total number of bytes in all the values. The value total might be
+meaningless because the int value might be being used as a flag instead
+of a size. These totals should have negligible overhead. Prove it by
+making a #define that optionally includes or excludes almost all the
+code. It should also be possible to verify the totals in an assert() as
+part of the re-hash that occurs when the number of buckets doubles.
+
+MHF: If it can be done in O(1) time, track the highest number of entries
+in any bucket (the problem is what to do after a delete without
+resorting to an O(bucketmask) scan). A (single/double) linked list
+ordering the buckets by size may work. The information is not
+necessary, only interesting for the curious, so the CPU overhead must
+be negligible. Prove it by making the code #define optional.
+
+MHF: make variable the threshold load factors that trigger doubling and
+halving of the number of buckets.
+
+HHF: Try converting hashtable->entrylist into a list of lists, in order to
+reduce the size of the memory blocks that need to be copied during the
+realloc() that sometimes occurs when adding a new key. Again,
+negligible added CPU time please, although there is a bit more tolerance
+in this change because it should speed up some inserts a little when
+hashes contain many (perhaps over 1000) entries. Prove the difference
+if possible with a #define.
+
+MHF: Currently the entries in a bucket are ordered from most recently
+used (MRU) to least recently used (LRU). Every access can update the
+order, and searching is sequential, favouring short lists. As an
+alternative try keeping the entries in key order and search with
+bsearch() instead. The would update the order only when keys are added
+or deleted, and would favour longer lists. Would this perform better or
+worse with typical Perl 6 scripts? Use #define again to make this
+change optional to find out. With larger bucket sizes this would behave
+like a hash of sorted arrays instead of a simple hash table.
+*/
+
+/* See also:
+Hash table overview http://en.wikipedia.org/wiki/Hash_table
+Perl 5 hash source http://cpansearch.perl.org/src/RJBS/perl-5.12.3/hv.c
+*/
+
+/* end of hashtable.c */
View
74 c/src/hashtable.h
@@ -0,0 +1,74 @@
+/* hashtable.h */
+
+/* Structures */
+
+/* hashtable_entry */
+struct hashtable_entry {
+ /* A hash entry is a key-value pair. The key and the value can */
+ /* each be an arbitrary array of bytes. */
+ /* This is a frequently accessed data structure. For address */
+ /* alignment and efficient memory access all the pointers are */
+ /* defined at the beginning and the other members ordered by */
+ /* decreasing size and frquency of use after that. Some C */
+ /* compilers may do that anyway, but maintaining the order in the */
+ /* source code is only a small effort. */
+ void * keypointer;
+ void * valuepointer;
+ int keyint;
+ int valueint;
+ /* A hash entry can also be empty, for example after a delete. */
+ /* The empty entries are arranged in a linked list ordered by */
+ /* position in the hash entry table, so that subsequent re-use */
+ /* occurs in the entry nearest the start of the table. The empty */
+ /* entry list is doubly linked so that the average addition needs */
+ /* to walk only a quarter of the list (single linked would have */
+ /* needed to walk half the list on average). */
+ /* In an empty entry, keypointer == NULL, keyint == 0, */
+ /* (int)valuepointer == forward link subscript (-1 ends), */
+ /* valueint == backward link subscript (-1 ends). */
+};
+
+/* hashtable_bucket */
+struct hashtable_bucket {
+ int * list; /* list of indices into hash entry list */
+ int size;
+};
+
+/* hashtable */
+struct hashtable {
+ /* This is a frequently accessed data structure. For best memory */
+ /* alignment, all the pointer fields are at the beginning and the */
+ /* other members ordered by decreasing size and popularity after */
+ /* that. Some compilers may do that anyway, but manual ordering */
+ /* is only a slight inconvenience. */
+ struct hashtable_entry * entrylist;
+ struct hashtable_bucket * bucketlist1;
+ struct hashtable_bucket * bucketlist2;
+ float loadfactorlow; /* threshold to halve the number of buckets */
+ float loadfactorhigh; /* threshold to double number of buckets */
+ int salt; /* random seed for hashing function */
+ int entrylistsize; /* total including deleted entries */
+ int entrycount; /* number of actual (not deleted) entries */
+ int deletedentryhead; /* head of linked list of deleted entries */
+ int deletedentrytail; /* tail of linked list of deleted entries */
+ int bucketmask1; /* eg 0x3f when there are 64 buckets */
+ int bucketmask2; /* eg 0x3f when there are 64 buckets */
+ int emptybuckets; /* to decide when to shorten bucket list */
+};
+
+/* hashtable_iterator */
+struct hashtable_iterator {
+ struct hashtable * hashtable;
+ int nextentryindex; /* >=0 when iterating, -1 when done */
+};
+
+/* Function declarations */
+struct hashtable * hashtable_new();
+int hashtable_store(struct hashtable * hash, void * keypointer, int keylength, void * valuepointer, int valueint);
+int hashtable_fetch(struct hashtable * hash, void * keypointer, int keylength, void ** valuepointerpointer, int * valueintpointer);
+int hashtable_delete(struct hashtable * hash, void * keypointer, int keylength);
+void hashtable_free(struct hashtable * hash);
+void hashtable_iterator_init(struct hashtable * hash, struct hashtable_iterator * iter);
+int hashtable_iterator_next(struct hashtable_iterator * iter, struct hashtable_entry * entry);
+
+/* end of hashtable.h */
View
55 c/t/01-toolchain/01a-cc.c
@@ -137,13 +137,13 @@ remove_exe()
perror("01a-cc error 6:");
exit(6);
}
- printf("ok 6 - unlink testexe.c\n");
+ printf("ok 6 - remove testexe.c\n");
status = unlink("testexe" EXT_EXE);
if (status) {
perror("01a-cc error 7:");
exit(5);
}
- printf("ok 7 - unlink testexe" EXT_EXE "\n");
+ printf("ok 7 - remove testexe" EXT_EXE "\n");
}
@@ -173,7 +173,7 @@ create_lib()
);
fclose(testlib_sourcefile);
#ifdef _WIN32
- #ifdef MSVC
+ #ifdef _MSC_VER
status = system("cl -LD -WX -nologo testlib.c >nul"); /* Visual C++ */
#else
status = system("gcc -mdll -o testlib.dll testlib.c"); /* MinGW */
@@ -183,6 +183,9 @@ create_lib()
if (status==0) {
status = system("cc -shared -s -o testlib.so testlib.o");
}
+ if (status==0) {
+ status = system("rm testlib.o");
+ }
#endif
if (status) {
perror("01a-cc error 8:");
@@ -215,7 +218,7 @@ load_lib()
exit(9);
}
#endif
- printf("ok 9 - loaded testlib" EXT_DYNLIB "\n");
+ printf("ok 9 - load testlib" EXT_DYNLIB "\n");
#ifdef _WIN32
pfunction = GetProcAddress(testlib, "testfunction");
@@ -223,30 +226,30 @@ load_lib()
dlerror(); /* clear any possible error */
pfunction = dlsym(testlib, "testfunction");
if( (error = dlerror()) != NULL ) {
- fprintf(stderr, "01a-cc error 11: %s\n", error);
+ fprintf(stderr, "01a-cc error 10: %s\n", error);
exit(10);
}
#endif
if (pfunction == NULL) {
- fprintf(stderr, "01a-cc error 11a: GetProcAddress returned NULL\n");
- exit(11);
+ fprintf(stderr, "01a-cc error 10a: GetProcAddress returned NULL\n");
+ exit(10);
}
- printf("ok 11 - dlsym testfunction\n");
- result = (* pfunction)(12, "call testfunction"); /* prints "ok 12" */
- if (result == 42+12)
- printf("ok 13 - testfunction result\n");
+ printf("ok 10 - dlsym testfunction\n");
+ result = (* pfunction)(11, "call testfunction"); /* prints "ok 12" */
+ if (result == 42+11)
+ printf("ok 12 - testfunction result\n");
else
- printf("not ok 13 - testfunction result\n");
+ printf("not ok 12 - testfunction result\n");
#ifdef _WIN32
result = ! FreeLibrary(testlib); /* returns 0 for failure! */
#else
result = dlclose(testlib);
#endif
if (result) {
- fprintf(stderr, "01a-cc error 14: %s\n", error);
+ fprintf(stderr, "01a-cc error 13: %s\n", error);
exit(EXIT_FAILURE);
}
- printf("ok 14 - dlclose\n");
+ printf("ok 13 - unload library\n");
}
@@ -257,26 +260,26 @@ remove_lib()
int status;
status = unlink("testlib.c");
if (status) {
- perror("01a-cc error 15:");
- exit(15);
+ perror("01a-cc error 14:");
+ exit(14);
}
- printf("ok 15 - unlink testlib.c\n");
- #ifdef MSVC
+ printf("ok 14 - remove testlib.c\n");
+ #ifdef _MSC_VER
status = unlink("testlib" EXT_OBJ);
if (status) {
- perror("01a-cc error 16:");
- exit(16);
+ perror("01a-cc error 15:");
+ exit(15);
}
- printf("ok 16 - unlink testlib" EXT_OBJ "\n");
+ printf("ok 15 - remove testlib" EXT_OBJ "\n");
#else
- printf("ok 16 - unlink testlib # SKIPPED\n");
+ printf("ok 15 - remove testlib # SKIPPED\n"); /* MinGW */
#endif
status = unlink("testlib" EXT_DYNLIB);
if (status) {
- perror("01a-cc error 17:");
- exit(17);
+ perror("01a-cc error 16:");
+ exit(16);
}
- printf("ok 17 - unlink testlib" EXT_DYNLIB "\n");
+ printf("ok 16 - remove testlib" EXT_DYNLIB "\n");
}
@@ -284,7 +287,7 @@ remove_lib()
int
main(int argc, char * argv[])
{
- printf("1..17\n"); /* tests */
+ printf("1..16\n"); /* tests */
create_exe(); /* 1-2 make testexe.c and testexe.exe */
run_exe(); /* 2-5 run testexe.exe */
remove_exe(); /* 6-7 remove testexe.c and testexe.exe */
View
169 c/t/02-components/02a-hashtable.c
@@ -0,0 +1,169 @@
+/* 02a-hashtable.c */
+/* Create several hashes and store, fetch and delete a large number */
+/* of random data values. Count the number of such random operations */
+/* (not exactly repeatable) that can be performed in a 10 second */
+/* interval. Free everything so that Valgrind can verify that there */
+/* are no memory leaks. */
+
+#include <assert.h> /* assert */
+#include <stdio.h> /* printf */
+#include <stdlib.h> /* malloc */
+#include <string.h> /* memmove strlen */
+#include <sys/time.h> /* gettimeofday */
+#include "../../src/hashtable.h"
+#include "../Test.h" /* is plan */
+
+/* The number of allocations is O((STRINGCOUNT ** 2) * MAXKEYLENGTH), */
+/* so be careful when increasing it. 5000 strings use 250MiB. */
+#define STRINGCOUNT 5000 /* number of strings to store into hash table */
+#define MAXKEYLENGTH 40 /* maximum number of characters per key */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* hashtable_dump */
+void
+hashtable_dump(struct hashtable * hashtable)
+{
+ int i, j;
+ struct hashtable_entry * entry;
+ struct hashtable_bucket * bucket;
+ fprintf(stderr,"{hashtable at %lx size %d at %lx %d keys\n",
+ (long) hashtable, hashtable->entrylistsize,
+ (long) hashtable->entrylist, hashtable->entrycount
+ );
+ for (i=0; i<hashtable->entrylistsize; ++i) {
+ entry = hashtable->entrylist + i;
+ if (entry->keyint) {
+ fprintf(stderr," entry %d at %lx '%*s' => '%*s'\n",
+ i, (long) entry,
+ -entry->keyint, (char *) entry->keypointer,
+ -entry->valueint, (char *) entry->valuepointer
+ );
+ }
+ else {
+ fprintf(stderr," entry %d at %lx empty forward %lx back %d\n",
+ i, (long) entry,
+ (long)(entry->valuepointer), entry->valueint
+ );
+ assert( entry->keypointer == NULL );
+ }
+ }
+ fprintf(stderr," %d bucket%s at %lx\n", hashtable->bucketmask1 + 1,
+ (hashtable->bucketmask1 == 0) ? "" : "s", (long) hashtable->bucketlist1
+ );
+ for (i=0; i <= hashtable->bucketmask1; ++i ) {
+ bucket = hashtable->bucketlist1;
+ if (bucket->size) {
+ fprintf(stderr," bucket %d %d at %lx", i,
+ bucket->size, (long) bucket->list
+ );
+ for (j=0; j < bucket->size; ++j) {
+ entry = hashtable->entrylist + bucket->list[j];
+ fprintf(stderr," %*s", -entry->keyint, (char *) entry->keypointer);
+ }
+ fprintf(stderr,"\n");
+ }
+ else {
+ fprintf(stderr," bucket %d empty\n", i );
+ assert( bucket->list == NULL );
+ }
+ }
+ fprintf(stderr,"}\n", i );
+}
+
+char *
+random_string(int maxlength)
+{
+ int i;
+ int length = (rand() % maxlength) + 1;
+ char * s = (char *) malloc(length+1);
+ for (i=0; i<length; ++i) {
+ s[i] = (rand() % 26) + 'a';
+ }
+ s[length] = '\0';
+ return s;
+}
+
+
+/* main */
+int main(int argc, char *argv[])
+{
+ struct timeval time_now, time_write, time_read;
+ struct hashtable * hashtable;
+ struct hashtable_iterator iter;
+ struct hashtable_entry entry;
+ void * valuepointer;
+ int valueint, seed, stringcount = 0, stringlength, key_bytes = 0,
+ value_bytes = 0, entrynumber, collisions = 0, delete_count;
+ char * source, * destination;
+
+ plan(4);
+ gettimeofday(&time_now, NULL);
+ time_write.tv_sec = time_now.tv_sec + 5;
+ time_write.tv_usec = time_now.tv_usec;
+ seed = time_now.tv_sec ^ time_now.tv_usec;
+ hashtable = hashtable_new();
+ srand(seed);
+ while (stringcount<STRINGCOUNT) { /* nondeterministic because of collisions */
+ char * key = random_string(MAXKEYLENGTH);
+ /* create a value consisting of the key reversed followed by */
+ /* the original key, for example 'abc' -> 'cbaabc' */
+ stringlength = strlen(key);
+ char * value = (char *) malloc(2 * stringlength + 1);
+ destination=value+stringlength;
+ * destination -- = '\0';
+ for (source=key; stringlength-->0; ) {
+ * destination -- = * source ++;
+ }
+ strcat( value, key );
+ /* test whether the key is already in the hashtable */
+ if ( hashtable_fetch(hashtable, key, strlen(key), & valuepointer, & valueint) ) {
+ /* it is already in the hash table, free these values */
+ free(key);
+ free(value);
+ ++ collisions;
+ }
+ else {
+ /* it is not already in the hash table, add it */
+ hashtable_store(hashtable, key, strlen(key), value, strlen(value));
+ key_bytes += strlen(key);
+ value_bytes += strlen(value);
+ ++ stringcount;
+ }
+ gettimeofday(&time_now, NULL);
+ }
+ is_ii( stringcount, STRINGCOUNT, "created a hash with 5000 entries");
+ srand(seed);
+
+ /* Test 2 - iterate the entries and delete them */
+ hashtable_iterator_init(hashtable, & iter);
+ delete_count = 0;
+ while (hashtable_iterator_next(& iter, & entry)) {
+ key_bytes -= strlen(entry.keypointer);
+ value_bytes -= strlen(entry.valuepointer);
+ /* fprintf(stderr,"iter A '%s' => '%s'\n", (char *) entry.keypointer, (char *) entry.valuepointer); */
+ free(entry.keypointer);
+ free(entry.valuepointer);
+ ++delete_count;
+ }
+ is_ii(delete_count, stringcount, "iterate 5000 entries and delete");
+
+ /* Test 3 - verify total number of bytes in keys */
+ is_ii(key_bytes, 0, "all bytes in keys reclaimed");
+
+ /* Test 4 - verify total number of bytes in keys */
+ is_ii(value_bytes, 0, "all bytes in values reclaimed");
+
+ /* Cannot test this internally, but Valgrind should show that no */
+ /* bytes remain allocated on the heap. */
+ hashtable_free(hashtable);
+ return 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+/* end of 02a-hashtable.c */
View
15 c/tools/build/Configure.c
@@ -49,7 +49,7 @@
/* Subscript names for configuration strings. Almost like a hash ;) */
enum { CC, EXE, LDL, MAKE_COMMAND, OS_TYPE, OUT, RM_RF,
CONFIG_END /* this one must always be last */ };
-char * config[CONFIG_END];
+char * config[CONFIG_END] = {"", "", "", "", "", "", ""};
/* forward references to internal functions */
void detect(void);
char * slurp(char * filename);
@@ -63,8 +63,9 @@ void trans(char ** text, char * search, char * replace);
void
config_set(void)
{
+ char * s;
/* Operating system */
- if (strcmp(getenv("OS"),"Windows_NT")==0) { /* any Windows system */
+ if ((s=getenv("OS")) && strcmp(s,"Windows_NT")==0) { /* any Windows system */
config[OS_TYPE] = "Windows";
config[EXE] = ".exe";
}
@@ -73,13 +74,13 @@ config_set(void)
config[EXE] = "";
}
/* C compiler */
- if (strcmp(getenv("COMPILER"),"MSVC")==0) {
+ if ((s=getenv("COMPILER")) && strcmp(s,"MSVC")==0) {
config[CC] = "cl -DMSVC ";
config[OUT] = "-Fe";
config[RM_RF] = "del /F /Q /S";
}
- if (strcmp(getenv("COMPILER"),"GCC")==0) {
- if (strcmp(getenv("OS"),"Windows_NT")==0)
+ if ((s=getenv("COMPILER")) && strcmp(s,"GCC")==0) {
+ if ((s=getenv("OS")) && strcmp(s,"Windows_NT")==0)
config[CC] = "cc -DGCC ";
else
config[CC] = "cc -DGCC -ldl ";
@@ -87,7 +88,7 @@ config_set(void)
config[RM_RF] = "rm -rf";
}
/* Make utility */
- if (strcmp(getenv("COMPILER"),"GCC")==0) {
+ if ((s=getenv("COMPILER")) && strcmp(s,"GCC")==0) {
config[MAKE_COMMAND] = "make";
} else {
config[MAKE_COMMAND] = "nmake";
@@ -228,4 +229,4 @@ main(int argc, char * argv[])
return 0;
}
-/* end of Configure.c */
+/* end of Configure.c */
View
44 c/tools/build/Makefile.in
@@ -6,36 +6,50 @@
# Targets that do not produce files (tells make not to waste time
# checking that such files exist).
-
.PHONY: all clean test test01 test02
+# The compiled Configure.c will replace specific words between @ signs.
CC = @cc@
EXE = @exe@
O = @out@
RM_RF = @rm_rf@
+# The first target is default, will be used by a plain 'make' command.
all: test
-t/01-toolchain/01a-cc.exe: t/01-toolchain/01a-cc.c
+# Recipes to build executables
+t/01-toolchain/01a-cc.exe: t/01-toolchain/01a-cc.c t/Test.h
$(CC) $(O)t/01-toolchain/01a-cc.exe t/01-toolchain/01a-cc.c
- -$(RM_RF) 01a-cc.obj
+ -$(RM_RF) 01a-cc.obj
+
+t/01-toolchain/01b-threads.exe: t/01-toolchain/01b-threads.c t/Test.h
+ $(CC) $(O)t/01-toolchain/01b-threads.exe t/01-toolchain/01b-threads.c
+ -$(RM_RF) 01b-threads.obj
+
+t/02-components/02a-hashtable.exe: t/02-components/02a-hashtable.c \
+ src/hashtable.h src/hashtable.c t/Test.h
+ $(CC) $(O)t/02-components/02a-hashtable.exe src/hashtable.c t/02-components/02a-hashtable.c
+ -$(RM_RF) hashtable.obj
-t/02-components/02a-hashtable.exe: t/02-components/02a-hashtable.c
- $(CC) $(O)t/02-components/02a-hashtable.exe t/02-components/02a-hashtable.c
+t/02-components/02b-heapmanager.exe: t/02-components/02b-heapmanager.c \
+ src/heapmanager.h src/heapmanager.c t/Test.h
+ $(CC) $(O)t/02-components/02b-heapmanager.exe src/heapmanager.c t/02-components/02b-heapmanager.c
+ -$(RM_RF) hashtable.obj
+
+tools/build/prove$(EXE): tools/build/prove.c
+ $(CC) $(O)tools/build/prove$(EXE) tools/build/prove.c
+ -$(RM_RF) prove.obj
# Test executables are named *.exe even on Unix so that prove can find them
test01: t/01-toolchain/01a-cc.exe tools/build/prove$(EXE)
tools/build/prove -e "" --ext ".exe" t/01-toolchain
-test02: t/02-components/02a-hashtable.exe
- prove -e "" --ext ".exe" t/02-components
+test02: t/02-components/02a-hashtable.exe tools/build/prove$(EXE)
+ tools/build/prove -e "" --ext ".exe" t/02-components
test: test01 test02
-tools/build/prove$(EXE): tools/build/prove.c
- $(CC) $(O)tools/build/prove$(EXE) tools/build/prove.c
- -$(RM_RF) prove.obj
-
+# Miscellaneous targets
clean:
$(RM_RF) *.exe *.obj *.dll *.lib *.exp
@@ -44,5 +58,9 @@ realclean: clean
help:
@echo In this 6model/c directory you can make the following targets:
- @echo test01 - tests the toolchain (C compiler, ICU etc)
- @echo help - you already found this
+ @echo "test - general test as far as 6model has been developed"
+ @echo "test01 - test the toolchain, eg C compiler, threads, ICU etc"
+ @echo "test02 - test 6model components, eg hashtable, heapmanager etc"
+ @echo "clean - remove all generated files except this Makefile"
+ @echo "realclean - remove all generated files including this Makefile"
+ @echo "help - you already found this"
View
80 c/tools/build/prove.c
@@ -1,18 +1,22 @@
/* prove.c */
-/* Lightweigt Test Anything Protocol (TAP) harness */
+/* Lightweight TAP (Test Anything Protocol) harness */
-#include <stdio.h> /* fclose fgets FILE fopen fprintf printf stderr */
-#include <stdlib.h> /* exit free getenv malloc realloc */
-#include <string.h> /* memmove memcpy strcpy strlen strstr */
+/* TODO: parse the test script output looking for 'ok', 'not ok' etc */
+/* and output a summary instead of every test result. */
+
+#include <glob.h> /* glob globfree */
+#include <stdio.h> /* FILE fprintf printf stderr */
+#include <stdlib.h> /* exit free malloc realloc */
+#include <string.h> /* strcat strcpy strlen */
#ifdef _WIN32
-#define pclose _pclose
-#define popen _popen
+ #define pclose _pclose
+ #define popen _popen
#endif
#define LINEBUFFERSIZE 128
-char * executable_program;
-char * filename_extension;
+char * executable_program = NULL;
+char * filename_extension = NULL;
/* options */
@@ -42,6 +46,7 @@ options(int argc, char * argv[])
return argindex;
}
+
/* qx */
/* Imitate the Perl qx operator, returning the results of running the */
/* command passed as a parameter */
@@ -67,15 +72,64 @@ qx(char * command)
}
+/* run_tests_in_dir */
+void
+run_tests_in_dir(char * dir)
+{
+ int patternlength, glob_flags, status, pathindex;
+ char * glob_pattern, * tap_output, * errormessage;
+ int (* glob_errfunc) (const char * epath, int eerrno);
+ glob_t globbuf;
+
+ /* Scan the specified directory for test files */
+ patternlength = strlen(dir) + strlen(filename_extension) + 3;
+ glob_pattern = (char *) malloc(patternlength);
+ strcpy(glob_pattern, dir);
+ strcat(glob_pattern, "/*");
+ strcat(glob_pattern, filename_extension);
+ glob_flags = 0;
+ glob_errfunc = NULL;
+ status = glob(glob_pattern, glob_flags, glob_errfunc, &globbuf);
+ free(glob_pattern);
+ if (status) {
+ switch (status) {
+ case GLOB_NOSPACE:
+ errormessage = "out of memory";
+ break;
+ case GLOB_ABORTED:
+ errormessage = "read error";
+ break;
+ case GLOB_NOMATCH:
+ errormessage = "no files found";
+ break;
+ }
+ fprintf(stderr,
+ "scanning directory '%s' ended unexpectedly with %s\n",
+ dir, errormessage);
+ exit(1);
+ }
+
+ /* Run each test file found in the directory and scan the output */
+ for (pathindex=0; pathindex<globbuf.gl_pathc; pathindex++) {
+ // printf("found path %s\n", globbuf.gl_pathv[pathindex]);
+ tap_output = qx(globbuf.gl_pathv[pathindex]);
+ printf("%s\n", tap_output);
+ free(tap_output);
+ }
+}
+
+
/* main */
int
main(int argc, char * argv[])
{
- char * tap_output;
- int argi;
+ int i, argi;
+
+ /* Get command line options and process them */
argi = options(argc, argv);
- printf("exe=%s ext=%s arg=%s\n", executable_program, filename_extension, argv[argi]);
- tap_output = qx("dir");
- printf("tap_output=%s\n", tap_output);
+
+ /* The remaining arguments are all expected to be directories */
+ for (i=argi; i<argc; ++i)
+ run_tests_in_dir(argv[i]);
return 0;
}
Please sign in to comment.
Something went wrong with that request. Please try again.