Skip to content

Commit

Permalink
Implement a hash table data structure for rate stats
Browse files Browse the repository at this point in the history
This new implementation removes the previous hard limit on
the number of hosts that could be tracked at one time. Now
that number can scale to however many hosts are seen on the
wire.
  • Loading branch information
jbittel committed Nov 29, 2011
1 parent ba7b3a1 commit ae91189
Show file tree
Hide file tree
Showing 7 changed files with 268 additions and 164 deletions.
1 change: 1 addition & 0 deletions doc/ChangeLog
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ version 0.1.7
* added an option (-P) to specify the PID filename
* fixed compiling under OSX
* changed IPv6 parsing to follow extension headers if present
* changed rate statistics code to use a hash table data structure

version 0.1.6
* added IPv6 support
Expand Down
53 changes: 19 additions & 34 deletions format.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,23 +26,23 @@
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>
#include "error.h"
#include "format.h"
#include "utility.h"

#define HASHSIZE 30
#define HASHSIZE 64

typedef struct format_node FORMAT_NODE;
struct format_node {
char *name, *value;
FORMAT_NODE *next, *list;
};

FORMAT_NODE *insert_node(char *str);
FORMAT_NODE *hash_lookup(char *str);
unsigned hash_string(char *str);
FORMAT_NODE *insert_field(char *str);
FORMAT_NODE *get_field(char *str);

static FORMAT_NODE *output_fields[HASHSIZE];
static FORMAT_NODE *fields[HASHSIZE];
static FORMAT_NODE *head = NULL;

/* Parse and insert output fields from format string */
Expand All @@ -68,7 +68,7 @@ void parse_format_string(char *str) {
name = str_tolower(name);

if (strlen(name) == 0) continue;
if (insert_node(name)) num_nodes++;
if (insert_field(name)) num_nodes++;
}

free(tmp);
Expand All @@ -81,10 +81,10 @@ void parse_format_string(char *str) {
FORMAT_NODE *node;

for (j = 0; j < HASHSIZE; j++) {
if (output_fields[j]) num_buckets++;
if (fields[j]) num_buckets++;

num_chain = 0;
for (node = output_fields[j]; node != NULL; node = node->next) num_chain++;
for (node = fields[j]; node != NULL; node = node->next) num_chain++;
if (num_chain > max_chain) max_chain = num_chain;
}

Expand All @@ -101,28 +101,28 @@ void parse_format_string(char *str) {
}

/* Insert a new node into the hash table */
FORMAT_NODE *insert_node(char *name) {
FORMAT_NODE *insert_field(char *name) {
FORMAT_NODE *node;
static FORMAT_NODE *prev = NULL;
unsigned hashval;
unsigned int hashval;

#ifdef DEBUG
ASSERT(name);
ASSERT(strlen(name) > 0);
#endif

if ((node = hash_lookup(name)) == NULL) {
if ((node = get_field(name)) == NULL) {
if ((node = (FORMAT_NODE *) malloc(sizeof(FORMAT_NODE))) == NULL)
LOG_DIE("Cannot allocate memory for new node");

hashval = hash_string(name);
hashval = hash_str(name, HASHSIZE);

#ifdef DEBUG
ASSERT((hashval >= 0) && (hashval < HASHSIZE));
#endif

node->next = output_fields[hashval];
output_fields[hashval] = node;
node->next = fields[hashval];
fields[hashval] = node;
} else {
WARN("Format name '%s' already provided", name);
return NULL;
Expand Down Expand Up @@ -155,7 +155,7 @@ void insert_value(char *name, char *value) {
if ((strlen(name) == 0) || (strlen(value) == 0))
return;

if ((node = hash_lookup(name)))
if ((node = get_field(name)))
node->value = value;

return;
Expand All @@ -172,7 +172,7 @@ char *get_value(char *name) {
if (strlen(name) == 0)
return EMPTY_FIELD;

if ((node = hash_lookup(name))) {
if ((node = get_field(name))) {
return node->value;
} else {
return EMPTY_FIELD;
Expand Down Expand Up @@ -262,33 +262,18 @@ void free_format() {

/* Lookup a particular node in hash; return pointer to node
if found, NULL otherwise */
FORMAT_NODE *hash_lookup(char *str) {
FORMAT_NODE *get_field(char *str) {
FORMAT_NODE *node;

#ifdef DEBUG
ASSERT(str);
ASSERT(strlen(str) > 0);
ASSERT((hash_string(str) >= 0) && (hash_string(str) < HASHSIZE));
ASSERT((hash_str(str, HASHSIZE) >= 0) && (hash_str(str, HASHSIZE) < HASHSIZE));
#endif

for (node = output_fields[hash_string(str)]; node != NULL; node = node->next)
for (node = fields[hash_str(str, HASHSIZE)]; node != NULL; node = node->next)
if (str_compare(str, node->name) == 0)
return node;

return NULL;
}

/* Use the djb2 hash function; supposed to be good for strings */
unsigned hash_string(char *str) {
unsigned hashval;

#ifdef DEBUG
ASSERT(str);
ASSERT(strlen(str) > 0);
#endif

for (hashval = 5381; *str != '\0'; str++)
hashval = (hashval * 33) ^ tolower(*str);

return hashval % HASHSIZE;
}
6 changes: 3 additions & 3 deletions httpry.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ void display_banner();
void display_usage();

/* Program flags/options, set by arguments or config file */
static u_int parse_count = 0;
static unsigned int parse_count = 0;
static int daemon_mode = 0;
static char *use_infile = NULL;
static char *interface = NULL;
Expand All @@ -67,7 +67,7 @@ int use_syslog = 0; /* Defined as extern in error.h */

static pcap_t *pcap_hnd = NULL; /* Opened pcap device handle */
static char *buf = NULL;
static u_int num_parsed = 0; /* Count of fully parsed HTTP packets */
static unsigned int num_parsed = 0; /* Count of fully parsed HTTP packets */
static time_t start_time = 0; /* Start tick for statistics calculations */
static int header_offset = 0;
static pcap_dumper_t *dumpfile = NULL;
Expand Down Expand Up @@ -379,7 +379,7 @@ void parse_http_packet(u_char *args, const struct pcap_pkthdr *header, const u_c
insert_value("timestamp", ts);

if (rate_stats) {
add_to_bucket(get_value("host"), header->ts.tv_sec);
update_host_stats(get_value("host"), header->ts.tv_sec);
clear_values();
} else {
print_format_values();
Expand Down
Loading

0 comments on commit ae91189

Please sign in to comment.