Skip to content

Commit

Permalink
Merge pull request #2594 from Sea-n/multithread-part1
Browse files Browse the repository at this point in the history
Add Multi-thread Support
  • Loading branch information
allinurl committed Dec 20, 2023
2 parents b1332f5 + f4ca02d commit 1e116d0
Show file tree
Hide file tree
Showing 13 changed files with 377 additions and 288 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ config.status
config.guess
config.sub
config.rpath
configure~
**/.deps/
/INSTALL
/Makefile
Expand Down Expand Up @@ -103,6 +104,6 @@ Makefile.in
/resources/css/fa.min.css.tmp
/resources/js/app.js.tmp
/resources/js/charts.js.tmp
/resources/js/d3.v3.min.js.tmp
/resources/js/d3.v?.min.js.tmp
/resources/js/hogan.min.js.tmp
/resources/tpls.html.tmp
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,18 @@ like to process all log files `access.log*`, we can do:

_Note_: On Mac OS X, use `gunzip -c` instead of `zcat`.

### Multi-thread Support ###

Use `--jobs=<count>` (or `-j`) to enable multi-thread parsing. For example:

# goaccess access.log -o report.html -j 4


And use `--chunk-size=<256-32768>` to adjust chunk size, the default chunk size is 1024. For example:

# goaccess access.log -o report.html -j 4 --chunk-size=8192


### Real-time HTML outputs ###

GoAccess has the ability the output real-time data in the HTML report. You can
Expand Down
42 changes: 17 additions & 25 deletions src/gkhash.c
Original file line number Diff line number Diff line change
Expand Up @@ -996,31 +996,28 @@ ins_u648 (khash_t (u648) *hash, uint64_t key, uint8_t value) {
}

/* Increase an uint32_t value given an uint32_t key.
* Note: If the key exists, its value is increased by the given inc.
*
* On error, 0 is returned.
* On success the inserted value is returned */
* On success the increased value is returned */
uint32_t
inc_ii32 (khash_t (ii32) *hash, uint32_t key, uint32_t inc) {
khint_t k;
int ret;
uint32_t value = inc;

if (!hash)
return 0;

k = kh_get (ii32, hash, key);
/* key found, increment current value by the given `inc` */
if (k != kh_end (hash))
value = kh_val (hash, k) + inc;

k = kh_put (ii32, hash, key, &ret);
if (ret == -1)
return 0;

kh_val (hash, k) = value;
/* key not found, put a new hash with val=0 */
if (k == kh_end (hash)) {
k = kh_put (ii32, hash, key, &ret);
/* operation failed */
if (ret == -1)
return 0;
kh_val (hash, k) = 0;
}

return value;
return __sync_add_and_fetch (&kh_val (hash, k), inc);
}

/* Increase a uint64_t value given a string key.
Expand Down Expand Up @@ -1082,33 +1079,29 @@ inc_iu64 (khash_t (iu64) *hash, uint32_t key, uint64_t inc) {
return 0;
}

/* Increase a uint32_t value given a string key.
/* Increase an uint32_t value given a string key.
*
* On error, 0 is returned.
* On success the increased value is returned */
static uint32_t
inc_si32 (khash_t (si32) *hash, const char *key, uint32_t inc) {
khint_t k;
int ret;
uint32_t value = inc;

if (!hash)
return 0;

k = kh_get (si32, hash, key);
/* key not found, set new value to the given `inc` */
/* key not found, put a new hash with val=0 */
if (k == kh_end (hash)) {
k = kh_put (si32, hash, key, &ret);
/* operation failed */
if (ret == -1)
return 0;
} else {
value = kh_val (hash, k) + inc;
kh_val (hash, k) = 0;
}

kh_val (hash, k) = value;

return value;
return __sync_add_and_fetch (&kh_val (hash, k), inc);
}

/* Insert a string key and auto increment int value.
Expand Down Expand Up @@ -1200,7 +1193,7 @@ get_si32 (khash_t (si32) *hash, const char *key) {
k = kh_get (si32, hash, key);
/* key found, return current value */
if (k != kh_end (hash))
return kh_val (hash, k);
return __sync_add_and_fetch (&kh_val (hash, k), 0);

return 0;
}
Expand Down Expand Up @@ -1291,15 +1284,14 @@ get_ss32 (khash_t (ss32) *hash, const char *key) {
uint32_t
get_ii32 (khash_t (ii32) *hash, uint32_t key) {
khint_t k;
uint32_t value = 0;

if (!hash)
return 0;

k = kh_get (ii32, hash, key);
/* key found, return current value */
if (k != kh_end (hash) && (value = kh_val (hash, k)))
return value;
if (k != kh_end (hash))
return __sync_add_and_fetch (&kh_val (hash, k), 0);

return 0;
}
Expand Down
11 changes: 10 additions & 1 deletion src/goaccess.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,9 @@
GConf conf = {
.append_method = 1,
.append_protocol = 1,
.chunk_size = 1024,
.hl_header = 1,
.jobs = 1,
.num_tests = 10,
};

Expand Down Expand Up @@ -767,6 +769,7 @@ read_client (void *ptr_data) {
/* Parse tailed lines */
static void
parse_tail_follow (GLog *glog, FILE *fp) {
GLogItem *logitem;
#ifdef WITH_GETLINE
char *buf = NULL;
#else
Expand All @@ -780,7 +783,13 @@ parse_tail_follow (GLog *glog, FILE *fp) {
while (fgets (buf, LINE_BUFFER, fp) != NULL) {
#endif
pthread_mutex_lock (&gdns_thread.mutex);
pre_process_log (glog, buf, 0);
logitem = parse_line (glog, buf, 0);
if (logitem != NULL) {
if (logitem->errstr == NULL)
process_log (logitem);
count_process (glog);
free_glog (logitem);
}
pthread_mutex_unlock (&gdns_thread.mutex);
glog->bytes += strlen (buf);
#ifdef WITH_GETLINE
Expand Down
16 changes: 8 additions & 8 deletions src/gstorage.c
Original file line number Diff line number Diff line change
Expand Up @@ -558,16 +558,16 @@ count_bw (int numdate, uint64_t resp_size) {

/* Keep track of all invalid log strings. */
static void
count_invalid (GLog *glog, const char *line) {
count_invalid (GLog *glog, GLogItem *logitem, const char *line) {
glog->invalid++;
ht_inc_cnt_overall ("failed_requests", 1);

if (conf.invalid_requests_log) {
LOG_INVALID (("%s", line));
}

if (glog->items->errstr && glog->log_erridx < MAX_LOG_ERRORS) {
glog->errors[glog->log_erridx++] = xstrdup (glog->items->errstr);
if (logitem->errstr && glog->log_erridx < MAX_LOG_ERRORS) {
glog->errors[glog->log_erridx++] = xstrdup (logitem->errstr);
}
}

Expand Down Expand Up @@ -610,16 +610,16 @@ count_valid (int numdate) {
/* Keep track of all valid and processed log strings. */
void
count_process (GLog *glog) {
__sync_add_and_fetch(&glog->processed, 1);
lock_spinner ();
glog->processed++;
ht_inc_cnt_overall ("total_requests", 1);
unlock_spinner ();
}

void
count_process_and_invalid (GLog *glog, const char *line) {
count_process_and_invalid (GLog *glog, GLogItem *logitem, const char *line) {
count_process (glog);
count_invalid (glog, line);
count_invalid (glog, logitem, line);
}

/* Keep track of all excluded log strings (IPs).
Expand Down Expand Up @@ -1324,8 +1324,8 @@ gen_status_code_key (GKeyData *kdata, GLogItem *logitem) {
if (!logitem->status)
return 1;

type = verify_status_code_type (logitem->status);
status = verify_status_code (logitem->status);
type = verify_status_code_type (logitem->status);

get_kdata (kdata, status, status);
get_kroot (kdata, type, type);
Expand Down Expand Up @@ -1390,7 +1390,7 @@ static int
include_uniq (GLogItem *logitem) {
int u = conf.client_err_to_unique_count;

if (!logitem->status || logitem->status[0] != '4' || (u && logitem->status[0] == '4'))
if (!logitem->status || (logitem->status / 100) != 4 || (u && (logitem->status / 100) == '4'))
return 1;
return 0;
}
Expand Down
2 changes: 1 addition & 1 deletion src/gstorage.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ char *get_mtr_str (GSMetric metric);
int excluded_ip (GLogItem * logitem);
uint32_t *i322ptr (uint32_t val);
uint64_t *uint642ptr (uint64_t val);
void count_process_and_invalid (GLog * glog, const char *line);
void count_process_and_invalid (GLog * glog, GLogItem * logitem, const char *line);
void count_process (GLog * glog);
void free_gmetrics (GMetrics * metric);
void insert_methods_protocols (void);
Expand Down
28 changes: 25 additions & 3 deletions src/options.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,11 @@

#include "xmalloc.h"

static char short_options[] = "f:e:p:o:l:H:M:S:b:"
static char short_options[] = "b:e:f:j:l:o:p:H:M:S:"
#ifdef HAVE_LIBGEOIP
"g"
#endif
"acirmMhHqdsV";
"acdhimqrsV";

/* *INDENT-OFF* */
struct option long_opts[] = {
Expand All @@ -71,6 +71,7 @@ struct option long_opts[] = {
{"hl-header" , no_argument , 0 , 'i' } ,
{"http-method" , required_argument , 0 , 'M' } ,
{"http-protocol" , required_argument , 0 , 'H' } ,
{"jobs" , required_argument , 0 , 'j' } ,
{"log-file" , required_argument , 0 , 'f' } ,
{"log-size" , required_argument , 0 , 'S' } ,
{"no-query-string" , no_argument , 0 , 'q' } ,
Expand All @@ -90,6 +91,7 @@ struct option long_opts[] = {
{"color" , required_argument , 0 , 0 } ,
{"color-scheme" , required_argument , 0 , 0 } ,
{"crawlers-only" , no_argument , 0 , 0 } ,
{"chunk-size" , required_argument , 0 , 0 } ,
{"daemonize" , no_argument , 0 , 0 } ,
{"datetime-format" , required_argument , 0 , 0 } ,
{"date-format" , required_argument , 0 , 0 } ,
Expand Down Expand Up @@ -261,6 +263,8 @@ cmd_help (void)
" -d --with-output-resolver - Enable IP resolver on HTML|JSON output.\n"
" -e --exclude-ip=<IP> - Exclude one or multiple IPv4/6. Allows IP\n"
" ranges. e.g., 192.168.0.1-192.168.0.10\n"
" -j --jobs=<1-6> - Threads count for parsing log. Default\n"
" is 1 thread. Use 4 threads is recommended.\n"
" -H --http-protocol=<yes|no> - Set/unset HTTP request protocol if found.\n"
" -M --http-method=<yes|no> - Set/unset HTTP request method if found.\n"
" -o --output=<format|filename> - Output to stdout or the specified file.\n"
Expand All @@ -276,6 +280,7 @@ cmd_help (void)
" report.\n"
" --anonymize-level=<1|2|3> - Anonymization levels: 1 => default, 2 =>\n"
" strong, 3 => pedantic.\n"
" --chunk-size=<256-32768> - Chunk size for every threads. Default is 1024.\n"
" --crawlers-only - Parse and display only crawlers.\n"
" --date-spec=<date|hr|min> - Date specificity. Possible values: `date`\n"
" (default), `hr` or `min`.\n"
Expand Down Expand Up @@ -578,6 +583,16 @@ parse_long_opt (const char *name, const char *oarg) {
if (!strcmp ("all-static-files", name))
conf.all_static_files = 1;

/* chunk size */
if (!strcmp ("chunk-size", name)) {
/* Recommended chunk size is 256 - 32768, hard limit is 32 - 1048576. */
conf.chunk_size = atoi (oarg);
if (conf.chunk_size < 32)
FATAL ("The hard lower limit of --chunk-size is 32.");
if (conf.chunk_size > 1048576)
FATAL ("The hard limit of --chunk-size is 1048576.");
}

/* crawlers only */
if (!strcmp ("crawlers-only", name))
conf.crawlers_only = 1;
Expand Down Expand Up @@ -627,7 +642,8 @@ parse_long_opt (const char *name, const char *oarg) {

/* ignore status code */
if (!strcmp ("ignore-status", name))
set_array_opt (oarg, conf.ignore_status, &conf.ignore_status_idx, MAX_IGNORE_STATUS);
if (conf.ignore_status_idx < MAX_IGNORE_STATUS)
conf.ignore_status[conf.ignore_status_idx++] = atoi (oarg);

/* ignore static requests */
if (!strcmp ("ignore-statics", name)) {
Expand Down Expand Up @@ -804,6 +820,12 @@ read_option_args (int argc, char **argv) {
case 'i':
conf.hl_header = 1;
break;
case 'j':
/* Recommended 4 threads, soft limit is 6, hard limit is 12. */
conf.jobs = atoi (optarg);
if (conf.jobs > 12)
FATAL ("The hard limit of --jobs is 12.");
break;
case 'q':
conf.ignore_qstr = 1;
break;
Expand Down
Loading

0 comments on commit 1e116d0

Please sign in to comment.