Skip to content

Commit

Permalink
Added scan log analyzer for should_grep logs
Browse files Browse the repository at this point in the history
  • Loading branch information
andresriancho committed May 24, 2019
1 parent 663ab9d commit 2fa5a2f
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import re
import json
import plotille

from utils.graph import num_formatter
from utils.utils import (get_first_timestamp,
get_last_timestamp,
get_line_epoch)


SHOULD_GREP_STATS = re.compile("Grep consumer should_grep\\(\\) stats: (.*)$")

def to_dict(match_data):
match_data = match_data.replace("'", '"')
return json.loads(match_data)


def get_should_grep_data(scan_log_filename, scan):
scan.seek(0)

should_grep_data = []
should_grep_timestamps = []

for line in scan:
match = SHOULD_GREP_STATS.search(line)
if not match:
continue

try:
stats_dict = to_dict(match.group(1))
except:
print('Warning: %s is not valid JSON' % match.group(1))
continue
else:
should_grep_data.append(stats_dict)
should_grep_timestamps.append(get_line_epoch(line))

return should_grep_data, should_grep_timestamps


def draw_should_grep(scan_log_filename, scan):
should_grep_data, should_grep_timestamps = get_should_grep_data(scan_log_filename, scan)

# Get the last timestamp to use as max in the graphs
first_timestamp = get_first_timestamp(scan)
last_timestamp = get_last_timestamp(scan)
spent_time_epoch = last_timestamp - first_timestamp
should_grep_timestamps = [ts - first_timestamp for ts in should_grep_timestamps]

if not should_grep_data:
print('No should_grep data found')
return

last_data = should_grep_data[-1]

print('should_grep() stats')
print(' Latest should_grep() count: %r' % last_data)

# Calculate %
last_data = should_grep_data[-1]
total = sum(v for k, v in last_data.iteritems())
total = float(total)
data_percent = dict((k, (v / total) * 100) for k, v in last_data.iteritems())
print(' Latest should_grep() percentages: %r' % data_percent)
print('')

fig = plotille.Figure()
fig.width = 90
fig.height = 20
fig.register_label_formatter(float, num_formatter)
fig.register_label_formatter(int, num_formatter)
fig.y_label = 'Percentage of rejected and accepted HTTP request and response grep tasks'
fig.x_label = 'Time'
fig.color_mode = 'byte'
fig.set_x_limits(min_=0, max_=spent_time_epoch)
fig.set_y_limits(min_=0, max_=None)

for key in should_grep_data[-1].keys():
key_slice = []

for data_point in should_grep_data:
total = sum(v for k, v in data_point.iteritems())
total = float(total)
if total == 0:
key_slice.append(0)
continue

data_percent = dict((k, (v / total) * 100) for k, v in data_point.iteritems())
key_slice.append(data_percent[key])

fig.plot(should_grep_timestamps,
key_slice,
label=key)

print(fig.show(legend=True))
print('')
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from graphs.timeout import draw_timeout, get_timeout_data
from graphs.rtt_histogram import draw_rtt_histogram, get_rtt_histogram_data
from graphs.rtt import draw_rtt, get_rtt_data
from graphs.should_grep_stats import draw_should_grep, get_should_grep_data
from graphs.not_found_requests import get_not_found_requests_over_time_data, draw_not_found_requests_over_time
from graphs.not_found_cache_rate import get_not_found_cache_rate_data, draw_not_found_cache_rate_over_time
from graphs.progress_delta import show_progress_delta
Expand Down Expand Up @@ -64,6 +65,7 @@ def get_console_functions():
draw_http_requests_over_time,
get_crawling_stats,
#generate_crawl_graph, ######
draw_should_grep,
draw_queue_size_grep,
draw_queue_size_audit,
draw_queue_size_crawl,
Expand Down Expand Up @@ -108,6 +110,7 @@ def get_json_functions():
get_http_requests_over_time_data,
get_crawling_stats,
#generate_crawl_graph, ######
get_should_grep_data,
get_queue_size_grep_data,
get_queue_size_audit_data,
get_queue_size_crawl_data,
Expand Down

0 comments on commit 2fa5a2f

Please sign in to comment.