Skip to content

Commit

Permalink
Merge pull request #268 from flask-dashboard/reporting
Browse files Browse the repository at this point in the history
Reporting
  • Loading branch information
mircealungu committed Nov 7, 2019
2 parents a454ce3 + bc40bfd commit dedfac4
Show file tree
Hide file tree
Showing 22 changed files with 737 additions and 95 deletions.
1 change: 1 addition & 0 deletions flask_monitoringdashboard/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def bind(app, schedule=True):
profiler,
version,
auth,
reporting,
)
import flask_monitoringdashboard.views

Expand Down
56 changes: 25 additions & 31 deletions flask_monitoringdashboard/controllers/requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ def get_all_request_status_code_counts(db_session, endpoint_id):
"""
return (
db_session.query(Request.status_code, func.count(Request.status_code))
.filter(and_(Request.endpoint_id == endpoint_id, Request.status_code.isnot(None)))
.group_by(Request.status_code)
.all()
.filter(Request.endpoint_id == endpoint_id, Request.status_code.isnot(None))
.group_by(Request.status_code)
.all()
)


Expand All @@ -67,24 +67,21 @@ def get_status_code_distribution(db_session, endpoint_id):
return {status_code: frequency / total_count for (status_code, frequency) in status_code_counts}


def get_status_code_frequencies(db_session, endpoint_id):
def get_status_code_frequencies(db_session, endpoint_id, *criterion):
"""
Gets the frequencies of each status code.
:param db_session: session for the database
:param endpoint_id: id for the endpoint
:return: A dict where the key is the status code and the value is the fraction of requests that
returned the status
code. Example: a return value of `{ 200: 105, 404: 3 }` means that status code 200 was returned
105 times and
:param criterion: Optional criteria used to file the requests.
:return: A dict where the key is the status code and the value is the fraction of requests that returned the status
code. Example: a return value of `{ 200: 105, 404: 3 }` means that status code 200 was returned 105 times and
404 was returned 3 times.
"""
status_code_counts = (
db_session.query(Request.status_code, func.count(Request.status_code))
.filter(and_(Request.endpoint_id == endpoint_id, Request.status_code.isnot(None)))
.group_by(Request.status_code)
.all()
)
status_code_counts = db_session.query(Request.status_code, func.count(Request.status_code)) \
.filter(Request.endpoint_id == endpoint_id, Request.status_code.isnot(None), *criterion) \
.group_by(Request.status_code).all()

return dict(status_code_counts)

Expand All @@ -93,29 +90,26 @@ def get_error_requests(db_session, endpoint_id, *criterion):
"""
Gets all requests that did not return a 200 status code.
:param db_session:
:param endpoint_id:
:param criterion:
:param db_session: session for the database
:param endpoint_id: ID of the endpoint to be queried
:param criterion: Optional criteria used to file the requests.
:return:
"""
return (
db_session.query(Request)
.filter(
and_(
Request.endpoint_id == endpoint_id,
Request.status_code.isnot(None),
Request.status_code >= 400,
Request.status_code <= 599,
*criterion
)
)
.all()
)

criteria = [
Request.endpoint_id == endpoint_id,
Request.status_code.isnot(None),
Request.status_code >= 400,
Request.status_code <= 599,
]

return db_session.query(Request).filter(criteria, *criterion).all()


def get_status_code_frequencies_in_interval(db_session, endpoint_id, start_date, end_date):
criterion = create_time_based_sample_criterion(start_date, end_date)
return get_status_code_frequencies(db_session, endpoint_id, criterion)

return get_status_code_frequencies(db_session, endpoint_id, *criterion)


def get_hourly_load(db_session, endpoint_id, start_date, end_date):
Expand Down
16 changes: 16 additions & 0 deletions flask_monitoringdashboard/core/date_interval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
class DateInterval(object):
def __init__(self, start_date, end_date):
if start_date > end_date:
raise ValueError('start_date must be before or equals to end_date')

self._start_date = start_date
self._end_date = end_date

def start_date(self):
return self._start_date

def end_date(self):
return self._end_date

def __repr__(self):
return str((self._start_date, self._end_date))
Empty file.
33 changes: 33 additions & 0 deletions flask_monitoringdashboard/core/reporting/mean_permutation_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from __future__ import division
import numpy as np


def mean_diff(x, y):
return np.abs(np.mean(x) - np.mean(y))


def mean_permutation_test(x, y, num_rounds=1000):
"""
Performs a non-parametric test to check whether `x` and `y` come from the same distribution.
:param x: a sample from some distribution
:param y: a sample to compare x to
:param num_rounds: number of different permutations to test. Increase this number to increase
the accuracy
:return: The p-value
"""
rng = np.random.RandomState()

m = len(x)
combined = np.hstack((x, y))

more_extreme = 0
reference_stat = mean_diff(x, y)

for i in range(num_rounds):
rng.shuffle(combined)

if mean_diff(combined[:m], combined[m:]) > reference_stat:
more_extreme += 1

return more_extreme / num_rounds
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from flask_monitoringdashboard.core.reporting.mean_permutation_test import mean_permutation_test
import numpy as np

from flask_monitoringdashboard.core.reporting.questions.report_question import Answer, ReportQuestion
from flask_monitoringdashboard.database import session_scope

from flask_monitoringdashboard.database.request import get_latencies_sample


class AverageLatencyAnswer(Answer):
def __init__(self, is_significant, comparison_interval_latencies_sample=None,
compared_to_interval_latencies_sample=None, percentual_diff=None, comparison_interval_avg=None,
compared_to_interval_avg=None):
super().__init__('AVERAGE_LATENCY')

self._is_significant = is_significant
self._comparison_interval_latencies_sample = comparison_interval_latencies_sample
self._compared_to_interval_latencies_sample = compared_to_interval_latencies_sample
self._percentual_diff = percentual_diff

self._compared_to_interval_avg = compared_to_interval_avg
self._comparison_interval_avg = comparison_interval_avg

def meta(self):
return dict(
latencies_sample=dict(
comparison_interval=self._comparison_interval_latencies_sample,
compared_to_interval=self._compared_to_interval_latencies_sample
),
comparison_average=self._comparison_interval_avg,
compared_to_average=self._compared_to_interval_avg,
percentual_diff=self._percentual_diff,
)

def is_significant(self):
return self._is_significant


class AverageLatency(ReportQuestion):

def get_answer(self, endpoint, comparison_interval, compared_to_interval):
with session_scope() as db_session:
comparison_interval_latencies_sample = get_latencies_sample(db_session, endpoint.id, comparison_interval)
compared_to_interval_latencies_sample = get_latencies_sample(db_session, endpoint.id, compared_to_interval)

if len(comparison_interval_latencies_sample) == 0 or len(compared_to_interval_latencies_sample) == 0:
return AverageLatencyAnswer(is_significant=False)

comparison_interval_avg = np.average(comparison_interval_latencies_sample)
compared_to_interval_avg = np.average(compared_to_interval_latencies_sample)

percentual_diff = (comparison_interval_avg - compared_to_interval_avg) / compared_to_interval_avg * 100

p_value = mean_permutation_test(comparison_interval_latencies_sample,
compared_to_interval_latencies_sample,
num_rounds=1000)
is_significant = abs(float(percentual_diff)) > 30 and p_value < 0.05

return AverageLatencyAnswer(
is_significant=is_significant,

percentual_diff=percentual_diff,

# Sample latencies
comparison_interval_latencies_sample=comparison_interval_latencies_sample,
compared_to_interval_latencies_sample=compared_to_interval_latencies_sample,

# Latency averages
comparison_interval_avg=comparison_interval_avg,
compared_to_interval_avg=compared_to_interval_avg
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@

from abc import ABCMeta, abstractmethod


class Answer:
__metaclass__ = ABCMeta

def __init__(self, type):
self.type = type

@abstractmethod
def is_significant(self):
pass

@abstractmethod
def meta(self):
pass

def serialize(self):
base = dict(
is_significant=self.is_significant(),
type=self.type
)
base.update(self.meta())

return base


class ReportQuestion:
__metaclass__ = ABCMeta

@abstractmethod
def get_answer(self, endpoint, comparison_interval, compared_to_interval):
pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
from collections import defaultdict

from flask_monitoringdashboard.controllers.requests import get_status_code_frequencies_in_interval
from flask_monitoringdashboard.core.reporting.questions.report_question import Answer, ReportQuestion
from flask_monitoringdashboard.database import session_scope


class StatusCodeDistributionAnswer(Answer):
def __init__(self, is_significant=False, percentages=None):
super().__init__('STATUS_CODE_DISTRIBUTION')

self.percentages = percentages
self._is_significant = is_significant

def is_significant(self):
return self._is_significant

def meta(self):
return dict(
percentages=self.percentages
)


def frequency_to_percentage(freq, total):
if total == 0:
raise ValueError('`total` can not be zero!')

return (float(freq)) / total * 100


class StatusCodeDistribution(ReportQuestion):

def get_answer(self, endpoint, comparison_interval, compared_to_interval):
with session_scope() as db_session:
comparison_interval_frequencies = get_status_code_frequencies_in_interval(db_session, endpoint.id,
comparison_interval.start_date(),
comparison_interval.end_date())


compared_to_interval_frequencies = get_status_code_frequencies_in_interval(
db_session, endpoint.id,
compared_to_interval.start_date(),
compared_to_interval.end_date())

registered_status_codes = set(compared_to_interval_frequencies.keys()).union(
set(comparison_interval_frequencies.keys()))

total_requests_comparison_interval = sum(comparison_interval_frequencies.values())
total_requests_compared_to_interval = sum(compared_to_interval_frequencies.values())

if total_requests_comparison_interval == 0 or total_requests_compared_to_interval == 0:
return StatusCodeDistributionAnswer(is_significant=False)

percentages = []
max_absolute_diff = 0

for status_code in registered_status_codes:
count_comparison_interval = comparison_interval_frequencies[
status_code] if status_code in comparison_interval_frequencies else 0

count_compared_to_interval = compared_to_interval_frequencies[
status_code] if status_code in compared_to_interval_frequencies else 0

comparison_interval_percentage = frequency_to_percentage(count_comparison_interval,
total_requests_comparison_interval)

compared_to_interval_percentage = frequency_to_percentage(count_compared_to_interval,
total_requests_compared_to_interval)

percentage_diff = comparison_interval_percentage - compared_to_interval_percentage

percentages.append(dict(
status_code=status_code,
comparison_interval=comparison_interval_percentage,
compared_to_interval=compared_to_interval_percentage,
percentage_diff=percentage_diff
))

max_absolute_diff = max(max_absolute_diff, percentage_diff)

return StatusCodeDistributionAnswer(is_significant=max_absolute_diff > 3, percentages=percentages)
38 changes: 34 additions & 4 deletions flask_monitoringdashboard/database/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,42 @@
Contains all functions that access a Request object.
"""
import time
from random import sample

from sqlalchemy import and_
from sqlalchemy import and_, func

from flask_monitoringdashboard.database import Request


def get_latencies_in_timeframe(db_session, endpoint_id, start_date, end_date):
criterion = create_time_based_sample_criterion(start_date, end_date)

items = db_session.query(Request.duration).filter(Request.endpoint_id == endpoint_id, *criterion).all()

return [item.duration for item in items]


def get_latencies_sample(db_session, endpoint_id, interval, sample_size=500):
criterion = create_time_based_sample_criterion(interval.start_date(), interval.end_date())

dialect = db_session.bind.dialect.name

if dialect in ['sqlite', 'mysql']:
order_by = func.random() if dialect == 'sqlite' else func.rand()

items = db_session.query(Request.duration) \
.filter(Request.endpoint_id == endpoint_id, *criterion) \
.order_by(order_by) \
.limit(sample_size) \
.all()

durations = [item.duration for item in items]

return durations
else:
return get_latencies_in_timeframe(db_session, endpoint_id, interval.start_date(), interval.end_date())


def add_request(db_session, duration, endpoint_id, ip, group_by, status_code):
""" Adds a request to the database. Returns the id.
:param status_code: status code of the request
Expand Down Expand Up @@ -53,9 +83,9 @@ def get_date_of_first_request_version(db_session, version):
"""
result = (
db_session.query(Request.time_requested)
.filter(Request.version_requested == version)
.order_by(Request.time_requested)
.first()
.filter(Request.version_requested == version)
.order_by(Request.time_requested)
.first()
)
if result:
return int(time.mktime(result[0].timetuple()))
Expand Down

0 comments on commit dedfac4

Please sign in to comment.