Merge pull request #259 from flask-dashboard/overhead

Overhead
flask-dashboard · Oct 7, 2019 · 78d2c04 · 78d2c04
2 parents 1a9633e + 45d570e
commit 78d2c04
Show file tree

Hide file tree

Showing 24 changed files with 262 additions and 112 deletions.
diff --git a/docs/changelog.rst b/docs/changelog.rst
@@ -5,6 +5,15 @@ All notable changes to this project will be documented in this file.
 This project adheres to `Semantic Versioning <http://semver.org/>`_.
 Please note that the changes before version 1.10.0 have not been documented.
 
+v3.0.6
+----------
+Changed
+
+- Removed profiler feature from monitoring level 2
+- Added outlier detection feature to monitoring level 3
+- Configurable profiler sampling period, with 5 ms default
+- Implemented an in-memory cache for performance improvements
+
 v3.0.0
 ----------
 Changed

diff --git a/docs/functionality.rst b/docs/functionality.rst
@@ -112,42 +112,9 @@ that endpoint. The following data is recorded:
      print(request.environ['REMOTE_ADDR'])
 
 
-Monitoring Level 2 - Profiler
+Monitoring Level 2 - Outliers
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-When the monitoring level is set to 2, the Dashboard performs a
-`statistical profiling <https://docs.python.org/3/library/profile.html#what-is-deterministic-profiling>`_
-of all the requests coming to that endpoint. What this means is that another
-thread will be launched in parallel with the one processing the request, it
-will periodically sample the processing thread, and will analyze its current stack
-trace. Using this information, the Dashboard will infer how long every function
-call inside the endpoint code takes to execute.
-
-The profiler is one of the most powerful features of the Dashboard, pointing to
-where your optimization efforts should be directed, one level of abstraction
-lower than the performance monitoring of Level 1. To access this information,
-you have to:
-
-1. Go to the Overview tab in the left menu: http://localhost:5000/dashboard/overview
-
-2. Select an endpoint for which the monitoring level is or was at some point at least 2.
-
-3. Go to the Profiler tab: http://localhost:5000/dashboard/endpoint/:endpoint_id:/profiler
-
-4. Go to the Grouped Profiler tab: http://localhost:5000/dashboard/endpoint/:endpoint_id:/grouped-profiler
-
-The Profiler tab shows all individual profiled requests of an endpoint
-in the form of a execution tree. Each code line is displayed along with
-its execution time and its share of the total execution time of the request.
-
-The Grouped Profiler tab shows the merged execution of up to 100 most recent
-profiled requests of an endpoint. This is displayed both as a table and as
-a Sunburst graph. The table shows for each code line information about
-the Hits (i.e. how many times it has been executed), average execution time
-and standard deviation, and also total execution time.
-
-Monitoring Level 3 - Outliers
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-When the monitoring level is set to 3, the Dashboard collects extra information
+When the monitoring level is set to 2, the Dashboard collects extra information
 about slow requests.
 
 It is useful to investigate why certain requests take way longer to process than other requests.
@@ -183,6 +150,41 @@ The data that is collected from outliers, can be seen by the following procedure
 3. Go to the Outliers tab: http://localhost:5000/dashboard/endpoint/:endpoint_id:/outliers
 
 
+Monitoring Level 3 - Profiler
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When the monitoring level is set to 3, the Dashboard performs a
+`statistical profiling <https://docs.python.org/3/library/profile.html#what-is-deterministic-profiling>`_
+of all the requests coming to that endpoint. What this means is that another
+thread will be launched in parallel with the one processing the request, it
+will periodically sample the processing thread, and will analyze its current stack
+trace. Using this information, the Dashboard will infer how long every function
+call inside the endpoint code takes to execute.
+
+The profiler is one of the most powerful features of the Dashboard, pointing to
+where your optimization efforts should be directed, one level of abstraction
+lower than the performance monitoring of Level 1. To access this information,
+you have to:
+
+1. Go to the Overview tab in the left menu: http://localhost:5000/dashboard/overview
+
+2. Select an endpoint for which the monitoring level is or was at some point at least 2.
+
+3. Go to the Profiler tab: http://localhost:5000/dashboard/endpoint/:endpoint_id:/profiler
+
+4. Go to the Grouped Profiler tab: http://localhost:5000/dashboard/endpoint/:endpoint_id:/grouped-profiler
+
+The Profiler tab shows all individual profiled requests of an endpoint
+in the form of a execution tree. Each code line is displayed along with
+its execution time and its share of the total execution time of the request.
+
+The Grouped Profiler tab shows the merged execution of up to 100 most recent
+profiled requests of an endpoint. This is displayed both as a table and as
+a Sunburst graph. The table shows for each code line information about
+the Hits (i.e. how many times it has been executed), average execution time
+and standard deviation, and also total execution time.
+
+
+
 2. Data Visualization
 ----------------------
 

diff --git a/docs/img/monitoring_levels.png b/docs/img/monitoring_levels.png
diff --git a/flask_monitoringdashboard/__init__.py b/flask_monitoringdashboard/__init__.py
@@ -50,15 +50,22 @@ def bind(app, schedule=True):
 
     # Add wrappers to the endpoints that have to be monitored
     from flask_monitoringdashboard.core.measurement import init_measurement
+    from flask_monitoringdashboard.core.cache import init_cache
     from flask_monitoringdashboard.core import custom_graph
 
     blueprint.before_app_first_request(init_measurement)
+    blueprint.before_app_first_request(init_cache)
     if schedule:
         custom_graph.init(app)
 
     # register the blueprint to the app
     app.register_blueprint(blueprint, url_prefix='/' + config.link)
 
+    # flush cache to db before shutdown
+    import atexit
+    from flask_monitoringdashboard.core.cache import flush_cache
+    atexit.register(flush_cache)
+
 
 def add_graph(title, func, **schedule):
     """

diff --git a/flask_monitoringdashboard/constants.json b/flask_monitoringdashboard/constants.json
@@ -1,5 +1,5 @@
 {
-	"version": "3.0.5",
+	"version": "3.0.6",
 	"author": "Patrick Vogel, Bogdan Petre",
 	"email": "flask.monitoringdashboard@gmail.com"
 }
diff --git a/flask_monitoringdashboard/controllers/endpoints.py b/flask_monitoringdashboard/controllers/endpoints.py
@@ -2,6 +2,7 @@
 
 from numpy import median
 
+import flask_monitoringdashboard.core.cache as cache
 from flask_monitoringdashboard import config
 from flask_monitoringdashboard.core.colors import get_color
 from flask_monitoringdashboard.core.measurement import add_decorator
@@ -27,6 +28,8 @@ def get_endpoint_overview(db_session):
     today_local = now_local.replace(hour=0, minute=0, second=0, microsecond=0)
     today_utc = to_utc_datetime(today_local)
 
+    # First flush last requested info to db
+    cache.flush_cache()
     error_hits_criterion = and_(Request.status_code >= 400,
                                 Request.status_code < 600)
 

diff --git a/flask_monitoringdashboard/core/__init__.py b/flask_monitoringdashboard/core/__init__.py
@@ -15,4 +15,4 @@
       application, or for a specific endpoint.
     - timezone: handles utc-timezone <==> local-timezone
     - utils: for other functions
-"""
+"""
diff --git a/flask_monitoringdashboard/core/cache.py b/flask_monitoringdashboard/core/cache.py
@@ -0,0 +1,101 @@
+"""
+    Contains the in memory cache used to increase the FMD performance.
+"""
+import datetime
+from multiprocessing import Lock
+
+from flask_monitoringdashboard.core.rules import get_rules
+from flask_monitoringdashboard.database import session_scope
+from flask_monitoringdashboard.database.endpoint import get_last_requested, get_endpoints_hits, get_endpoint_averages, \
+    update_last_requested
+
+memory_cache = {}
+mutex = Lock()
+
+
+class EndpointInfo(object):
+    """
+    Info about an endpoint that is stored in the memory cache.
+    """
+    def __init__(self, last_requested=None, average_duration=None, hits=None):
+        # timestamp of the most recent request
+        self.last_requested = last_requested
+        # all-time average duration
+        self.average_duration = average_duration if average_duration else 0
+        # all-time number of requests
+        self.hits = hits if hits else 0
+
+    def set_last_requested(self, last_requested):
+        with mutex:
+            self.last_requested = last_requested
+
+    def set_duration(self, duration):
+        with mutex:
+            self.average_duration = (self.average_duration * self.hits + duration)/float(self.hits + 1)
+            self.hits += 1
+
+    def get_duration(self):
+        with mutex:
+            return self.average_duration
+
+
+def init_cache():
+    """
+    This should be added to the list of functions that are executed before the first request.
+    It initializes the in-memory cache from the db
+    """
+    global memory_cache
+    with session_scope() as db_session:
+        last_req_dict = dict(get_last_requested(db_session))
+        hits_dict = dict(get_endpoints_hits(db_session))
+        averages_dict = dict(get_endpoint_averages(db_session))
+        for rule in get_rules():
+            memory_cache[rule.endpoint] = EndpointInfo(last_requested=last_req_dict.get(rule.endpoint),
+                                                       average_duration=averages_dict.get(rule.endpoint),
+                                                       hits=hits_dict.get(rule.endpoint))
+
+
+def update_last_requested_cache(endpoint_name):
+    """
+    Use this instead of updating the last requested to the database.
+    """
+    global memory_cache
+    memory_cache.get(endpoint_name).set_last_requested(datetime.datetime.utcnow())
+
+
+def update_duration_cache(endpoint_name, duration):
+    """
+    Use this together with adding a request to the database.
+    """
+    global memory_cache
+    memory_cache.get(endpoint_name).set_last_requested(datetime.datetime.utcnow())
+    memory_cache.get(endpoint_name).set_duration(duration)
+
+
+def get_avg_endpoint(endpoint_name):
+    """
+    Return the average of the request duration for an endpoint.
+    """
+    global memory_cache
+    return memory_cache.get(endpoint_name).get_duration()
+
+
+def get_last_requested_overview():
+    """
+    Get the last requested values from the cache for the overview page.
+    """
+    global memory_cache
+    return [(endpoint_name, endpoint_info.last_requested) for endpoint_name, endpoint_info in memory_cache.items]
+
+
+def flush_cache():
+    """
+    Flushes cache changes to the db. To be called at shut down.
+    """
+    global memory_cache
+    if not memory_cache:
+        return
+    with session_scope() as db_session:
+        for endpoint_name, endpoint_info in memory_cache.items():
+            if endpoint_info.last_requested:
+                update_last_requested(db_session, endpoint_name, endpoint_info.last_requested)
diff --git a/flask_monitoringdashboard/core/config/__init__.py b/flask_monitoringdashboard/core/config/__init__.py
@@ -27,7 +27,7 @@ def __init__(self):
         self.link = 'dashboard'
         self.monitor_level = 1
         self.outlier_detection_constant = 2.5
-        self.sampling_period = 0
+        self.sampling_period = 5/1000.0
         self.enable_logging = False
 
         # database
@@ -98,6 +98,7 @@ def init_from(self, file=None, envvar=None, log_verbose=False):
 
             :param file: a string pointing to the location of the config-file.
             :param envvar: a string specifying which environment variable holds the config file location
+            :param log_verbose: flag to print the location of the config file.
         """
 
         if envvar:
@@ -121,7 +122,7 @@ def init_from(self, file=None, envvar=None, log_verbose=False):
             self.monitor_level = parse_literal(parser, 'dashboard', 'MONITOR_LEVEL', self.monitor_level)
             self.outlier_detection_constant = parse_literal(parser, 'dashboard', 'OUTlIER_DETECTION_CONSTANT',
                                                             self.outlier_detection_constant)
-            self.sampling_period = parse_literal(parser, 'dashboard', 'SAMPLING_RATE', self.sampling_period) / 1000
+            self.sampling_period = parse_literal(parser, 'dashboard', 'SAMPLING_RATE', self.sampling_period) / 1000.0
             self.enable_logging = parse_bool(parser, 'dashboard', 'ENABLE_LOGGING', self.enable_logging)
 
             # parse 'authentication'

diff --git a/flask_monitoringdashboard/core/measurement.py b/flask_monitoringdashboard/core/measurement.py
@@ -7,7 +7,7 @@
 
 from flask_monitoringdashboard import config
 from flask_monitoringdashboard.core.profiler import start_thread_last_requested, start_performance_thread, \
-    start_profiler_thread, start_profiler_and_outlier_thread
+    start_outlier_thread, start_profiler_and_outlier_thread
 from flask_monitoringdashboard.core.rules import get_rules
 from flask_monitoringdashboard.database import session_scope
 from flask_monitoringdashboard.database.endpoint import get_endpoint_by_name
@@ -59,9 +59,7 @@ def add_wrapper1(endpoint, fun):
     def wrapper(*args, **kwargs):
         start_time = time.time()
         result = fun(*args, **kwargs)
-
         status_code = result[1] if isinstance(result, tuple) else 200
-
         duration = time.time() - start_time
         start_performance_thread(endpoint, duration, status_code)
         return result
@@ -73,12 +71,12 @@ def wrapper(*args, **kwargs):
 def add_wrapper2(endpoint, fun):
     @wraps(fun)
     def wrapper(*args, **kwargs):
-        thread = start_profiler_thread(endpoint)
+        outlier = start_outlier_thread(endpoint)
         start_time = time.time()
         result = fun(*args, **kwargs)
         status_code = result[1] if isinstance(result, tuple) else 200
         duration = time.time() - start_time
-        thread.stop(duration, status_code)
+        outlier.stop(duration, status_code)
         return result
 
     wrapper.original = fun

diff --git a/flask_monitoringdashboard/core/profiler/__init__.py b/flask_monitoringdashboard/core/profiler/__init__.py
@@ -30,7 +30,7 @@ def start_performance_thread(endpoint, duration, status_code):
 
 
 def start_profiler_thread(endpoint):
-    """ Starts a thread that monitors the main thread. """
+    """ Starts a thread that profiles the main thread. """
     current_thread = threading.current_thread().ident
     ip = request.environ['REMOTE_ADDR']
     group_by = get_group_by()
@@ -39,12 +39,22 @@ def start_profiler_thread(endpoint):
     return thread
 
 
+def start_outlier_thread(endpoint):
+    """ Starts a thread that collects outliers."""
+    current_thread = threading.current_thread().ident
+    ip = request.environ['REMOTE_ADDR']
+    group_by = get_group_by()
+    thread = OutlierProfiler(current_thread, endpoint, ip, group_by)
+    thread.start()
+    return thread
+
+
 def start_profiler_and_outlier_thread(endpoint):
     """ Starts two threads: PerformanceProfiler and StacktraceProfiler.  """
     current_thread = threading.current_thread().ident
     ip = request.environ['REMOTE_ADDR']
     group_by = get_group_by()
-    outlier = OutlierProfiler(current_thread, endpoint)
+    outlier = OutlierProfiler(current_thread, endpoint, ip, group_by)
     thread = StacktraceProfiler(current_thread, endpoint, ip, group_by, outlier)
     thread.start()
     outlier.start()

diff --git a/flask_monitoringdashboard/core/profiler/baseProfiler.py b/flask_monitoringdashboard/core/profiler/baseProfiler.py
@@ -1,7 +1,6 @@
 import threading
 
-from flask_monitoringdashboard.database import session_scope
-from flask_monitoringdashboard.database.endpoint import update_last_accessed
+from flask_monitoringdashboard.core.cache import update_last_requested_cache
 
 
 class BaseProfiler(threading.Thread):
@@ -15,5 +14,4 @@ def __init__(self, endpoint):
         threading.Thread.__init__(self)
 
     def run(self):
-        with session_scope() as db_session:
-            update_last_accessed(db_session, endpoint_name=self._endpoint.name)
+        update_last_requested_cache(endpoint_name=self._endpoint.name)