Skip to content

Commit

Permalink
[grafana] Implement sensible rules for dashboard refresh interval taming
Browse files Browse the repository at this point in the history
  • Loading branch information
amotl committed Apr 5, 2018
1 parent 5a05a38 commit 920d200
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 58 deletions.
3 changes: 2 additions & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,14 @@ in progress
- [io] Fix MQTT error signalling
- [grafana] Use specific Grafana folder for stuffing instant dashboards into
- [grafana] Use topology information for deriving unique dashboard id from
- [grafana] Add mechanisms for taming the dashboard refresh interval
- [grafana] Add basic mechanisms for taming the dashboard refresh interval
- [core] Improve component loading robustness
- [grafana] Refactoring. Robustness. Run GrafanaManager as service. Add worker service for taming dashboard refresh intervals.
- [command] Add ``kotori-selftest`` program, still in its infancy
- [code] Namespace refactoring
- [grafana] Relocate template resources (.json files)
- [grafana] Subsystem refactoring
- [grafana] Implement sensible rules for dashboard refresh interval taming


.. _kotori-0.20.1:
Expand Down
56 changes: 5 additions & 51 deletions kotori/daq/graphing/grafana/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@


class GrafanaApi(object):
"""
A small wrapper around ``grafana_api_client``.
https://pypi.python.org/pypi/grafana_api_client
"""

def __init__(self, host='localhost', port=3000, username='admin', password='admin'):
self.host = host
Expand Down Expand Up @@ -87,7 +91,7 @@ def create_datasource(self, name=None, data=None):
try:
logger.info('deleting datasource: {}'.format(name))
response = self.grafana_client.datasources[name].delete()
logger.info(slm(response))
print response
except GrafanaClientError as ex:
if '404' in ex.message or 'Dashboard not found' in ex.message:
logger.warn(slm(ex.message))
Expand Down Expand Up @@ -186,53 +190,3 @@ def demo(self):

def get_dashboards(self):
return self.grafana_client.search(type='dash-db')

def tame_refresh_interval(self, preset='standard'):
"""
Tame refresh interval for all dashboards.
:param mode: Which taming preset to use. Currently, only "standard" is
implemented, which is also the default preset.
Introduction
------------
The default dashboard refresh interval of 5 seconds is important
for instant-on workbench operations. However, the update interval
is usually just about 5 minutes after the sensor node is in the field.
Problem
-------
Having high refresh rates on many dashboards can increase the overall
system usage significantly, depending on how many users are displaying
them in their browsers and the complexity of the database queries
issued when rendering the dashboard.
Solution
--------
In order to reduce the overall load on the data acquisition system,
the refresh interval of dashboards not updated since a configurable
threshold time is decreased according to rules of built-in presets.
The default "standard" preset currently implements the following rules:
- Leave all dashboards completely untouched which have been updated during the last 14 days
- Apply a refresh interval of 5 minutes for all dashboards having the "live" tag
- Completely disable refreshing for all dashboards having the "historical" tag
- Apply a refresh interval of 30 minutes for all other dashboards
"""
log.info('Taming dashboard refresh interval with preset="{preset}"', preset=preset)
for dashboard_meta in self.get_dashboards():

# Get dashboard by uid
dashboard_uid = dashboard_meta['uid']
dashboard = self.get_dashboard_by_uid(dashboard_uid)

# TODO: Look at dashboard.meta.updated and apply taming only on appropriate threshold.
# e.g. u'2018-04-04T20:07:10+02:00'
# TODO: Look at list of tags and apply interval=null if it contains "historical".
# TODO: Look at list of tags and apply interval=5m if it contains "live".

# Update refresh interval
dashboard['dashboard']['refresh'] = '5m'
response = self.grafana_client.dashboards.db.create(**dashboard)
104 changes: 101 additions & 3 deletions kotori/daq/graphing/grafana/manager.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
# -*- coding: utf-8 -*-
# (c) 2015-2018 Andreas Motl, <andreas@getkotori.org>
import arrow
from twisted.logger import Logger
from twisted.application.service import MultiService

from kotori.daq.services import MultiServiceMixin
from kotori.daq.graphing.grafana.api import GrafanaApi
from kotori.daq.graphing.grafana.dashboard import GrafanaDashboardBuilder, GrafanaDashboardModel
from kotori.daq.graphing.grafana.service import DashboardRefreshTamingService
from kotori.util.common import KeyCache
from kotori.util.common import KeyCache, SmartBunch

log = Logger()

Expand Down Expand Up @@ -137,6 +138,99 @@ def provision(self, storage_location, data, topology=None):
# Remember dashboard/panel creation for this kind of data inflow
self.keycache.set(*signature)

def tame_refresh_interval(self, preset='standard', force=False):
"""
Tame refresh interval for all dashboards.
:param mode: Which taming preset to use. Currently, only "standard" is
implemented, which is also the default preset.
Introduction
------------
The default dashboard refresh interval of 5 seconds is important
for instant-on workbench operations. However, the update interval
is usually just about 5 minutes after the sensor node is in the field.
Problem
-------
Having high refresh rates on many dashboards can increase the overall
system usage significantly, depending on how many users are displaying
them in their browsers and the complexity of the database queries
issued when rendering the dashboard.
Solution
--------
In order to reduce the overall load on the data acquisition system,
the refresh interval of dashboards not updated since a configurable
threshold time is decreased according to rules of built-in presets.
The default "standard" preset currently implements the following rules:
- Leave all dashboards completely untouched which have been updated during the last 14 days
- Apply a refresh interval of 5 minutes for all dashboards having the "live" tag
- Completely disable refreshing for all dashboards having the "historical" tag
- Apply a refresh interval of 30 minutes for all other dashboards
"""

dashboard_list = self.grafana_api.get_dashboards()

log.info('Taming dashboard refresh interval with preset="{preset}" for {count} dashboards',
preset=preset, count=len(dashboard_list))

# Date of 14 days in the past
before_14_days = arrow.utcnow().shift(days=-14)

for dashboard_meta in dashboard_list:

dashboard_meta = SmartBunch.bunchify(dashboard_meta)
#print dashboard_meta.prettify()

whoami = u'title="{title}", uid="{uid}"'.format(title=dashboard_meta['title'], uid=dashboard_meta['uid'])

# Request dashboard by uid
dashboard_uid = dashboard_meta['uid']
response = self.grafana_api.get_dashboard_by_uid(dashboard_uid)
response = SmartBunch.bunchify(response)

# Get effective dashboard information from response
folder_id = response.meta.folderId
dashboard = response.dashboard

# Compute new dashboard refresh interval by applying taming rules
# Units: Mwdhmsy

# 1. Check dashboard modification time against threshold
modification_time = arrow.get(response.meta.updated)
if not force and modification_time > before_14_days:
log.debug('Skip taming dashboard with {whoami}, it has recently been modified', whoami=whoami)
continue

# 2. Apply refresh interval by looking at the dashboard tags
if 'live' in dashboard_meta.tags:
refresh_interval = '5m'
elif 'historical' in dashboard_meta.tags:
refresh_interval = None
else:
refresh_interval = '30m'

# Skip update procedure if refresh interval hasn't changed at all
if refresh_interval == dashboard.refresh:
continue

# Set new refresh interval
dashboard.refresh = refresh_interval

# Update dashboard
log.debug('Taming dashboard with {whoami} to refresh interval of {interval}', whoami=whoami, interval=refresh_interval)
response = self.grafana_api.grafana_client.dashboards.db.create(dashboard=dashboard, folderId=folder_id)

# Report about the outcome
if response['status'] == 'success':
log.info('Successfully tamed dashboard with {whoami}', whoami=whoami)
else:
log.warn('Failed taming dashboard with {whoami}', whoami=whoami)


if __name__ == '__main__':
"""
Expand All @@ -151,7 +245,11 @@ def provision(self, storage_location, data, topology=None):
twisted.python.log.startLogging(sys.stderr)

# Connect to Grafana
grafana = GrafanaApi(host='localhost', username='admin', password='admin')
manager = GrafanaManager(
settings={"grafana": dict(host='localhost', username='admin', password='admin')},
channel={}
)
grafana = manager.grafana_api

# Create Grafana Datasource object
grafana.create_datasource('hiveeyes_test', {
Expand All @@ -176,4 +274,4 @@ def provision(self, storage_location, data, topology=None):
grafana.create_dashboard(dashboard)

# Run one-shot task to tame the dashboard intervals
grafana.tame_refresh_interval()
manager.tame_refresh_interval()
4 changes: 1 addition & 3 deletions kotori/daq/graphing/grafana/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,4 @@ def tamer_start(self, now=False):
self.tamer.start(now=now)

def tamer_process(self):
self.parent.grafana_api.tame_refresh_interval(preset=self.preset)


self.parent.tame_refresh_interval(preset=self.preset, force=False)

0 comments on commit 920d200

Please sign in to comment.