Skip to content

Commit

Permalink
fix: change housekeeping delete threshold to seconds
Browse files Browse the repository at this point in the history
  • Loading branch information
satterly committed Apr 28, 2021
1 parent 7fee108 commit 5059351
Show file tree
Hide file tree
Showing 12 changed files with 134 additions and 52 deletions.
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ repos:
args: ['--autofix']
- id: name-tests-test
args: ['--django']
exclude: ^tests/helpers/
- id: requirements-txt-fixer
- id: trailing-whitespace
- repo: https://gitlab.com/pycqa/flake8
Expand Down
12 changes: 6 additions & 6 deletions alerta/database/backends/mongodb/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1584,17 +1584,17 @@ def update_timer(self, timer):
# HOUSEKEEPING

def get_expired(self, expired_threshold, info_threshold):
# delete 'closed' or 'expired' alerts older than "expired_threshold" hours
# and 'informational' alerts older than "info_threshold" hours
# delete 'closed' or 'expired' alerts older than "expired_threshold" seconds
# and 'informational' alerts older than "info_threshold" seconds

if expired_threshold:
expired_hours_ago = datetime.utcnow() - timedelta(hours=expired_threshold)
expired_seconds_ago = datetime.utcnow() - timedelta(seconds=expired_threshold)
self.get_db().alerts.delete_many(
{'status': {'$in': ['closed', 'expired']}, 'lastReceiveTime': {'$lt': expired_hours_ago}})
{'status': {'$in': ['closed', 'expired']}, 'lastReceiveTime': {'$lt': expired_seconds_ago}})

if info_threshold:
info_hours_ago = datetime.utcnow() - timedelta(hours=info_threshold)
self.get_db().alerts.delete_many({'severity': 'informational', 'lastReceiveTime': {'$lt': info_hours_ago}})
info_seconds_ago = datetime.utcnow() - timedelta(seconds=info_threshold)
self.get_db().alerts.delete_many({'severity': 'informational', 'lastReceiveTime': {'$lt': info_seconds_ago}})

# get list of alerts to be newly expired
pipeline = [
Expand Down
8 changes: 4 additions & 4 deletions alerta/database/backends/postgres/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1500,22 +1500,22 @@ def update_timer(self, timer):
# HOUSEKEEPING

def get_expired(self, expired_threshold, info_threshold):
# delete 'closed' or 'expired' alerts older than "expired_threshold" hours
# and 'informational' alerts older than "info_threshold" hours
# delete 'closed' or 'expired' alerts older than "expired_threshold" seconds
# and 'informational' alerts older than "info_threshold" seconds

if expired_threshold:
delete = """
DELETE FROM alerts
WHERE (status IN ('closed', 'expired')
AND last_receive_time < (NOW() at time zone 'utc' - INTERVAL '%(expired_threshold)s hours'))
AND last_receive_time < (NOW() at time zone 'utc' - INTERVAL '%(expired_threshold)s seconds'))
"""
self._deleteall(delete, {'expired_threshold': expired_threshold})

if info_threshold:
delete = """
DELETE FROM alerts
WHERE (severity='informational'
AND last_receive_time < (NOW() at time zone 'utc' - INTERVAL '%(info_threshold)s hours'))
AND last_receive_time < (NOW() at time zone 'utc' - INTERVAL '%(info_threshold)s seconds'))
"""
self._deleteall(delete, {'info_threshold': info_threshold})

Expand Down
14 changes: 12 additions & 2 deletions alerta/management/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,18 @@ def health_check():
@cross_origin()
@permission(Scope.admin_management)
def housekeeping():
expired_threshold = request.args.get('expired', default=current_app.config['DEFAULT_EXPIRED_DELETE_HRS'], type=int)
info_threshold = request.args.get('info', default=current_app.config['DEFAULT_INFO_DELETE_HRS'], type=int)
expired_threshold_hrs = request.args.get('expired', type=int)
info_threshold_hrs = request.args.get('info', type=int)

if expired_threshold_hrs:
expired_threshold = expired_threshold_hrs * 60 * 60 # convert hours to seconds
else:
expired_threshold = current_app.config['DELETE_EXPIRED_AFTER'] # seconds

if info_threshold_hrs:
info_threshold = info_threshold_hrs * 60 * 60 # convert hours to seconds
else:
info_threshold = current_app.config['DELETE_INFO_AFTER'] # seconds

has_expired, shelve_timeout, ack_timeout = Alert.housekeeping(expired_threshold, info_threshold)

Expand Down
2 changes: 1 addition & 1 deletion alerta/models/alert.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,7 @@ def delete_note(self, note_id):
return Note.delete_by_id(note_id)

@staticmethod
def housekeeping(expired_threshold: int = 2, info_threshold: int = 12) -> Tuple[List['Alert'], List['Alert'], List['Alert']]:
def housekeeping(expired_threshold: int, info_threshold: int) -> Tuple[List['Alert'], List['Alert'], List['Alert']]:
return (
[Alert.from_db(alert) for alert in db.get_expired(expired_threshold, info_threshold)],
[Alert.from_db(alert) for alert in db.get_unshelve()],
Expand Down
4 changes: 2 additions & 2 deletions alerta/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,8 @@
SHELVE_TIMEOUT = 7200 # auto-unshelve alerts after x seconds (0 seconds = do not auto-unshelve)

# Housekeeping settings
DEFAULT_EXPIRED_DELETE_HRS = 2 # hours (0 hours = do not delete)
DEFAULT_INFO_DELETE_HRS = 12 # hours (0 hours = do not delete)
DELETE_EXPIRED_AFTER = 2 * 60 * 60 # seconds (0 = do not delete)
DELETE_INFO_AFTER = 12 * 60 * 60 # seconds (0 = do not delete)

# Send verification emails to new BasicAuth users
EMAIL_VERIFICATION = False
Expand Down
16 changes: 16 additions & 0 deletions alerta/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,22 @@ def get_user_config():

config['GOOGLE_TRACKING_ID'] = get_config('GOOGLE_TRACKING_ID', default=None, type=str, config=config)

# housekeeping
delete_expired_hrs = (
os.environ.get('DEFAULT_EXPIRED_DELETE_HRS', None)
or os.environ.get('HK_EXPIRED_DELETE_HRS', None)
)
delete_expired = delete_expired_hrs * 60 * 60 if delete_expired_hrs else None
config['DELETE_EXPIRED_AFTER'] = get_config('DELETE_EXPIRED_AFTER', default=delete_expired, type=int, config=config)

delete_info_hrs = (
os.environ.get('DEFAULT_INFO_DELETE_HRS', None)
or os.environ.get('HK_INFO_DELETE_HRS', None)
)
delete_info = delete_info_hrs * 60 * 60 if delete_info_hrs else None
config['DELETE_INFO_AFTER'] = get_config('DELETE_INFO_AFTER', default=delete_info, type=int, config=config)

# plugins
config['PLUGINS'] = get_config('PLUGINS', default=[], type=list, config=config)

# blackout plugin
Expand Down
3 changes: 3 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,6 @@ universal = 1

[pycodestyle]
max-line-length = 120

[tool:pytest]
norecursedirs=tests/helpers
Empty file added tests/helpers/__init__.py
Empty file.
35 changes: 35 additions & 0 deletions tests/helpers/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import contextlib
import os


@contextlib.contextmanager
def mod_env(*remove, **update):
"""
See https://stackoverflow.com/questions/2059482#34333710
Temporarily updates the ``os.environ`` dictionary in-place.
The ``os.environ`` dictionary is updated in-place so that the modification
is sure to work in all situations.
:param remove: Environment variables to remove.
:param update: Dictionary of environment variables and values to add/update.
"""
env = os.environ
update = update or {}
remove = remove or []

# List of environment variables being updated or removed.
stomped = (set(update.keys()) | set(remove)) & set(env.keys())
# Environment variables and values to restore on exit.
update_after = {k: env[k] for k in stomped}
# Environment variables and values to remove on exit.
remove_after = frozenset(k for k in update if k not in env)

try:
env.update(update)
[env.pop(k, None) for k in remove]
yield
finally:
env.update(update_after)
[env.pop(k) for k in remove_after]
36 changes: 1 addition & 35 deletions tests/test_config.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,8 @@
import contextlib
import json
import os
import unittest

from alerta.app import create_app, db


@contextlib.contextmanager
def mod_env(*remove, **update):
"""
See https://stackoverflow.com/questions/2059482#34333710
Temporarily updates the ``os.environ`` dictionary in-place.
The ``os.environ`` dictionary is updated in-place so that the modification
is sure to work in all situations.
:param remove: Environment variables to remove.
:param update: Dictionary of environment variables and values to add/update.
"""
env = os.environ
update = update or {}
remove = remove or []

# List of environment variables being updated or removed.
stomped = (set(update.keys()) | set(remove)) & set(env.keys())
# Environment variables and values to restore on exit.
update_after = {k: env[k] for k in stomped}
# Environment variables and values to remove on exit.
remove_after = frozenset(k for k in update if k not in env)

try:
env.update(update)
[env.pop(k, None) for k in remove]
yield
finally:
env.update(update_after)
[env.pop(k) for k in remove_after]
from tests.helpers.utils import mod_env


class ConfigTestCase(unittest.TestCase):
Expand Down
55 changes: 53 additions & 2 deletions tests/test_management.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,29 @@
from uuid import uuid4

from alerta.app import create_app, db
from tests.helpers.utils import mod_env


class ManagementTestCase(unittest.TestCase):

def setUp(self):

self.maxDiff = None

test_config = {
'DEBUG': False,
'TESTING': True,
'AUTH_REQUIRED': False,
# 'ACK_TIMEOUT': 2,
# 'SHELVE_TIMEOUT': 3
}
self.app = create_app(test_config)
self.client = self.app.test_client()

with mod_env(
DELETE_EXPIRED_AFTER='2',
DELETE_INFO_AFTER='3'
):
self.app = create_app(test_config)
self.client = self.app.test_client()

self.headers = {
'Content-type': 'application/json'
Expand Down Expand Up @@ -79,6 +87,16 @@ def random_resource():
'correlate': ['node_down', 'node_marginal', 'node_up']
}

self.info_alert = {
'event': 'node_init',
'resource': random_resource(),
'environment': 'Production',
'service': ['Network'],
'severity': 'informational',
'correlate': ['node_down', 'node_marginal', 'node_up'],
'timeout': 3
}

def tearDown(self):
db.destroy()

Expand Down Expand Up @@ -153,8 +171,14 @@ def test_housekeeping(self):
# create an alert that should be unaffected
response = self.client.post('/alert', data=json.dumps(self.ok_alert), headers=self.headers)
self.assertEqual(response.status_code, 201)

# create an info alert that should be deleted
response = self.client.post('/alert', data=json.dumps(self.info_alert), headers=self.headers)
self.assertEqual(response.status_code, 201)
data = json.loads(response.data.decode('utf-8'))

info_id = data['id']

# create an alert and ack it then shelve it
response = self.client.post('/alert', data=json.dumps(self.acked_and_shelved_alert), headers=self.headers)
self.assertEqual(response.status_code, 201)
Expand Down Expand Up @@ -184,6 +208,7 @@ def test_housekeeping(self):

time.sleep(5)

# run housekeeping (1st time)
response = self.client.get('/management/housekeeping', headers=self.headers)
self.assertEqual(response.status_code, 200)
data = json.loads(response.data.decode('utf-8'))
Expand Down Expand Up @@ -232,3 +257,29 @@ def test_housekeeping(self):
self.assertEqual(data['alert']['history'][2]['timeout'], 3)
self.assertEqual(data['alert']['history'][3]['status'], 'ack')
self.assertEqual(data['alert']['history'][3]['timeout'], 4)

response = self.client.get('/alert/' + info_id)
self.assertEqual(response.status_code, 404)

time.sleep(5)

# run housekeeping (2nd time)
response = self.client.get('/management/housekeeping', headers=self.headers)
self.assertEqual(response.status_code, 200)
data = json.loads(response.data.decode('utf-8'))
self.assertEqual(data['count'], 1)
self.assertListEqual(data['expired'], [])
self.assertListEqual(data['unshelve'], [])
self.assertListEqual(data['unack'], [acked_and_shelved_id])

response = self.client.get('/alert/' + expired_id)
self.assertEqual(response.status_code, 404)

# run housekeeping (3rd time)
response = self.client.get('/management/housekeeping', headers=self.headers)
self.assertEqual(response.status_code, 200)
data = json.loads(response.data.decode('utf-8'))
self.assertEqual(data['count'], 0)
self.assertListEqual(data['expired'], [])
self.assertListEqual(data['unshelve'], [])
self.assertListEqual(data['unack'], [])

0 comments on commit 5059351

Please sign in to comment.