Skip to content

Commit

Permalink
Merge pull request #2303 from clusterfudge/fail-hard
Browse files Browse the repository at this point in the history
Fail hard
  • Loading branch information
deniszh committed Jun 26, 2018
2 parents 8ee7d04 + b4df156 commit a3667ef
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 0 deletions.
3 changes: 3 additions & 0 deletions webapp/graphite/local_settings.py.example
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,9 @@ DEFAULT_XFILES_FACTOR = 0
# Time before retrying a failed remote webapp
#REMOTE_RETRY_DELAY = 60.0

# Fail all requests if any remote webapp call fails
#STORE_FAIL_ON_ERROR = False

# Try to detect when a cluster server is localhost and don't forward queries
#REMOTE_EXCLUDE_LOCAL = False

Expand Down
1 change: 1 addition & 0 deletions webapp/graphite/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
REMOTE_FETCH_TIMEOUT = None # Replaced by FETCH_TIMEOUT
REMOTE_RETRY_DELAY = 60.0
REMOTE_EXCLUDE_LOCAL = False
STORE_FAIL_ON_ERROR = False
REMOTE_STORE_MERGE_RESULTS = True
REMOTE_STORE_FORWARD_HEADERS = []
REMOTE_STORE_USE_POST = False
Expand Down
11 changes: 11 additions & 0 deletions webapp/graphite/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,17 @@ def wait_jobs(self, jobs, timeout, context):
)
raise Exception(message)

if len(results) < len(jobs) and settings.STORE_FAIL_ON_ERROR:
message = "%s request(s) failed for %s (%d)" % (
len(jobs) - len(results), context, len(jobs)
)
for job in failed:
message += "\n\n%s: %s: %s" % (
job, job.exception,
'\n'.join(traceback.format_exception(*job.exception_info))
)
raise Exception(message)

return results

def fetch(self, patterns, startTime, endTime, now, requestContext):
Expand Down
44 changes: 44 additions & 0 deletions webapp/tests/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,50 @@ def test_fetch_all_failed(self):
self.assertEqual(log_info.call_count, 2)
self.assertRegexpMatches(log_info.call_args[0][0], 'Exception during fetch for \[\'a\'\] after [-.e0-9]+s: TestFinder.find_nodes')

def test_fetch_some_failed(self):
# some finders failed
store = Store(
finders=[TestFinder(), RemoteFinder()]
)

with patch('graphite.storage.log.info') as log_info:
list(store.fetch(['a'], 1, 2, 3, {}))
self.assertEqual(log_info.call_count, 1)


store = Store(
finders=[TestFinder(), TestFinder()]
)

with patch('graphite.storage.log.info') as log_info:
with self.assertRaisesRegexp(Exception, 'All requests failed for fetch for \[\'a\'\] \(2\)'):
list(store.fetch(['a'], 1, 2, 3, {}))
self.assertEqual(log_info.call_count, 2)
self.assertRegexpMatches(log_info.call_args[0][0], 'Exception during fetch for \[\'a\'\] after [-.e0-9]+s: TestFinder.find_nodes')

@override_settings(STORE_FAIL_ON_ERROR=True)
def test_fetch_some_failed_hard_fail_enabled(self):
# all finds failed
store = Store(
finders=[TestFinder(), RemoteFinder()]
)

with patch('graphite.storage.log.info') as log_info:
with self.assertRaisesRegexp(Exception, '1 request\(s\) failed for fetch for \[\'a\'\] \(2\)'):
list(store.fetch(['a'], 1, 2, 3, {}))
self.assertEqual(log_info.call_count, 1)
self.assertRegexpMatches(log_info.call_args[0][0], 'Exception during fetch for \[\'a\'\] after [-.e0-9]+s: TestFinder.find_nodes')

store = Store(
finders=[TestFinder(), TestFinder()]
)

with patch('graphite.storage.log.info') as log_info:
with self.assertRaisesRegexp(Exception, 'All requests failed for fetch for \[\'a\'\] \(2\)'):
list(store.fetch(['a'], 1, 2, 3, {}))
self.assertEqual(log_info.call_count, 2)
self.assertRegexpMatches(log_info.call_args[0][0], 'Exception during fetch for \[\'a\'\] after [-.e0-9]+s: TestFinder.find_nodes')

def test_find(self):
disabled_finder = DisabledFinder()
legacy_finder = LegacyFinder()
Expand Down

0 comments on commit a3667ef

Please sign in to comment.