From 83b3d0642be84d464eb7e0c79f4e143364e81f4d Mon Sep 17 00:00:00 2001 From: Peter Giacomo Lombardo Date: Thu, 20 Aug 2020 12:06:37 +0200 Subject: [PATCH] If the host agent goes offline, timeout and reset. --- instana/agent/host.py | 8 +++++++- instana/collector/host.py | 4 ++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/instana/agent/host.py b/instana/agent/host.py index ac510b67..d666de6e 100644 --- a/instana/agent/host.py +++ b/instana/agent/host.py @@ -4,8 +4,10 @@ """ from __future__ import absolute_import -import json import os +import json +import urllib3 +import requests from datetime import datetime from ..log import logger @@ -230,6 +232,10 @@ def report_data_payload(self, payload): # The host agent returned something indicating that is has a request for us that we # need to process. self.handle_agent_tasks(json.loads(response.content)[0]) + except requests.exceptions.ConnectionError: + pass + except urllib3.exceptions.MaxRetryError: + pass except Exception as exc: logger.debug("report_data_payload: Instana host agent connection error (%s)", type(exc), exc_info=True) return response diff --git a/instana/collector/host.py b/instana/collector/host.py index 57184b13..06c225c5 100644 --- a/instana/collector/host.py +++ b/instana/collector/host.py @@ -42,6 +42,10 @@ def prepare_and_report_data(self): self.agent.machine.fsm.ready() else: return + + if self.agent.machine.fsm.current == "good2go" and self.agent.is_timed_out(): + logger.info("The Instana host agent has gone offline or is no longer reachable for > 1 min. Will retry periodically.") + self.agent.reset() except Exception: logger.debug('Harmless state machine thread disagreement. Will self-correct on next timer cycle.')