From 3b71f61facecd61efce907eb18feaa366664feb1 Mon Sep 17 00:00:00 2001 From: Peter Giacomo Lombardo Date: Mon, 11 Dec 2017 12:48:48 +0100 Subject: [PATCH] Better offline handling - Track the last time the host agent was seen - Properly timeout if more than 60 seconds - Fallback into retry announce mode --- instana/agent.py | 11 +++++++++++ instana/fsm.py | 14 ++++++-------- instana/meter.py | 5 ++++- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/instana/agent.py b/instana/agent.py index cb84c38c..c4739cbd 100644 --- a/instana/agent.py +++ b/instana/agent.py @@ -3,6 +3,7 @@ import instana.fsm as f import instana.agent_const as a import threading +from datetime import datetime try: import urllib.request as urllib2 @@ -36,6 +37,7 @@ class Agent(object): port = a.AGENT_DEFAULT_PORT fsm = None from_ = From() + last_seen = None def __init__(self, sensor): log.debug("initializing agent") @@ -50,6 +52,13 @@ def to_json(self, o): except Exception as e: log.info("to_json: ", e, o) + def is_timed_out(self): + if self.last_seen and self.can_send: + diff = datetime.now() - self.last_seen + if diff.seconds > 60: + return True + return False + def can_send(self): return self.fsm.fsm.current == "good2go" @@ -90,6 +99,7 @@ def full_request_response(self, url, method, o, body, header): if self.can_send(): self.reset() else: + self.last_seen = datetime.now() if body: b = response.read() @@ -125,6 +135,7 @@ def make_full_url(self, host, port, prefix): return s def reset(self): + self.last_seen = None self.from_ = From() self.fsm.reset() diff --git a/instana/fsm.py b/instana/fsm.py index dc2fa706..946ce15e 100644 --- a/instana/fsm.py +++ b/instana/fsm.py @@ -41,18 +41,17 @@ def __init__(self, agent): self.agent = agent self.fsm = f.Fysom({ - "initial": {'state': "lostandalone", 'event': 'init', 'defer': True}, "events": [ - ("startup", "*", "lostandalone"), - ("lookup", "lostandalone", "found"), + ("lookup", "*", "found"), ("announce", "found", "announced"), ("ready", "announced", "good2go")], "callbacks": { "onlookup": self.lookup_agent_host, "onannounce": self.announce_sensor, + "onready": self.start_metric_reporting, "onchangestate": self.printstatechange}}) - timer = t.Timer(2, self.boot) + timer = t.Timer(2, self.fsm.lookup) timer.daemon = True timer.name = "Startup" timer.start() @@ -61,13 +60,12 @@ def printstatechange(self, e): log.debug('========= (%i#%s) FSM event: %s, src: %s, dst: %s ==========' % (os.getpid(), t.current_thread().name, e.event, e.src, e.dst)) - def boot(self): - self.fsm.init() - self.fsm.lookup() - def reset(self): self.fsm.lookup() + def start_metric_reporting(self, e): + self.agent.sensor.meter.run() + def lookup_agent_host(self, e): if self.agent.sensor.options.agent_host != "": host = self.agent.sensor.options.agent_host diff --git a/instana/meter.py b/instana/meter.py index 1fd48c72..e82dbcd1 100644 --- a/instana/meter.py +++ b/instana/meter.py @@ -114,7 +114,6 @@ class Meter(object): def __init__(self, sensor): self.sensor = sensor - self.run() def run(self): self.timer = t.Thread(target=self.collect_and_report) @@ -125,6 +124,10 @@ def run(self): def collect_and_report(self): while 1: self.process() + if (self.sensor.agent.is_timed_out()): + log.warn("Host agent offline for >1 min. Going to sit in a corner...") + self.sensor.agent.reset() + break time.sleep(1) def process(self):