Skip to content

Commit

Permalink
Verson 1.1.13
Browse files Browse the repository at this point in the history
  • Loading branch information
guilhemmarchand committed Mar 16, 2019
2 parents d2800cf + 410134b commit 7b7568d
Show file tree
Hide file tree
Showing 6 changed files with 15 additions and 7 deletions.
8 changes: 8 additions & 0 deletions docs/releasenotes.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
Release notes
#############

=======
Version 1.1.13
==============

- fix: Static span is defined in Burrow detailed view charts
- fix: Prevents removed Burrow consumers to appear as low range when latest metrics available are part of the selected time range
- fix: Missing group by statement for Burrow consumers monitoring in OOTB alert, generates unexpected output containing OK consumers, while alerts are correctly justified for ERR consumers

Version 1.1.12
==============

Expand Down
2 changes: 1 addition & 1 deletion telegraf-kafka/default/app.conf
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ label = Kafka Smart Monitoring
[launcher]
author = Guilhem Marchand
description = Kafka Smart Monitoring provides advanced and fast monitoring of Kafka and Confluent streaming platform
version = 1.1.12
version = 1.1.13
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
| lookup burrow_status status_code OUTPUT status, description
| rangemap field=status_code low=1-1 elevated=2-2 default=severe
| eval avg_lag=round(avg_lag, 3), delta_lastTime=now()-lastTime, lastTime=strftime(lastTime, "%H:%M:%S")
| eval status=if(delta_lastTime>120, "UNKNOWN", status), description=if(delta_lastTime>120, "The delta in seconds between the last state received and now has exceeded 120 seconds", description)
| eval status=if(delta_lastTime>120, "UNKNOWN", status), description=if(delta_lastTime>120, "The delta in seconds between the last state received and now has exceeded 120 seconds", description), range=if(delta_lastTime>120, "severe", range)
| appendcols [ | mcatalog values(topic) as topics where metric_name="burrow_partition.lag" `telegraf_kafka_index` group="$group$" by group ]
| fields group, topics, avg_lag, max_lag, current, sparkline, status, range, lastTime, description | rename description as "status description", topics as "consuming from topics"</query>
<earliest>$time.earliest$</earliest>
Expand Down Expand Up @@ -96,7 +96,7 @@
<title>Group consumer lag over time</title>
<search>
<query>| mstats max(_value) as value where `telegraf_kafka_index` metric_name="burrow_group.lag" env=$env$ label=$label$ group="$group$" span=10s
| timechart span=1m avg(value) as "average lag", max(value) as "max lag"</query>
| timechart `telegraf_kafka_span` avg(value) as "average lag", max(value) as "max lag"</query>
<earliest>$time.earliest$</earliest>
<latest>$time.latest$</latest>
<refresh>30s</refresh>
Expand Down Expand Up @@ -166,7 +166,7 @@
<title>By topic lag over time</title>
<search>
<query>| mstats max(_value) as value where `telegraf_kafka_index` metric_name="burrow_partition.lag" env=$env$ label=$label$ group="$group$" ($topic$) by topic span=10s
| timechart useother=f limit=45 span=1m $bytopic_statsmode$(value) as lag by topic</query>
| timechart useother=f limit=45 `telegraf_kafka_span` $bytopic_statsmode$(value) as lag by topic</query>
<earliest>$time.earliest$</earliest>
<latest>$time.latest$</latest>
<refresh>30s</refresh>
Expand Down Expand Up @@ -218,7 +218,7 @@
| lookup burrow_status status_code OUTPUT status, description
| rangemap field=status_code low=1-1 elevated=2-2 default=severe
| eval avg_lag=round(avg_lag, 3), delta_lastTime=now()-lastTime, lastTime=strftime(lastTime, "%H:%M:%S")
| eval status=if(delta_lastTime>120, "UNKNOWN", status), description=if(delta_lastTime>120, "The delta in seconds between the last state received and now has exceeded 120 seconds", description)
| eval status=if(delta_lastTime>120, "UNKNOWN", status), description=if(delta_lastTime>120, "The delta in seconds between the last state received and now has exceeded 120 seconds", description), range=if(delta_lastTime>120, "severe", range)
| join topic, partition [ | mstats latest(_value) prestats=t where metric_name="burrow_topic.offset" `telegraf_kafka_index` env=$env$ label=$label$ $topic$ by topic, partition
| stats latest(_value) as topic_offset by topic, partition ]
| fields topic, partition, topic_offset, current_offset, avg_lag, max_lag, current, sparkline, status, range, lastTime, description | rename description as "status description"</query>
Expand Down
2 changes: 1 addition & 1 deletion telegraf-kafka/default/data/ui/views/telegraf-burrow.xml
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@
| lookup burrow_status status_code OUTPUT status, description
| rangemap field=status_code low=1-1 elevated=2-2 default=severe
| eval avg_lag=round(avg_lag, 3), delta_lastTime=now()-lastTime, lastTime=strftime(lastTime, "%H:%M:%S")
| eval status=if(delta_lastTime>120, "UNKNOWN", status), description=if(delta_lastTime>120, "The delta in seconds between the last state received and now has exceeded 120 seconds", description)
| eval status=if(delta_lastTime>120, "UNKNOWN", status), description=if(delta_lastTime>120, "The delta in seconds between the last state received and now has exceeded 120 seconds", description), range=if(delta_lastTime>120, "severe", range)
| search ($group_status$)
| fields env, label, cluster, group, avg_lag, max_lag, current, sparkline, status, range, lastTime, description | rename description as "status description"
| lookup kafka_burrow_consumers_monitoring env, label, cluster, group OUTPUT monitoring_state</query>
Expand Down
2 changes: 1 addition & 1 deletion telegraf-kafka/default/savedsearches.conf
Original file line number Diff line number Diff line change
Expand Up @@ -1025,7 +1025,7 @@ search = | mstats latest(_value) as status_code where metric_name="burrow_group.
| stats latest(status_code) as status_code, max(_time) as last_time by env, label, cluster, group\
| lookup burrow_status status_code OUTPUT status, description as status_description\
| append [ | inputlookup kafka_burrow_consumers_monitoring ]\
| stats first(last_time) as last_time values(*) as "*" by env, label, cluster\
| stats first(last_time) as last_time values(*) as "*" by env, label, cluster, group\
| eval now=now(), delta_seconds=now-last_time, now=strftime(now, "%d/%m/%Y %H:%M:%S"), last_time=strftime(last_time, "%d/%m/%Y %H:%M:%S")\
| eval state=case(status_code!=1, "severe", status_code=1, "low"), state=if(delta_seconds>120, "severe", state), status=if(delta_seconds>120, "unknown", status), status_description=if(delta_seconds>120, "The delta in seconds between the last state received and now has exceeded 120 seconds", status_description)\
| eval last_time=if(isnull(last_time), "out of time range scope", last_time), delta_seconds=if(isnull(delta_seconds), "out of time range scope", delta_seconds)\
Expand Down
Binary file added telegraf-kafka_1113.tgz
Binary file not shown.

0 comments on commit 7b7568d

Please sign in to comment.