Skip to content
Permalink
Browse files

shipper-state-metrics: collect time spent on pending release conditions

We're trying to collect metrics on how long rollouts take, end to end.
Although we can't currently get that directly, we have a decent proxy
metric: how long a release has spent with a Strategy.Condition.Status=False.

We will start exposing that through a prometheus summary metric through
shipper-state-metrics, with the 50th, 90th and 99th percentile. I chose
not to go with a histogram because we currently have no idea about the
distribution of these numbers, so it's really hard to set up buckets
that make sense. Also, I don't expect many aggregations to be done on
this, as we only have one instance of shipper-state-metrics running,
collecting data for all available releases.

For getting these numbers back, for instance the 90th percentile we've
been waiting for releases with ContenderAchievedInstallation=False, one
could query prometheus with the following:

	shipper_release_durations{cond_type="ContenderAchievedInstallation", quantile="0.9"}
  • Loading branch information...
juliogreff committed May 9, 2019
1 parent 04d2fe3 commit 24787c85dea459e07ca16e6b43cccaed7c5ea1c7
Showing with 129 additions and 0 deletions.
  1. +25 −0 NOTICE
  2. +36 −0 cmd/shipper-state-metrics/collector.go
  3. +68 −0 cmd/shipper-state-metrics/math.go
25 NOTICE
@@ -3,3 +3,28 @@ Copyright 2018 Booking.com.

This software contains code derived from the sample-controller by The
Kubernetes Authors.

This software contains code derived from the stats project by Montana Flynn,
under the following license:

The MIT License (MIT)

Copyright (c) 2014-2015 Montana Flynn (https://anonfunction.com)

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
@@ -2,6 +2,7 @@ package main

import (
"strings"
"time"

"github.com/golang/glog"
"github.com/prometheus/client_golang/prometheus"
@@ -29,6 +30,13 @@ var (
nil,
)

relDurationDesc = prometheus.NewDesc(
fqn("release_durations"),
"Duration of release objects",
[]string{"cond_type"},
nil,
)

itsDesc = prometheus.NewDesc(
fqn("installationtargets"),
"Number of InstallationTarget objects",
@@ -132,6 +140,9 @@ func (ssm ShipperStateMetrics) collectReleases(ch chan<- prometheus.Metric) {
key := func(ss ...string) string { return strings.Join(ss, "^") }
unkey := func(s string) []string { return strings.Split(s, "^") }

now := time.Now()
relAgesByCondition := make(map[string][]float64)

breakdown := make(map[string]float64)
for _, rel := range rels {
var appName string
@@ -156,13 +167,38 @@ func (ssm ShipperStateMetrics) collectReleases(ch chan<- prometheus.Metric) {
breakdown[key(rel.Namespace, appName, cluster, string(cond.Type), string(cond.Status), reason)]++
}
}

if rel.Status.Strategy != nil {
for _, condition := range rel.Status.Strategy.Conditions {
if condition.Status == corev1.ConditionFalse {
continue
}
age := now.Sub(condition.LastTransitionTime.Time).Seconds()
relAgesByCondition[string(condition.Type)] = append(relAgesByCondition[string(condition.Type)], age)
}
}

}

glog.V(4).Infof("releases: %v", breakdown)

for k, v := range breakdown {
ch <- prometheus.MustNewConstMetric(relsDesc, prometheus.GaugeValue, v, unkey(k)...)
}

quantiles := []float64{0.5, 0.9, 0.99}

for condition, ages := range relAgesByCondition {
count := uint64(len(ages))
sum := Sum(ages)
summary, err := MakeSummary(ages, quantiles)
if err != nil {
glog.Warningf("collect Releases: %s", err)
return
}

ch <- prometheus.MustNewConstSummary(relDurationDesc, count, sum, summary, condition)
}
}

func (ssm ShipperStateMetrics) collectInstallationTargets(ch chan<- prometheus.Metric) {
@@ -0,0 +1,68 @@
package main

import (
"errors"
"math"
"sort"
)

// Copyright (c) 2014-2015 Montana Flynn (https://anonfunction.com)
// Licensed under the MIT License (check NOTICE for full license)
// Obtained from https://github.com/montanaflynn/stats, minor modifications
// were applied
func Percentile(input []float64, percent float64) (percentile float64, err error) {
// Find the length of items in the slice
il := len(input)

// Return an error for empty slices
if il == 0 {
return 0, errors.New("empty input")
}

// Return error for less than 0 or greater than 100 percentages
if percent < 0 || percent > 100 {
return 0, errors.New("percent out of bounds")
}

// Sort the data
sort.Float64s(input)

// Return the last item
if percent == 100.0 {
return input[il-1], nil
}

// Find ordinal ranking
or := int(math.Ceil(float64(il) * percent / 100))

// Return the item that is in the place of the ordinal rank
if or == 0 {
return input[0], nil
}
return input[or-1], nil

}

func MakeSummary(input []float64, quantiles []float64) (map[float64]float64, error) {
summary := make(map[float64]float64)

for _, p := range quantiles {
percentile, err := Percentile(input, p)
summary[p] = percentile
if err != nil {
return nil, err
}
}

return summary, nil
}

func Sum(input []float64) float64 {
sum := 0.0

for _, v := range input {
sum = sum + v
}

return sum
}

0 comments on commit 24787c8

Please sign in to comment.
You can’t perform that action at this time.