Skip to content

Commit

Permalink
Fix node termination summary metric not firing (#335) (#342)
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathan-innis committed May 18, 2023
1 parent fd3f7d1 commit fb59f0b
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 9 deletions.
5 changes: 0 additions & 5 deletions pkg/controllers/controllers.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,9 @@ import (
"github.com/aws/karpenter-core/pkg/controllers/termination"
"github.com/aws/karpenter-core/pkg/controllers/termination/terminator"
"github.com/aws/karpenter-core/pkg/events"
"github.com/aws/karpenter-core/pkg/metrics"
"github.com/aws/karpenter-core/pkg/operator/controller"
)

func init() {
metrics.MustRegister() // Registers cross-controller metrics
}

func NewControllers(
ctx context.Context,
clock clock.Clock,
Expand Down
7 changes: 7 additions & 0 deletions pkg/controllers/termination/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,14 @@ import (
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/reconcile"

"github.com/prometheus/client_golang/prometheus"

"github.com/aws/karpenter-core/pkg/apis/v1alpha5"
"github.com/aws/karpenter-core/pkg/cloudprovider"
"github.com/aws/karpenter-core/pkg/controllers/termination/terminator"
terminatorevents "github.com/aws/karpenter-core/pkg/controllers/termination/terminator/events"
"github.com/aws/karpenter-core/pkg/events"
"github.com/aws/karpenter-core/pkg/metrics"
corecontroller "github.com/aws/karpenter-core/pkg/operator/controller"
machineutil "github.com/aws/karpenter-core/pkg/utils/machine"
)
Expand Down Expand Up @@ -104,6 +107,10 @@ func (c *Controller) removeFinalizer(ctx context.Context, n *v1.Node) error {
if err := c.kubeClient.Patch(ctx, n, client.MergeFrom(stored)); err != nil {
return client.IgnoreNotFound(fmt.Errorf("patching node, %w", err))
}
// We use stored.DeletionTimestamp since the api-server may give back a node after the patch without a deletionTimestamp
TerminationSummary.With(prometheus.Labels{
metrics.ProvisionerLabel: n.Labels[v1alpha5.ProvisionerNameLabelKey],
}).Observe(time.Since(stored.DeletionTimestamp.Time).Seconds())
logging.FromContext(ctx).Infof("deleted node")
}
return nil
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/

package terminator
package termination

import (
"github.com/prometheus/client_golang/prometheus"
Expand All @@ -22,17 +22,18 @@ import (
)

var (
terminationSummary = prometheus.NewSummary(
TerminationSummary = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Namespace: "karpenter",
Subsystem: "nodes",
Name: "termination_time_seconds",
Help: "The time taken between a node's deletion request and the removal of its finalizer",
Objectives: metrics.SummaryObjectives(),
},
[]string{metrics.ProvisionerLabel},
)
)

func init() {
crmetrics.Registry.MustRegister(terminationSummary)
crmetrics.Registry.MustRegister(TerminationSummary)
}
17 changes: 17 additions & 0 deletions pkg/controllers/termination/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
"github.com/aws/karpenter-core/pkg/controllers/termination"
"github.com/aws/karpenter-core/pkg/controllers/termination/terminator"
"github.com/aws/karpenter-core/pkg/events"
"github.com/aws/karpenter-core/pkg/metrics"
"github.com/aws/karpenter-core/pkg/operator/controller"
"github.com/aws/karpenter-core/pkg/operator/scheme"
"github.com/aws/karpenter-core/pkg/test"
Expand Down Expand Up @@ -87,6 +88,10 @@ var _ = Describe("Termination", func() {
ExpectCleanedUp(ctx, env.Client)
fakeClock.SetTime(time.Now())
cloudProvider.Reset()

// Reset the metrics collectors
metrics.NodesTerminatedCounter.Reset()
termination.TerminationSummary.Reset()
})

Context("Reconciliation", func() {
Expand Down Expand Up @@ -416,6 +421,18 @@ var _ = Describe("Termination", func() {
ExpectNotFound(ctx, env.Client, node)
})
})
Context("Metrics", func() {
It("should fire the terminationSummary metric when deleting nodes", func() {
ExpectApplied(ctx, env.Client, node)
Expect(env.Client.Delete(ctx, node)).To(Succeed())
node = ExpectNodeExists(ctx, env.Client, node.Name)
ExpectReconcileSucceeded(ctx, terminationController, client.ObjectKeyFromObject(node))

m, ok := FindMetricWithLabelValues("karpenter_nodes_termination_time_seconds", map[string]string{"provisioner": ""})
Expect(ok).To(BeTrue())
Expect(m.GetSummary().GetSampleCount()).To(BeNumerically("==", 1))
})
})
})

func ExpectNotEnqueuedForEviction(e *terminator.EvictionQueue, pods ...*v1.Pod) {
Expand Down
2 changes: 1 addition & 1 deletion pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,6 @@ var (
)
)

func MustRegister() {
func init() {
crmetrics.Registry.MustRegister(NodesCreatedCounter, NodesTerminatedCounter)
}

0 comments on commit fb59f0b

Please sign in to comment.