Skip to content

Commit

Permalink
Instrument the instance and shell using Swift Metrics #56 (#70)
Browse files Browse the repository at this point in the history
* +metrics,swim implement metrics in SWIM instance and NIO Shell

* prepared testing infra for metrics and prepared most of them

* implementest test infra properly for metrics; dead is not a Gauge but Counter

* more metrics, lha value as well as shell specific values

+testkit move testing utilities to shared module, since we need to reuse them

implement more metrics, failures and timing intervals

* adjust labels to be unique per type, a prometheus requirement

comment out in example by default

cleanup

* Apply suggestions from code review

Co-authored-by: Yim Lee <yim_lee@apple.com>

Co-authored-by: Yim Lee <yim_lee@apple.com>
  • Loading branch information
ktoso and yim-lee authored Oct 2, 2020
1 parent 2bdd8df commit 343e2ad
Show file tree
Hide file tree
Showing 19 changed files with 1,151 additions and 26 deletions.
20 changes: 18 additions & 2 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,28 @@ var targets: [PackageDescription.Target] = [

.testTarget(
name: "SWIMTests",
dependencies: ["SWIM"]
dependencies: [
"SWIM",
"SWIMTestKit",
]
),

.testTarget(
name: "SWIMNIOExampleTests",
dependencies: ["SWIMNIOExample"]
dependencies: [
"SWIMNIOExample",
"SWIMTestKit",
]
),

// NOT FOR PUBLIC CONSUMPTION.
.testTarget(
name: "SWIMTestKit",
dependencies: [
.product(name: "NIO", package: "swift-nio"),
.product(name: "Logging", package: "swift-log"),
.product(name: "Metrics", package: "swift-metrics"),
]
),

// ==== ------------------------------------------------------------------------------------------------------------
Expand Down
2 changes: 2 additions & 0 deletions Samples/Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ var targets: [PackageDescription.Target] = [
dependencies: [
"SWIM",
"SWIMNIOExample",
"SwiftPrometheus",
"Lifecycle",
"ArgumentParser",
],
Expand All @@ -34,6 +35,7 @@ var dependencies: [Package.Dependency] = [
// ~~~~~~~ only for samples ~~~~~~~

.package(url: "https://github.com/swift-server/swift-service-lifecycle.git", from: "1.0.0-alpha"),
.package(url: "https://github.com/MrLotU/SwiftPrometheus.git", from: "1.0.0-alpha"),
.package(url: "https://github.com/apple/swift-argument-parser", from: "0.2.0"),
]

Expand Down
19 changes: 18 additions & 1 deletion Samples/Sources/SWIMNIOSampleCluster/main.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

import ClusterMembership
import SWIM
import Metrics
import Prometheus
import SWIMNIOExample
import NIO
import Logging
Expand All @@ -38,8 +40,23 @@ struct SWIMNIOSampleCluster: ParsableCommand {

mutating func run() throws {
LoggingSystem.bootstrap(_SWIMPrettyMetadataLogHandler.init)

let group = MultiThreadedEventLoopGroup(numberOfThreads: System.coreCount)

// Uncomment this if you'd like to see metrics displayed in the command line periodically;
// This bootstraps and uses the Prometheus metrics backend to report metrics periodically by printing them to the stdout (console).
//
// Note though that this will be a bit noisy, since logs are also emitted to the stdout by default, however it's a nice way
// to learn and explore what the metrics are and how they behave when toying around with a local cluster.
// let prom = PrometheusClient()
// MetricsSystem.bootstrap(prom)
//
// group.next().scheduleRepeatedTask(initialDelay: .seconds(1), delay: .seconds(10)) { _ in
// prom.collect { (string: String) in
// print("")
// print("")
// print(string)
// }
// }

let lifecycle = ServiceLifecycle()
lifecycle.registerShutdown(
Expand Down
2 changes: 1 addition & 1 deletion Sources/SWIM/Events.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//
// This source file is part of the Swift Cluster Membership open source project
//
// Copyright (c) 2018-2019 Apple Inc. and the Swift Cluster Membership project authors
// Copyright (c) 2020 Apple Inc. and the Swift Cluster Membership project authors
// Licensed under Apache License v2.0
//
// See LICENSE.txt for license information
Expand Down
210 changes: 210 additions & 0 deletions Sources/SWIM/Metrics.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the swift-cluster-membership open source project
//
// Copyright (c) 2020 Apple Inc. and the swift-cluster-membership project authors
// Licensed under Apache License v2.0
//
// See LICENSE.txt for license information
// See CONTRIBUTORS.txt for the list of swift-cluster-membership project authors
//
// SPDX-License-Identifier: Apache-2.0
//
//===----------------------------------------------------------------------===//

import Metrics

extension SWIM {
/// Object containing all metrics a SWIM instance and shell should be reporting.
///
/// - SeeAlso: `SWIM.Metrics.Shell` for metrics that a specific implementation should emit
public struct Metrics {
// ==== --------------------------------------------------------------------------------------------------------
// MARK: Membership

/// Number of members (alive)
public let membersAlive: Gauge
/// Number of members (suspect)
public let membersSuspect: Gauge
/// Number of members (unreachable)
public let membersUnreachable: Gauge
// Number of members (dead) is not reported, because "dead" is considered "removed" from the cluster
// -- no metric --

/// Total number of nodes *ever* declared noticed as dead by this member
public let membersTotalDead: Counter

/// The current number of tombstones for previously known (and now dead and removed) members.
public let removedDeadMemberTombstones: Gauge

// ==== --------------------------------------------------------------------------------------------------------
// MARK: Internal metrics

/// Current value of the local health multiplier.
public let localHealthMultiplier: Gauge

// ==== --------------------------------------------------------------------------------------------------------
// MARK: Probe metrics

/// Records the incarnation of the SWIM instance.
///
/// Incarnation numbers are bumped whenever the node needs to refute some gossip about itself,
/// as such the incarnation number *growth* is an interesting indicator of cluster observation churn.
public let incarnation: Gauge

/// Total number of successful probes (pings with successful replies)
public let successfulPingProbes: Counter
/// Total number of failed probes (pings with successful replies)
public let failedPingProbes: Counter

/// Total number of successful ping request probes (pingRequest with successful replies)
/// Either an .ack or .nack from the intermediary node count as an success here
public let successfulPingRequestProbes: Counter
/// Total number of failed ping request probes (pings requests with successful replies)
/// Only a .timeout counts as a failed ping request.
public let failedPingRequestProbes: Counter

// ==== ----------------------------------------------------------------------------------------------------------------
// MARK: Shell / Transport Metrics

/// Metrics to be filled in by respective SWIM shell implementations.
public let shell: ShellMetrics

public struct ShellMetrics {
// ==== ----------------------------------------------------------------------------------------------------
// MARK: Probe metrics

/// Records time it takes for ping successful round-trips.
public let pingResponseTime: Timer

/// Records time it takes for (every) successful pingRequest round-trip
public let pingRequestResponseTimeAll: Timer
/// Records the time it takes for the (first) successful pingRequest to round trip
/// (A ping request hits multiple intermediary peers, the first reply is what counts)
public let pingRequestResponseTimeFirst: Timer

/// Number of incoming messages received
public let messageInboundCount: Counter
/// Sizes of messages received, in bytes
public let messageInboundBytes: Recorder

/// Number of messages sent
public let messageOutboundCount: Counter
/// Sizes of messages sent, in bytes
public let messageOutboundBytes: Recorder

public init(settings: SWIM.Settings) {
self.pingResponseTime = Timer(
label: settings.metrics.makeLabel("roundTripTime", "ping")
)

self.pingRequestResponseTimeAll = Timer(
label: settings.metrics.makeLabel("roundTripTime", "pingRequest"),
dimensions: [("type", "all")]
)
self.pingRequestResponseTimeFirst = Timer(
label: settings.metrics.makeLabel("roundTripTime", "pingRequest"),
dimensions: [("type", "firstAck")]
)

self.messageInboundCount = Counter(
label: settings.metrics.makeLabel("message", "count"),
dimensions: [
("direction", "in"),
]
)
self.messageInboundBytes = Recorder(
label: settings.metrics.makeLabel("message", "bytes"),
dimensions: [
("direction", "in"),
]
)

self.messageOutboundCount = Counter(
label: settings.metrics.makeLabel("message", "count"),
dimensions: [
("direction", "out"),
]
)
self.messageOutboundBytes = Recorder(
label: settings.metrics.makeLabel("message", "bytes"),
dimensions: [
("direction", "out"),
]
)
}
}

public init(settings: SWIM.Settings) {
self.membersAlive = Gauge(
label: settings.metrics.makeLabel("members"),
dimensions: [("status", "alive")]
)
self.membersSuspect = Gauge(
label: settings.metrics.makeLabel("members"),
dimensions: [("status", "suspect")]
)
self.membersUnreachable = Gauge(
label: settings.metrics.makeLabel("members"),
dimensions: [("status", "unreachable")]
)
self.membersTotalDead = Counter(
label: settings.metrics.makeLabel("members", "total"),
dimensions: [("status", "dead")]
)
self.removedDeadMemberTombstones = Gauge(
label: settings.metrics.makeLabel("removedMemberTombstones")
)

self.localHealthMultiplier = Gauge(
label: settings.metrics.makeLabel("lha")
)

self.incarnation = Gauge(label: settings.metrics.makeLabel("incarnation"))

self.successfulPingProbes = Counter(
label: settings.metrics.makeLabel("probe", "ping"),
dimensions: [("type", "successful")]
)
self.failedPingProbes = Counter(
label: settings.metrics.makeLabel("probe", "ping"),
dimensions: [("type", "failed")]
)

self.successfulPingRequestProbes = Counter(
label: settings.metrics.makeLabel("probe", "pingRequest"),
dimensions: [("type", "successful")]
)
self.failedPingRequestProbes = Counter(
label: settings.metrics.makeLabel("probe", "pingRequest"),
dimensions: [("type", "failed")]
)

self.shell = .init(settings: settings)
}
}
}

extension SWIM.Metrics {
/// Update member metrics metrics based on SWIM's membership.
public func updateMembership(_ members: SWIM.Membership) {
var alives = 0
var suspects = 0
var unreachables = 0
for member in members {
switch member.status {
case .alive:
alives += 1
case .suspect:
suspects += 1
case .unreachable:
unreachables += 1
case .dead:
() // dead is reported as a removal when they're removed and tombstoned, not as a gauge
}
}
self.membersAlive.record(alives)
self.membersSuspect.record(suspects)
self.membersUnreachable.record(unreachables)
}
}
2 changes: 1 addition & 1 deletion Sources/SWIM/SWIM.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//
// This source file is part of the Swift Cluster Membership open source project
//
// Copyright (c) 2018-2019 Apple Inc. and the Swift Cluster Membership project authors
// Copyright (c) 2020 Apple Inc. and the Swift Cluster Membership project authors
// Licensed under Apache License v2.0
//
// See LICENSE.txt for license information
Expand Down
Loading

0 comments on commit 343e2ad

Please sign in to comment.