cloudbase · gabriel-samfira · Apr 13, 2026 · Apr 13, 2026
diff --git a/doc/credentials.md b/doc/credentials.md
@@ -4,15 +4,23 @@ GARM needs credentials to interact with GitHub or Gitea: creating runners, manag
 
 <!-- TOC -->
 
-- [Credential types](#credential-types)
-- [GitHub permissions](#github-permissions)
-    - [PAT (classic) scopes](#pat-classic-scopes)
-    - [Fine-grained PAT permissions](#fine-grained-pat-permissions)
-    - [GitHub App permissions](#github-app-permissions)
-- [Managing credentials](#managing-credentials)
-- [Gitea credentials](#gitea-credentials)
-- [Credential and endpoint relationship](#credential-and-endpoint-relationship)
-- [Security](#security)
+- [Credentials](#credentials)
+    - [Credential types](#credential-types)
+    - [GitHub permissions](#github-permissions)
+        - [PAT classic scopes](#pat-classic-scopes)
+        - [Fine-grained PAT permissions](#fine-grained-pat-permissions)
+        - [GitHub App permissions](#github-app-permissions)
+    - [Managing credentials](#managing-credentials)
+        - [Add a PAT](#add-a-pat)
+        - [Add a GitHub App](#add-a-github-app)
+        - [List credentials](#list-credentials)
+        - [Show credential details](#show-credential-details)
+        - [Delete a credential](#delete-a-credential)
+    - [Gitea credentials](#gitea-credentials)
+        - [Create a Gitea token](#create-a-gitea-token)
+        - [Add Gitea credentials to GARM](#add-gitea-credentials-to-garm)
+    - [Credential and endpoint relationship](#credential-and-endpoint-relationship)
+    - [Security](#security)
 
 <!-- /TOC -->
 

diff --git a/doc/monitoring.md b/doc/monitoring.md
@@ -12,12 +12,13 @@ GARM provides built-in tools for monitoring, live log streaming, event watching,
         - [Metrics reference](#metrics-reference)
             - [Health](#health)
             - [Webhooks](#webhooks)
-            - [Entities repositories organizations enterprises](#entities-repositories-organizations-enterprises)
+            - [Entities repositories, organizations, enterprises](#entities-repositories-organizations-enterprises)
             - [Providers](#providers)
             - [Pools](#pools)
+            - [Scale sets](#scale-sets)
             - [Runner instances](#runner-instances)
             - [Jobs](#jobs)
-            - [GitHubGitea API](#githubgitea-api)
+            - [GitHub/Gitea API](#githubgitea-api)
     - [Live log streaming](#live-log-streaming)
         - [Filtering logs](#filtering-logs)
     - [Database events](#database-events)
@@ -113,17 +114,29 @@ The `_info` gauges are always set to 1; the labels are what carry the informatio
 | `garm_pool_min_idle_runners` | Gauge | `id` |
 | `garm_pool_bootstrap_timeout` | Gauge | `id` |
 
-> [!NOTE]
-> Pool metrics only cover pools, not scale sets. Scale sets currently have no dedicated metrics (but jobs from scale sets are captured by `garm_job_status` via the `scaleset_job_id` label).
+#### Scale sets
+
+| Metric | Type | Labels |
+|--------|------|--------|
+| `garm_scaleset_info` | Gauge | `id`, `scaleset_id`, `name`, `image`, `flavor`, `prefix`, `os_type`, `os_arch`, `tags`, `provider`, `runner_group`, `scaleset_owner`, `scaleset_type` |
+| `garm_scaleset_status` | Gauge | `id`, `enabled`, `state` |
+| `garm_scaleset_max_runners` | Gauge | `id` |
+| `garm_scaleset_min_idle_runners` | Gauge | `id` |
+| `garm_scaleset_desired_runner_count` | Gauge | `id` |
+| `garm_scaleset_bootstrap_timeout` | Gauge | `id` |
+
+The `id` label is GARM's internal scale set ID; `scaleset_id` is the numeric ID assigned by GitHub. `garm_scaleset_desired_runner_count` reflects the runner count GitHub has requested for the scale set (unique to scale sets, since GitHub drives scheduling).
 
 #### Runner instances
 
 | Metric | Type | Labels |
 |--------|------|--------|
-| `garm_runner_status` | Gauge | `name`, `status`, `runner_status`, `pool_owner`, `pool_type`, `pool_id`, `provider` |
+| `garm_runner_status` | Gauge | `name`, `status`, `runner_status`, `pool_owner`, `pool_type`, `pool_id`, `scaleset_id`, `provider` |
 | `garm_runner_operations_total` | Counter | `operation`, `provider` |
 | `garm_runner_errors_total` | Counter | `operation`, `provider` |
 
+`garm_runner_status` covers both pool-owned and scale-set-owned runners. For any given series, exactly one of `pool_id` / `scaleset_id` is populated. `pool_owner` and `pool_type` describe the owning entity (repo/org/enterprise) and apply to both.
+
 The `operation` label on `garm_runner_operations_total` / `garm_runner_errors_total` takes one of these values:
 
 | Operation | Description |

diff --git a/metrics/instance.go b/metrics/instance.go
@@ -24,7 +24,7 @@ var (
 		Subsystem: metricsRunnerSubsystem,
 		Name:      "status",
 		Help:      "Status of the instance",
-	}, []string{"name", "status", "runner_status", "pool_owner", "pool_type", "pool_id", "provider"})
+	}, []string{"name", "status", "runner_status", "pool_owner", "pool_type", "pool_id", "scaleset_id", "provider"})
 
 	InstanceOperationCount = prometheus.NewCounterVec(prometheus.CounterOpts{
 		Namespace: metricsNamespace,

diff --git a/metrics/metrics.go b/metrics/metrics.go
@@ -22,6 +22,7 @@ const (
 	metricsNamespace             = "garm"
 	metricsRunnerSubsystem       = "runner"
 	metricsPoolSubsystem         = "pool"
+	metricsScaleSetSubsystem     = "scaleset"
 	metricsProviderSubsystem     = "provider"
 	metricsOrganizationSubsystem = "organization"
 	metricsRepositorySubsystem   = "repository"
@@ -57,6 +58,13 @@ func RegisterMetrics() error {
 		PoolMaxRunners,
 		PoolMinIdleRunners,
 		PoolBootstrapTimeout,
+		// scale set metrics
+		ScaleSetInfo,
+		ScaleSetStatus,
+		ScaleSetMaxRunners,
+		ScaleSetMinIdleRunners,
+		ScaleSetDesiredRunnerCount,
+		ScaleSetBootstrapTimeout,
 		// health metrics
 		GarmHealth,
 

diff --git a/metrics/scaleset.go b/metrics/scaleset.go
@@ -0,0 +1,63 @@
+// Copyright 2026 Cloudbase Solutions SRL
+//
+//    Licensed under the Apache License, Version 2.0 (the "License"); you may
+//    not use this file except in compliance with the License. You may obtain
+//    a copy of the License at
+//
+//         http://www.apache.org/licenses/LICENSE-2.0
+//
+//    Unless required by applicable law or agreed to in writing, software
+//    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+//    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+//    License for the specific language governing permissions and limitations
+//    under the License.
+
+package metrics
+
+import (
+	"github.com/prometheus/client_golang/prometheus"
+)
+
+var (
+	ScaleSetInfo = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+		Namespace: metricsNamespace,
+		Subsystem: metricsScaleSetSubsystem,
+		Name:      "info",
+		Help:      "Info of the scale set",
+	}, []string{"id", "scaleset_id", "name", "image", "flavor", "prefix", "os_type", "os_arch", "tags", "provider", "runner_group", "scaleset_owner", "scaleset_type"})
+
+	ScaleSetStatus = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+		Namespace: metricsNamespace,
+		Subsystem: metricsScaleSetSubsystem,
+		Name:      "status",
+		Help:      "Status of the scale set",
+	}, []string{"id", "enabled", "state"})
+
+	ScaleSetMaxRunners = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+		Namespace: metricsNamespace,
+		Subsystem: metricsScaleSetSubsystem,
+		Name:      "max_runners",
+		Help:      "Maximum number of runners in the scale set",
+	}, []string{"id"})
+
+	ScaleSetMinIdleRunners = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+		Namespace: metricsNamespace,
+		Subsystem: metricsScaleSetSubsystem,
+		Name:      "min_idle_runners",
+		Help:      "Minimum number of idle runners in the scale set",
+	}, []string{"id"})
+
+	ScaleSetDesiredRunnerCount = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+		Namespace: metricsNamespace,
+		Subsystem: metricsScaleSetSubsystem,
+		Name:      "desired_runner_count",
+		Help:      "Desired runner count requested by GitHub for the scale set",
+	}, []string{"id"})
+
+	ScaleSetBootstrapTimeout = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+		Namespace: metricsNamespace,
+		Subsystem: metricsScaleSetSubsystem,
+		Name:      "bootstrap_timeout",
+		Help:      "Runner bootstrap timeout in the scale set",
+	}, []string{"id"})
+)
diff --git a/runner/metrics/instance.go b/runner/metrics/instance.go
@@ -16,13 +16,20 @@ package metrics
 
 import (
 	"context"
+	"strconv"
 
 	"github.com/cloudbase/garm/metrics"
 	"github.com/cloudbase/garm/runner"
 )
 
+type parentInfo struct {
+	OwnerName    string
+	Type         string
+	ProviderName string
+}
+
 // CollectInstanceMetric collects the metrics for the runner instances
-// reflecting the statuses and the pool they belong to.
+// reflecting the statuses and the pool or scale set they belong to.
 func CollectInstanceMetric(ctx context.Context, r *runner.Runner) error {
 	// reset metrics
 	metrics.InstanceStatus.Reset()
@@ -37,42 +44,68 @@ func CollectInstanceMetric(ctx context.Context, r *runner.Runner) error {
 		return err
 	}
 
-	type poolInfo struct {
-		Name         string
-		Type         string
-		ProviderName string
+	scaleSets, err := r.ListAllScaleSets(ctx)
+	if err != nil {
+		return err
 	}
 
-	poolNames := make(map[string]poolInfo)
+	poolParents := make(map[string]parentInfo, len(pools))
 	for _, pool := range pools {
+		info := parentInfo{
+			Type:         string(pool.PoolType()),
+			ProviderName: pool.ProviderName,
+		}
 		switch {
 		case pool.OrgName != "":
-			poolNames[pool.ID] = poolInfo{
-				Name: pool.OrgName,
-				Type: string(pool.PoolType()),
-			}
+			info.OwnerName = pool.OrgName
 		case pool.EnterpriseName != "":
-			poolNames[pool.ID] = poolInfo{
-				Name: pool.EnterpriseName,
-				Type: string(pool.PoolType()),
-			}
+			info.OwnerName = pool.EnterpriseName
 		default:
-			poolNames[pool.ID] = poolInfo{
-				Name: pool.RepoName,
-				Type: string(pool.PoolType()),
-			}
+			info.OwnerName = pool.RepoName
 		}
+		poolParents[pool.ID] = info
+	}
+
+	scaleSetParents := make(map[uint]parentInfo, len(scaleSets))
+	for _, scaleSet := range scaleSets {
+		info := parentInfo{
+			Type:         string(scaleSet.ScaleSetType()),
+			ProviderName: scaleSet.ProviderName,
+		}
+		switch {
+		case scaleSet.OrgName != "":
+			info.OwnerName = scaleSet.OrgName
+		case scaleSet.EnterpriseName != "":
+			info.OwnerName = scaleSet.EnterpriseName
+		default:
+			info.OwnerName = scaleSet.RepoName
+		}
+		scaleSetParents[scaleSet.ID] = info
 	}
 
 	for _, instance := range instances {
+		var (
+			parent     parentInfo
+			poolID     string
+			scaleSetID string
+		)
+		if instance.ScaleSetID != 0 {
+			parent = scaleSetParents[instance.ScaleSetID]
+			scaleSetID = strconv.FormatUint(uint64(instance.ScaleSetID), 10)
+		} else {
+			parent = poolParents[instance.PoolID]
+			poolID = instance.PoolID
+		}
+
 		metrics.InstanceStatus.WithLabelValues(
-			instance.Name,                           // label: name
-			string(instance.Status),                 // label: status
-			string(instance.RunnerStatus),           // label: runner_status
-			poolNames[instance.PoolID].Name,         // label: pool_owner
-			poolNames[instance.PoolID].Type,         // label: pool_type
-			instance.PoolID,                         // label: pool_id
-			poolNames[instance.PoolID].ProviderName, // label: provider
+			instance.Name,                 // label: name
+			string(instance.Status),       // label: status
+			string(instance.RunnerStatus), // label: runner_status
+			parent.OwnerName,              // label: pool_owner
+			parent.Type,                   // label: pool_type
+			poolID,                        // label: pool_id
+			scaleSetID,                    // label: scaleset_id
+			parent.ProviderName,           // label: provider
 		).Set(1)
 	}
 	return nil

diff --git a/runner/metrics/metrics.go b/runner/metrics/metrics.go
@@ -89,6 +89,12 @@ func collectMetrics(ctx context.Context, r *runner.Runner, controllerInfo params
 		return err
 	}
 
+	slog.DebugContext(ctx, "collecting scale set metrics")
+	err = CollectScaleSetMetric(ctx, r)
+	if err != nil {
+		return err
+	}
+
 	slog.DebugContext(ctx, "collecting instance metrics")
 	err = CollectInstanceMetric(ctx, r)
 	if err != nil {