Skip to content

Commit

Permalink
feat: Add runners metrics (#7)
Browse files Browse the repository at this point in the history
  • Loading branch information
mikolaj-krzyzanowski-f3 committed Aug 11, 2023
1 parent 85d2f0a commit 4474ee7
Show file tree
Hide file tree
Showing 10 changed files with 527 additions and 45 deletions.
8 changes: 4 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ go 1.19
require (
github.com/go-kit/log v0.2.1
github.com/google/go-github/v47 v47.1.0
github.com/patrickmn/go-cache v2.1.0+incompatible
github.com/prometheus/client_golang v1.14.0
github.com/prometheus/common v0.39.0
github.com/stretchr/testify v1.8.1
Expand All @@ -23,13 +24,12 @@ require (
github.com/google/go-querystring v1.1.0 // indirect
github.com/kr/pretty v0.3.1 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
github.com/patrickmn/go-cache v2.1.0+incompatible // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_model v0.3.0 // indirect
github.com/prometheus/procfs v0.8.0 // indirect
golang.org/x/crypto v0.0.0-20210915214749-c084706c2272 // indirect
golang.org/x/net v0.5.0 // indirect
golang.org/x/sys v0.4.0 // indirect
golang.org/x/crypto v0.1.0 // indirect
golang.org/x/net v0.7.0 // indirect
golang.org/x/sys v0.5.0 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/protobuf v1.28.1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
Expand Down
12 changes: 6 additions & 6 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -57,17 +57,17 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210915214749-c084706c2272 h1:3erb+vDS8lU1sxfDHF4/hhWyaXnhIaO+7RgL4fDZORA=
golang.org/x/crypto v0.0.0-20210915214749-c084706c2272/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.1.0 h1:MDRAIl0xIo9Io2xV565hzXHw3zVseKrJKodhohM5CjU=
golang.org/x/crypto v0.1.0/go.mod h1:RecgLatLF4+eUMCP1PoPZQb+cVrJcOPbHkTkbkB9sbw=
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/net v0.5.0 h1:GyT4nK/YDHSqa1c4753ouYCDajOYKTja9Xb/OHtgvSw=
golang.org/x/net v0.5.0/go.mod h1:DivGGAXEgPSlEBzxGzZI+ZLohi+xUj054jfeKui00ws=
golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g=
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/oauth2 v0.4.0 h1:NF0gk8LVPg1Ml7SSbGyySuoxdsXitj7TvgvuRxIMc/M=
golang.org/x/oauth2 v0.4.0/go.mod h1:RznEsdpjGAINPTOF0UH/t+xJ75L18YO3Ho6Pyn+uRec=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.4.0 h1:Zr2JFtRQNX3BCZ8YtxRE9hNJYC8J6I1MVbMg6owUp18=
golang.org/x/sys v0.4.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
Expand Down
17 changes: 4 additions & 13 deletions internal/server/billing_metrics_exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,15 @@ import (

"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/google/go-github/v47/github"
"golang.org/x/oauth2"
)

type BillingMetricsExporter struct {
GHClient *github.Client
GHClient GitHubClient
Logger log.Logger
Opts Opts
}

func NewBillingMetricsExporter(logger log.Logger, opts Opts) *BillingMetricsExporter {
ctx := context.Background()
ts := oauth2.StaticTokenSource(
&oauth2.Token{AccessToken: opts.GitHubAPIToken},
)
tc := oauth2.NewClient(ctx, ts)
client := github.NewClient(tc)

func NewBillingMetricsExporter(logger log.Logger, opts Opts, client GitHubClient) *BillingMetricsExporter {
return &BillingMetricsExporter{
Logger: logger,
Opts: opts,
Expand Down Expand Up @@ -83,7 +74,7 @@ func (c *BillingMetricsExporter) StartUserBilling(ctx context.Context) error {

// CollectActionBilling collect the action billing.
func (c *BillingMetricsExporter) collectOrgBilling(ctx context.Context) {
actionsBilling, _, err := c.GHClient.Billing.GetActionsBillingOrg(ctx, c.Opts.GitHubOrg)
actionsBilling, err := c.GHClient.GetActionsBillingOrg(ctx, c.Opts.GitHubOrg)
if err != nil {
_ = c.Logger.Log("msg", "failed to retrieve the actions billing for an org", "org", c.Opts.GitHubOrg, "err", err)
return
Expand All @@ -98,7 +89,7 @@ func (c *BillingMetricsExporter) collectOrgBilling(ctx context.Context) {
}

func (c *BillingMetricsExporter) collectUserBilling(ctx context.Context) {
actionsBilling, _, err := c.GHClient.Billing.GetActionsBillingUser(ctx, c.Opts.GitHubUser)
actionsBilling, err := c.GHClient.GetActionsBillingUser(ctx, c.Opts.GitHubUser)
if err != nil {
_ = c.Logger.Log("msg", "failed to retrieve the actions billing for an user", "user", c.Opts.GitHubUser, "err", err)
return
Expand Down
116 changes: 116 additions & 0 deletions internal/server/github_client.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
package server

import (
"context"
"errors"
"github.com/google/go-github/v47/github"
"strconv"
)

const (
pageSize = 100
)

type GitHubClient interface {
GetOrganisationRunnerGroups(ctx context.Context, orgName string) ([]*github.RunnerGroup, error)
GetEnterpriseRunners(ctx context.Context, enterpriseName string) ([]*github.Runner, error)
GetGroupRunners(ctx context.Context, groupID int64, orgName string) ([]*github.Runner, error)
GetActionsBillingOrg(ctx context.Context, org string) (*github.ActionBilling, error)
GetActionsBillingUser(ctx context.Context, user string) (*github.ActionBilling, error)
}

type DefaultGitHubClient struct {
Client *github.Client
Opts *Opts
}

func NewGitHubClient(opts *Opts, client *github.Client) *DefaultGitHubClient {
return &DefaultGitHubClient{Client: client, Opts: opts}
}

func (c *DefaultGitHubClient) GetOrganisationRunnerGroups(ctx context.Context, orgName string) ([]*github.RunnerGroup, error) {
nextPage := 1
var allGroups []*github.RunnerGroup

for nextPage > 0 {
runnerGroups, response, err := c.Client.Actions.ListOrganizationRunnerGroups(ctx, orgName, &github.ListOrgRunnerGroupOptions{
ListOptions: github.ListOptions{
Page: nextPage,
PerPage: pageSize,
},
})

if err != nil {
return nil, err
}

if response.StatusCode != 200 {
return nil, errors.New("unexpected response from GitHub API: " + strconv.Itoa(response.StatusCode))
}

allGroups = append(allGroups, runnerGroups.RunnerGroups...)
nextPage = response.NextPage
}

return allGroups, nil
}

func (c *DefaultGitHubClient) GetEnterpriseRunners(ctx context.Context, enterpriseName string) ([]*github.Runner, error) {
var enterpriseRunners []*github.Runner
var nextPage = 1

for nextPage > 0 {
runners, response, err := c.Client.Enterprise.ListRunners(ctx, enterpriseName, &github.ListOptions{
Page: nextPage,
PerPage: pageSize,
})

if err != nil {
return nil, err
}

if response.StatusCode != 200 {
return nil, errors.New("unexpected response from GitHub API: " + strconv.Itoa(response.StatusCode))
}

enterpriseRunners = append(enterpriseRunners, runners.Runners...)
nextPage = response.NextPage
}

return enterpriseRunners, nil
}

func (c *DefaultGitHubClient) GetGroupRunners(ctx context.Context, groupID int64, orgName string) ([]*github.Runner, error) {
var groupRunners []*github.Runner
var nextPage = 1

for nextPage > 0 {
runners, response, err := c.Client.Actions.ListRunnerGroupRunners(ctx, orgName, groupID, &github.ListOptions{
Page: nextPage,
PerPage: pageSize,
})

if err != nil {
return nil, err
}

if response.StatusCode != 200 {
return nil, errors.New("unexpected response from GitHub API: " + strconv.Itoa(response.StatusCode))
}

groupRunners = append(groupRunners, runners.Runners...)
nextPage = response.NextPage
}

return groupRunners, nil
}

func (c *DefaultGitHubClient) GetActionsBillingOrg(ctx context.Context, org string) (*github.ActionBilling, error) {
billing, _, err := c.Client.Billing.GetActionsBillingOrg(ctx, org)
return billing, err
}

func (c *DefaultGitHubClient) GetActionsBillingUser(ctx context.Context, user string) (*github.ActionBilling, error) {
billing, _, err := c.Client.Billing.GetActionsBillingUser(ctx, user)
return billing, err
}
27 changes: 26 additions & 1 deletion internal/server/metrics.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package server

import "github.com/prometheus/client_golang/prometheus"
import (
"github.com/prometheus/client_golang/prometheus"
"strconv"
)

var (
workflowJobHistogramVec = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Expand Down Expand Up @@ -96,6 +99,13 @@ var (
},
[]string{"org", "user"},
)

registeredRunnersTotal = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "actions_registered_runners_total",
Help: "The number of registered Actions runners.",
},
[]string{"busy", "status", "runner_group"},
)
)

func init() {
Expand All @@ -112,6 +122,7 @@ func init() {
prometheus.MustRegister(totalMinutesUsedUbuntuActions)
prometheus.MustRegister(totalMinutesUsedMacOSActions)
prometheus.MustRegister(totalMinutesUsedWindowsActions)
prometheus.MustRegister(registeredRunnersTotal)
}

type WorkflowObserver interface {
Expand All @@ -123,7 +134,13 @@ type WorkflowObserver interface {
CountWorkflowRunStatus(org, repo, status, conclusion, workflow string)
}

type RunnersObserver interface {
ResetRegisteredRunnersTotal()
IncreaseRegisteredRunnersTotal(busy bool, status string, runnerGroup string)
}

var _ WorkflowObserver = (*PrometheusObserver)(nil)
var _ RunnersObserver = (*PrometheusObserver)(nil)

type PrometheusObserver struct{}

Expand Down Expand Up @@ -151,3 +168,11 @@ func (o *PrometheusObserver) ObserveWorkflowRunDuration(org, repo, workflowName
func (o *PrometheusObserver) CountWorkflowRunStatus(org, repo, status, conclusion, workflowName string) {
workflowRunStatusCounter.WithLabelValues(org, repo, status, conclusion, workflowName).Inc()
}

func (o *PrometheusObserver) ResetRegisteredRunnersTotal() {
registeredRunnersTotal.Reset()
}

func (o *PrometheusObserver) IncreaseRegisteredRunnersTotal(busy bool, status string, runnerGroup string) {
registeredRunnersTotal.WithLabelValues(strconv.FormatBool(busy), status, runnerGroup).Inc()
}
99 changes: 99 additions & 0 deletions internal/server/runners_metrics_exporter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
package server

import (
"context"
"errors"
"time"

"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/google/go-github/v47/github"
)

type RunnersMetricsExporter struct {
GHClient GitHubClient
Logger log.Logger
Opts Opts
Observer RunnersObserver
}

func NewRunnersMetricsExporter(logger log.Logger, opts Opts, client GitHubClient, observer RunnersObserver) *RunnersMetricsExporter {
return &RunnersMetricsExporter{
Logger: logger,
Opts: opts,
GHClient: client,
Observer: observer,
}
}

func (c *RunnersMetricsExporter) Start(ctx context.Context) error {
if c.Opts.GitHubOrg == "" {
return errors.New("github org not configured")
}
if c.Opts.GitHubAPIToken == "" {
return errors.New("github token not configured")
}

ticker := time.NewTicker(time.Duration(c.Opts.RunnersAPIPollSeconds) * time.Second)
go func() {
for {
select {
case <-ticker.C:
c.collectRunnersInformation(ctx)
case <-ctx.Done():
_ = level.Info(c.Logger).Log("msg", "stopped polling for runner metrics")
return
}
}
}()

return nil
}

func (c *RunnersMetricsExporter) collectRunnersInformation(ctx context.Context) {
// Resetting, otherwise a certain label combination might retain its old value despite not being present in the pool
// For example, if there are no busy runners then group[true] will be empty and the old value of group[true] will
// continue to be reported rather than set to 0 as expected. Same would be true if API calls fail so we reset first.
c.Observer.ResetRegisteredRunnersTotal()

allRunners := make(map[string][]*github.Runner)
runnerGroups, err := c.GHClient.GetOrganisationRunnerGroups(ctx, c.Opts.GitHubOrg)

if err != nil {
_ = level.Error(c.Logger).Log("msg", "unable to retrieve runner groups", "error", err.Error())
return
}

for _, runnerGroup := range runnerGroups {
groupRunners, err := c.GHClient.GetGroupRunners(ctx, *runnerGroup.ID, c.Opts.GitHubOrg)

if err != nil {
_ = level.Error(c.Logger).Log("msg", "unable to retrieve organisation runners' info", "error", err.Error())
return
}

allRunners[*runnerGroup.Name] = groupRunners
}

// Collect information from the Enterprise runners, if an Enterprise name has been configured.
// Requires the GitHub API Token to have manage_runners:enterprise scope.
if c.Opts.GitHubEnterprise != "" {
enterpriseRunners, err := c.GHClient.GetEnterpriseRunners(ctx, c.Opts.GitHubEnterprise)

// We are putting the enterprise runners into a fake runner group named after the enterprise
// This is because we already have that name in Grafana and also because there is no way in the API at the moment
// to tie them to their real runner group
allRunners[c.Opts.GitHubEnterprise] = enterpriseRunners

if err != nil {
_ = level.Error(c.Logger).Log("msg", "unable to retrieve enterprise runners' info", "error", err.Error())
return
}
}

for group, runners := range allRunners {
for _, runner := range runners {
c.Observer.IncreaseRegisteredRunnersTotal(runner.GetBusy(), runner.GetStatus(), group)
}
}
}
Loading

0 comments on commit 4474ee7

Please sign in to comment.