-
Notifications
You must be signed in to change notification settings - Fork 29
Description
Circa September 2021, GitHub modified the runner routing logic so jobs don't fail anymore when there is no runner available with the required labels.
This eliminates the need for a substantial amount of workarounds in out code, and we should probably consider removing them.
Documentation
github/docs#9307
- If GitHub finds an online and idle runner at a certain level that matches the job's
runs-onlabels, the job is then assigned and sent to the runner.- If GitHub doesn't find an online and idle runner at any level, the job is queued to all levels and waits for a matching runner from any level to come online and pick up the job.
- If the job remains queued for more than 24 hours, the job will fail.
(From “routing precedence for self-hosted runners” on the official GitHub documentation)
actions/actions-runner-controller#909
This is a deprecated feature for GitHub Cloud as "registration-only" runners are no longer needed due to GitHub changing their runner routing logic to no longer fail a workflow run if it targets a runner label that there are no registered runners for.
(From “autoscaling to/from 0” on the actions-runner-controller documentation, linked from the official documentation)
Code
terraform-provider-iterative/iterative/resource_runner.go
Lines 224 to 262 in fa9c7f8
| var logError error | |
| var logEvents string | |
| err = resource.Retry(d.Timeout(schema.TimeoutCreate), func() *resource.RetryError { | |
| switch cloud := d.Get("cloud").(string); cloud { | |
| case "kubernetes": | |
| logEvents, logError = resourceMachineLogs(ctx, d, m) | |
| default: | |
| logEvents, logError = utils.RunCommand("journalctl --unit cml --no-pager", | |
| 2*time.Second, | |
| net.JoinHostPort(d.Get("instance_ip").(string), "22"), | |
| "ubuntu", | |
| d.Get("ssh_private").(string)) | |
| } | |
| log.Printf("[DEBUG] Collected log events: %#v", logEvents) | |
| log.Printf("[DEBUG] Connection errors: %#v", logError) | |
| if logError != nil { | |
| return resource.RetryableError(fmt.Errorf("Waiting for the machine to accept connections... %s", logError)) | |
| } else if utils.HasStatus(logEvents, "terminated") { | |
| return resource.NonRetryableError(fmt.Errorf("Failed to launch the runner!")) | |
| } else if utils.HasStatus(logEvents, "ready") { | |
| return nil | |
| } | |
| return resource.RetryableError(fmt.Errorf("Waiting for the runner to be ready...")) | |
| }) | |
| if logError != nil { | |
| logEvents += "\n" + logError.Error() | |
| } | |
| if err != nil { | |
| diags = append(diags, diag.Diagnostic{ | |
| Severity: diag.Error, | |
| Summary: fmt.Sprintf("Error checking the runner status"), | |
| Detail: logEvents, | |
| }) | |
| } |
terraform-provider-iterative/iterative/utils/runner.go
Lines 1 to 40 in fa9c7f8
| package utils | |
| import ( | |
| "bufio" | |
| "encoding/json" | |
| "regexp" | |
| "strings" | |
| ) | |
| type LogEvent struct { | |
| Level string `json:"level"` | |
| Time string `json:"time"` | |
| Repository string `json:"repo"` | |
| Job string `json:"job"` | |
| Status string `json:"status"` | |
| Success bool `json:"success"` | |
| } | |
| func ParseLogEvent(logEvent string) (LogEvent, error) { | |
| var result LogEvent | |
| err := json.Unmarshal([]byte(logEvent), &result) | |
| return result, err | |
| } | |
| // HasStatus checks whether a runner is has reported the given status or not by parsing the JSONL records from the logs it produces | |
| func HasStatus(logs string, status string) bool { | |
| scanner := bufio.NewScanner(strings.NewReader(logs)) | |
| for scanner.Scan() { | |
| line := scanner.Text() | |
| // Extract the JSON between curly braces from the log line. | |
| record := regexp.MustCompile(`\{.+\}`).Find([]byte(line)) | |
| // Try to parse the retrieved JSON string into a LogEvent structure. | |
| if event, err := ParseLogEvent(string(record)); err == nil { | |
| if event.Status == status { | |
| return true | |
| } | |
| } | |
| } | |
| return false | |
| } |
terraform-provider-iterative/iterative/resource_machine.go
Lines 278 to 289 in fa9c7f8
| func resourceMachineLogs(ctx context.Context, d *schema.ResourceData, m interface{}) (string, error) { | |
| switch cloud := d.Get("cloud").(string); cloud { | |
| case "kubernetes": | |
| return kubernetes.ResourceMachineLogs(ctx, d, m) | |
| default: | |
| return utils.RunCommand("journalctl --no-pager", | |
| 2*time.Second, | |
| net.JoinHostPort(d.Get("instance_ip").(string), "22"), | |
| "ubuntu", | |
| d.Get("ssh_private").(string)) | |
| } | |
| } |