Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make logging more concise #410

Merged
merged 1 commit into from
Mar 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions controllers/runner_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,11 @@ func (r *RunnerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
// Gracefully handle pod-already-exists errors due to informer cache delay.
// Without this we got a few errors like the below on new runner pod:
// 2021-03-16T00:23:10.116Z ERROR controller-runtime.controller Reconciler error {"controller": "runner-controller", "request": "default/example-runnerdeploy-b2g2g-j4mcp", "error": "pods \"example-runnerdeploy-b2g2g-j4mcp\" already exists"}
log.Info("Runner pod already exists. Probably this pod has been already created in previous reconcilation but the new pod is not yet cached.")
log.Info(
"Failed to create pod due to AlreadyExists error. Probably this pod has been already created in previous reconcilation but is still not in the informer cache. Will retry on pod created. If it doesn't repeat, there's no problem",
)

return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
return ctrl.Result{}, nil
}

log.Error(err, "Failed to create pod resource")
Expand All @@ -187,7 +189,7 @@ func (r *RunnerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {

if deletionDidTimeout {
log.Info(
"Pod failed to delete itself in a timely manner. "+
fmt.Sprintf("Failed to delete pod within %s. ", deletionTimeout)+
"This is typically the case when a Kubernetes node became unreachable "+
"and the kube controller started evicting nodes. Forcefully deleting the pod to not get stuck.",
"podDeletionTimestamp", pod.DeletionTimestamp,
Expand Down Expand Up @@ -262,9 +264,12 @@ func (r *RunnerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
if lastCheckTime := runner.Status.LastRegistrationCheckTime; lastCheckTime != nil {
nextCheckTime := lastCheckTime.Add(registrationCheckInterval)
now := time.Now()
if nextCheckTime.After(now) {
requeueAfter := nextCheckTime.Sub(now)

// Requeue scheduled by RequeueAfter can happen a bit earlier (like dozens of milliseconds)
// so to avoid excessive, in-effective retry, we heuristically ignore the remaining delay in case it is
// shorter than 1s
requeueAfter := nextCheckTime.Sub(now) - time.Second
if requeueAfter > 0 {
log.Info(
fmt.Sprintf("Skipped registration check because it's deferred until %s. Retrying in %s at latest", nextCheckTime, requeueAfter),
"lastRegistrationCheckTime", lastCheckTime,
Expand Down Expand Up @@ -795,8 +800,6 @@ func (r *RunnerReconciler) SetupWithManager(mgr ctrl.Manager) error {

r.Recorder = mgr.GetEventRecorderFor(name)

r.Recorder = mgr.GetEventRecorderFor(name)

return ctrl.NewControllerManagedBy(mgr).
For(&v1alpha1.Runner{}).
Owns(&corev1.Pod{}).
Expand Down
27 changes: 19 additions & 8 deletions controllers/runnerdeployment_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,17 +192,28 @@ func (r *RunnerDeploymentReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
if len(oldSets) > 0 {
readyReplicas := newestSet.Status.ReadyReplicas

if readyReplicas < currentDesiredReplicas {
log.WithValues("runnerreplicaset", types.NamespacedName{
oldSetsCount := len(oldSets)

logWithDebugInfo := log.WithValues(
"newest_runnerreplicaset", types.NamespacedName{
Namespace: newestSet.Namespace,
Name: newestSet.Name,
}).
Info("Waiting until the newest runner replica set to be 100% available",
"ready", readyReplicas,
"desired", currentDesiredReplicas,
)
},
"newest_runnerreplicaset_replicas_ready", readyReplicas,
"newest_runnerreplicaset_replicas_desired", currentDesiredReplicas,
"old_runnerreplicasets_count", oldSetsCount,
)

if readyReplicas < currentDesiredReplicas {
logWithDebugInfo.
Info("Waiting until the newest runnerreplicaset to be 100% available")

return ctrl.Result{}, nil
}

return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
if oldSetsCount > 0 {
logWithDebugInfo.
Info("The newest runnerreplicaset is 100% available. Deleting old runnerreplicasets")
}

for i := range oldSets {
Expand Down
27 changes: 15 additions & 12 deletions controllers/runnerreplicaset_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,14 @@ func (r *RunnerReplicaSetReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
desired = 1
}

log.V(0).Info("debug", "desired", desired, "available", available)

if available > desired {
n := available - desired

// get runners that are currently not busy
var notBusy []v1alpha1.Runner
log.V(0).Info(fmt.Sprintf("Deleting %d runners", n), "desired", desired, "available", available, "ready", ready)

// get runners that are currently offline/not busy/timed-out to register
var deletionCandidates []v1alpha1.Runner

for _, runner := range allRunners.Items {
busy, err := r.GitHubClient.IsRunnerBusy(ctx, runner.Spec.Enterprise, runner.Spec.Organization, runner.Spec.Repository, runner.Name)
if err != nil {
Expand Down Expand Up @@ -168,35 +169,37 @@ func (r *RunnerReplicaSetReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
"configuredRegistrationTimeout", registrationTimeout,
)

notBusy = append(notBusy, runner)
deletionCandidates = append(deletionCandidates, runner)
}

// offline runners should always be a great target for scale down
if offline {
notBusy = append(notBusy, runner)
deletionCandidates = append(deletionCandidates, runner)
}
} else if !busy {
notBusy = append(notBusy, runner)
deletionCandidates = append(deletionCandidates, runner)
}
}

if len(notBusy) < n {
n = len(notBusy)
if len(deletionCandidates) < n {
n = len(deletionCandidates)
}

for i := 0; i < n; i++ {
if err := r.Client.Delete(ctx, &notBusy[i]); client.IgnoreNotFound(err) != nil {
if err := r.Client.Delete(ctx, &deletionCandidates[i]); client.IgnoreNotFound(err) != nil {
log.Error(err, "Failed to delete runner resource")

return ctrl.Result{}, err
}

r.Recorder.Event(&rs, corev1.EventTypeNormal, "RunnerDeleted", fmt.Sprintf("Deleted runner '%s'", notBusy[i].Name))
log.Info("Deleted runner", "runnerreplicaset", rs.ObjectMeta.Name)
r.Recorder.Event(&rs, corev1.EventTypeNormal, "RunnerDeleted", fmt.Sprintf("Deleted runner '%s'", deletionCandidates[i].Name))
log.Info("Deleted runner")
}
} else if desired > available {
n := desired - available

log.V(0).Info(fmt.Sprintf("Creating %d runner(s)", n), "desired", desired, "available", available, "ready", ready)

for i := 0; i < n; i++ {
newRunner, err := r.newRunner(rs)
if err != nil {
Expand Down
32 changes: 16 additions & 16 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ const (
)

var (
scheme = runtime.NewScheme()
setupLog = ctrl.Log.WithName("setup")
scheme = runtime.NewScheme()
log = ctrl.Log.WithName("actions-runner-controller")
)

func init() {
Expand Down Expand Up @@ -109,78 +109,78 @@ func main() {
Namespace: namespace,
})
if err != nil {
setupLog.Error(err, "unable to start manager")
log.Error(err, "unable to start manager")
os.Exit(1)
}

runnerReconciler := &controllers.RunnerReconciler{
Client: mgr.GetClient(),
Log: ctrl.Log.WithName("controllers").WithName("Runner"),
Log: log.WithName("runner"),
Scheme: mgr.GetScheme(),
GitHubClient: ghClient,
RunnerImage: runnerImage,
DockerImage: dockerImage,
}

if err = runnerReconciler.SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "Runner")
log.Error(err, "unable to create controller", "controller", "Runner")
os.Exit(1)
}

runnerSetReconciler := &controllers.RunnerReplicaSetReconciler{
Client: mgr.GetClient(),
Log: ctrl.Log.WithName("controllers").WithName("RunnerReplicaSet"),
Log: log.WithName("runnerreplicaset"),
Scheme: mgr.GetScheme(),
GitHubClient: ghClient,
}

if err = runnerSetReconciler.SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "RunnerReplicaSet")
log.Error(err, "unable to create controller", "controller", "RunnerReplicaSet")
os.Exit(1)
}

runnerDeploymentReconciler := &controllers.RunnerDeploymentReconciler{
Client: mgr.GetClient(),
Log: ctrl.Log.WithName("controllers").WithName("RunnerDeployment"),
Log: log.WithName("runnerdeployment"),
Scheme: mgr.GetScheme(),
CommonRunnerLabels: commonRunnerLabels,
}

if err = runnerDeploymentReconciler.SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "RunnerDeployment")
log.Error(err, "unable to create controller", "controller", "RunnerDeployment")
os.Exit(1)
}

horizontalRunnerAutoscaler := &controllers.HorizontalRunnerAutoscalerReconciler{
Client: mgr.GetClient(),
Log: ctrl.Log.WithName("controllers").WithName("HorizontalRunnerAutoscaler"),
Log: log.WithName("horizontalrunnerautoscaler"),
Scheme: mgr.GetScheme(),
GitHubClient: ghClient,
CacheDuration: syncPeriod - 10*time.Second,
}

if err = horizontalRunnerAutoscaler.SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "HorizontalRunnerAutoscaler")
log.Error(err, "unable to create controller", "controller", "HorizontalRunnerAutoscaler")
os.Exit(1)
}

if err = (&actionsv1alpha1.Runner{}).SetupWebhookWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "Runner")
log.Error(err, "unable to create webhook", "webhook", "Runner")
os.Exit(1)
}
if err = (&actionsv1alpha1.RunnerDeployment{}).SetupWebhookWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "RunnerDeployment")
log.Error(err, "unable to create webhook", "webhook", "RunnerDeployment")
os.Exit(1)
}
if err = (&actionsv1alpha1.RunnerReplicaSet{}).SetupWebhookWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "RunnerReplicaSet")
log.Error(err, "unable to create webhook", "webhook", "RunnerReplicaSet")
os.Exit(1)
}
// +kubebuilder:scaffold:builder

setupLog.Info("starting manager")
log.Info("starting manager")
if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
setupLog.Error(err, "problem running manager")
log.Error(err, "problem running manager")
os.Exit(1)
}
}
Expand Down