-
Notifications
You must be signed in to change notification settings - Fork 126
/
step_run_timeout.go
142 lines (103 loc) · 3.53 KB
/
step_run_timeout.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
package ticker
import (
"context"
"fmt"
"time"
"github.com/hatchet-dev/hatchet/internal/datautils"
"github.com/hatchet-dev/hatchet/internal/services/shared/tasktypes"
"github.com/hatchet-dev/hatchet/internal/taskqueue"
)
func (t *TickerImpl) handleScheduleStepRunTimeout(ctx context.Context, task *taskqueue.Task) error {
t.l.Debug().Msg("ticker: scheduling step run timeout")
payload := tasktypes.ScheduleStepRunTimeoutTaskPayload{}
metadata := tasktypes.ScheduleStepRunTimeoutTaskMetadata{}
err := t.dv.DecodeAndValidate(task.Payload, &payload)
if err != nil {
return fmt.Errorf("could not decode ticker task payload: %w", err)
}
err = t.dv.DecodeAndValidate(task.Metadata, &metadata)
if err != nil {
return fmt.Errorf("could not decode ticker task metadata: %w", err)
}
timeoutAt, err := time.Parse(time.RFC3339, payload.TimeoutAt)
if err != nil {
return fmt.Errorf("could not parse timeout at: %w", err)
}
// schedule the timeout
childCtx, cancel := context.WithDeadline(context.Background(), timeoutAt)
go func() {
<-childCtx.Done()
t.runStepRunTimeout(metadata.TenantId, payload.JobRunId, payload.StepRunId)
}()
// store the schedule in the step run map
t.stepRuns.Store(payload.StepRunId, &timeoutCtx{
ctx: childCtx,
cancel: cancel,
})
return nil
}
func (t *TickerImpl) handleCancelStepRunTimeout(ctx context.Context, task *taskqueue.Task) error {
t.l.Debug().Msg("ticker: canceling step run timeout")
payload := tasktypes.CancelStepRunTimeoutTaskPayload{}
metadata := tasktypes.CancelStepRunTimeoutTaskMetadata{}
err := t.dv.DecodeAndValidate(task.Payload, &payload)
if err != nil {
return fmt.Errorf("could not decode ticker task payload: %w", err)
}
err = t.dv.DecodeAndValidate(task.Metadata, &metadata)
if err != nil {
return fmt.Errorf("could not decode ticker task metadata: %w", err)
}
// get the cancel function
childTimeoutCtxVal, ok := t.stepRuns.Load(payload.StepRunId)
if !ok {
return fmt.Errorf("could not find step run %s", payload.StepRunId)
}
// cancel the timeout
childTimeoutCtx := childTimeoutCtxVal.(*timeoutCtx)
childTimeoutCtx.ctx = context.WithValue(childTimeoutCtx.ctx, "cancelled", true)
childTimeoutCtx.cancel()
return nil
}
func (t *TickerImpl) runStepRunTimeout(tenantId, jobRunId, stepRunId string) {
defer t.stepRuns.Delete(stepRunId)
childTimeoutCtxVal, ok := t.stepRuns.Load(stepRunId)
if !ok {
t.l.Debug().Msgf("ticker: could not find step run %s", stepRunId)
return
}
childTimeoutCtx := childTimeoutCtxVal.(*timeoutCtx)
var isCancelled bool
if cancelledVal := childTimeoutCtx.ctx.Value("cancelled"); cancelledVal != nil {
isCancelled = cancelledVal.(bool)
}
if isCancelled {
t.l.Debug().Msgf("ticker: timeout of %s was cancelled", stepRunId)
return
}
t.l.Debug().Msgf("ticker: step run %s timed out", stepRunId)
// signal the jobs controller that the step timed out
err := t.tq.AddTask(
context.Background(),
taskqueue.JOB_PROCESSING_QUEUE,
taskStepRunTimedOut(tenantId, jobRunId, stepRunId),
)
if err != nil {
t.l.Err(err).Msg("could not add step run requeue task")
}
}
func taskStepRunTimedOut(tenantId, jobRunId, stepRunId string) *taskqueue.Task {
payload, _ := datautils.ToJSONMap(tasktypes.StepRunTimedOutTaskPayload{
StepRunId: stepRunId,
JobRunId: jobRunId,
})
metadata, _ := datautils.ToJSONMap(tasktypes.StepRunTimedOutTaskMetadata{
TenantId: tenantId,
})
return &taskqueue.Task{
ID: "step-run-timed-out",
Queue: taskqueue.JOB_PROCESSING_QUEUE,
Payload: payload,
Metadata: metadata,
}
}