internal/workflow: add expansions

One of the upcoming use cases for relui, tagging x/ repos, has a dependency graph we can't know without doing significant work. This is in conflict with one of the workflow package's design principles, that workflows be defined ahead of time. So here's one idea for bending that rule. Introduce the concept of an expansion task. An expansion task runs the same as a normal task, but in addition to its inputs it receives (a copy of) the workflow definition and can add tasks and outputs to it as usual. Once the task finishes running, the workflow swaps in the new definition and continues. Other tasks can run concurrently. One significant difference is that the expansion task must run every time the workflow is started or resumed, so they really should be pure functions with no network access or side effects. To encourage this, they don't receive a Context. Introducing this required some refactoring of the existing Run loop; we need to allow these apparently-unused tasks to run, and deal with new tasks popping up mid-run. But overall I think it's not too huge a distortion to the design. For golang/go#48523. Change-Id: I92c164883e06474fa951abba5fa18fd78eacb8ce Reviewed-on: https://go-review.googlesource.com/c/build/+/423037 Run-TryBot: Heschi Kreinick <heschi@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Jenny Rakoczy <jenny@golang.org>
golang · Aug 25, 2022 · bf01fcc · bf01fcc
1 parent 7cf3efd
commit bf01fcc
Show file tree

Hide file tree

Showing 2 changed files with 285 additions and 62 deletions.
diff --git a/internal/workflow/workflow.go b/internal/workflow/workflow.go
@@ -7,8 +7,7 @@
 //
 // Workflows are a set of tasks and actions that produce and consume Values.
 // Tasks don't run until the workflow is started, so Values represent data that
-// doesn't exist yet, and can't be used directly. Each value has a dynamic type,
-// which must match its uses.
+// doesn't exist yet, and can't be used directly.
 //
 // To wrap an existing Go object in a Value, use Const. To define a
 // parameter that will be set when the workflow is started, use Param.
@@ -32,6 +31,13 @@
 // definitions to create an ordering dependency that doesn't correspond to a
 // function argument.
 //
+// Expansions are a third type of function that adds to a running workflow
+// definition rather than producing an output. Unlike Actions and Tasks, they
+// execute multiple times and must produce exactly the same workflow
+// modifications each time. As such, they should be pure functions of their
+// inputs. Producing different modifications, or running multiple expansions
+// concurrently, is an error that will corrupt the workflow's state.
+//
 // Once a Definition is complete, call Start to set its parameters and
 // instantiate it into a Workflow. Call Run to execute the workflow until
 // completion.
@@ -75,6 +81,17 @@ func (d *Definition) name(name string) string {
 	return d.namePrefix + name
 }
 
+func (d *Definition) shallowClone() *Definition {
+	clone := New()
+	for k, v := range d.tasks {
+		clone.tasks[k] = v
+	}
+	for k, v := range d.outputs {
+		clone.outputs[k] = v
+	}
+	return clone
+}
+
 type definitionState struct {
 	parameters []MetaParameter // Ordered according to registration, unique parameter names.
 	tasks      map[string]*taskDefinition
@@ -324,7 +341,7 @@ func Task5[C context.Context, I1, I2, I3, I4, I5, O1 any](d *Definition, name st
 	return addTask[O1](d, name, f, []metaValue{i1, i2, i3, i4, i5}, opts)
 }
 
-func addTask[O1 any](d *Definition, name string, f interface{}, inputs []metaValue, opts []TaskOption) *taskResult[O1] {
+func addFunc(d *Definition, name string, f interface{}, inputs []metaValue, opts []TaskOption) *taskDefinition {
 	name = d.name(name)
 	td := &taskDefinition{name: name, f: f, args: inputs}
 	for _, input := range inputs {
@@ -334,12 +351,22 @@ func addTask[O1 any](d *Definition, name string, f interface{}, inputs []metaVal
 		td.deps = append(td.deps, opt.(*after).deps...)
 	}
 	d.tasks[name] = td
+	return td
+}
+
+func addTask[O1 any](d *Definition, name string, f interface{}, inputs []metaValue, opts []TaskOption) *taskResult[O1] {
+	td := addFunc(d, name, f, inputs, opts)
 	return &taskResult[O1]{td}
 }
 
 func addAction(d *Definition, name string, f interface{}, inputs []metaValue, opts []TaskOption) *dependency {
-	tr := addTask[interface{}](d, name, f, inputs, opts)
-	return &dependency{tr.task}
+	td := addFunc(d, name, f, inputs, opts)
+	return &dependency{td}
+}
+
+func addExpansion(d *Definition, name string, f interface{}, inputs []metaValue, opts []TaskOption) {
+	td := addFunc(d, name, f, inputs, opts)
+	td.isExpansion = true
 }
 
 // ActionN adds an Action to the workflow definition. Its behavior and
@@ -377,6 +404,39 @@ func (d *dependency) dependencies() []*taskDefinition {
 	return []*taskDefinition{d.task}
 }
 
+// ExpandN adds a workflow expansion task to the workflow definition.
+// Expansion tasks run similarly to normal tasks, but instead of computing
+// a result, they can add to the workflow definition.
+//
+// Unlike normal tasks, expansions may run multiple times and must produce
+// the exact same changes to the definition each time.
+//
+// Running more than one expansion concurrently is an error and will corrupt
+// the workflow.
+func Expand0(d *Definition, name string, f func(*Definition) error, opts ...TaskOption) {
+	addExpansion(d, name, f, nil, opts)
+}
+
+func Expand1[I1 any](d *Definition, name string, f func(*Definition, I1) error, i1 Value[I1], opts ...TaskOption) {
+	addExpansion(d, name, f, []metaValue{i1}, opts)
+}
+
+func Expand2[I1, I2 any](d *Definition, name string, f func(*Definition, I1, I2) error, i1 Value[I1], i2 Value[I2], opts ...TaskOption) {
+	addExpansion(d, name, f, []metaValue{i1, i2}, opts)
+}
+
+func Expand3[I1, I2, I3 any](d *Definition, name string, f func(*Definition, I1, I2, I3) error, i1 Value[I1], i2 Value[I2], i3 Value[I3], opts ...TaskOption) {
+	addExpansion(d, name, f, []metaValue{i1, i2, i3}, opts)
+}
+
+func Expand4[I1, I2, I3, I4 any](d *Definition, name string, f func(*Definition, I1, I2, I3, I4) error, i1 Value[I1], i2 Value[I2], i3 Value[I3], i4 Value[I4], opts ...TaskOption) {
+	addExpansion(d, name, f, []metaValue{i1, i2, i3, i4}, opts)
+}
+
+func Expand5[I1, I2, I3, I4, I5 any](d *Definition, name string, f func(*Definition, I1, I2, I3, I4, I5) error, i1 Value[I1], i2 Value[I2], i3 Value[I3], i4 Value[I4], i5 Value[I5], opts ...TaskOption) {
+	addExpansion(d, name, f, []metaValue{i1, i2, i3, i4, i5}, opts)
+}
+
 // A TaskContext is a context.Context, plus workflow-related features.
 type TaskContext struct {
 	disableRetries bool
@@ -439,10 +499,11 @@ type Logger interface {
 }
 
 type taskDefinition struct {
-	name string
-	args []metaValue
-	deps []*taskDefinition
-	f    interface{}
+	name        string
+	isExpansion bool
+	args        []metaValue
+	deps        []*taskDefinition
+	f           interface{}
 }
 
 type taskResult[T any] struct {
@@ -477,16 +538,25 @@ type Workflow struct {
 
 	def   *Definition
 	tasks map[*taskDefinition]*taskState
+	// pendingStates stores states that haven't been loaded because their
+	// tasks didn't exist at Resume time.
+	pendingStates map[string]*TaskState
 }
 
 type taskState struct {
-	def              *taskDefinition
-	started          bool
-	finished         bool
+	def      *taskDefinition
+	created  bool
+	started  bool
+	finished bool
+	err      error
+
+	// normal tasks
 	result           interface{}
 	serializedResult []byte
-	err              error
 	retryCount       int
+
+	// workflow expansion
+	expanded *Definition
 }
 
 func (t *taskState) toExported() *TaskState {
@@ -534,7 +604,7 @@ func (w *Workflow) validate() error {
 		used[output] = true
 	}
 	for _, task := range w.def.tasks {
-		if !used[task] {
+		if !used[task] && !task.isExpansion {
 			return fmt.Errorf("task %v is not referenced and should be deleted", task.name)
 		}
 	}
@@ -568,39 +638,55 @@ func (w *Workflow) validate() error {
 func Resume(def *Definition, state *WorkflowState, taskStates map[string]*TaskState) (*Workflow, error) {
 	w := &Workflow{
 		ID:            state.ID,
-		def:           def,
 		params:        state.Params,
-		tasks:         map[*taskDefinition]*taskState{},
 		retryCommands: make(chan retryCommand, len(def.tasks)),
+		def:           def,
+		tasks:         map[*taskDefinition]*taskState{},
+		pendingStates: taskStates,
 	}
 	if err := w.validate(); err != nil {
 		return nil, err
 	}
 	for _, taskDef := range def.tasks {
-		tState, ok := taskStates[taskDef.name]
-		if !ok {
-			return nil, fmt.Errorf("task state for %q not found", taskDef.name)
+		var err error
+		w.tasks[taskDef], err = loadTaskState(w.pendingStates, taskDef, false)
+		if err != nil {
+			return nil, fmt.Errorf("loading state for %v: %v", taskDef.name, err)
 		}
-		state := &taskState{
-			def:              taskDef,
-			started:          tState.Finished, // Can't resume tasks, so either it's new or done.
-			finished:         tState.Finished,
-			serializedResult: tState.SerializedResult,
-			retryCount:       tState.RetryCount,
-		}
-		if state.serializedResult != nil {
-			result, err := unmarshalNew(reflect.ValueOf(taskDef.f).Type().Out(0), tState.SerializedResult)
-			if err != nil {
-				return nil, fmt.Errorf("failed to unmarshal result of %v: %v", taskDef.name, err)
-			}
-			state.result = result
+	}
+	return w, nil
+}
+
+func loadTaskState(states map[string]*TaskState, def *taskDefinition, allowMissing bool) (*taskState, error) {
+	tState, ok := states[def.name]
+	if !ok {
+		if !allowMissing {
+			return nil, fmt.Errorf("task state not found")
 		}
-		if tState.Error != "" {
-			state.err = fmt.Errorf("serialized error: %v", tState.Error) // untyped, but hopefully that doesn't matter.
+		tState = &TaskState{}
+	}
+	// Can't resume tasks, so either it's new or done.
+	// Expansions need to run every time.
+	finished := tState.Finished && !def.isExpansion
+	state := &taskState{
+		def:              def,
+		created:          ok,
+		started:          finished,
+		finished:         finished,
+		serializedResult: tState.SerializedResult,
+		retryCount:       tState.RetryCount,
+	}
+	if state.serializedResult != nil {
+		result, err := unmarshalNew(reflect.ValueOf(def.f).Type().Out(0), tState.SerializedResult)
+		if err != nil {
+			return nil, fmt.Errorf("failed to unmarshal result: %v", err)
 		}
-		w.tasks[taskDef] = state
+		state.result = result
 	}
-	return w, nil
+	if tState.Error != "" {
+		state.err = fmt.Errorf("serialized error: %v", tState.Error) // untyped, but hopefully that doesn't matter.
+	}
+	return state, nil
 }
 
 func unmarshalNew(t reflect.Type, data []byte) (interface{}, error) {
@@ -620,29 +706,25 @@ func (w *Workflow) Run(ctx context.Context, listener Listener) (map[string]inter
 		listener = &defaultListener{}
 	}
 
-	for _, task := range w.tasks {
-		listener.TaskStateChanged(w.ID, task.def.name, task.toExported())
-	}
-
 	stateChan := make(chan taskState, 2*len(w.def.tasks))
 	doneOnce := ctx.Done()
 	for {
-		// If we have all the outputs, the workflow is done.
-		outValues := map[string]interface{}{}
-		for outName, outDef := range w.def.outputs {
-			if task := w.tasks[outDef]; task.finished && task.err == nil {
-				outValues[outName] = task.result
-			}
-		}
-		if len(outValues) == len(w.def.outputs) {
-			return outValues, nil
-		}
-
 		running := 0
+		allDone := true
 		for _, task := range w.tasks {
+			if !task.created {
+				task.created = true
+				listener.TaskStateChanged(w.ID, task.def.name, task.toExported())
+			}
 			if task.started && !task.finished {
 				running++
 			}
+			if !task.finished || task.err != nil {
+				allDone = false
+			}
+		}
+		if allDone {
+			break
 		}
 
 		if ctx.Err() == nil {
@@ -658,9 +740,13 @@ func (w *Workflow) Run(ctx context.Context, listener Listener) (map[string]inter
 				task.started = true
 				running++
 				listener.TaskStateChanged(w.ID, task.def.name, task.toExported())
-				go func(task taskState) {
-					stateChan <- runTask(ctx, w.ID, listener, task, args)
-				}(*task)
+				taskCopy := *task
+				if task.def.isExpansion {
+					defCopy := w.def.shallowClone()
+					go func() { stateChan <- runExpansion(defCopy, taskCopy, args) }()
+				} else {
+					go func() { stateChan <- runTask(ctx, w.ID, listener, taskCopy, args) }()
+				}
 			}
 		}
 
@@ -675,6 +761,9 @@ func (w *Workflow) Run(ctx context.Context, listener Listener) (map[string]inter
 
 		select {
 		case state := <-stateChan:
+			if state.def.isExpansion && state.finished && state.err == nil {
+				state.err = w.expand(state.expanded)
+			}
 			listener.TaskStateChanged(w.ID, state.def.name, state.toExported())
 			w.tasks[state.def] = &state
 		case retry := <-w.retryCommands:
@@ -689,14 +778,20 @@ func (w *Workflow) Run(ctx context.Context, listener Listener) (map[string]inter
 				break
 			}
 			listener.Logger(w.ID, def.name).Printf("Manual retry requested")
-			stateChan <- taskState{def: def}
+			stateChan <- taskState{def: def, created: true}
 			retry.reply <- nil
 		// Don't get stuck when cancellation comes in after all tasks have
 		// finished, but also don't busy wait if something's still running.
 		case <-doneOnce:
 			doneOnce = nil
 		}
 	}
+
+	outs := map[string]interface{}{}
+	for name, def := range w.def.outputs {
+		outs[name] = w.tasks[def].result
+	}
+	return outs, nil
 }
 
 func (w *Workflow) taskArgs(def *taskDefinition) ([]reflect.Value, bool) {
@@ -753,12 +848,47 @@ func runTask(ctx context.Context, workflowID uuid.UUID, listener Listener, state
 		tctx.Printf("task failed, will retry (%v of %v): %v", state.retryCount+1, MaxRetries, state.err)
 		state = taskState{
 			def:        state.def,
+			created:    true,
 			retryCount: state.retryCount + 1,
 		}
 	}
 	return state
 }
 
+func runExpansion(d *Definition, state taskState, args []reflect.Value) taskState {
+	in := append([]reflect.Value{reflect.ValueOf(d)}, args...)
+	fv := reflect.ValueOf(state.def.f)
+	out := fv.Call(in)
+	state.finished = true
+	if out[0].IsNil() {
+		state.expanded = d
+	} else {
+		state.err = out[0].Interface().(error)
+	}
+	return state
+}
+
+func (w *Workflow) expand(expanded *Definition) error {
+	origDef := w.def
+	w.def = expanded
+	if err := w.validate(); err != nil {
+		w.def = origDef
+		return err
+	}
+	for _, def := range w.def.tasks {
+		if _, ok := w.tasks[def]; ok {
+			continue
+		}
+		// w.tasks[def] = &taskState{def: def}
+		var err error
+		w.tasks[def], err = loadTaskState(w.pendingStates, def, true)
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
 type defaultListener struct{}
 
 func (s *defaultListener) TaskStateChanged(_ uuid.UUID, _ string, _ *TaskState) error {