This repository has been archived by the owner on Oct 9, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 53
/
raw_output_path.go
97 lines (87 loc) · 3.82 KB
/
raw_output_path.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
package ioutils
import (
"context"
"crypto/sha1" // #nosec
"encoding/hex"
"strconv"
core2 "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/core"
"github.com/flyteorg/flytestdlib/storage"
"github.com/flyteorg/flyteplugins/go/tasks/pluginmachinery/io"
)
type precomputedRawOutputPaths struct {
path storage.DataReference
}
func (r precomputedRawOutputPaths) GetRawOutputPrefix() storage.DataReference {
return r.path
}
// Creates a deterministic RawOutputPath whose path is distributed based on the ShardSelector passed in.
// Determinism depends on the outputMetadataPath
// Potential performance problem, as creating a new RawPath creation may be expensive as it hashes the outputMetadataPath
// the final RawOutputPath is created in the shard selected by the sharder at the basePath and then appended by a hashed value of the outputMetadata
func NewShardedDeterministicRawOutputPath(ctx context.Context, sharder ShardSelector, basePrefix, outputMetadataPrefix storage.DataReference, store storage.ReferenceConstructor) (io.RawOutputPaths, error) {
o := []byte(outputMetadataPrefix)
prefix, err := sharder.GetShardPrefix(ctx, o)
if err != nil {
return nil, err
}
/* #nosec */
// We use SHA1 for sheer speed instead of no collisions. As because of the shard Prefix + hash is pretty unique :)
m := sha1.New()
if _, err := m.Write(o); err != nil {
return nil, err
}
path, err := store.ConstructReference(ctx, basePrefix, prefix, hex.EncodeToString(m.Sum(nil)))
if err != nil {
return nil, err
}
return precomputedRawOutputPaths{
path: path,
}, nil
}
// A simple Output sandbox at a given path
func NewRawOutputPaths(_ context.Context, rawOutputPrefix storage.DataReference) io.RawOutputPaths {
return precomputedRawOutputPaths{path: rawOutputPrefix}
}
// Creates an OutputSandbox in the basePath using the uniqueID and a sharder
// This implementation is faster than the Randomized strategy
func NewShardedRawOutputPath(ctx context.Context, sharder ShardSelector, basePath storage.DataReference, uniqueID string, store storage.ReferenceConstructor) (io.RawOutputPaths, error) {
o := []byte(uniqueID)
prefix, err := sharder.GetShardPrefix(ctx, o)
if err != nil {
return nil, err
}
path, err := store.ConstructReference(ctx, basePath, prefix, uniqueID)
if err != nil {
return nil, err
}
return precomputedRawOutputPaths{
path: path,
}, nil
}
// Constructs an output path that is deterministic and unique within the given outputPrefix. No sharding is performed
func NewDeterministicUniqueRawOutputPath(ctx context.Context, rawOutputPrefix, outputMetadataPrefix storage.DataReference, store storage.ReferenceConstructor) (io.RawOutputPaths, error) {
o := []byte(outputMetadataPrefix)
/* #nosec */
// We use SHA1 for sheer speed instead of no collisions. As because of the shard Prefix + hash is pretty unique :)
m := sha1.New()
if _, err := m.Write(o); err != nil {
return nil, err
}
path, err := store.ConstructReference(ctx, rawOutputPrefix, hex.EncodeToString(m.Sum(nil)))
if err != nil {
return nil, err
}
return precomputedRawOutputPaths{
path: path,
}, nil
}
// Generates a RawOutput Path that looks like the TaskExecutionID and can be easily cross referenced with Flyte generated TaskExecution ID
func NewTaskIDRawOutputPath(ctx context.Context, rawOutputPrefix storage.DataReference, taskID *core2.TaskExecutionIdentifier, store storage.ReferenceConstructor) (io.RawOutputPaths, error) {
path, err := store.ConstructReference(ctx, rawOutputPrefix, taskID.GetNodeExecutionId().GetExecutionId().GetProject(), taskID.GetNodeExecutionId().GetExecutionId().GetDomain(), taskID.GetNodeExecutionId().GetExecutionId().GetName(), taskID.GetNodeExecutionId().GetNodeId(), strconv.Itoa(int(taskID.GetRetryAttempt())), taskID.GetTaskId().GetName())
if err != nil {
return nil, err
}
return precomputedRawOutputPaths{
path: path,
}, nil
}