/
secret_scrub.go
310 lines (274 loc) · 8.86 KB
/
secret_scrub.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
package main
import (
"bytes"
"fmt"
"io"
"io/fs"
"path/filepath"
"strings"
"golang.org/x/text/transform"
"github.com/dagger/dagger/core"
)
var (
// scrubString will be used as replacement for found secrets:
scrubString = []byte("***")
)
func NewSecretScrubReader(r io.Reader, currentDirPath string, fsys fs.FS, env []string, secretsToScrub core.SecretToScrubInfo) (io.Reader, error) {
secrets := loadSecretsToScrubFromEnv(env, secretsToScrub.Envs)
fileSecrets, err := loadSecretsToScrubFromFiles(currentDirPath, fsys, secretsToScrub.Files)
if err != nil {
return nil, fmt.Errorf("could not load secrets from file: %w", err)
}
secrets = append(secrets, fileSecrets...)
secretAsBytes := make([][]byte, 0)
for _, v := range secrets {
// Skip empty env:
if len(v) == 0 {
continue
}
secretAsBytes = append(secretAsBytes, []byte(v))
}
trie := &Trie{}
for _, s := range secretAsBytes {
trie.Insert([]byte(s), scrubString)
}
transformer := &censor{
trie: trie,
trieRoot: trie,
// NOTE: keep these sizes the same as the default transform sizes
srcBuf: make([]byte, 0, 4096),
dstBuf: make([]byte, 0, 4096),
}
return transform.NewReader(r, transformer), nil
}
// loadSecretsToScrubFromEnv loads secrets value from env if they are in secretsToScrub.
func loadSecretsToScrubFromEnv(env []string, secretsToScrub []string) []string {
secrets := []string{}
for _, envKV := range env {
envName, envValue, ok := strings.Cut(envKV, "=")
// no env value for this secret
if !ok {
continue
}
for _, envToScrub := range secretsToScrub {
if envName == envToScrub {
secrets = append(secrets, envValue)
}
}
}
return secrets
}
// loadSecretsToScrubFromFiles loads secrets from file path in secretFilePathsToScrub from the fsys, accessed from the absolute currentDirPathAbs.
// It will attempt to make any file path as absolute file path by joining it with the currentDirPathAbs if need be.
func loadSecretsToScrubFromFiles(currentDirPathAbs string, fsys fs.FS, secretFilePathsToScrub []string) ([]string, error) {
secrets := make([]string, 0, len(secretFilePathsToScrub))
for _, fileToScrub := range secretFilePathsToScrub {
absFileToScrub := fileToScrub
if !filepath.IsAbs(fileToScrub) {
absFileToScrub = filepath.Join("/", fileToScrub)
}
if strings.HasPrefix(fileToScrub, currentDirPathAbs) || strings.HasPrefix(fileToScrub, currentDirPathAbs[1:]) {
absFileToScrub = strings.TrimPrefix(fileToScrub, currentDirPathAbs)
absFileToScrub = filepath.Join("/", absFileToScrub)
}
// we remove the first `/` from the absolute path to fileToScrub to work with fs.ReadFile
secret, err := fs.ReadFile(fsys, absFileToScrub[1:])
if err != nil {
return nil, fmt.Errorf("secret value not available for: %w", err)
}
secrets = append(secrets, string(secret))
}
return secrets, nil
}
// censor is a custom Transformer for replacing all keys in a target trie with
// their values.
type censor struct {
// trieRoot is the root of the trie
trieRoot *Trie
// trie is the current node we are at in the trie
trie *Trie
// srcBuf is the source buffer, which contains bytes read from the src that
// are partial matches against the trie
srcBuf []byte
// destBuf is the destination buffer, which contains bytes that have been
// sanitized by the censor and are ready to be copied out
dstBuf []byte
}
// Transform ingests src bytes, and outputs sanitized bytes to dst.
//
// Unlike some other secret scrubbing implementations, this aims to sanitize
// bytes *as soon as possible*. The moment that we know a byte is not part of a
// secret, we should output it into dst - even if this would break up a provided
// src into multiple dsts over multiple calls to Transform.
func (c *censor) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
for {
// flush the destination buffer
k := copy(dst[nDst:], c.dstBuf)
nDst += k
if nDst == len(dst) {
c.dstBuf = c.dstBuf[k:]
return nDst, nSrc, transform.ErrShortDst
}
c.dstBuf = c.dstBuf[:0]
if !atEOF && nSrc == len(src) {
// no more source bytes, we're done!
return nDst, nSrc, nil
}
if atEOF && nSrc == len(src) && len(c.srcBuf) == 0 {
// no more source bytes, or buffered source bytes, we're done!
// (when atEOF, we won't get called again, so we need to make sure
// to flush everything)
return nDst, nSrc, nil
}
// read more source bytes, until either we've read all the source
// bytes, or we've filled the destination buffer
for ; nSrc < len(src) && nDst+len(c.dstBuf) < len(dst); nSrc++ {
ch := src[nSrc]
c.trie = c.trie.Step(ch)
if c.trie == nil {
// no match possible, so flush the source buffer into the
// destination buffer, and process the current byte again.
//
// we do this because this *might* cause us to try to flush
// more than len(dst) - nDst bytes into the destination buffer,
// so we should avoid consuming the next byte in this case.
if len(c.srcBuf) != 0 {
c.trie = c.trieRoot
c.dstBuf = append(c.dstBuf, c.srcBuf...)
c.srcBuf = c.srcBuf[:0]
nSrc--
continue
}
// put the current byte either into the destination buffer, or
// the source buffer, depending on whether it's a partial match
c.trie = c.trieRoot.Step(ch)
if c.trie == nil {
c.trie = c.trieRoot
c.dstBuf = append(c.dstBuf, ch)
} else if replace := c.trie.Value(); replace != nil {
c.trie = c.trieRoot
c.dstBuf = append(c.dstBuf, replace...)
} else {
c.srcBuf = append(c.srcBuf, ch)
}
} else if replace := c.trie.Value(); replace != nil {
// aha, we made a match, so replace the source buffer with the
// censored string, and flush into the destination buffer
c.trie = c.trieRoot
c.dstBuf = append(c.dstBuf, replace...)
c.srcBuf = c.srcBuf[:0]
} else {
// we're in the middle of a match
c.srcBuf = append(c.srcBuf, ch)
}
}
// at this point, no more matches are possible, so flush
if atEOF {
c.dstBuf = append(c.dstBuf, c.srcBuf...)
c.srcBuf = c.srcBuf[:0]
}
}
}
func (c *censor) Reset() {
c.trie = c.trieRoot
c.srcBuf = c.srcBuf[:0]
c.dstBuf = c.dstBuf[:0]
}
// Trie is a simple implementation of a compressed trie (or radix tree). In
// essence, it's a key-value store that allows easily selecting all entries
// that have a given prefix.
//
// Why not an off-the-shelf implementation? Well, most of those don't allow
// navigating character-by-character through the tree, like we do with Step.
type Trie struct {
// value is the value stored in this trie node
value []byte
// children is a byte-indexed slice of child nodes
children []*Trie
// direct is an alternative shortcut for a list of children that helps save
// us some memory (note, it currently only appears in leaf nodes, and
// contains suffixes)
direct []byte
}
func (t *Trie) Insert(key []byte, value []byte) {
node := t
for i, ch := range key {
if node.children == nil {
if node.direct == nil || bytes.Equal(node.direct, key[i:]) {
node.direct = key[i:]
node.value = value
return
}
node.branch()
}
if node.children[ch] == nil {
node.children[ch] = &Trie{}
}
node = node.children[ch]
}
if node.direct != nil {
node.branch()
}
node.value = value
}
// branch takes a node in the trie and converts it from a leaf node into a
// branch node.
func (t *Trie) branch() {
if t.children != nil {
return
}
// why a slice instead of a map? surely it uses more space?
// well, doing a lookup on a slice like this is *super* quick, but
// doing so on a map is *much* slower - since this is in the
// hotpath, it makes sense to waste the memory here (and since the
// trie is compressed, it doesn't seem to be that much in practice)
t.children = make([]*Trie, 256)
// since we potentially create children here, we have to re-insert the
// direct shortcuts to preserve internally consistency
if len(t.direct) > 0 {
t.children[t.direct[0]] = &Trie{
direct: t.direct[1:],
value: t.value,
}
t.value = nil
}
t.direct = nil
}
// Step selects a node that was previously inserted.
func (t *Trie) Step(ch byte) *Trie {
if t.children != nil {
return t.children[ch]
}
if len(t.direct) > 0 && t.direct[0] == ch {
// this is a "virtual node" - it doesn't actually exist in the trie,
// but can still used for traversal
return &Trie{
direct: t.direct[1:],
value: t.value,
}
}
return nil
}
// Value gets the value previously inserted at this node.
func (t *Trie) Value() []byte {
if len(t.direct) == 0 {
return t.value
}
return nil
}
// String prints a debuggable representation of the trie.
func (t Trie) String() string {
lines := ""
if t.value != nil {
lines += fmt.Sprintf("%s (%s)\n", t.direct, t.value)
}
for ch, child := range t.children {
if child != nil {
lines += fmt.Sprintf("%c\n", ch)
for _, line := range strings.Split(child.String(), "\n") {
lines += " " + line + "\n"
}
}
}
return strings.TrimSpace(lines)
}