/
scan.go
executable file
·615 lines (550 loc) · 17.9 KB
/
scan.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
/*
* Copyright 2021 American Express
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package scan
import (
"bufio"
"crypto/sha1"
"io"
"log"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"sync"
"time"
cfgReader "github.com/americanexpress/earlybird/v4/pkg/config"
"github.com/americanexpress/earlybird/v4/pkg/postprocess"
)
var (
//Labels is a map of all our labels, accessible by the rule unique code
Labels map[int]LabelConfigs
//CombinedRules is a global array where we load all our precompiled rules
CombinedRules []Rule
//FalsePositiveRules is a map of our false positive rules sorted by the rule unique code
FalsePositiveRules map[int]FalsePositives
//SolutionConfigs is a map of our solutions sorted by the rule unique code
SolutionConfigs map[int]Solution
//CompressPattern is a pattern used to identify compressed zip files
CompressPattern = regexp.MustCompile(compressRegex)
//ConvertPattern is a pattern used to identify files that need to be converted to plaintext to be scanned
ConvertPattern = regexp.MustCompile(convertRegex)
tempPattern = regexp.MustCompile(tempRegex)
)
//SearchFiles will use the EarlybirdConfig, the provided file list, decompressed zip files and converted files temporary paths to send found secrets to the Hit channel
func SearchFiles(cfg *cfgReader.EarlybirdConfig, files []File, compressPaths []string, convertPaths []string, hits chan<- Hit) {
//Delete tmp file directory when we're done
defer DeleteFiles(compressPaths)
defer DeleteFiles(convertPaths)
//Create our channels and mutex
var jobMutex = &sync.Mutex{}
jobs := make(chan WorkJob)
wg := new(sync.WaitGroup)
//Create our worker pool
scanPool(cfg, wg, jobMutex, jobs, hits)
//Scan the file names
nameScanner(cfg, files, hits)
//Create work from file content for the scanPool
contentJobWriter(cfg, files, jobMutex, jobs)
//Close our channels
close(jobs)
wg.Wait()
close(hits)
}
//scanPool searches incoming jobs for secrets and write findings to hits channel
func scanPool(cfg *cfgReader.EarlybirdConfig, wg *sync.WaitGroup, jobMutex *sync.Mutex, jobs chan WorkJob, hits chan<- Hit) {
//Create duplicate map
dupeMap := make(map[string]bool) //HASH:true
for w := 1; w <= cfg.WorkerCount; w++ {
wg.Add(1)
go func(w int) {
for j := range jobs {
if IsIgnoreAnnotation(cfg, j.WorkLine.LineValue) {
j.WorkLine.LineValue = ""
}
// Scan the line based on common password rules
hitFound, tmpHits := scanLine(j.WorkLine, j.FileLines, cfg)
if cfg.Suppress {
for i := range tmpHits {
tmpHits[i].MatchValue = maskValue(tmpHits[i].MatchValue)
tmpHits[i].LineValue = maskValue(tmpHits[i].LineValue)
}
}
if hitFound {
for _, hit := range tmpHits {
jobMutex.Lock() // put a mutex on it to avoid collisions/misses
if !hitUnique(dupeMap, hit) {
jobMutex.Unlock()
continue
}
jobMutex.Unlock()
if hit.ConfidenceID <= cfg.ConfidenceDisplayLevel {
hits <- hit //Push hits to channel
}
if !cfg.FailScan {
cfg.FailScan = determineScanFail(cfg, &hit)
}
}
}
}
defer wg.Done()
}(w)
}
}
// determine if we should fail scan based on severity and confidence
func determineScanFail(cfg *cfgReader.EarlybirdConfig, hit *Hit) bool {
return hit.SeverityID <= cfg.SeverityFailLevel && hit.ConfidenceID <= cfg.ConfidenceFailLevel
}
//contentJobWriter creates work based off file content for scanning
func contentJobWriter(cfg *cfgReader.EarlybirdConfig, files []File, jobMutex *sync.Mutex, jobs chan WorkJob) {
var e error
// Loop through each File
for _, searchFile := range files {
//FileOS refers to the file object that's open, not the file object which contains the name and path
if searchFile.Path == "buffer" || searchFile.Name == "buffer" {
for _, workline := range searchFile.Lines {
jobs <- WorkJob{
WorkLine: workline,
FileLines: searchFile.Lines,
}
}
} else {
//Don't do file read/scan on files we know will trigger the filename scan -- Don't open compressed files either
if !isExcludedFileType(cfg, searchFile.Name) && len(CompressPattern.FindStringSubmatch(searchFile.Name)) <= 0 {
fileOS, err := os.Open(searchFile.Path) //Open file path
if err != nil {
fileOS, err = os.Open(searchFile.Name) //If file path open fails, try file name
if err != nil {
log.Fatal("Can't open file", err)
}
}
var work []WorkJob
var job WorkJob
job.FileLines = searchFile.Lines
//Search line by line
reader := bufio.NewReader(fileOS)
job.WorkLine.LineValue, e = readln(reader)
for e == nil {
job.WorkLine.LineNum = job.WorkLine.LineNum + 1
job.WorkLine.FileName = jobFileName(cfg.Gitrepo, searchFile.Name)
job.WorkLine.FilePath = searchFile.Path
job.FileLines = append(job.FileLines, job.WorkLine)
//Add our split up jobs to the work array
work = append(work, splitJob(job, cfg.WorkLength)...)
//Search next line to break out of loop
job.WorkLine.LineValue, e = readln(reader)
if e != nil && e != io.EOF {
log.Println("Error reading file:", e)
}
}
//Push our work to the jobs channel
for _, job := range work {
jobs <- job
}
fileOS.Close()
}
}
}
}
//nameScanner scans file names for sensitive values
func nameScanner(cfg *cfgReader.EarlybirdConfig, files []File, hits chan<- Hit) {
for _, file := range files {
// Scan the filename based on the Filename rules
hitFound, hit := scanName(file, CombinedRules, cfg)
if hitFound {
// Append the hit to our slice for return
if cfg.LevelMap[hit.Severity] <= cfg.SeverityDisplayLevel {
hits <- hit //push hit to channel
}
// If a hit severity is less than the failLevel and a hit confidence is less than the failLevel, set failScan = true
if cfg.LevelMap[hit.Severity] <= cfg.SeverityFailLevel && cfg.LevelMap[hit.Confidence] <= cfg.ConfidenceFailLevel {
cfg.FailScan = true
}
}
}
}
//DeleteFiles removes files and folders in target path array
func DeleteFiles(paths []string) {
for _, p := range paths {
err := os.RemoveAll(p)
if err != nil {
log.Println("Failed to delete temporary file", err)
}
}
}
// Check if the file extension is something we know will trigger a hit on the filename scan (e.g., .pem, .p12, etc.
func isExcludedFileType(cfg *cfgReader.EarlybirdConfig, filename string) (excluded bool) {
for _, ext := range cfg.ExtensionsToSkipScan {
if strings.EqualFold(filepath.Ext(filename), ext) {
return true
}
//filename ends in extension stripped of period, e.g., 'foobarmin.js'
trimmedExt := string(ext[1:])
if strings.HasSuffix(filename, trimmedExt) {
return true
}
}
return false
}
func hitUnique(dupeMap map[string]bool, hit Hit) bool {
digest := sha1.New()
_, err := digest.Write([]byte(hit.Filename + strconv.Itoa(hit.Line) + hit.MatchValue))
if err != nil {
log.Println("Failed to produce digest of hit", err)
}
hithash := string(digest.Sum(nil))
//hash hit here
if exists, ok := (dupeMap)[hithash]; ok && exists {
//This is a duplicate
return false
}
(dupeMap)[hithash] = true
return true
}
// Take a line and run through the rules, looking for a hit
func scanLine(line Line, fileLines []Line, cfg *cfgReader.EarlybirdConfig) (isHit bool, hits []Hit) {
for _, rule := range CombinedRules {
var hit Hit
//Skip rules that do not apply
if rule.Searcharea == "filename" || cfg.SkipComments && rule.Category == "comment" {
continue
}
patternMatch, matchValue := findHit(line.LineValue, rule.CompiledPattern)
if !patternMatch {
continue
}
//If we found a Regexp match, build a Hit
hit.Code = rule.Code
hit.Confidence = getLevelNameFromID(rule.Confidence, cfg.LevelMap)
hit.ConfidenceID = rule.Confidence
hit.Caption = rule.Caption
hit.Category = rule.Category
if cfg.ShowSolutions {
hit.Solution = SolutionConfigs[rule.SolutionID].Text
}
hit.CWE = rule.CWE
hit.Line = line.LineNum
hit.LineValue = strings.TrimSpace(line.LineValue)
hit.MatchValue = matchValue
if line.FilePath != "buffer" && !strings.Contains(line.FilePath, "ebconv") {
hit.Filename = removeTempPrefix(line.FilePath)
} else {
hit.Filename = line.FileName
}
hit.Time = time.Now().UTC().Format(time.RFC3339)
hit.determineSeverity(cfg, &rule)
// Apply labels to the hit if appropriate
labelHit(&hit, fileLines)
//Check if our hit has any false positives
isStillHit := hit.postProcess(cfg, &rule)
if isStillHit {
isHit = true
hits = append(hits, hit)
}
}
return isHit, hits
}
// Take a filename and run through the rules, looking for a hit
func scanName(file File, rules []Rule, cfg *cfgReader.EarlybirdConfig) (isHit bool, hit Hit) {
for _, rule := range rules {
if rule.Searcharea == "body" { //Skip rules that do not apply
continue
}
if file.Path == "buffer" {
file.Path = file.Name
}
patternMatch, _ := findHit(file.Path, rule.CompiledPattern)
// If we found a match to the Regexp pattern, build a Hit
if patternMatch {
hit.Code = rule.Code
hit.Severity = getLevelNameFromID(rule.Severity, cfg.LevelMap)
hit.SeverityID = rule.Severity
hit.Caption = rule.Caption
hit.Category = rule.Category
hit.CWE = rule.CWE
hit.Confidence = getLevelNameFromID(rule.Confidence, cfg.LevelMap)
hit.ConfidenceID = rule.Confidence
if cfg.ShowSolutions {
hit.Solution = SolutionConfigs[rule.SolutionID].Text
}
hit.Line = 0
hit.Filename = file.Path
hit.MatchValue = file.Name
hit.LineValue = file.Name
hit.Time = time.Now().UTC().Format(time.RFC3339)
// Check if the severity needs to be adjusted based on filepath
hit.determineSeverity(cfg, &rule)
// Check if the hit has any false positives
fpHit := findFalsePositive(hit)
if fpHit {
return false, hit
}
return true, hit
}
}
return false, hit
}
// readln returns a single line (without the ending \n)
// from the input buffered reader.
// An error is returned iff there is an error with the
// buffered reader.
func readln(r *bufio.Reader) (string, error) {
var (
isPrefix bool = true
err error = nil
line, ln []byte
)
for isPrefix && err == nil {
line, isPrefix, err = r.ReadLine()
ln = append(ln, line...)
}
return string(ln), err
}
//IsIgnoreAnnotation Checks for ignore annotation
func IsIgnoreAnnotation(cfg *cfgReader.EarlybirdConfig, line string) bool {
for _, annotation := range cfg.AnnotationsToSkipLine {
if strings.Contains(line, annotation) {
return true
}
}
return false
}
// If we want to suppress a secret value from being displayed in the results, mask it with maskCharacter
func maskValue(input string) string {
return strings.Repeat(maskCharacter, len(input))
}
func jobFileName(gitRepo, fileName string) string {
if gitRepo != "" {
return getFileURL(gitRepo, filepath.Base(fileName))
}
return fileName
}
//splitJob splits up the job into an array of jobs if too long otherwise returns a single job
func splitJob(inJob WorkJob, worklength int) (work []WorkJob) {
//If line isn't too long, just push job to work channel
if len(inJob.WorkLine.LineValue) <= worklength {
return []WorkJob{inJob}
}
//For VERY long lines, split it up at WORK_LENGTH, creating another string that overlaps the split
linesValues := splitSubN(inJob.WorkLine.LineValue, worklength)
for _, value := range linesValues {
outJob := WorkJob{
WorkLine: Line{
LineNum: inJob.WorkLine.LineNum,
FileName: inJob.WorkLine.FileName,
FilePath: inJob.WorkLine.FilePath,
LineValue: value,
},
FileLines: inJob.FileLines,
}
work = append(work, outJob)
}
return work
}
//splitSubN Create the overlap string when splitting long strings
func splitSubN(s string, n int) []string {
runes := []rune(s)
chunks := make([]string, 0, len(runes)/n)
for start := 0; start < len(runes); start += n {
if end := start + n; end < len(runes) {
chunks = append(chunks, string(runes[start:end]))
} else {
chunks = append(chunks, string(runes[start:]))
}
}
results := []string{}
//subs contains all split strings
//iterate over strings parsing
toggle := true
for _, sub := range chunks {
var tmpString string
if toggle { // Check if we should parse from the end of the string
toggle = false
results = append(results, sub) // Append split string
if len(sub) >= overlapLength {
tmpString = sub[len(sub)-overlapLength:]
} else {
if len(sub) > 0 {
tmpString = sub[len(sub)-(len(sub)-1):]
} else {
tmpString = sub[0:]
}
}
continue
}
// parse from the start of the string
toggle = true
if len(sub) > overlapLength {
tmpString = tmpString + sub[0:overlapLength-1]
results = append(results, tmpString) //Append overlapped data
results = append(results, sub) // Append split string
tmpString = ""
} else {
results = append(results, sub) // Append split string
break //stop if last element is too short
}
}
return results
}
// From the configs in labels.json, apply labels to each hit as appropriate
func labelHit(hit *Hit, fileLines []Line) {
rules, ok := Labels[hit.Code]
if !ok {
return
}
for _, rule := range rules.Labels {
if !rule.Multiline {
// Some labels get applied based on the actual hit, not the key context
if len(rule.Keys) == 0 {
hit.Labels = append(hit.Labels, rule.Label)
continue
}
var matched bool
for _, key := range rule.Keys {
if substringExistsInString(hit.LineValue, key) {
matched = true
break
}
}
if matched {
hit.Labels = append(hit.Labels, rule.Label)
}
continue
}
criteriaMatched := 0
for _, key := range rule.Keys {
if substringExistsInLines(fileLines, key) {
criteriaMatched++
}
}
neededCriteria := len(rule.Keys)
if criteriaMatched == neededCriteria {
hit.Labels = append(hit.Labels, rule.Label)
}
}
}
func (hit *Hit) determineSeverity(cfg *cfgReader.EarlybirdConfig, rule *Rule) {
// check if for the given category we need to adjust the severity based on user config
for _, adjustedSeverityCategoryCfg := range cfg.AdjustedSeverityCategories {
if adjustedSeverityCategoryCfg.Category == rule.Category {
for _, p := range adjustedSeverityCategoryCfg.CompiledPatterns {
var test string
if adjustedSeverityCategoryCfg.UseFilename {
test = hit.Filename
} else if adjustedSeverityCategoryCfg.UseLineValue {
test = hit.LineValue
} else {
test = hit.MatchValue
}
if p.Match([]byte(test)) {
severityId := getIdFromLevelName(adjustedSeverityCategoryCfg.AdjustedDisplaySeverity, cfg.LevelMap)
hit.Severity = getLevelNameFromID(severityId, cfg.LevelMap)
hit.SeverityID = severityId
return
}
}
}
}
hit.Severity = getLevelNameFromID(rule.Severity, cfg.LevelMap)
hit.SeverityID = rule.Severity
}
func (hit *Hit) postProcess(cfg *cfgReader.EarlybirdConfig, rule *Rule) (isHit bool) {
fpHit := false
if !cfg.IgnoreFPRules {
fpHit = findFalsePositive(*hit)
}
switch {
case fpHit:
isHit = false
// Check if a password is valid and weak. Exclude if invalid, label as 'weak' if weak
case rule.Postprocess == "password":
// Skip account_token as password so that it can be reported under credit card
SkipAccountToken := postprocess.SkipAccountTokenPassword(hit.LineValue)
if SkipAccountToken {
isHit = false
break
}
// If it's a false positive return no match.
Confidence, IsFalsePositive := postprocess.PasswordFalse(hit.MatchValue)
if IsFalsePositive {
isHit = false
break
}
// Skip password as same key/value pair
IsPasswordSameKeyValue := postprocess.SkipSameKeyValue(hit.MatchValue, hit.LineValue)
if IsPasswordSameKeyValue {
isHit = false
break
}
// Skip password if the value has unicode char in it
passwordContainsUnicode := postprocess.SkipPasswordWithUnicode(hit.MatchValue)
if passwordContainsUnicode {
isHit = false
break
}
// Skip password if the value has HTML entities in it
passwordContainsHTMLEntities := postprocess.SkipPasswordWithHTMLEntities(hit.MatchValue)
if passwordContainsHTMLEntities {
isHit = false
break
}
hit.Confidence = getLevelNameFromID(Confidence, cfg.LevelMap)
hit.ConfidenceID = Confidence
if postprocess.PasswordWeak(hit.MatchValue) {
hit.Caption = postprocess.WeakPswdCaption
hit.Labels = append(hit.Labels, "weak password")
}
isHit = true
// If a SSN hit doesn't meet certain criteria (e.g., all zeroes, certain test patterns, etc.), skip it
case rule.Postprocess == "ssn":
if postprocess.ValidSSN(hit.MatchValue) {
isHit = true
}
// Verify credit card hits against a mod10 check
case rule.Postprocess == "mod10":
// If the match passed a Luhn/mod-10 check, build a Hit
if postprocess.IsCard(hit.MatchValue) {
isHit = true
}
// Calculate the entropy of a string and make sure it passes entropyThreshold
case rule.Postprocess == "entropy":
e := postprocess.Shannon(hit.MatchValue)
// If the line's string entropy is high enough, build a Hit
if e > entropyThreshold {
isHit = true
}
// No additional validation needed
case rule.Postprocess == "key":
// Skip same key/value pair
IsSameKeyValue := postprocess.SkipSameKeyValue(hit.MatchValue, hit.LineValue)
if IsSameKeyValue {
isHit = false
break
}
default:
isHit = true
}
return isHit
}
//removeTempPrefix removes the temp path prefix if it exists
func removeTempPrefix(path string) string {
if strings.Contains(path, "ebzip") || strings.Contains(path, "ebgit") || strings.Contains(path, "ebconv") {
if paths := tempPattern.FindStringSubmatch(path); len(paths) > 1 {
path = paths[1]
}
}
return path
}