This repository has been archived by the owner on May 25, 2023. It is now read-only.
/
aws.go
89 lines (74 loc) · 2.11 KB
/
aws.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
package aws
import (
"bytes"
"strings"
"sync"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/service/s3"
"github.com/aws/aws-sdk-go/service/s3/s3iface"
"github.com/bilalcaliskan/s3-substring-finder/internal/logging"
"github.com/bilalcaliskan/s3-substring-finder/internal/options"
"github.com/schollz/progressbar/v3"
"go.uber.org/zap"
)
// Find does the heavy lifting, communicates with the S3 and finds the files
func Find(svc s3iface.S3API, opts *options.S3SubstringFinderOptions) ([]string, []error) {
var errors []error
var matchedFiles []string
mu := &sync.Mutex{}
// fetch all the objects in target bucket
listResult, err := svc.ListObjects(&s3.ListObjectsInput{
Bucket: aws.String(opts.BucketName),
})
if err != nil {
errors = append(errors, err)
return matchedFiles, errors
}
var resultArr []*s3.Object
var wg sync.WaitGroup
extensions := strings.Split(opts.FileExtensions, ",")
// separate the txt files from all of the fetched objects from bucket
for _, v := range listResult.Contents {
for _, y := range extensions {
if strings.HasSuffix(*v.Key, y) {
logging.GetLogger().Info("found file", zap.String("name", *v.Key))
resultArr = append(resultArr, v)
}
}
}
bar := progressbar.Default(int64(len(resultArr)))
// check each txt file individually if it contains provided substring
for _, obj := range resultArr {
wg.Add(1)
go func(obj *s3.Object, wg *sync.WaitGroup) {
defer wg.Done()
getResult, err := svc.GetObject(&s3.GetObjectInput{
Bucket: aws.String(opts.BucketName),
Key: obj.Key,
})
if err != nil {
errors = append(errors, err)
return
}
buf := new(bytes.Buffer)
if _, err := buf.ReadFrom(getResult.Body); err != nil {
errors = append(errors, err)
return
}
if strings.Contains(buf.String(), opts.Substring) {
mu.Lock()
matchedFiles = append(matchedFiles, *obj.Key)
mu.Unlock()
}
defer func() {
if err := getResult.Body.Close(); err != nil {
panic(err)
}
}()
_ = bar.Add(1)
}(obj, &wg)
}
// wait for all the goroutines to complete
wg.Wait()
return matchedFiles, errors
}