/
main.go
181 lines (150 loc) · 4.23 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
package main
import (
"context"
mapset "github.com/deckarep/golang-set/v2"
"golang.org/x/sync/errgroup"
"io/fs"
"os"
"runtime"
"sort"
"sync/atomic"
"time"
)
// This application demonstrates the correct way to calculate an MD5 hash
// that is compatible with Azure Blob Storage's Content-MD5.
import (
"fmt"
"github.com/abitofhelp/azmd5_hash_dir/hash/md5"
"github.com/abitofhelp/azmd5_hash_dir/hash/model"
"path/filepath"
)
const kPathsChannelSize = 10
const kLocalFilesChannelSize = 10
const kTimeout = 30 * time.Second
func WalkDirectoryWithChannel(
ctx context.Context,
dir string,
excludeDirs mapset.Set[string],
excludeFiles mapset.Set[string],
paths chan<- string) error {
cnt := 0
if err := fs.WalkDir(os.DirFS(dir), ".", func(p string, d fs.DirEntry, err error) error {
filePath := dir + "/" + p
if err != nil {
return fmt.Errorf("failed to walk the directory '%s': %w", filePath, err)
}
if excludeDirs.Contains(d.Name()) {
return filepath.SkipDir
}
// Scan any directories that are not in excludeDir.
if d.IsDir() {
return nil
}
// Skip any files that are in excludeFiles.
if !d.IsDir() && excludeFiles.Contains(d.Name()) {
return nil
}
select {
case <-ctx.Done():
return ctx.Err()
case paths <- filepath.Join(p):
cnt++
fmt.Printf("prodcnt: %d\n", cnt)
}
return nil
}); err == nil {
return nil
} else {
return fmt.Errorf("failed to walk the directory '%s': %w", dir, err)
}
}
func BuildLocalFileHashes(
eqctx context.Context,
dir string,
excludeDirs mapset.Set[string],
excludeFiles mapset.Set[string],
eg *errgroup.Group) ([]*model.LocalFile, error) {
paths := make(chan string, kPathsChannelSize)
// Producer: Get the paths to the files of interest within the dir directory.
eg.Go(func() error {
defer close(paths)
if err := WalkDirectoryWithChannel(eqctx, dir, excludeDirs, excludeFiles, paths); err == nil {
return nil
} else {
return fmt.Errorf("failed to walk through directory '%s': %w", dir, err)
}
})
// Consumer: Hash the files
localFiles := make(chan *model.LocalFile, kLocalFilesChannelSize)
workers := int64(runtime.NumCPU() / 2)
cnt := 0
for i := int64(0); i < workers; i++ {
eg.Go(func() error {
defer func() {
// Close the channel when the last worker completes.
if atomic.AddInt64(&workers, -1) == 0 {
close(localFiles)
}
}()
for p := range paths {
// Calculate the base64 MD5 hash of the file.
fullPath := filepath.Join(dir, p)
if base64Md5, err := md5.GenMd5HashAsBase64(fullPath); err == nil {
select {
case <-eqctx.Done():
return eqctx.Err()
case localFiles <- model.NewLocalFile(p, base64Md5):
cnt++
fmt.Printf("hashcnt: %d\n", cnt)
}
} else {
return fmt.Errorf("failed to generate a base64 hash of file '%s': %w", fullPath, err)
}
}
return nil
})
}
// Reduce & Sort: Slice of hashes ordered by path.
var hashes []*model.LocalFile
eg.Go(func() error {
for lf := range localFiles {
hashes = append(hashes, lf)
}
sort.Slice(hashes, func(i, j int) bool {
return hashes[i].PathInsideDirectory() < hashes[j].PathInsideDirectory()
})
return nil
})
if err := eg.Wait(); err == nil {
return hashes, nil
} else {
return nil, fmt.Errorf("failed to generate hashes for directory '%s': %w", dir, err)
}
}
// //////////////////////////////////////////////////////////////////////////////
// M A I N L I N E
// //////////////////////////////////////////////////////////////////////////////
func main() {
dir := "/Users/mike/Downloads/clients/alm"
excludeDirs := mapset.NewSet[string]()
excludeDirs.Add("assets")
excludeDirs.Add("aerials")
excludeDirs.Add("projects")
excludeFiles := mapset.NewSet[string]()
excludeFiles.Add(".DS_Store")
ctx, cancel := context.WithTimeout(context.Background(), kTimeout)
eg, eqctx := errgroup.WithContext(ctx)
start := time.Now()
if hashes, err := BuildLocalFileHashes(eqctx, dir, excludeDirs, excludeFiles, eg); err == nil {
elapsed := time.Since(start)
fmt.Println()
for i, h := range hashes {
fmt.Printf("(%4d) %s => %s\n", i+1, h.PathInsideDirectory(), h.Base64Md5())
}
fmt.Printf("Elapsed: %d ms\n", elapsed.Milliseconds())
} else {
fmt.Println(err)
}
fmt.Println()
cancel()
}