/
indexing.go
328 lines (273 loc) · 7.36 KB
/
indexing.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
package unreal
import (
"context"
"fmt"
"io/fs"
"os"
"path/filepath"
"slices"
"sort"
"strings"
"sync"
"golang.org/x/sync/errgroup"
)
// collectModules scans the whole |Source| directory of an unreal project in a parallel fashion.
// Indexes all the files within a project, for faster in memory searching afterwards.
func collectModules(ctx context.Context, sourceDir string) (map[string]*Module, error) {
// collect all the files in the unreal project.
result, err := collectFiles(ctx, sourceDir)
if err != nil {
return nil, fmt.Errorf("collecting files in %q: %w", sourceDir, err)
}
// For each build file, we generate a worker that will collect all the files to it.
g, ctx := errgroup.WithContext(ctx)
// Produce: all the build files to check.
buildFileDescriptionsCh := make(chan *buildFileDescription)
{
g.Go(func() error {
defer close(buildFileDescriptionsCh)
for _, bfd := range result.buildFiles {
select {
case buildFileDescriptionsCh <- bfd:
continue
case <-ctx.Done():
return ctx.Err()
}
}
return nil
})
}
// Map: each module collect all the files that belong to it.
modulesCh := make(chan *Module)
{
var wg sync.WaitGroup
// Create a new worker to consume the channel.
for i := 0; i < kModuleBuilderWorkerCount; i++ {
wg.Add(1)
g.Go(func() error {
defer wg.Done()
for bfd := range buildFileDescriptionsCh {
baseDir := filepath.Dir(bfd.Path)
// Search for all the files that are under this path.
// We do it by searching for the build file in a binary search and then searching "upwards
// and downwards" for the files in the same dir. At soon as we find one that doens't belong,
// we can stop searching because the file list is sorted.
index, ok := slices.BinarySearch(result.allFiles, bfd.Path)
if !ok {
return fmt.Errorf("build file %q not found in the all file list", bfd.Path)
}
moduleFiles := []string{
bfd.Path,
}
// Search backwards from the build file index.
for i := index - 1; i >= 0; i-- {
file := result.allFiles[i]
if strings.Contains(file, baseDir) {
moduleFiles = append(moduleFiles, file)
continue
}
// As soon as we don't find a file, it means that we're "out of this dir range".
break
}
// Search forward from the build file index.
for i := index + 1; i < len(result.allFiles); i++ {
file := result.allFiles[i]
if strings.Contains(file, baseDir) {
moduleFiles = append(moduleFiles, file)
continue
}
// As soon as we don't find a file, it means that we're "out of this dir range".
break
}
// Now moduleFiles has all the files that belong to this module.
// We sort because sorted lists are cool.
sort.Strings(moduleFiles)
um, err := newUnrealModule(bfd.ModuleName, bfd.Path, moduleFiles)
if err != nil {
return fmt.Errorf("creating unreal module %q: %w", bfd.ModuleName, err)
}
select {
case modulesCh <- um:
continue
case <-ctx.Done():
return ctx.Err()
}
}
return nil
})
}
// Make sure the channel will be closed.
g.Go(func() error {
wg.Wait()
close(modulesCh)
return nil
})
}
// Reduce: Collect all the generated modules.
modules := make(map[string]*Module)
{
g.Go(func() error {
for module := range modulesCh {
if _, ok := modules[module.Name]; ok {
return fmt.Errorf("module %q found more than once", module.Name)
}
modules[module.Name] = module
}
return nil
})
}
if err := g.Wait(); err != nil {
return nil, err
}
return modules, nil
}
type buildFileDescription struct {
ModuleName string
Path string
}
type collectFilesResult struct {
buildFiles []*buildFileDescription
allFiles []string
}
// func collectFiles(ctx context.Context, sourceDir string) (_buildFiles []*buildFileDescription, _allFiles []string, _err error) {
func collectFiles(ctx context.Context, sourceDir string) (*collectFilesResult, error) {
g, ctx := errgroup.WithContext(ctx)
// Produce: generate all the directories to be checked.
dirsToCheckCh := make(chan string)
{
g.Go(func() error {
defer close(dirsToCheckCh)
err := filepath.WalkDir(sourceDir, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return fmt.Errorf("path %q: %w", path, err)
}
if !d.IsDir() {
return nil
}
select {
case dirsToCheckCh <- path:
return nil
case <-ctx.Done():
return ctx.Err()
}
})
if err != nil {
return fmt.Errorf("walking dir %q: %w", sourceDir, err)
}
return nil
})
}
// Map: Generate all the valid files found in the directories.
foundFilesCh := make(chan string)
{
var wg sync.WaitGroup
// Ensure the channel gets closed.
g.Go(func() error {
wg.Wait()
close(foundFilesCh)
return nil
})
// Create a new worker to search for the files.
for i := 0; i < kModuleSearcherWorkerCount; i++ {
wg.Add(1)
g.Go(func() error {
defer wg.Done()
for dir := range dirsToCheckCh {
files, err := os.ReadDir(dir)
if err != nil {
return fmt.Errorf("reading dir %q: %w", dir, err)
}
for _, file := range files {
// If it's a directory, it will be grabbed by another worker.
if file.IsDir() {
continue
}
// Send the file
select {
case foundFilesCh <- filepath.Join(dir, file.Name()):
continue
case <-ctx.Done():
return ctx.Err()
}
}
}
return nil
})
}
}
// Router: detect if it's a build file (we read the file, so we should do it in parallel).
buildFilesCh := make(chan *buildFileDescription)
allFilesCh := make(chan string)
{
var wg sync.WaitGroup
// Ensure the channel gets closed.
g.Go(func() error {
wg.Wait()
close(buildFilesCh)
close(allFilesCh)
return nil
})
// Create a new worker to search for build files.
for i := 0; i < kBuildFileSearcherWorkerCount; i++ {
wg.Add(1)
g.Go(func() error {
defer wg.Done()
for file := range foundFilesCh {
// We send it to the all files channel.
select {
case allFilesCh <- file:
// Sent.
case <-ctx.Done():
return ctx.Err()
}
// Now we check if it's an unreal file and send it to the specific channel.
moduleName, ok, err := IsUnrealBuildFile(file)
if err != nil {
return fmt.Errorf("checking if %q is unreal file: %w", file, err)
}
bfd := &buildFileDescription{
ModuleName: moduleName,
Path: file,
}
if ok {
select {
case buildFilesCh <- bfd:
// Sent.
case <-ctx.Done():
return ctx.Err()
}
}
}
return nil
})
}
}
// Reduce: Collect all the files in an array to be sorted and also collects the module.
var buildFiles []*buildFileDescription
var allFiles []string
{
g.Go(func() error {
for bfd := range buildFilesCh {
buildFiles = append(buildFiles, bfd)
}
return nil
})
g.Go(func() error {
for file := range allFilesCh {
// Add it to the generic array.
allFiles = append(allFiles, file)
}
return nil
})
}
// We run the whole script.
if err := g.Wait(); err != nil {
return nil, err
}
// We sort all the files so we can do faster searches on it.
sort.Strings(allFiles)
return &collectFilesResult{
buildFiles: buildFiles,
allFiles: allFiles,
}, nil
}