-
Notifications
You must be signed in to change notification settings - Fork 75
/
module_file_set_builder.go
183 lines (173 loc) · 7.41 KB
/
module_file_set_builder.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bufmodulebuild
import (
"context"
"encoding/hex"
"errors"
)
import (
"go.uber.org/zap"
"golang.org/x/crypto/sha3"
)
import (
"github.com/apache/dubbo-kubernetes/pkg/bufman/bufpkg/bufmodule"
)
type moduleFileSetBuilder struct {
logger *zap.Logger
moduleReader bufmodule.ModuleReader
}
func newModuleFileSetBuilder(
logger *zap.Logger,
moduleReader bufmodule.ModuleReader,
) *moduleFileSetBuilder {
return &moduleFileSetBuilder{
logger: logger,
moduleReader: moduleReader,
}
}
func (m *moduleFileSetBuilder) Build(
ctx context.Context,
module bufmodule.Module,
options ...BuildModuleFileSetOption,
) (bufmodule.ModuleFileSet, error) {
buildModuleFileSetOptions := &buildModuleFileSetOptions{}
for _, option := range options {
option(buildModuleFileSetOptions)
}
return m.build(
ctx,
module,
buildModuleFileSetOptions.workspace,
)
}
func (m *moduleFileSetBuilder) build(
ctx context.Context,
module bufmodule.Module,
workspace bufmodule.Workspace,
) (bufmodule.ModuleFileSet, error) {
var dependencyModules []bufmodule.Module
hashes := make(map[string]struct{})
moduleHash, err := protoPathsHash(ctx, module)
if err != nil {
return nil, err
}
hashes[moduleHash] = struct{}{}
if workspace != nil {
// From the perspective of the ModuleFileSet, we include all of the files
// specified in the workspace. When we build the Image from the ModuleFileSet,
// we construct it based on the TargetFileInfos, and thus only include the files
// in the transitive closure.
//
// This is defensible as we're saying that everything in the workspace is a potential
// dependency, even if some are not actual dependencies of this specific module. In this
// case, the extra modules are no different than unused dependencies in a buf.yaml/buf.lock.
//
// By including all the Modules from the workspace, we are potentially including the input
// Module itself. This is bad, and will result in errors when using the result ModuleFileSet.
// The ModuleFileSet expects a Module, and its dependency Modules, but it is not OK for
// a Module to both be the input Module and a dependency Module. We have no concept
// of Module "ID" - a Module may have a ModuleIdentity and commit associated with it,
// but there is no guarantee of this. To get around this, we do a hash of the .proto file
// paths within a Module, and say that Modules are equivalent if they contain the exact
// same .proto file paths. If they have the same .proto file paths, then we do not
// add the Module as a dependency.
//
// We know from bufmodule.Workspace that no two Modules in a Workspace will have overlapping
// file paths, therefore if the Module is in the Workspace and has equivalent file paths,
// we know that it must be the same module. If the Module is not in the workspace...why
// did we provide a workspace?
//
// We could use other methods for equivalence or to say "do not add":
//
// - If there are any overlapping files: for example, one module has a.proto, one module
// has b.proto, and both have c.proto. We don't use this heuristic as what we are looking
// for here is a situation where based on our Module construction, we have two actually-equivalent
// Modules. The existence of any overlapping files will result in an error during build, which
// is what we want. This would also indicate this Module did not come from the Workspace, given
// the property of file uniqueness in Workspaces.
// - Golang object equivalence: for example, doing "module != potentialDependencyModule". This
// happens to work since we only construct Modules once, but it's error-prone: it's totally
// possible to create two Module objects from the same source, and if they represent the
// same Module on disk/in the BSR, we don't want to include these as duplicates.
// - Full module digest and/or proto file content: We could include buf.yaml, buf.lock,
// README.md, etc, and also hash the actual content of the .proto files, but we're saying
// that this doesn't help us any more than just comparing .proto files, and may lead to
// false negatives. However, this is the most likely candidate as an alternative, as you
// could argue that at the ModuleFileSetBuilder level, we should say "assume any difference
// is a real difference".
//
// We could also determine which modules could be omitted here, but it would incur
// the cost of parsing the target files and detecting exactly which imports are
// used. We already get this for free in Image construction, so it's simplest and
// most efficient to bundle all of the modules together like so.
for _, potentialDependencyModule := range workspace.GetModules() {
potentialDependencyModuleHash, err := protoPathsHash(ctx, potentialDependencyModule)
if err != nil {
return nil, err
}
if _, ok := hashes[potentialDependencyModuleHash]; !ok {
dependencyModules = append(dependencyModules, potentialDependencyModule)
} else {
hashes[potentialDependencyModuleHash] = struct{}{}
}
}
}
// We know these are unique by remote, owner, repository and
// contain all transitive dependencies.
for _, dependencyModulePin := range module.DependencyModulePins() {
if workspace != nil {
if _, ok := workspace.GetModule(dependencyModulePin); ok {
// This dependency is already provided by the workspace, so we don't
// need to consult the ModuleReader.
continue
}
}
dependencyModule, err := m.moduleReader.GetModule(ctx, dependencyModulePin)
if err != nil {
return nil, err
}
dependencyModuleHash, err := protoPathsHash(ctx, dependencyModule)
if err != nil {
return nil, err
}
// At this point, this is really just a safety check.
if _, ok := hashes[dependencyModuleHash]; ok {
return nil, errors.New("module declared in DependencyModulePins but not in workspace was already added to the dependency Module set, this is a system error")
}
dependencyModules = append(dependencyModules, dependencyModule)
hashes[dependencyModuleHash] = struct{}{}
}
return bufmodule.NewModuleFileSet(module, dependencyModules), nil
}
// protoPathsHash returns a hash representing the paths of the .proto files within the Module.
func protoPathsHash(ctx context.Context, module bufmodule.Module) (string, error) {
fileInfos, err := module.SourceFileInfos(ctx)
if err != nil {
return "", err
}
shakeHash := sha3.NewShake256()
for _, fileInfo := range fileInfos {
_, err := shakeHash.Write([]byte(fileInfo.Path()))
if err != nil {
return "", err
}
}
data := make([]byte, 64)
if _, err := shakeHash.Read(data); err != nil {
return "", err
}
return hex.EncodeToString(data), nil
}