Skip to content

Commit

Permalink
Bucketed Indexer (#1236)
Browse files Browse the repository at this point in the history
Indexer implementing bucketing to speed up and make more time consistent
queries.

Changes the API interface to add 2 extra fields in the response.

- minimum file matches
- estimated different files

Also added a number of bug fixes:

- Commit hash is now using the real commit hash rather than the
reference hash of the tag. The datastore key is still using the
reference hash for cases where multiple tags are on the same commit. An
extra field `reference` in the datastore to save the reference hash
- Controller now correctly avoids pushing already existing items to the
pub/sub
-
  • Loading branch information
another-rex committed Apr 21, 2023
1 parent b436816 commit d511a2d
Show file tree
Hide file tree
Showing 24 changed files with 649 additions and 347 deletions.
1 change: 1 addition & 0 deletions .ruff.toml
@@ -0,0 +1 @@
line-length = 80
68 changes: 34 additions & 34 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 5 additions & 3 deletions docker/importer/importer.py
Expand Up @@ -196,7 +196,7 @@ def import_new_oss_fuzz_entries(self, repo, oss_fuzz_source):
logging.info('No new entries, skipping committing.')
return

logging.info('Commiting and pushing new entries')
logging.info('Committing and pushing new entries')
if osv.push_source_changes(repo, 'Import from OSS-Fuzz',
self._git_callbacks(oss_fuzz_source)):
ndb.put_multi(exported)
Expand All @@ -217,13 +217,15 @@ def schedule_regular_updates(self, repo, source_repo: osv.SourceRepository):
osv.Bug.source == source_repo.name):
self._request_analysis(bug, source_repo, repo)

# yapf: disable
# Perform a re-analysis on existing oss-fuzz bugs for a period of time,
# more vulnerable releases might be made even though fixes have
# already been merged into master/main
cutoff_time = aest_time_now - datetime.timedelta(days=_BUG_REDO_DAYS)
query = osv.Bug.query(osv.Bug.status == osv.BugStatus.PROCESSED,
osv.Bug.source == source_repo.name, osv.Bug.timestamp
>= cutoff_time)
osv.Bug.source == source_repo.name,
osv.Bug.timestamp >= cutoff_time)
# yapf: enable

for bug in query:
logging.info('Re-requesting impact for %s.', bug.key.id())
Expand Down
10 changes: 8 additions & 2 deletions docker/indexer/Dockerfile
Expand Up @@ -14,8 +14,14 @@

FROM golang:1.20 as GO_BUILD
WORKDIR /build
ADD ./ /build
RUN chmod +x build.sh

# Cache dependencies in these steps
COPY ./go.mod /build/go.mod
COPY ./go.sum /build/go.sum
RUN go mod download

# Do the build here
COPY ./ /build
RUN ./build.sh

FROM gcr.io/distroless/base
Expand Down
1 change: 0 additions & 1 deletion docker/indexer/build.sh 100644 → 100755
@@ -1,3 +1,2 @@
#!/bin/sh
go mod tidy
go build -o indexer
18 changes: 9 additions & 9 deletions docker/indexer/config/config.go
@@ -1,17 +1,17 @@
/*
Copyright 2022 Google LLC
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package config provides functionality to load configurations
package config
Expand Down
20 changes: 10 additions & 10 deletions docker/indexer/indexer.go
@@ -1,17 +1,17 @@
/*
Copyright 2022 Google LLC
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package main

Expand Down Expand Up @@ -85,7 +85,7 @@ func runWorker(ctx context.Context, storer *idxStorage.Store, repoBucketHdl *sto
PubSubOutstandingMessages: outstanding,
}
// The preparation results are picked up by the processing stage
// in workder mode.
// in worker mode.
// They include checkout options which are used to load the desired
// repository state and hash the source files in that particular tree.
// Finally, the computed hashes and repo state information is stored.
Expand Down
20 changes: 10 additions & 10 deletions docker/indexer/shared/shared.go
@@ -1,17 +1,17 @@
/*
Copyright 2022 Google LLC
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package shared provides functionality that is used in multiple packages.
package shared
Expand All @@ -34,7 +34,7 @@ const (

// CopyFromBucket copies a directory from a bucket to a temporary location.
func CopyFromBucket(ctx context.Context, bucketHdl *storage.BucketHandle, name string) (string, error) {
tmpDir, err := os.MkdirTemp("", "")
tmpDir, err := os.MkdirTemp("", name)
if err != nil {
return "", err
}
Expand Down
44 changes: 28 additions & 16 deletions docker/indexer/stages/preparation/preparation.go
@@ -1,17 +1,17 @@
/*
Copyright 2022 Google LLC
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package preparation provides functionality to extract tags, branches and commits from repository configurations.
package preparation
Expand Down Expand Up @@ -53,6 +53,7 @@ type Result struct {
Version string
CheckoutOptions *git.CheckoutOptions
Commit plumbing.Hash
Reference plumbing.Hash
CommitTag string
When time.Time
Type string
Expand Down Expand Up @@ -150,6 +151,13 @@ func (s *Stage) processGit(ctx context.Context, repoCfg *config.RepoConfig) erro
commitTracker := make(map[plumbing.Hash]bool)
// repoInfo is used as the iterator function to create RepositoryInformation structs.
repoInfo := func(ref *plumbing.Reference) error {
// Resolve the real commit hash
commitHash, err := repo.ResolveRevision(plumbing.Revision(ref.Name().String()))

if err != nil {
return err
}

found, err := s.Checker.Exists(ctx, repoCfg.Address, shared.MD5, ref.Hash())
if err != nil {
return err
Expand All @@ -159,7 +167,7 @@ func (s *Stage) processGit(ctx context.Context, repoCfg *config.RepoConfig) erro
}

var when time.Time
if c, ok := allCommits[ref.Hash()]; ok {
if c, ok := allCommits[*commitHash]; ok {
when = c.Author.When
}

Expand Down Expand Up @@ -191,17 +199,20 @@ func (s *Stage) processGit(ctx context.Context, repoCfg *config.RepoConfig) erro
Branch: ref.Name(),
},
When: when,
Commit: ref.Hash(),
Commit: *commitHash,
Reference: ref.Hash(),
CommitTag: commitTag,
Type: shared.Git,
Addr: repoCfg.Address,
FileExts: repoCfg.FileExts,
}
commitTracker[ref.Hash()] = true
commitTracker[*commitHash] = true
buf, err := json.Marshal(result)
if err != nil {
return err
}

log.Infof("publishing %s at version: %s", result.Name, result.Version)
pubRes := s.Output.Publish(ctx, &pubsub.Message{Data: buf})
_, err = pubRes.Get(ctx)
return err
Expand Down Expand Up @@ -241,10 +252,11 @@ func (s *Stage) processGit(ctx context.Context, repoCfg *config.RepoConfig) erro
Hash: h,
Force: true,
},
When: c.Author.When,
Commit: h,
Type: shared.Git,
FileExts: repoCfg.FileExts,
Reference: h,
When: c.Author.When,
Commit: h,
Type: shared.Git,
FileExts: repoCfg.FileExts,
}
buf, err := json.Marshal(result)
if err != nil {
Expand Down

0 comments on commit d511a2d

Please sign in to comment.