Skip to content

Commit

Permalink
Update version to return non-parsed tag (#1286)
Browse files Browse the repository at this point in the history
Update version that's returned in determine_version API to return the
actual tag rather than the parsed version.

---------

Signed-off-by: Rex P <rexpan@google.com>
  • Loading branch information
another-rex committed May 30, 2023
1 parent 6014215 commit 9f32328
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 11 deletions.
4 changes: 2 additions & 2 deletions gcp/api/Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 14 additions & 7 deletions gcp/api/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,14 @@
_MAX_MATCHES_TO_CARE = 100
# Max results to return for DetermineVersion
_MAX_DETERMINE_VER_RESULTS_TO_RETURN = 10
_DETERMINE_VER_MIN_SCORE_CUTOFF = 0.2
_DETERMINE_VER_MIN_SCORE_CUTOFF = 0.05
# Size of buckets to divide hashes into in DetermineVersion
# This should match the number in the indexer
_BUCKET_SIZE = 512

# Prefix for the
_TAG_PREFIX = "refs/tags/"

_ndb_client = ndb.Client()


Expand Down Expand Up @@ -178,6 +181,10 @@ def build_determine_version_result(
for f in idx_futures:
idx: osv.RepoIndex = f.result()

if idx.empty_bucket_bitmap is None:
logging.error('No empty bucket bitmap for: %s@%s', idx.name, idx.version)
continue

# Byte order little is how the bitmap is stored in the indexer originally
bitmap = int.from_bytes(idx.empty_bucket_bitmap, byteorder='little')

Expand All @@ -190,24 +197,24 @@ def build_determine_version_result(
# this requirement.
missed_empty_buckets = (inverted_empty_bucket_bitmap & bitmap).bit_count()

estimated_num_diff = estimate_diff(
_BUCKET_SIZE -
bucket_matches_by_proj[idx.key] # Buckets that match are not changed
estimated_diff_files = estimate_diff(
_BUCKET_SIZE # Starting with the total number of buckets
- bucket_matches_by_proj[idx.key] # Buckets that match are not changed
- empty_bucket_count # Buckets that are empty are not changed
+ missed_empty_buckets # Unless they don't match the bitmap
- num_skipped_buckets, # Buckets skipped are assumed unchanged
abs(idx.file_count - max_files) # The difference in file count
)

version_match = osv_service_v1_pb2.VersionMatch(
score=(max_files - estimated_num_diff) / max_files,
score=(max_files - estimated_diff_files) / max_files,
minimum_file_matches=file_matches_by_proj[idx.key],
estimated_diff_files=estimated_num_diff,
estimated_diff_files=estimated_diff_files,
repo_info=osv_service_v1_pb2.VersionRepositoryInformation(
type=osv_service_v1_pb2.VersionRepositoryInformation.GIT,
address=idx.repo_addr,
commit=idx.commit,
version=idx.version,
tag=idx.tag.removeprefix(_TAG_PREFIX),
),
)

Expand Down
4 changes: 2 additions & 2 deletions osv/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -657,8 +657,6 @@ class RepoIndex(ndb.Model):
base_cpe = ndb.StringProperty()
# The repository commit
commit = ndb.BlobProperty()
# Number of hash pages
pages = ndb.IntegerProperty()
# The source address
repo_addr = ndb.StringProperty()
# The identified version
Expand All @@ -673,6 +671,8 @@ class RepoIndex(ndb.Model):
empty_bucket_bitmap = ndb.BlobProperty()
# Number of files in this repo
file_count = ndb.IntegerProperty()
# Tag name of the source
tag = ndb.StringProperty()


class FileResult(ndb.Model):
Expand Down

0 comments on commit 9f32328

Please sign in to comment.