Skip to content

Commit

Permalink
Merge pull request #8679 from dependabot/dev/rybrande/NugetJsonParse
Browse files Browse the repository at this point in the history
NuGet efficiency
  • Loading branch information
bdragon committed Jan 9, 2024
2 parents 9095f66 + d28427b commit 763f444
Show file tree
Hide file tree
Showing 13 changed files with 5,824 additions and 288 deletions.
13 changes: 12 additions & 1 deletion bin/dry-run.rb
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,17 @@
}
end

unless ENV["LOCAL_AZURE_ACCESS_TOKEN"].to_s.strip.empty?
raise "LOCAL_AZURE_ACCESS_TOKEN supplied without LOCAL_AZURE_FEED_URL" unless ENV["LOCAL_AZURE_FEED_URL"]

$options[:credentials] << {
"type" => "nuget_feed",
"host" => "pkgs.dev.azure.com",
"url" => ENV.fetch("LOCAL_AZURE_FEED_URL", nil),
"token" => ":#{ENV.fetch('LOCAL_AZURE_ACCESS_TOKEN', nil)}"
}
end

unless ENV["LOCAL_CONFIG_VARIABLES"].to_s.strip.empty?
# For example:
# "[{\"type\":\"npm_registry\",\"registry\":\
Expand Down Expand Up @@ -391,8 +402,8 @@ def fetch_files(fetcher)
else
puts "=> cloning into #{$repo_contents_path}"
FileUtils.rm_rf($repo_contents_path)
fetcher.clone_repo_contents
end
fetcher.clone_repo_contents
if $options[:commit]
Dir.chdir($repo_contents_path) do
puts "=> checking out commit #{$options[:commit]}"
Expand Down
50 changes: 29 additions & 21 deletions nuget/lib/dependabot/nuget/file_fetcher.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

require "dependabot/file_fetchers"
require "dependabot/file_fetchers/base"
require "dependabot/nuget/cache_manager"
require "set"
require "sorbet-runtime"

Expand Down Expand Up @@ -283,27 +284,34 @@ def imported_property_files
end

def fetch_imported_property_files(file:, previously_fetched_files:)
paths =
ImportPathsFinder.new(project_file: file).import_paths +
ImportPathsFinder.new(project_file: file).project_reference_paths +
ImportPathsFinder.new(project_file: file).project_file_paths

paths.flat_map do |path|
next if previously_fetched_files.map(&:name).include?(path)
next if file.name == path
next if path.include?("$(")

fetched_file = fetch_file_from_host(path)
grandchild_property_files = fetch_imported_property_files(
file: fetched_file,
previously_fetched_files: previously_fetched_files + [file]
)
[fetched_file, *grandchild_property_files]
rescue Dependabot::DependencyFileNotFound
# Don't worry about missing files too much for now (at least
# until we start resolving properties)
nil
end.compact
file_id = file.directory + "/" + file.name
@fetched_files ||= {}
if @fetched_files[file_id]
@fetched_files[file_id]
else
paths =
ImportPathsFinder.new(project_file: file).import_paths +
ImportPathsFinder.new(project_file: file).project_reference_paths +
ImportPathsFinder.new(project_file: file).project_file_paths

paths.flat_map do |path|
next if previously_fetched_files.map(&:name).include?(path)
next if file.name == path
next if path.include?("$(")

fetched_file = fetch_file_from_host(path)
grandchild_property_files = fetch_imported_property_files(
file: fetched_file,
previously_fetched_files: previously_fetched_files + [file]
)
@fetched_files[file_id] = [fetched_file, *grandchild_property_files]
@fetched_files[file_id]
rescue Dependabot::DependencyFileNotFound
# Don't worry about missing files too much for now (at least
# until we start resolving properties)
nil
end.compact
end
end
end
end
Expand Down
14 changes: 3 additions & 11 deletions nuget/lib/dependabot/nuget/file_parser/project_file_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
require "dependabot/nuget/file_parser"
require "dependabot/nuget/update_checker"
require "dependabot/nuget/cache_manager"
require "dependabot/nuget/nuget_client"

# For details on how dotnet handles version constraints, see:
# https://docs.microsoft.com/en-us/nuget/reference/package-versioning
module Dependabot
module Nuget
class FileParser
# rubocop:disable Metrics/ClassLength
class ProjectFileParser
require "dependabot/file_parsers/base/dependency_set"
require_relative "property_value_finder"
Expand Down Expand Up @@ -310,16 +310,9 @@ def dependency_url_has_matching_result?(dependency_name, dependency_url)
end

def dependency_url_has_matching_result_v3?(dependency_name, dependency_url)
url = dependency_url.fetch(:search_url)
auth_header = dependency_url.fetch(:auth_header)
response = execute_search_for_dependency_url(url, auth_header)
return false unless response.status == 200

body = JSON.parse(response.body)
data = body["data"]
return false unless data.length.positive?
versions = NugetClient.get_package_versions_v3(dependency_name, dependency_url)

data.any? { |result| result["id"].casecmp?(dependency_name) }
versions != nil
end

def dependency_url_has_matching_result_v2?(dependency_name, dependency_url)
Expand Down Expand Up @@ -501,7 +494,6 @@ def dotnet_tools_json
dependency_files.find { |f| f.name.casecmp(".config/dotnet-tools.json").zero? }
end
end
# rubocop:enable Metrics/ClassLength
end
end
end
99 changes: 99 additions & 0 deletions nuget/lib/dependabot/nuget/nuget_client.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# typed: true
# frozen_string_literal: true

require "dependabot/nuget/cache_manager"
require "dependabot/nuget/update_checker/repository_finder"

module Dependabot
module Nuget
class NugetClient
def self.get_package_versions_v3(dependency_name, repository_details)
# Use the registration URL if possible because it is fast and correct
if repository_details[:registration_url]
get_versions_from_registration_v3(repository_details)
# use the search API if not because it is slow but correct
elsif repository_details[:search_url]
get_versions_from_search_url_v3(repository_details, dependency_name)
# Otherwise, use the versions URL (fast but wrong because it includes unlisted versions)
elsif repository_details[:versions_url]
get_versions_from_versions_url_v3(repository_details)
end
end

private_class_method def self.get_versions_from_versions_url_v3(repository_details)
body = execute_search_for_dependency_url(repository_details[:versions_url], repository_details)
body&.fetch("versions")
end

private_class_method def self.get_versions_from_registration_v3(repository_details)
url = repository_details[:registration_url]
body = execute_search_for_dependency_url(url, repository_details)

return unless body

pages = body.fetch("items")
versions = Set.new
pages.each do |page|
items = page["items"]
if items
# inlined entries
items.each do |item|
catalog_entry = item["catalogEntry"]
if catalog_entry["listed"] == true
vers = catalog_entry["version"]
versions << vers
end
end
else
# paged entries
page_url = page["@id"]
page_body = execute_search_for_dependency_url(page_url, repository_details)
items = page_body.fetch("items")
items.each do |item|
catalog_entry = item.fetch("catalogEntry")
versions << catalog_entry.fetch("version") if catalog_entry["listed"] == true
end
end
end

versions
end

private_class_method def self.get_versions_from_search_url_v3(repository_details, dependency_name)
search_url = repository_details[:search_url]
body = execute_search_for_dependency_url(search_url, repository_details)

body&.fetch("data")
&.find { |d| d.fetch("id").casecmp(dependency_name.downcase).zero? }
&.fetch("versions")
&.map { |d| d.fetch("version") }
end

private_class_method def self.execute_search_for_dependency_url(url, repository_details)
cache = CacheManager.cache("dependency_url_search_cache")
cache[url] ||= Dependabot::RegistryClient.get(
url: url,
headers: repository_details[:auth_header]
)

response = cache[url]

return unless response.status == 200

body = remove_wrapping_zero_width_chars(response.body)
JSON.parse(body)
rescue Excon::Error::Timeout, Excon::Error::Socket
repo_url = repository_details[:repository_url]
raise if repo_url == Dependabot::Nuget::UpdateChecker::RepositoryFinder::DEFAULT_REPOSITORY_URL

raise PrivateSourceTimedOut, repo_url
end

private_class_method def self.remove_wrapping_zero_width_chars(string)
string.force_encoding("UTF-8").encode
.gsub(/\A[\u200B-\u200D\uFEFF]/, "")
.gsub(/[\u200B-\u200D\uFEFF]\Z/, "")
end
end
end
end
42 changes: 36 additions & 6 deletions nuget/lib/dependabot/nuget/update_checker/repository_finder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def dependency_urls
def self.get_default_repository_details(dependency_name)
{
base_url: "https://api.nuget.org/v3-flatcontainer/",
registration_url: "https://api.nuget.org/v3/registration5-gz-semver2/#{dependency_name.downcase}/index.json",
repository_url: DEFAULT_REPOSITORY_URL,
versions_url: "https://api.nuget.org/v3-flatcontainer/" \
"#{dependency_name.downcase}/index.json",
Expand Down Expand Up @@ -60,9 +61,11 @@ def build_url_for_details(repo_details)
return unless response.status == 200

body = remove_wrapping_zero_width_chars(response.body)
base_url = base_url_from_v3_metadata(JSON.parse(body))
parsed_json = JSON.parse(body)
base_url = base_url_from_v3_metadata(parsed_json)
resolved_base_url = base_url || repo_details.fetch(:url).gsub("/index.json", "-flatcontainer")
search_url = search_url_from_v3_metadata(JSON.parse(body))
search_url = search_url_from_v3_metadata(parsed_json)
registration_url = registration_url_from_v3_metadata(parsed_json)

details = {
base_url: resolved_base_url,
Expand All @@ -78,6 +81,11 @@ def build_url_for_details(repo_details)
details[:search_url] =
search_url + "?q=#{dependency.name.downcase}&prerelease=true&semVerLevel=2.0.0"
end

if registration_url
details[:registration_url] = File.join(registration_url, dependency.name.downcase, "index.json")
end

details
rescue JSON::ParserError
build_v2_url(response, repo_details)
Expand All @@ -86,10 +94,18 @@ def build_url_for_details(repo_details)
end

def get_repo_metadata(repo_details)
Dependabot::RegistryClient.get(
url: repo_details.fetch(:url),
headers: auth_header_for_token(repo_details.fetch(:token))
)
url = repo_details.fetch(:url)
cache = CacheManager.cache("repo_finder_metadatacache")
if !CacheManager.caching_disabled? && cache[url]
cache[url]
else
result = Dependabot::RegistryClient.get(
url: url,
headers: auth_header_for_token(repo_details.fetch(:token))
)
cache[url] = result
result
end
end

def base_url_from_v3_metadata(metadata)
Expand All @@ -99,6 +115,20 @@ def base_url_from_v3_metadata(metadata)
&.fetch("@id")
end

def registration_url_from_v3_metadata(metadata)
allowed_registration_types = %w(
RegistrationsBaseUrl
RegistrationsBaseUrl/3.0.0-beta
RegistrationsBaseUrl/3.0.0-rc
RegistrationsBaseUrl/3.4.0
RegistrationsBaseUrl/3.6.0
)
metadata
.fetch("resources", [])
.find { |r| allowed_registration_types.find { |s| r.fetch("@type") == s } }
&.fetch("@id")
end

def search_url_from_v3_metadata(metadata)
# allowable values from here: https://learn.microsoft.com/en-us/nuget/api/search-query-service-resource#versioning
allowed_search_types = %w(
Expand Down
42 changes: 2 additions & 40 deletions nuget/lib/dependabot/nuget/update_checker/version_finder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
require "dependabot/nuget/requirement"
require "dependabot/update_checkers/version_filters"
require "dependabot/nuget/update_checker"
require "dependabot/nuget/nuget_client"

module Dependabot
module Nuget
Expand Down Expand Up @@ -294,40 +295,7 @@ def fetch_v2_next_link_href(xml_body)
end

def versions_for_v3_repository(repository_details)
# If we have a search URL that returns results we use it
# (since it will exclude unlisted versions)
if repository_details[:search_url]
fetch_versions_from_search_url(repository_details)
# Otherwise, use the versions URL
elsif repository_details[:versions_url]
response = Dependabot::RegistryClient.get(
url: repository_details[:versions_url],
headers: repository_details[:auth_header]
)
return unless response.status == 200

body = remove_wrapping_zero_width_chars(response.body)
JSON.parse(body).fetch("versions")
end
end

def fetch_versions_from_search_url(repository_details)
response = Dependabot::RegistryClient.get(
url: repository_details[:search_url],
headers: repository_details[:auth_header]
)
return unless response.status == 200

body = remove_wrapping_zero_width_chars(response.body)
JSON.parse(body).fetch("data")
.find { |d| d.fetch("id").casecmp(sanitized_name).zero? }
&.fetch("versions")
&.map { |d| d.fetch("version") }
rescue Excon::Error::Timeout, Excon::Error::Socket
repo_url = repository_details[:repository_url]
raise if repo_url == RepositoryFinder::DEFAULT_REPOSITORY_URL

raise PrivateSourceTimedOut, repo_url
NugetClient.get_package_versions_v3(dependency.name, repository_details)
end

def dependency_urls
Expand Down Expand Up @@ -356,12 +324,6 @@ def requirement_class
dependency.requirement_class
end

def remove_wrapping_zero_width_chars(string)
string.force_encoding("UTF-8").encode
.gsub(/\A[\u200B-\u200D\uFEFF]/, "")
.gsub(/[\u200B-\u200D\uFEFF]\Z/, "")
end

def excon_options
# For large JSON files we sometimes need a little longer than for
# other languages. For example, see:
Expand Down

0 comments on commit 763f444

Please sign in to comment.