diff --git a/chrome/browser/optimization_guide/page_content_annotations_service_browsertest.cc b/chrome/browser/optimization_guide/page_content_annotations_service_browsertest.cc index db08205a4745c8..88cb72daf960b5 100644 --- a/chrome/browser/optimization_guide/page_content_annotations_service_browsertest.cc +++ b/chrome/browser/optimization_guide/page_content_annotations_service_browsertest.cc @@ -243,10 +243,10 @@ IN_PROC_BROWSER_TEST_F(PageContentAnnotationsServicePageTopicsBrowserTest, run_loop->Quit(); }, &run_loop, &results), - std::vector{ - GURL("https://www.youtube.com/"), - GURL("https://www.chrome.com/"), - GURL("https://music.youtube.com/"), + std::vector{ + "youtube.com", + "chrome.com", + "music.youtube.com", }); run_loop.Run(); @@ -497,7 +497,7 @@ IN_PROC_BROWSER_TEST_F(PageContentAnnotationsServiceBrowserTest, run_loop->Quit(); }, &run_loop), - std::vector{GURL("https://www.chromium.org")}); + std::vector{"www.chromium.org"}); run_loop.Run(); } diff --git a/components/browsing_topics/BUILD.gn b/components/browsing_topics/BUILD.gn index 18e1135af467d6..0d0288131f977b 100644 --- a/components/browsing_topics/BUILD.gn +++ b/components/browsing_topics/BUILD.gn @@ -4,6 +4,8 @@ source_set("browsing_topics") { sources = [ + "browsing_topics_calculator.cc", + "browsing_topics_calculator.h", "browsing_topics_service.h", "browsing_topics_service_impl.cc", "browsing_topics_service_impl.h", @@ -20,16 +22,33 @@ source_set("browsing_topics") { deps = [ "//base", "//components/browsing_topics/common:common", + "//components/history/content/browser", + "//components/history/core/browser", "//components/keyed_service/core", + "//components/optimization_guide/content/browser", "//components/privacy_sandbox", + "//content/public/browser", "//crypto", "//third_party/blink/public/common", ] } +source_set("test_support") { + testonly = true + sources = [ + "test_util.cc", + "test_util.h", + ] + + public_deps = [ "//base" ] + + deps = [ ":browsing_topics" ] +} + source_set("unit_tests") { testonly = true sources = [ + "browsing_topics_calculator_unittest.cc", "browsing_topics_state_unittest.cc", "epoch_topics_unittest.cc", "topic_and_domains_unittest.cc", @@ -38,9 +57,21 @@ source_set("unit_tests") { deps = [ ":browsing_topics", + ":test_support", "//base", "//base/test:test_support", + "//components/content_settings/core/test:test_support", + "//components/history/core/browser:browser", + "//components/history/core/test", + "//components/optimization_guide/content/browser:browser", + "//components/optimization_guide/content/browser:test_support", + "//components/optimization_guide/core:test_support", "//components/prefs:test_support", + "//components/privacy_sandbox:privacy_sandbox", + "//components/privacy_sandbox:privacy_sandbox_prefs", + "//components/privacy_sandbox:test_support", + "//components/sync_preferences:test_support", + "//content/test:test_support", "//testing/gtest", "//third_party/blink/public/common", ] diff --git a/components/browsing_topics/DEPS b/components/browsing_topics/DEPS index dba046b8dff0d4..26ef4ba5ef54dd 100644 --- a/components/browsing_topics/DEPS +++ b/components/browsing_topics/DEPS @@ -1,6 +1,17 @@ include_rules = [ - "+crypto", - "+third_party/blink/public/common", + "+components/history", + "+components/optimization_guide", "+components/keyed_service", "+components/privacy_sandbox", + "+content/public/browser", + "+content/public/test", + "+crypto", + "+third_party/blink/public/common", ] + +specific_include_rules = { + ".*_unittest.cc": [ + "+components/content_settings/core", + "+components/sync_preferences", + ], +} diff --git a/components/browsing_topics/browsing_topics_calculator.cc b/components/browsing_topics/browsing_topics_calculator.cc new file mode 100644 index 00000000000000..88ade005a33f2c --- /dev/null +++ b/components/browsing_topics/browsing_topics_calculator.cc @@ -0,0 +1,370 @@ +// Copyright 2022 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/browsing_topics/browsing_topics_calculator.h" + +#include "base/containers/contains.h" +#include "base/metrics/histogram_functions.h" +#include "base/rand_util.h" +#include "base/threading/thread_task_runner_handle.h" +#include "components/browsing_topics/util.h" +#include "components/history/core/browser/history_service.h" +#include "components/optimization_guide/content/browser/page_content_annotations_service.h" +#include "components/privacy_sandbox/canonical_topic.h" +#include "components/privacy_sandbox/privacy_sandbox_settings.h" +#include "content/public/browser/browsing_topics_site_data_manager.h" +#include "third_party/blink/public/common/features.h" + +namespace browsing_topics { + +namespace { + +// Derive the mapping from hosts to topics and the mapping from topics to hosts. +// Precondition: the annotation didn't fail in general (e.g. `ModelInfo` is +// valid). +void DeriveHostTopicsMapAndTopicHostsMap( + const std::vector& raw_hosts, + const std::vector& results, + std::map>& host_topics_map, + std::map>& topic_hosts_map) { + DCHECK(host_topics_map.empty()); + DCHECK(topic_hosts_map.empty()); + + DCHECK_EQ(raw_hosts.size(), results.size()); + + for (size_t i = 0; i < results.size(); ++i) { + const optimization_guide::BatchAnnotationResult& result = results[i]; + const std::string raw_host = raw_hosts[i]; + + // As long as the annotation didn't fail in general, the individual + // `result.topics()` should always be valid. + const std::vector& + annotation_result_topics = result.topics().value(); + + HashedHost host = HashMainFrameHostForStorage(raw_host); + + for (const optimization_guide::WeightedIdentifier& annotation_result_topic : + annotation_result_topics) { + // Note that `annotation_result_topic.weight()` is ignored. This is the + // intended use of the model for the Topics API. + Topic topic = Topic(annotation_result_topic.value()); + + topic_hosts_map[topic].insert(host); + host_topics_map[host].insert(topic); + } + } +} + +// For `topic`, derive the context domains that observed it. This is done by +// first getting the hosts about `topic` from `topic_hosts_map`, and +// for each site, get the callers (context domains) that were on that site and +// add the callers to a result set. +std::set GetTopicObservationDomains( + const Topic& topic, + const std::map>& topic_hosts_map, + const std::map>& + host_context_domains_map) { + std::set topic_observation_domains; + + // If `topic` was padded, it may not exist in `topic_hosts_map`. In this + // case, return an empty set. + auto it = topic_hosts_map.find(topic); + if (it == topic_hosts_map.end()) + return std::set(); + + const std::set& hosts = it->second; + + for (const HashedHost& host : hosts) { + // `host` came from the history database, and it may not exist in the + // `host_context_domains_map` which came from the usage contexts + // database, due to e.g. per-context data deletion, database errors, etc. + // In this case, continue checking other hosts. + auto it = host_context_domains_map.find(host); + if (it == host_context_domains_map.end()) + continue; + + const std::vector& context_domains = it->second; + + for (const HashedDomain& context_domain : context_domains) { + topic_observation_domains.insert(context_domain); + + // To limit memory usage, cap the number of context domains to keep + // per-topic. The larger `HashedDomain`s will be kept. This is fair, as + // the hashing for context domains is per-user, so we are not + // prioritizing any domains in general. + if (topic_observation_domains.size() > + static_cast( + blink::features:: + kBrowsingTopicsMaxNumberOfApiUsageContextDomainsToKeepPerTopic + .Get())) { + topic_observation_domains.erase(topic_observation_domains.begin()); + } + } + } + + return topic_observation_domains; +} + +} // namespace + +BrowsingTopicsCalculator::BrowsingTopicsCalculator( + privacy_sandbox::PrivacySandboxSettings* privacy_sandbox_settings, + history::HistoryService* history_service, + content::BrowsingTopicsSiteDataManager* site_data_manager, + optimization_guide::PageContentAnnotationsService* annotations_service, + CalculateCompletedCallback callback) + : privacy_sandbox_settings_(privacy_sandbox_settings), + history_service_(history_service), + site_data_manager_(site_data_manager), + annotations_service_(annotations_service), + calculate_completed_callback_(std::move(callback)), + calculation_time_(base::Time::Now()) { + // Continue asynchronously so that `calculate_completed_callback_` isn't + // called synchronously while `this` is being constructed. + base::ThreadTaskRunnerHandle::Get()->PostTask( + FROM_HERE, base::BindOnce(&BrowsingTopicsCalculator::CheckCanCalculate, + weak_ptr_factory_.GetWeakPtr())); +} + +BrowsingTopicsCalculator::~BrowsingTopicsCalculator() = default; + +uint64_t BrowsingTopicsCalculator::GenerateRandUint64() { + return base::RandUint64(); +} + +void BrowsingTopicsCalculator::DeriveTopTopics( + const std::map& history_hosts_count, + const std::map>& host_topics_map, + size_t taxonomy_size, + std::vector& top_topics, + size_t& padded_top_topics_start_index) { + DCHECK(top_topics.empty()); + DCHECK_EQ(padded_top_topics_start_index, 0u); + + // Derive the frequency of each topic, by summing up the frequencies of the + // associated hosts. TODO(yaoxia): consider applying inverse frequency of + // topics (https://github.com/jkarlin/topics/issues/42). + std::map topics_count; + for (auto const& [host, host_count] : history_hosts_count) { + const std::set& topics = host_topics_map.at(host); + for (const Topic& topic : topics) { + topics_count[topic] += host_count; + } + } + + DCHECK_LE( + static_cast( + blink::features::kBrowsingTopicsNumberOfTopTopicsPerEpoch.Get()), + taxonomy_size); + + // Get the top up to `kBrowsingTopicsNumberOfTopTopicsPerEpoch` topics, + // sorted by decreasing count. + std::vector> top_topics_count(std::min( + static_cast( + blink::features::kBrowsingTopicsNumberOfTopTopicsPerEpoch.Get()), + topics_count.size())); + + std::partial_sort_copy( + topics_count.begin(), topics_count.end(), top_topics_count.begin(), + top_topics_count.end(), + [](auto& left, auto& right) { return left.second > right.second; }); + + std::transform(top_topics_count.begin(), top_topics_count.end(), + std::back_inserter(top_topics), + [](auto& topic_count) { return topic_count.first; }); + + padded_top_topics_start_index = top_topics.size(); + + // Pad the top topics with distinct random topics until we have + // `kBrowsingTopicsNumberOfTopTopicsPerEpoch` topics. + while (top_topics.size() < + static_cast( + blink::features::kBrowsingTopicsNumberOfTopTopicsPerEpoch.Get())) { + Topic padded_topic(0); + + do { + int padded_topic_index = + base::checked_cast(GenerateRandUint64() % taxonomy_size); + padded_topic = Topic(padded_topic_index + 1); + } while (base::Contains(top_topics, padded_topic)); + + top_topics.emplace_back(std::move(padded_topic)); + } +} + +void BrowsingTopicsCalculator::CheckCanCalculate() { + if (!privacy_sandbox_settings_->IsTopicsAllowed()) { + OnCalculateCompleted(CalculatorResultStatus::kFailurePermissionDenied); + return; + } + + // Get the the api usages context map (from the calling context domain to a + // set of history hosts) so that we can figure out which topics the APIs were + // called on. + site_data_manager_->GetBrowsingTopicsApiUsage( + /*begin_time=*/DeriveApiUsageContextDataStartTime( + calculation_time_, + privacy_sandbox_settings_->TopicsDataAccessibleSince()), + /*end_time=*/calculation_time_, + base::BindOnce(&BrowsingTopicsCalculator:: + OnGetRecentBrowsingTopicsApiUsagesCompleted, + weak_ptr_factory_.GetWeakPtr())); +} + +void BrowsingTopicsCalculator::OnGetRecentBrowsingTopicsApiUsagesCompleted( + browsing_topics::ApiUsageContextQueryResult result) { + DCHECK(host_context_domains_map_.empty()); + + if (!result.success) { + OnCalculateCompleted( + CalculatorResultStatus::kFailureApiUsageContextQueryError); + return; + } + + for (const ApiUsageContext& usage_context : result.api_usage_contexts) { + host_context_domains_map_[usage_context.hashed_main_frame_host] + .emplace_back(usage_context.hashed_context_domain); + } + + // `ApiUsageContext::hashed_main_frame_host` is a hashed number. To get the + // topic associated with it, we will need to match it against a set of raw + // hosts with topics. Thus, here we query the history with the larger time + // range (from DeriveApiUsageContextDataStartTime() to `calculation_time_`) to + // get the raw hosts. + history::QueryOptions options; + options.begin_time = DeriveApiUsageContextDataStartTime( + calculation_time_, + privacy_sandbox_settings_->TopicsDataAccessibleSince()); + options.end_time = calculation_time_; + options.duplicate_policy = history::QueryOptions::KEEP_ALL_DUPLICATES; + + history_service_->QueryHistory( + std::u16string(), options, + base::BindOnce( + &BrowsingTopicsCalculator::OnGetRecentlyVisitedURLsCompleted, + weak_ptr_factory_.GetWeakPtr()), + &history_task_tracker_); +} + +void BrowsingTopicsCalculator::OnGetRecentlyVisitedURLsCompleted( + history::QueryResults results) { + DCHECK(history_hosts_count_.empty()); + + std::set raw_hosts; + + for (const history::URLResult& url_result : results) { + if (!(url_result.content_annotations().annotation_flags & + history::VisitContentAnnotationFlag::kBrowsingTopicsEligible)) { + continue; + } + + std::string raw_host = url_result.url().host(); + raw_hosts.insert(raw_host); + + if (url_result.visit_time() >= + DeriveHistoryDataStartTime( + calculation_time_, + privacy_sandbox_settings_->TopicsDataAccessibleSince())) { + HashedHost host = HashMainFrameHostForStorage(raw_host); + history_hosts_count_[host]++; + } + } + + base::UmaHistogramCounts1000( + "BrowsingTopics.EpochTopicsCalculation.EligibleDistinctHistoryHostsCount", + history_hosts_count_.size()); + + std::vector raw_hosts_vector(raw_hosts.begin(), raw_hosts.end()); + + annotations_service_->BatchAnnotatePageTopics( + base::BindOnce(&BrowsingTopicsCalculator::OnGetTopicsForHostsCompleted, + weak_ptr_factory_.GetWeakPtr(), raw_hosts_vector), + raw_hosts_vector); +} + +void BrowsingTopicsCalculator::OnGetTopicsForHostsCompleted( + std::vector raw_hosts, + const std::vector& results) { + absl::optional model_info = + annotations_service_->GetModelInfoForType( + optimization_guide::AnnotationType::kPageTopics); + + if (!model_info) { + OnCalculateCompleted( + CalculatorResultStatus::kFailureAnnotationExecutionError); + return; + } + + absl::optional taxonomy_size = GetTaxonomySize(); + if (!taxonomy_size) { + OnCalculateCompleted( + CalculatorResultStatus::kFailureTaxonomyVersionNotSupportedInBinary); + return; + } + + const int model_version = base::checked_cast(model_info->GetVersion()); + DCHECK_GT(model_version, 0); + + std::map> host_topics_map; + std::map> topic_hosts_map; + DeriveHostTopicsMapAndTopicHostsMap(raw_hosts, results, host_topics_map, + topic_hosts_map); + + std::vector top_topics; + size_t padded_top_topics_start_index = 0u; + DeriveTopTopics(history_hosts_count_, host_topics_map, *taxonomy_size, + top_topics, padded_top_topics_start_index); + + base::UmaHistogramCounts100( + "BrowsingTopics.EpochTopicsCalculation.TopTopicsCountBeforePadding", + padded_top_topics_start_index); + + // For each top topic, derive the context domains that observed it + std::vector top_topics_and_observing_domains; + + for (const Topic& topic : top_topics) { + if (!privacy_sandbox_settings_->IsTopicAllowed( + privacy_sandbox::CanonicalTopic( + topic, + blink::features::kBrowsingTopicsTaxonomyVersion.Get()))) { + top_topics_and_observing_domains.emplace_back(TopicAndDomains()); + continue; + } + + std::set topic_observation_domains = + GetTopicObservationDomains(topic, topic_hosts_map, + host_context_domains_map_); + + base::UmaHistogramCounts1000( + "BrowsingTopics.EpochTopicsCalculation." + "ObservationContextDomainsCountPerTopTopic", + topic_observation_domains.size()); + + top_topics_and_observing_domains.emplace_back( + TopicAndDomains(topic, std::move(topic_observation_domains))); + } + + OnCalculateCompleted( + CalculatorResultStatus::kSuccess, + EpochTopics(std::move(top_topics_and_observing_domains), + padded_top_topics_start_index, *taxonomy_size, + blink::features::kBrowsingTopicsTaxonomyVersion.Get(), + model_version, calculation_time_)); +} + +void BrowsingTopicsCalculator::OnCalculateCompleted( + CalculatorResultStatus status, + EpochTopics epoch_topics) { + DCHECK(status != CalculatorResultStatus::kSuccess || + epoch_topics.HasValidTopics()); + + base::UmaHistogramEnumeration( + "BrowsingTopics.EpochTopicsCalculation.CalculatorResultStatus", status); + + std::move(calculate_completed_callback_).Run(std::move(epoch_topics)); + + // Do not add code after this. BrowsingTopicsCalculator has been destroyed. +} + +} // namespace browsing_topics diff --git a/components/browsing_topics/browsing_topics_calculator.h b/components/browsing_topics/browsing_topics_calculator.h new file mode 100644 index 00000000000000..1232da3752e7ec --- /dev/null +++ b/components/browsing_topics/browsing_topics_calculator.h @@ -0,0 +1,137 @@ +// Copyright 2022 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_BROWSING_TOPICS_BROWSING_TOPICS_CALCULATOR_H_ +#define COMPONENTS_BROWSING_TOPICS_BROWSING_TOPICS_CALCULATOR_H_ + +#include +#include + +#include "base/callback.h" +#include "base/task/cancelable_task_tracker.h" +#include "components/browsing_topics/common/common_types.h" +#include "components/browsing_topics/epoch_topics.h" +#include "components/history/core/browser/history_types.h" + +namespace privacy_sandbox { +class PrivacySandboxSettings; +} // namespace privacy_sandbox + +namespace history { +class HistoryService; +} // namespace history + +namespace content { +class BrowsingTopicsSiteDataManager; +} // namespace content + +namespace optimization_guide { +class PageContentAnnotationsService; +class BatchAnnotationResult; +} // namespace optimization_guide + +namespace browsing_topics { + +// Responsible for doing a one-off browsing topics calculation. It will: +// 1) Check the user settings for calculation permissions. +// 2) Query the `BrowsingTopicsSiteDataManager` for the contexts where the +// Topics API was called on. +// 3) Query the `HistoryService` for the hosts of the pages the API was called +// on. +// 4) Query the `PageContentAnnotationsService` with a set of hosts, to get the +// corresponding topics. +// 5) Derive `EpochTopics` (i.e. the top topics and the their observed-by +// contexts), and return it as the final result. +class BrowsingTopicsCalculator { + public: + // These values are persisted to logs. Entries should not be renumbered and + // numeric values should never be reused. + enum class CalculatorResultStatus { + kSuccess = 0, + kFailurePermissionDenied = 1, + kFailureApiUsageContextQueryError = 2, + kFailureAnnotationExecutionError = 3, + kFailureTaxonomyVersionNotSupportedInBinary = 4, + + kMaxValue = kFailureTaxonomyVersionNotSupportedInBinary, + }; + + using CalculateCompletedCallback = base::OnceCallback; + + BrowsingTopicsCalculator( + privacy_sandbox::PrivacySandboxSettings* privacy_sandbox_settings, + history::HistoryService* history_service, + content::BrowsingTopicsSiteDataManager* site_data_manager, + optimization_guide::PageContentAnnotationsService* annotations_service, + CalculateCompletedCallback callback); + + BrowsingTopicsCalculator(const BrowsingTopicsCalculator&) = delete; + BrowsingTopicsCalculator& operator=(const BrowsingTopicsCalculator&) = delete; + BrowsingTopicsCalculator(BrowsingTopicsCalculator&&) = delete; + BrowsingTopicsCalculator& operator=(BrowsingTopicsCalculator&&) = delete; + + virtual ~BrowsingTopicsCalculator(); + + protected: + // This method exists for the purposes of overriding in tests. + virtual uint64_t GenerateRandUint64(); + + private: + // Get the top `kBrowsingTopicsNumberOfTopTopicsPerEpoch` topics. If there + // aren't enough topics, pad with random ones. Return the result topics, and + // the starting index of the padded topics (or + // `kBrowsingTopicsNumberOfTopTopicsPerEpoch` if there's no padded topics). + // Precondition: the hosts in `history_hosts_count` should exist in + // `host_topics_map`. + void DeriveTopTopics( + const std::map& history_hosts_count, + const std::map>& host_topics_map, + size_t taxonomy_size, + std::vector& top_topics, + size_t& padded_top_topics_start_index); + + void CheckCanCalculate(); + + void OnGetRecentBrowsingTopicsApiUsagesCompleted( + browsing_topics::ApiUsageContextQueryResult result); + + void OnGetRecentlyVisitedURLsCompleted(history::QueryResults results); + + void OnGetTopicsForHostsCompleted( + std::vector raw_hosts, + const std::vector& results); + + void OnCalculateCompleted(CalculatorResultStatus status, + EpochTopics epoch_topics = EpochTopics()); + + // Those pointers are safe to hold and use throughout the lifetime of + // `BrowsingTopicsService`, which owns this object. + privacy_sandbox::PrivacySandboxSettings* privacy_sandbox_settings_; + history::HistoryService* history_service_; + content::BrowsingTopicsSiteDataManager* site_data_manager_; + optimization_guide::PageContentAnnotationsService* annotations_service_; + + CalculateCompletedCallback calculate_completed_callback_; + + // The calculation start time. + base::Time calculation_time_; + + // The history hosts over + // `kBrowsingTopicsNumberOfEpochsOfObservationDataToUseForFiltering` epochs, + // and the calling context domains that used the Topics API in each main frame + // host. + std::map> host_context_domains_map_; + + // The hashed history hosts and their count over the last epoch. + std::map history_hosts_count_; + + // Used for the async tasks querying the HistoryService. + base::CancelableTaskTracker history_task_tracker_; + + base::WeakPtrFactory weak_ptr_factory_{this}; +}; + +} // namespace browsing_topics + +#endif // COMPONENTS_BROWSING_TOPICS_BROWSING_TOPICS_CALCULATOR_H_ diff --git a/components/browsing_topics/browsing_topics_calculator_unittest.cc b/components/browsing_topics/browsing_topics_calculator_unittest.cc new file mode 100644 index 00000000000000..79fcfb0f729509 --- /dev/null +++ b/components/browsing_topics/browsing_topics_calculator_unittest.cc @@ -0,0 +1,789 @@ +// Copyright 2022 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/browsing_topics/browsing_topics_calculator.h" + +#include "base/files/scoped_temp_dir.h" +#include "base/logging.h" +#include "base/test/bind.h" +#include "base/test/gtest_util.h" +#include "base/test/metrics/histogram_tester.h" +#include "base/test/scoped_feature_list.h" +#include "components/browsing_topics/test_util.h" +#include "components/browsing_topics/util.h" +#include "components/content_settings/core/browser/cookie_settings.h" +#include "components/content_settings/core/browser/host_content_settings_map.h" +#include "components/history/core/browser/history_database_params.h" +#include "components/history/core/browser/history_service.h" +#include "components/history/core/test/test_history_database.h" +#include "components/optimization_guide/content/browser/page_content_annotations_service.h" +#include "components/optimization_guide/content/browser/test_page_content_annotator.h" +#include "components/optimization_guide/core/test_model_info_builder.h" +#include "components/optimization_guide/core/test_optimization_guide_model_provider.h" +#include "components/privacy_sandbox/privacy_sandbox_prefs.h" +#include "components/privacy_sandbox/privacy_sandbox_settings.h" +#include "components/privacy_sandbox/privacy_sandbox_test_util.h" +#include "components/sync_preferences/testing_pref_service_syncable.h" +#include "content/public/test/browser_task_environment.h" +#include "content/public/test/browsing_topics_test_util.h" +#include "third_party/blink/public/common/features.h" + +namespace browsing_topics { + +namespace { + +const size_t kTaxonomySize = 349; +const int kTaxonomyVersion = 1; + +const std::string kHost1 = "www.foo1.com"; +const std::string kHost2 = "www.foo2.com"; +const std::string kHost3 = "www.foo3.com"; +const std::string kHost4 = "www.foo4.com"; +const std::string kHost5 = "www.foo5.com"; +const std::string kHost6 = "www.foo6.com"; + +const std::string kTokenizedHost1 = "foo1 com"; +const std::string kTokenizedHost2 = "foo2 com"; +const std::string kTokenizedHost3 = "foo3 com"; +const std::string kTokenizedHost4 = "foo4 com"; +const std::string kTokenizedHost5 = "foo5 com"; +const std::string kTokenizedHost6 = "foo6 com"; + +} // namespace + +class BrowsingTopicsCalculatorTest : public testing::Test { + public: + BrowsingTopicsCalculatorTest() + : task_environment_(base::test::TaskEnvironment::TimeSource::MOCK_TIME) { + EXPECT_TRUE(temp_dir_.CreateUniqueTempDir()); + + content_settings::CookieSettings::RegisterProfilePrefs(prefs_.registry()); + HostContentSettingsMap::RegisterProfilePrefs(prefs_.registry()); + privacy_sandbox::RegisterProfilePrefs(prefs_.registry()); + + host_content_settings_map_ = new HostContentSettingsMap( + &prefs_, /*is_off_the_record=*/false, /*store_last_modified=*/false, + /*restore_session=*/false); + cookie_settings_ = new content_settings::CookieSettings( + host_content_settings_map_.get(), &prefs_, false, "chrome-extension"); + privacy_sandbox_settings_ = std::make_unique< + privacy_sandbox::PrivacySandboxSettings>( + std::make_unique< + privacy_sandbox_test_util::MockPrivacySandboxSettingsDelegate>(), + host_content_settings_map_.get(), cookie_settings_, &prefs_, + /*incognito_profile=*/false); + + topics_site_data_manager_ = + std::make_unique( + temp_dir_.GetPath()); + + history_service_ = std::make_unique(); + history_service_->Init( + history::TestHistoryDatabaseParamsForPath(temp_dir_.GetPath())); + + optimization_guide_model_provider_ = std::make_unique< + optimization_guide::TestOptimizationGuideModelProvider>(); + page_content_annotations_service_ = + std::make_unique( + "en-US", optimization_guide_model_provider_.get(), + history_service_.get(), nullptr, base::FilePath(), nullptr); + + page_content_annotations_service_->OverridePageContentAnnotatorForTesting( + &test_page_content_annotator_); + + task_environment_.RunUntilIdle(); + } + + ~BrowsingTopicsCalculatorTest() override { + host_content_settings_map_->ShutdownOnUIThread(); + } + + EpochTopics CalculateTopics() { + EpochTopics result; + + base::RunLoop run_loop; + + TesterBrowsingTopicsCalculator topics_calculator = + TesterBrowsingTopicsCalculator( + privacy_sandbox_settings_.get(), history_service_.get(), + topics_site_data_manager_.get(), + page_content_annotations_service_.get(), + base::BindLambdaForTesting([&](EpochTopics epoch_topics) { + result = std::move(epoch_topics); + run_loop.Quit(); + }), + /*rand_uint64_queue=*/ + base::queue{{100, 101, 102, 103, 104}}); + + run_loop.Run(); + + return result; + } + + void AddHistoryEntries(const std::vector& hosts, + base::Time time) { + history::HistoryAddPageArgs add_page_args; + add_page_args.time = time; + add_page_args.context_id = reinterpret_cast(1); + + for (const std::string& host : hosts) { + static int nav_entry_id = 0; + ++nav_entry_id; + + add_page_args.url = GURL(base::StrCat({"https://", host})); + add_page_args.nav_entry_id = nav_entry_id; + + history_service_->AddPage(add_page_args); + history_service_->SetBrowsingTopicsAllowed( + add_page_args.context_id, nav_entry_id, add_page_args.url); + } + + task_environment_.RunUntilIdle(); + } + + void AddApiUsageContextEntries( + std::vector>> + main_frame_hosts_with_context_domains) { + for (auto& [main_frame_host, context_domains] : + main_frame_hosts_with_context_domains) { + topics_site_data_manager_->OnBrowsingTopicsApiUsed( + HashMainFrameHostForStorage(main_frame_host), + base::flat_set(context_domains.begin(), + context_domains.end())); + } + + task_environment_.RunUntilIdle(); + } + + std::vector TopicsAndWeight( + std::vector topics, + double weight) { + std::vector result; + for (int32_t topic : topics) { + result.emplace_back( + optimization_guide::WeightedIdentifier(topic, weight)); + } + + return result; + } + + void ExpectResultTopicsEqual( + const std::vector& result, + std::vector>> expected) { + DCHECK_EQ(expected.size(), 5u); + EXPECT_EQ(result.size(), 5u); + + for (int i = 0; i < 5; ++i) { + EXPECT_EQ(result[i].topic(), expected[i].first); + EXPECT_EQ(result[i].hashed_domains(), expected[i].second); + } + } + + protected: + content::BrowserTaskEnvironment task_environment_; + + sync_preferences::TestingPrefServiceSyncable prefs_; + scoped_refptr host_content_settings_map_; + scoped_refptr cookie_settings_; + std::unique_ptr + privacy_sandbox_settings_; + + std::unique_ptr + topics_site_data_manager_; + + std::unique_ptr history_service_; + + std::unique_ptr + optimization_guide_model_provider_; + std::unique_ptr + page_content_annotations_service_; + + optimization_guide::TestPageContentAnnotator test_page_content_annotator_; + + base::ScopedTempDir temp_dir_; +}; + +TEST_F(BrowsingTopicsCalculatorTest, PermissionDenied) { + base::HistogramTester histograms; + + privacy_sandbox_settings_->SetPrivacySandboxEnabled(false); + + EpochTopics result = CalculateTopics(); + EXPECT_FALSE(result.HasValidTopics()); + + histograms.ExpectUniqueSample( + "BrowsingTopics.EpochTopicsCalculation.CalculatorResultStatus", + /*kFailurePermissionDenied*/ 1, + /*expected_bucket_count=*/1); +} + +TEST_F(BrowsingTopicsCalculatorTest, ApiUsageContextQueryError) { + base::HistogramTester histograms; + + topics_site_data_manager_->SetQueryFailureOverride(); + + EpochTopics result = CalculateTopics(); + EXPECT_FALSE(result.HasValidTopics()); + + histograms.ExpectUniqueSample( + "BrowsingTopics.EpochTopicsCalculation.CalculatorResultStatus", + /*kFailureApiUsageContextQueryError*/ 2, + /*expected_bucket_count=*/1); +} + +TEST_F(BrowsingTopicsCalculatorTest, AnnotationExecutionError) { + base::HistogramTester histograms; + + EpochTopics result = CalculateTopics(); + EXPECT_FALSE(result.HasValidTopics()); + + histograms.ExpectUniqueSample( + "BrowsingTopics.EpochTopicsCalculation.CalculatorResultStatus", + /*kFailureAnnotationExecutionError*/ 3, + /*expected_bucket_count=*/1); +} + +class BrowsingTopicsCalculatorUnsupporedTaxonomyVersionTest + : public BrowsingTopicsCalculatorTest { + public: + BrowsingTopicsCalculatorUnsupporedTaxonomyVersionTest() { + feature_list_.InitAndEnableFeatureWithParameters( + blink::features::kBrowsingTopics, {{"taxonomy_version", "999"}}); + } + + private: + base::test::ScopedFeatureList feature_list_; +}; + +TEST_F(BrowsingTopicsCalculatorUnsupporedTaxonomyVersionTest, + TaxonomyVersionNotSupportedInBinary) { + base::HistogramTester histograms; + + test_page_content_annotator_.UsePageTopics( + *optimization_guide::TestModelInfoBuilder().SetVersion(1).Build(), {}); + + EpochTopics result = CalculateTopics(); + EXPECT_FALSE(result.HasValidTopics()); + + histograms.ExpectUniqueSample( + "BrowsingTopics.EpochTopicsCalculation.CalculatorResultStatus", + /*kFailureTaxonomyVersionNotSupportedInBinary*/ 4, + /*expected_bucket_count=*/1); +} + +TEST_F(BrowsingTopicsCalculatorTest, TopicsMetadata) { + base::HistogramTester histograms; + base::Time begin_time = base::Time::Now(); + + test_page_content_annotator_.UsePageTopics( + *optimization_guide::TestModelInfoBuilder().SetVersion(1).Build(), {}); + + EpochTopics result1 = CalculateTopics(); + EXPECT_TRUE(result1.HasValidTopics()); + EXPECT_EQ(result1.taxonomy_size(), kTaxonomySize); + EXPECT_EQ(result1.taxonomy_version(), kTaxonomyVersion); + EXPECT_EQ(result1.model_version(), 1); + EXPECT_EQ(result1.calculation_time(), begin_time); + + histograms.ExpectUniqueSample( + "BrowsingTopics.EpochTopicsCalculation.CalculatorResultStatus", + /*kSuccess*/ 0, + /*expected_bucket_count=*/1); + + task_environment_.AdvanceClock(base::Seconds(2)); + + test_page_content_annotator_.UsePageTopics( + *optimization_guide::TestModelInfoBuilder().SetVersion(50).Build(), {}); + + EpochTopics result2 = CalculateTopics(); + EXPECT_TRUE(result2.HasValidTopics()); + EXPECT_EQ(result2.taxonomy_size(), kTaxonomySize); + EXPECT_EQ(result2.taxonomy_version(), kTaxonomyVersion); + EXPECT_EQ(result2.model_version(), 50); + EXPECT_EQ(result2.calculation_time(), begin_time + base::Seconds(2)); + + histograms.ExpectUniqueSample( + "BrowsingTopics.EpochTopicsCalculation.CalculatorResultStatus", + /*kSuccess*/ 0, + /*expected_bucket_count=*/2); +} + +TEST_F(BrowsingTopicsCalculatorTest, TopTopicsRankedByFrequency) { + base::Time begin_time = base::Time::Now(); + + AddHistoryEntries({kHost1, kHost2, kHost3, kHost4, kHost5, kHost6}, + begin_time); + + test_page_content_annotator_.UsePageTopics( + *optimization_guide::TestModelInfoBuilder().SetVersion(1).Build(), + {{kTokenizedHost1, TopicsAndWeight({1, 2, 3, 4, 5, 6}, 0.1)}, + {kTokenizedHost2, TopicsAndWeight({2, 3, 4, 5, 6}, 0.1)}, + {kTokenizedHost3, TopicsAndWeight({3, 4, 5, 6}, 0.1)}, + {kTokenizedHost4, TopicsAndWeight({4, 5, 6}, 0.1)}, + {kTokenizedHost5, TopicsAndWeight({5, 6}, 0.1)}, + {kTokenizedHost6, TopicsAndWeight({6}, 0.1)}}); + + task_environment_.AdvanceClock(base::Seconds(1)); + + EpochTopics result = CalculateTopics(); + ExpectResultTopicsEqual(result.top_topics_and_observing_domains(), + {{Topic(6), {}}, + {Topic(5), {}}, + {Topic(4), {}}, + {Topic(3), {}}, + {Topic(2), {}}}); + + EXPECT_EQ(result.padded_top_topics_start_index(), 5u); +} + +TEST_F(BrowsingTopicsCalculatorTest, + TopTopicsRankedByFrequency_AlsoAffectedByHostsCount) { + base::Time begin_time = base::Time::Now(); + + AddHistoryEntries({kHost1, kHost1, kHost1, kHost1, kHost1, kHost1, kHost2, + kHost3, kHost4, kHost5, kHost6}, + begin_time); + + test_page_content_annotator_.UsePageTopics( + *optimization_guide::TestModelInfoBuilder().SetVersion(1).Build(), + {{kTokenizedHost1, TopicsAndWeight({1, 2}, 0.1)}, + {kTokenizedHost2, TopicsAndWeight({2, 3, 4, 5, 6}, 0.1)}, + {kTokenizedHost3, TopicsAndWeight({3, 4, 5, 6}, 0.1)}, + {kTokenizedHost4, TopicsAndWeight({4, 5, 6}, 0.1)}, + {kTokenizedHost5, TopicsAndWeight({5, 6}, 0.1)}, + {kTokenizedHost6, TopicsAndWeight({6}, 0.1)}}); + + task_environment_.AdvanceClock(base::Seconds(1)); + + EpochTopics result = CalculateTopics(); + ExpectResultTopicsEqual(result.top_topics_and_observing_domains(), + {{Topic(2), {}}, + {Topic(1), {}}, + {Topic(6), {}}, + {Topic(5), {}}, + {Topic(4), {}}}); + + EXPECT_EQ(result.padded_top_topics_start_index(), 5u); +} + +TEST_F(BrowsingTopicsCalculatorTest, + TopTopicsRankingNotAffectedByAnnotationWeight) { + base::Time begin_time = base::Time::Now(); + + AddHistoryEntries({kHost1, kHost2, kHost3, kHost4, kHost5, kHost6}, + begin_time); + + // Setting the weight for Topic(1) and Topic(2) to 0.9. This weight shouldn't + // affect the top topics ordering. + test_page_content_annotator_.UsePageTopics( + *optimization_guide::TestModelInfoBuilder().SetVersion(1).Build(), + {{kTokenizedHost1, TopicsAndWeight({1, 2}, 0.9)}, + {kTokenizedHost2, TopicsAndWeight({2, 3, 4, 5, 6}, 0.1)}, + {kTokenizedHost3, TopicsAndWeight({3, 4, 5, 6}, 0.1)}, + {kTokenizedHost4, TopicsAndWeight({4, 5, 6}, 0.1)}, + {kTokenizedHost5, TopicsAndWeight({5, 6}, 0.1)}, + {kTokenizedHost6, TopicsAndWeight({6}, 0.1)}}); + + task_environment_.AdvanceClock(base::Seconds(1)); + + EpochTopics result = CalculateTopics(); + ExpectResultTopicsEqual(result.top_topics_and_observing_domains(), + {{Topic(6), {}}, + {Topic(5), {}}, + {Topic(4), {}}, + {Topic(3), {}}, + {Topic(2), {}}}); + + EXPECT_EQ(result.padded_top_topics_start_index(), 5u); +} + +TEST_F(BrowsingTopicsCalculatorTest, AllTopTopicsRandomlyPadded) { + test_page_content_annotator_.UsePageTopics( + *optimization_guide::TestModelInfoBuilder().SetVersion(1).Build(), + {{kTokenizedHost1, TopicsAndWeight({1, 2, 3, 4, 5, 6}, 0.1)}, + {kTokenizedHost2, TopicsAndWeight({2, 3, 4, 5, 6}, 0.1)}, + {kTokenizedHost3, TopicsAndWeight({3, 4, 5, 6}, 0.1)}, + {kTokenizedHost4, TopicsAndWeight({4, 5, 6}, 0.1)}, + {kTokenizedHost5, TopicsAndWeight({5, 6}, 0.1)}, + {kTokenizedHost6, TopicsAndWeight({6}, 0.1)}}); + + EpochTopics result = CalculateTopics(); + ExpectResultTopicsEqual(result.top_topics_and_observing_domains(), + {{Topic(101), {}}, + {Topic(102), {}}, + {Topic(103), {}}, + {Topic(104), {}}, + {Topic(105), {}}}); + + EXPECT_EQ(result.padded_top_topics_start_index(), 0u); +} + +TEST_F(BrowsingTopicsCalculatorTest, TopTopicsPartiallyPadded) { + base::HistogramTester histograms; + + base::Time begin_time = base::Time::Now(); + + AddHistoryEntries({kHost4, kHost5, kHost6}, begin_time); + + test_page_content_annotator_.UsePageTopics( + *optimization_guide::TestModelInfoBuilder().SetVersion(1).Build(), + {{kTokenizedHost1, TopicsAndWeight({1, 2, 3, 4, 5, 6}, 0.1)}, + {kTokenizedHost2, TopicsAndWeight({2, 3, 4, 5, 6}, 0.1)}, + {kTokenizedHost3, TopicsAndWeight({3, 4, 5, 6}, 0.1)}, + {kTokenizedHost4, TopicsAndWeight({4, 5, 6}, 0.1)}, + {kTokenizedHost5, TopicsAndWeight({5, 6}, 0.1)}, + {kTokenizedHost6, TopicsAndWeight({6}, 0.1)}}); + + task_environment_.AdvanceClock(base::Seconds(1)); + + EpochTopics result = CalculateTopics(); + ExpectResultTopicsEqual(result.top_topics_and_observing_domains(), + {{Topic(6), {}}, + {Topic(5), {}}, + {Topic(4), {}}, + {Topic(101), {}}, + {Topic(102), {}}}); + + EXPECT_EQ(result.padded_top_topics_start_index(), 3u); +} + +TEST_F(BrowsingTopicsCalculatorTest, TopTopicsAndObservingDomains) { + base::Time begin_time = base::Time::Now(); + + AddHistoryEntries({kHost1, kHost2, kHost3, kHost4, kHost5, kHost6}, + begin_time); + + AddApiUsageContextEntries( + {{kHost1, {}}, + {kHost2, {}}, + {kHost3, {HashedDomain(2)}}, + {kHost4, {HashedDomain(3)}}, + {kHost5, {HashedDomain(1), HashedDomain(2), HashedDomain(3)}}}); + + test_page_content_annotator_.UsePageTopics( + *optimization_guide::TestModelInfoBuilder().SetVersion(1).Build(), + {{kTokenizedHost1, TopicsAndWeight({1, 2, 3, 4, 5, 6}, 0.1)}, + {kTokenizedHost2, TopicsAndWeight({2, 3, 4, 5, 6}, 0.1)}, + {kTokenizedHost3, TopicsAndWeight({3, 4, 5, 6}, 0.1)}, + {kTokenizedHost4, TopicsAndWeight({4, 5, 6}, 0.1)}, + {kTokenizedHost5, TopicsAndWeight({5, 6}, 0.1)}, + {kTokenizedHost6, TopicsAndWeight({6}, 0.1)}}); + + task_environment_.AdvanceClock(base::Seconds(1)); + + EpochTopics result = CalculateTopics(); + ExpectResultTopicsEqual( + result.top_topics_and_observing_domains(), + {{Topic(6), {HashedDomain(1), HashedDomain(2), HashedDomain(3)}}, + {Topic(5), {HashedDomain(1), HashedDomain(2), HashedDomain(3)}}, + {Topic(4), {HashedDomain(2), HashedDomain(3)}}, + {Topic(3), {HashedDomain(2)}}, + {Topic(2), {}}}); + + EXPECT_EQ(result.padded_top_topics_start_index(), 5u); +} + +TEST_F( + BrowsingTopicsCalculatorTest, + HistoryHostsBefore21DaysAgo_IgnoredForTopTopicsDecision_IgnoredForObservingDomainsDecision) { + base::Time begin_time = base::Time::Now(); + + AddHistoryEntries({kHost1, kHost2, kHost3, kHost4, kHost5, kHost6}, + begin_time - base::Days(21)); + + AddApiUsageContextEntries( + {{kHost1, {}}, + {kHost2, {}}, + {kHost3, {HashedDomain(2)}}, + {kHost4, {HashedDomain(3)}}, + {kHost5, {HashedDomain(1), HashedDomain(2), HashedDomain(3)}}}); + + test_page_content_annotator_.UsePageTopics( + *optimization_guide::TestModelInfoBuilder().SetVersion(1).Build(), + {{kTokenizedHost1, TopicsAndWeight({1, 2, 103, 4, 5, 6}, 0.1)}, + {kTokenizedHost2, TopicsAndWeight({2, 103, 4, 5, 6}, 0.1)}, + {kTokenizedHost3, TopicsAndWeight({103, 4, 5, 6}, 0.1)}, + {kTokenizedHost4, TopicsAndWeight({4, 5, 6}, 0.1)}, + {kTokenizedHost5, TopicsAndWeight({5, 6}, 0.1)}, + {kTokenizedHost6, TopicsAndWeight({6}, 0.1)}}); + + task_environment_.AdvanceClock(base::Seconds(1)); + + EpochTopics result = CalculateTopics(); + ExpectResultTopicsEqual(result.top_topics_and_observing_domains(), + {{Topic(101), {}}, + {Topic(102), {}}, + {Topic(103), {}}, + {Topic(104), {}}, + {Topic(105), {}}}); + + EXPECT_EQ(result.padded_top_topics_start_index(), 0u); +} + +TEST_F( + BrowsingTopicsCalculatorTest, + HistoryHostsBetween7And21Days_IgnoredForTopTopicsDecision_ConsideredForObservingDomainsDecision) { + base::Time begin_time = base::Time::Now(); + + AddHistoryEntries({kHost1, kHost2, kHost3, kHost4, kHost5, kHost6}, + begin_time - base::Days(20)); + + AddApiUsageContextEntries( + {{kHost1, {}}, + {kHost2, {}}, + {kHost3, {HashedDomain(2)}}, + {kHost4, {HashedDomain(3)}}, + {kHost5, {HashedDomain(1), HashedDomain(2), HashedDomain(3)}}}); + + test_page_content_annotator_.UsePageTopics( + *optimization_guide::TestModelInfoBuilder().SetVersion(1).Build(), + {{kTokenizedHost1, TopicsAndWeight({1, 2, 103, 4, 5, 6}, 0.1)}, + {kTokenizedHost2, TopicsAndWeight({2, 103, 4, 5, 6}, 0.1)}, + {kTokenizedHost3, TopicsAndWeight({103, 4, 5, 6}, 0.1)}, + {kTokenizedHost4, TopicsAndWeight({4, 5, 6}, 0.1)}, + {kTokenizedHost5, TopicsAndWeight({5, 6}, 0.1)}, + {kTokenizedHost6, TopicsAndWeight({6}, 0.1)}}); + + task_environment_.AdvanceClock(base::Seconds(1)); + + EpochTopics result = CalculateTopics(); + ExpectResultTopicsEqual(result.top_topics_and_observing_domains(), + {{Topic(101), {}}, + {Topic(102), {}}, + {Topic(103), {HashedDomain(2)}}, + {Topic(104), {}}, + {Topic(105), {}}}); + + EXPECT_EQ(result.padded_top_topics_start_index(), 0u); +} + +TEST_F(BrowsingTopicsCalculatorTest, + DataQueryBoundedByTopicsDataAccessibleSince) { + base::Time begin_time = base::Time::Now(); + + prefs_.SetTime(prefs::kPrivacySandboxTopicsDataAccessibleSince, + begin_time + base::Days(6)); + + AddHistoryEntries({kHost1, kHost2}, begin_time); + AddApiUsageContextEntries({{kHost1, {}}, {kHost2, {}}}); + + task_environment_.AdvanceClock(base::Days(6)); + + AddHistoryEntries({kHost3, kHost4, kHost5, kHost6}, + begin_time + base::Days(6)); + AddApiUsageContextEntries( + {{kHost3, {HashedDomain(2)}}, + {kHost4, {HashedDomain(3)}}, + {kHost5, {HashedDomain(1), HashedDomain(2), HashedDomain(3)}}}); + + test_page_content_annotator_.UsePageTopics( + *optimization_guide::TestModelInfoBuilder().SetVersion(1).Build(), + {{kTokenizedHost1, TopicsAndWeight({1, 2, 3, 4, 5, 6}, 0.1)}, + {kTokenizedHost2, TopicsAndWeight({2, 3, 4, 5, 6}, 0.1)}, + {kTokenizedHost3, TopicsAndWeight({3, 4, 5, 6}, 0.1)}, + {kTokenizedHost4, TopicsAndWeight({4, 5, 6}, 0.1)}, + {kTokenizedHost5, TopicsAndWeight({5, 6}, 0.1)}, + {kTokenizedHost6, TopicsAndWeight({6}, 0.1)}}); + + task_environment_.AdvanceClock(base::Seconds(1)); + + EpochTopics result = CalculateTopics(); + ExpectResultTopicsEqual( + result.top_topics_and_observing_domains(), + {{Topic(6), {HashedDomain(1), HashedDomain(2), HashedDomain(3)}}, + {Topic(5), {HashedDomain(1), HashedDomain(2), HashedDomain(3)}}, + {Topic(4), {HashedDomain(2), HashedDomain(3)}}, + {Topic(3), {HashedDomain(2)}}, + {Topic(101), {}}}); + + EXPECT_EQ(result.padded_top_topics_start_index(), 4u); +} + +TEST_F(BrowsingTopicsCalculatorTest, + TopTopicsAndObservingDomains_DomainsSizeExceedsLimit) { + base::Time begin_time = base::Time::Now(); + + std::set large_size_domains; + for (int i = 1; i <= 1001; ++i) { + large_size_domains.insert(HashedDomain(i)); + } + + AddHistoryEntries({kHost1, kHost2, kHost3, kHost4, kHost5, kHost6}, + begin_time); + + AddApiUsageContextEntries({{kHost1, {}}, + {kHost2, {}}, + {kHost3, {HashedDomain(2)}}, + {kHost4, {HashedDomain(3)}}, + {kHost5, large_size_domains}}); + + test_page_content_annotator_.UsePageTopics( + *optimization_guide::TestModelInfoBuilder().SetVersion(1).Build(), + {{kTokenizedHost1, TopicsAndWeight({1, 2, 3, 4, 5, 6}, 0.1)}, + {kTokenizedHost2, TopicsAndWeight({2, 3, 4, 5, 6}, 0.1)}, + {kTokenizedHost3, TopicsAndWeight({3, 4, 5, 6}, 0.1)}, + {kTokenizedHost4, TopicsAndWeight({4, 5, 6}, 0.1)}, + {kTokenizedHost5, TopicsAndWeight({5, 6}, 0.1)}, + {kTokenizedHost6, TopicsAndWeight({6}, 0.1)}}); + + task_environment_.AdvanceClock(base::Seconds(1)); + + std::set expected_domains_after_capping = large_size_domains; + expected_domains_after_capping.erase(HashedDomain(1)); + + EpochTopics result = CalculateTopics(); + ExpectResultTopicsEqual(result.top_topics_and_observing_domains(), + {{Topic(6), expected_domains_after_capping}, + {Topic(5), expected_domains_after_capping}, + {Topic(4), {HashedDomain(2), HashedDomain(3)}}, + {Topic(3), {HashedDomain(2)}}, + {Topic(2), {}}}); + + EXPECT_EQ(result.padded_top_topics_start_index(), 5u); +} + +TEST_F(BrowsingTopicsCalculatorTest, TopicBlocked) { + base::Time begin_time = base::Time::Now(); + + AddHistoryEntries({kHost1, kHost2, kHost3, kHost4, kHost5, kHost6}, + begin_time); + + AddApiUsageContextEntries( + {{kHost1, {}}, + {kHost2, {}}, + {kHost3, {HashedDomain(2)}}, + {kHost4, {HashedDomain(3)}}, + {kHost5, {HashedDomain(1), HashedDomain(2), HashedDomain(3)}}}); + + test_page_content_annotator_.UsePageTopics( + *optimization_guide::TestModelInfoBuilder().SetVersion(1).Build(), + {{kTokenizedHost1, TopicsAndWeight({1, 2, 3, 4, 5, 6}, 0.1)}, + {kTokenizedHost2, TopicsAndWeight({2, 3, 4, 5, 6}, 0.1)}, + {kTokenizedHost3, TopicsAndWeight({3, 4, 5, 6}, 0.1)}, + {kTokenizedHost4, TopicsAndWeight({4, 5, 6}, 0.1)}, + {kTokenizedHost5, TopicsAndWeight({5, 6}, 0.1)}, + {kTokenizedHost6, TopicsAndWeight({6}, 0.1)}}); + + task_environment_.AdvanceClock(base::Seconds(1)); + + privacy_sandbox_settings_->SetTopicAllowed( + privacy_sandbox::CanonicalTopic(Topic(6), kTaxonomyVersion), + /*allowed=*/false); + privacy_sandbox_settings_->SetTopicAllowed( + privacy_sandbox::CanonicalTopic(Topic(4), kTaxonomyVersion), + /*allowed=*/false); + + EpochTopics result = CalculateTopics(); + ExpectResultTopicsEqual( + result.top_topics_and_observing_domains(), + {{Topic(0), {}}, + {Topic(5), {HashedDomain(1), HashedDomain(2), HashedDomain(3)}}, + {Topic(0), {}}, + {Topic(3), {HashedDomain(2)}}, + {Topic(2), {}}}); + + EXPECT_EQ(result.padded_top_topics_start_index(), 5u); +} + +TEST_F(BrowsingTopicsCalculatorTest, PaddedTopicsDoNotDuplicate) { + base::Time begin_time = base::Time::Now(); + + AddHistoryEntries({kHost4, kHost5, kHost6}, begin_time); + + AddApiUsageContextEntries( + {{kHost1, {}}, + {kHost2, {}}, + {kHost3, {HashedDomain(2)}}, + {kHost4, {HashedDomain(3)}}, + {kHost5, {HashedDomain(1), HashedDomain(2), HashedDomain(3)}}}); + + test_page_content_annotator_.UsePageTopics( + *optimization_guide::TestModelInfoBuilder().SetVersion(1).Build(), + {{kTokenizedHost1, TopicsAndWeight({1, 2, 3, 4, 5, 102}, 0.1)}, + {kTokenizedHost2, TopicsAndWeight({2, 3, 4, 5, 102}, 0.1)}, + {kTokenizedHost3, TopicsAndWeight({3, 4, 5, 102}, 0.1)}, + {kTokenizedHost4, TopicsAndWeight({4, 5, 102}, 0.1)}, + {kTokenizedHost5, TopicsAndWeight({5, 102}, 0.1)}, + {kTokenizedHost6, TopicsAndWeight({102}, 0.1)}}); + + task_environment_.AdvanceClock(base::Seconds(1)); + + EpochTopics result = CalculateTopics(); + ExpectResultTopicsEqual( + result.top_topics_and_observing_domains(), + {{Topic(102), {HashedDomain(1), HashedDomain(2), HashedDomain(3)}}, + {Topic(5), {HashedDomain(1), HashedDomain(2), HashedDomain(3)}}, + {Topic(4), {HashedDomain(3)}}, + {Topic(101), {}}, + {Topic(103), {}}}); +} + +TEST_F(BrowsingTopicsCalculatorTest, Metrics) { + base::HistogramTester histograms; + + base::Time begin_time = base::Time::Now(); + + AddHistoryEntries({kHost4, kHost5, kHost6}, begin_time); + + AddApiUsageContextEntries( + {{kHost1, {}}, + {kHost2, {}}, + {kHost3, {HashedDomain(2)}}, + {kHost4, {HashedDomain(3)}}, + {kHost5, {HashedDomain(1), HashedDomain(2), HashedDomain(3)}}}); + + test_page_content_annotator_.UsePageTopics( + *optimization_guide::TestModelInfoBuilder().SetVersion(1).Build(), + {{kTokenizedHost1, TopicsAndWeight({1, 2, 3, 4, 5, 6}, 0.1)}, + {kTokenizedHost2, TopicsAndWeight({2, 3, 4, 5, 6}, 0.1)}, + {kTokenizedHost3, TopicsAndWeight({3, 4, 5, 6}, 0.1)}, + {kTokenizedHost4, TopicsAndWeight({4, 5, 6}, 0.1)}, + {kTokenizedHost5, TopicsAndWeight({5, 6}, 0.1)}, + {kTokenizedHost6, TopicsAndWeight({6}, 0.1)}}); + + task_environment_.AdvanceClock(base::Seconds(1)); + + EpochTopics result = CalculateTopics(); + ExpectResultTopicsEqual( + result.top_topics_and_observing_domains(), + {{Topic(6), {HashedDomain(1), HashedDomain(2), HashedDomain(3)}}, + {Topic(5), {HashedDomain(1), HashedDomain(2), HashedDomain(3)}}, + {Topic(4), {HashedDomain(3)}}, + {Topic(101), {}}, + {Topic(102), {}}}); + + EXPECT_EQ(result.padded_top_topics_start_index(), 3u); + + histograms.ExpectUniqueSample( + "BrowsingTopics.EpochTopicsCalculation.EligibleDistinctHistoryHostsCount", + /*sample=*/3, + /*expected_bucket_count=*/1); + + histograms.ExpectUniqueSample( + "BrowsingTopics.EpochTopicsCalculation.TopTopicsCountBeforePadding", + /*sample=*/3, + /*expected_bucket_count=*/1); + + histograms.ExpectTotalCount( + "BrowsingTopics.EpochTopicsCalculation." + "ObservationContextDomainsCountPerTopTopic", + /*count=*/5); + histograms.ExpectBucketCount( + "BrowsingTopics.EpochTopicsCalculation." + "ObservationContextDomainsCountPerTopTopic", + /*sample=*/0, + /*expected_count=*/2); + histograms.ExpectBucketCount( + "BrowsingTopics.EpochTopicsCalculation." + "ObservationContextDomainsCountPerTopTopic", + /*sample=*/1, + /*expected_count=*/1); + histograms.ExpectBucketCount( + "BrowsingTopics.EpochTopicsCalculation." + "ObservationContextDomainsCountPerTopTopic", + /*sample=*/3, + /*expected_count=*/2); +} + +} // namespace browsing_topics diff --git a/components/browsing_topics/common/common_types.h b/components/browsing_topics/common/common_types.h index ed471521ca60bf..4a59150635efa9 100644 --- a/components/browsing_topics/common/common_types.h +++ b/components/browsing_topics/common/common_types.h @@ -19,7 +19,7 @@ using Topic = base::StrongAlias; struct COMPONENT_EXPORT(BROWSING_TOPICS_COMMON) ApiUsageContext { HashedDomain hashed_context_domain; - HashedHost hashed_top_host; + HashedHost hashed_main_frame_host; base::Time time; }; diff --git a/components/browsing_topics/epoch_topics.h b/components/browsing_topics/epoch_topics.h index f16e030729ad49..d38a3c2bf16cbc 100644 --- a/components/browsing_topics/epoch_topics.h +++ b/components/browsing_topics/epoch_topics.h @@ -57,6 +57,16 @@ class EpochTopics { // reset `padded_top_topics_start_index_` to 0. void ClearTopics(); + const std::vector& top_topics_and_observing_domains() const { + return top_topics_and_observing_domains_; + } + + size_t padded_top_topics_start_index() const { + return padded_top_topics_start_index_; + } + + size_t taxonomy_size() const { return taxonomy_size_; } + int taxonomy_version() const { return taxonomy_version_; } int model_version() const { return model_version_; } diff --git a/components/browsing_topics/test_util.cc b/components/browsing_topics/test_util.cc new file mode 100644 index 00000000000000..e1630294ad3060 --- /dev/null +++ b/components/browsing_topics/test_util.cc @@ -0,0 +1,34 @@ +// Copyright 2022 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/browsing_topics/test_util.h" + +namespace browsing_topics { + +TesterBrowsingTopicsCalculator::TesterBrowsingTopicsCalculator( + privacy_sandbox::PrivacySandboxSettings* privacy_sandbox_settings, + history::HistoryService* history_service, + content::BrowsingTopicsSiteDataManager* site_data_manager, + optimization_guide::PageContentAnnotationsService* annotations_service, + CalculateCompletedCallback callback, + base::queue rand_uint64_queue) + : BrowsingTopicsCalculator(privacy_sandbox_settings, + history_service, + site_data_manager, + annotations_service, + std::move(callback)), + rand_uint64_queue_(std::move(rand_uint64_queue)) {} + +TesterBrowsingTopicsCalculator::~TesterBrowsingTopicsCalculator() = default; + +uint64_t TesterBrowsingTopicsCalculator::GenerateRandUint64() { + DCHECK(!rand_uint64_queue_.empty()); + + uint64_t next_rand_uint64 = rand_uint64_queue_.front(); + rand_uint64_queue_.pop(); + + return next_rand_uint64; +} + +} // namespace browsing_topics diff --git a/components/browsing_topics/test_util.h b/components/browsing_topics/test_util.h new file mode 100644 index 00000000000000..cc9c2351bd0dde --- /dev/null +++ b/components/browsing_topics/test_util.h @@ -0,0 +1,47 @@ +// Copyright 2022 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_BROWSING_TOPICS_TEST_UTIL_H_ +#define COMPONENTS_BROWSING_TOPICS_TEST_UTIL_H_ + +#include "base/containers/queue.h" +#include "components/browsing_topics/browsing_topics_calculator.h" +#include "third_party/abseil-cpp/absl/types/optional.h" + +namespace browsing_topics { + +// A tester class that allows mocking the generated random numbers. +class TesterBrowsingTopicsCalculator : public BrowsingTopicsCalculator { + public: + // Initialize a regular `BrowsingTopicsCalculator` with an additional + // `rand_uint64_queue` member for generating random numbers. + TesterBrowsingTopicsCalculator( + privacy_sandbox::PrivacySandboxSettings* privacy_sandbox_settings, + history::HistoryService* history_service, + content::BrowsingTopicsSiteDataManager* site_data_manager, + optimization_guide::PageContentAnnotationsService* annotations_service, + CalculateCompletedCallback callback, + base::queue rand_uint64_queue); + + ~TesterBrowsingTopicsCalculator() override; + + TesterBrowsingTopicsCalculator(const TesterBrowsingTopicsCalculator&) = + delete; + TesterBrowsingTopicsCalculator& operator=( + const TesterBrowsingTopicsCalculator&) = delete; + TesterBrowsingTopicsCalculator(TesterBrowsingTopicsCalculator&&) = delete; + TesterBrowsingTopicsCalculator& operator=(TesterBrowsingTopicsCalculator&&) = + delete; + + // Pop and return the next number in `rand_uint64_queue_`. Precondition: + // `rand_uint64_queue_` is not empty. + uint64_t GenerateRandUint64() override; + + private: + base::queue rand_uint64_queue_; +}; + +} // namespace browsing_topics + +#endif // COMPONENTS_BROWSING_TOPICS_TEST_UTIL_H_ diff --git a/components/browsing_topics/topic_and_domains.h b/components/browsing_topics/topic_and_domains.h index 16633784e34773..dd335ef7f22936 100644 --- a/components/browsing_topics/topic_and_domains.h +++ b/components/browsing_topics/topic_and_domains.h @@ -33,6 +33,8 @@ class TopicAndDomains { static TopicAndDomains FromDictValue(const base::Value::Dict& dict_value); base::Value::Dict ToDictValue() const; + bool IsValid() const { return topic_ != Topic(0); } + const Topic& topic() const { return topic_; } const std::set& hashed_domains() const { diff --git a/components/browsing_topics/topic_and_domains_unittest.cc b/components/browsing_topics/topic_and_domains_unittest.cc index c89bc198aff217..8424d97aeb08f0 100644 --- a/components/browsing_topics/topic_and_domains_unittest.cc +++ b/components/browsing_topics/topic_and_domains_unittest.cc @@ -14,6 +14,7 @@ TEST_F(TopicAndDomainsTest, FromEmptyDictionaryValue) { TopicAndDomains read_topic_and_domains = TopicAndDomains::FromDictValue(base::Value::Dict()); + EXPECT_FALSE(read_topic_and_domains.IsValid()); EXPECT_EQ(read_topic_and_domains.topic(), Topic(0)); EXPECT_TRUE(read_topic_and_domains.hashed_domains().empty()); } @@ -25,6 +26,7 @@ TEST_F(TopicAndDomainsTest, EmptyTopicAndDomains_ToAndFromDictValue) { TopicAndDomains read_topic_and_domains = TopicAndDomains::FromDictValue(dict_value); + EXPECT_FALSE(read_topic_and_domains.IsValid()); EXPECT_EQ(read_topic_and_domains.topic(), Topic(0)); EXPECT_TRUE(read_topic_and_domains.hashed_domains().empty()); } @@ -38,6 +40,7 @@ TEST_F(TopicAndDomainsTest, PopulatedTopicAndDomains_ToAndFromValue) { TopicAndDomains read_topic_and_domains = TopicAndDomains::FromDictValue(dict_value); + EXPECT_TRUE(read_topic_and_domains.IsValid()); EXPECT_EQ(read_topic_and_domains.topic(), Topic(2)); EXPECT_EQ(read_topic_and_domains.hashed_domains(), std::set({HashedDomain(123), HashedDomain(456)})); diff --git a/components/browsing_topics/util.cc b/components/browsing_topics/util.cc index 7de6513b2cda04..998fb095500db8 100644 --- a/components/browsing_topics/util.cc +++ b/components/browsing_topics/util.cc @@ -10,6 +10,7 @@ #include "base/rand_util.h" #include "crypto/hmac.h" #include "crypto/sha2.h" +#include "third_party/blink/public/common/features.h" namespace browsing_topics { @@ -28,7 +29,7 @@ const char kTopTopicIndexDecisionPrefix[] = "TopicsV1_TopTopicIndexDecision|"; const char kEpochSwitchTimeDecisionPrefix[] = "TopicsV1_EpochSwitchTimeDecision|"; const char kContextDomainStoragePrefix[] = "TopicsV1_ContextDomainStorage|"; -const char kTopHostStoragePrefix[] = "TopicsV1_TopHostStorage|"; +const char kMainFrameHostStoragePrefix[] = "TopicsV1_MainFrameHostStorage|"; uint64_t HmacHash(ReadOnlyHmacKey hmac_key, const std::string& use_case_prefix, @@ -48,6 +49,16 @@ base::LazyInstance::Leaky } // namespace +absl::optional GetTaxonomySize() { + if (blink::features::kBrowsingTopicsTaxonomyVersion.Get() == 1) { + // Taxonomy version 1 has 349 topics. + // https://github.com/jkarlin/topics/blob/main/taxonomy_v1.md + return 349; + } + + return absl::nullopt; +} + HmacKey GenerateRandomHmacKey() { if (g_hmac_key_override_for_testing.IsCreated()) return g_hmac_key_override_for_testing.Get(); @@ -112,13 +123,32 @@ HashedDomain HashContextDomainForStorage(ReadOnlyHmacKey hmac_key, HmacHash(hmac_key, kContextDomainStoragePrefix, context_domain)); } -HashedHost HashTopHostForStorage(const std::string& top_host) { +HashedHost HashMainFrameHostForStorage(const std::string& main_frame_host) { int64_t result; - crypto::SHA256HashString(kTopHostStoragePrefix + top_host, &result, - sizeof(result)); + crypto::SHA256HashString(kMainFrameHostStoragePrefix + main_frame_host, + &result, sizeof(result)); return HashedHost(result); } +base::Time DeriveHistoryDataStartTime(base::Time calculation_time, + base::Time data_accessible_since) { + return std::max(data_accessible_since, + calculation_time - + blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get()); +} + +base::Time DeriveApiUsageContextDataStartTime( + base::Time calculation_time, + base::Time data_accessible_since) { + return std::max( + data_accessible_since, + calculation_time - + blink::features:: + kBrowsingTopicsNumberOfEpochsOfObservationDataToUseForFiltering + .Get() * + blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get()); +} + void OverrideHmacKeyForTesting(ReadOnlyHmacKey hmac_key) { std::copy(hmac_key.begin(), hmac_key.end(), g_hmac_key_override_for_testing.Get().begin()); diff --git a/components/browsing_topics/util.h b/components/browsing_topics/util.h index 943ddd583b5fc2..b8f7e841f57485 100644 --- a/components/browsing_topics/util.h +++ b/components/browsing_topics/util.h @@ -8,12 +8,22 @@ #include "base/containers/span.h" #include "base/time/time.h" #include "components/browsing_topics/common/common_types.h" +#include "third_party/abseil-cpp/absl/types/optional.h" namespace browsing_topics { using HmacKey = std::array; using ReadOnlyHmacKey = base::span; +// Get the size of the taxonomy. This is used for generating random topics from +// [1, `GetTaxonomySize()`]. It returns nullopt if this Chrome binary does not +// support the finch configured taxonomy version +// `kBrowsingTopicsTaxonomyVersion`. +// +// TODO(yaoxia): this should be maintained by UX along with the string mappings. +// Consider moving to a UX component. +absl::optional GetTaxonomySize(); + // Generate a 256 bit random hmac key. HmacKey GenerateRandomHmacKey(); @@ -53,9 +63,21 @@ uint64_t HashTopDomainForEpochSwitchTimeDecision(ReadOnlyHmacKey hmac_key, HashedDomain HashContextDomainForStorage(ReadOnlyHmacKey hmac_key, const std::string& context_domain); -// Returns a hash of `top_host` to be stored more efficiently in disk and +// Returns a hash of `main_frame_host` to be stored more efficiently in disk and // memory. -HashedHost HashTopHostForStorage(const std::string& top_host); +HashedHost HashMainFrameHostForStorage(const std::string& main_frame_host); + +// Returns the maximum of |`calculation_time` - history data time range|, and +// |data_accessible_since|. +base::Time DeriveHistoryDataStartTime( + base::Time calculation_time, + base::Time data_accessible_since = base::Time()); + +// Returns the maximum of |`calculation_time` - api usage data time range|, +// and |data_accessible_since|. +base::Time DeriveApiUsageContextDataStartTime( + base::Time calculation_time, + base::Time data_accessible_since = base::Time()); // Override the key to be returned for subsequent invocations of // `GenerateRandomHmacKey()`. diff --git a/components/browsing_topics/util_unittest.cc b/components/browsing_topics/util_unittest.cc index ff8e2580a1e1e1..569ab818361287 100644 --- a/components/browsing_topics/util_unittest.cc +++ b/components/browsing_topics/util_unittest.cc @@ -247,11 +247,12 @@ TEST_F(BrowsingTopicsUtilTest, })); } -TEST_F(BrowsingTopicsUtilTest, HashTopHostForStorage) { +TEST_F(BrowsingTopicsUtilTest, HashMainFrameHostForStorage) { CheckUniformRandom(base::BindLambdaForTesting([&]() { - std::string top_host = GenerateRandomDomainOrHost(); + std::string main_frame_host = GenerateRandomDomainOrHost(); - return static_cast(HashTopHostForStorage(top_host).value()); + return static_cast( + HashMainFrameHostForStorage(main_frame_host).value()); })); } diff --git a/components/optimization_guide/content/browser/page_content_annotations_model_manager.h b/components/optimization_guide/content/browser/page_content_annotations_model_manager.h index dc43f852d23dfd..780fc08348b224 100644 --- a/components/optimization_guide/content/browser/page_content_annotations_model_manager.h +++ b/components/optimization_guide/content/browser/page_content_annotations_model_manager.h @@ -58,6 +58,9 @@ class PageContentAnnotationsModelManager : public PageContentAnnotator { const std::vector& inputs, AnnotationType annotation_type) override; + absl::optional GetModelInfoForType( + AnnotationType type) const override; + // Requests that the given model for |type| be loaded in the background and // then runs |callback| with true when the model is ready to execute. If the // model is ready now, the callback is run immediately. If the model file will @@ -66,10 +69,6 @@ class PageContentAnnotationsModelManager : public PageContentAnnotator { AnnotationType type, base::OnceCallback callback); - // Returns the model info associated with the given AnnotationType, if it is - // available and loaded. - absl::optional GetModelInfoForType(AnnotationType type) const; - // Returns the version of the page topics model that is currently being used // to annotate page content. Will return |absl::nullopt| if no model is being // used to annotate page topics for received page content. diff --git a/components/optimization_guide/content/browser/page_content_annotations_service.cc b/components/optimization_guide/content/browser/page_content_annotations_service.cc index 818f5f0551921e..96cb5dc7dd93c5 100644 --- a/components/optimization_guide/content/browser/page_content_annotations_service.cc +++ b/components/optimization_guide/content/browser/page_content_annotations_service.cc @@ -239,36 +239,37 @@ void PageContentAnnotationsService::OverridePageContentAnnotatorForTesting( } // static -std::string PageContentAnnotationsService::StringInputForPageTopicsDomain( - const GURL& url) { - std::string domain = base::ToLowerASCII(url.host()); +std::string PageContentAnnotationsService::StringInputForPageTopicsHost( + const std::string& host) { + std::string output = base::ToLowerASCII(host); // Strip the 'www.' if it exists. - if (base::StartsWith(domain, "www.")) { - domain = domain.substr(4); + if (base::StartsWith(output, "www.")) { + output = output.substr(4); } - for (char c : std::vector{'-', '_', '.', '+'}) { - std::replace(domain.begin(), domain.end(), c, ' '); + const char kCharsToReplaceWithSpace[] = {'-', '_', '.', '+'}; + for (char c : kCharsToReplaceWithSpace) { + std::replace(output.begin(), output.end(), c, ' '); } - return domain; + return output; } void PageContentAnnotationsService::BatchAnnotatePageTopics( BatchAnnotationCallback callback, - const std::vector& inputs) { - std::vector domains; - for (const GURL& url : inputs) { - domains.emplace_back(StringInputForPageTopicsDomain(url)); + const std::vector& hosts) { + std::vector tokenized_hosts; + for (const std::string& host : hosts) { + tokenized_hosts.emplace_back(StringInputForPageTopicsHost(host)); } if (!annotator_) { - std::move(callback).Run(CreateEmptyBatchAnnotationResults(domains)); + std::move(callback).Run(CreateEmptyBatchAnnotationResults(tokenized_hosts)); return; } - annotator_->Annotate(std::move(callback), domains, + annotator_->Annotate(std::move(callback), tokenized_hosts, AnnotationType::kPageTopics); } @@ -289,8 +290,8 @@ void PageContentAnnotationsService::BatchAnnotate( absl::optional PageContentAnnotationsService::GetModelInfoForType( AnnotationType type) const { #if BUILDFLAG(BUILD_WITH_TFLITE_LIB) - DCHECK(model_manager_); - return model_manager_->GetModelInfoForType(type); + DCHECK(annotator_); + return annotator_->GetModelInfoForType(type); #else return absl::nullopt; #endif @@ -482,11 +483,7 @@ void PageContentAnnotationsService::RunBatchAnnotationValidation() { return; } - std::vector urls; - for (const std::string& domain : dummy_inputs) { - urls.emplace_back(GURL("https://" + domain)); - } - BatchAnnotatePageTopics(base::DoNothing(), urls); + BatchAnnotatePageTopics(base::DoNothing(), dummy_inputs); } // static diff --git a/components/optimization_guide/content/browser/page_content_annotations_service.h b/components/optimization_guide/content/browser/page_content_annotations_service.h index fc3b6ddcc11cc2..76c64aa07e3e4b 100644 --- a/components/optimization_guide/content/browser/page_content_annotations_service.h +++ b/components/optimization_guide/content/browser/page_content_annotations_service.h @@ -107,10 +107,10 @@ class PageContentAnnotationsService : public KeyedService, const std::vector& inputs, AnnotationType annotation_type); - // Calls |BatchAnnotate| with pre-processing the urls into its domain string, - // all specific to PageTopics. + // Calls |BatchAnnotate| with pre-processing the hosts into tokens, all + // specific to PageTopics. void BatchAnnotatePageTopics(BatchAnnotationCallback callback, - const std::vector& inputs); + const std::vector& inputs); // Requests that the given model for |type| be loaded in the background and // then runs |callback| with true when the model is ready to execute. If the @@ -135,7 +135,7 @@ class PageContentAnnotationsService : public KeyedService, private: friend class PageContentAnnotationsServiceTest; - static std::string StringInputForPageTopicsDomain(const GURL& url); + static std::string StringInputForPageTopicsHost(const std::string& host); #if BUILDFLAG(BUILD_WITH_TFLITE_LIB) // Callback invoked when |visit| has been annotated. diff --git a/components/optimization_guide/content/browser/page_content_annotations_service_unittest.cc b/components/optimization_guide/content/browser/page_content_annotations_service_unittest.cc index 99a681e1f80308..5d493192a6fab1 100644 --- a/components/optimization_guide/content/browser/page_content_annotations_service_unittest.cc +++ b/components/optimization_guide/content/browser/page_content_annotations_service_unittest.cc @@ -8,7 +8,6 @@ #include #include "testing/gtest/include/gtest/gtest.h" -#include "url/gurl.h" namespace optimization_guide { @@ -17,29 +16,29 @@ class PageContentAnnotationsServiceTest : public testing::Test { PageContentAnnotationsServiceTest() = default; ~PageContentAnnotationsServiceTest() override = default; - std::string CallStringInputForPageTopicsDomain(const GURL& url) { - return PageContentAnnotationsService::StringInputForPageTopicsDomain(url); + std::string CallStringInputForPageTopicsHost(const std::string& host) { + return PageContentAnnotationsService::StringInputForPageTopicsHost(host); } }; -TEST_F(PageContentAnnotationsServiceTest, PageTopicsDomain) { - std::vector> tests = { - {GURL("https://www.chromium.org/path?q=a"), "chromium org"}, - {GURL("https://foo-bar.com/"), "foo bar com"}, - {GURL("https://foo_bar.com/"), "foo bar com"}, - {GURL("https://cats.co.uk/"), "cats co uk"}, - {GURL("https://cats+dogs.com"), "cats dogs com"}, - {GURL("https://www.foo-bar_.baz.com"), "foo bar baz com"}, - {GURL("https://www.foo-bar-baz.com"), "foo bar baz com"}, - {GURL("https://WwW.LOWER-CASE.com"), "lower case com"}, +TEST_F(PageContentAnnotationsServiceTest, PageTopicsHost) { + std::vector> tests = { + {"www.chromium.org", "chromium org"}, + {"foo-bar.com", "foo bar com"}, + {"foo_bar.com", "foo bar com"}, + {"cats.co.uk", "cats co uk"}, + {"cats+dogs.com", "cats dogs com"}, + {"www.foo-bar_.baz.com", "foo bar baz com"}, + {"www.foo-bar-baz.com", "foo bar baz com"}, + {"WwW.LOWER-CASE.com", "lower case com"}, }; for (const auto& test : tests) { - GURL url = test.first; + std::string host = test.first; std::string expected = test.second; - std::string got = CallStringInputForPageTopicsDomain(url); + std::string got = CallStringInputForPageTopicsHost(host); - EXPECT_EQ(expected, got) << url; + EXPECT_EQ(expected, got) << host; } } diff --git a/components/optimization_guide/content/browser/page_content_annotator.h b/components/optimization_guide/content/browser/page_content_annotator.h index bf86383a0888b9..f76b8708e86182 100644 --- a/components/optimization_guide/content/browser/page_content_annotator.h +++ b/components/optimization_guide/content/browser/page_content_annotator.h @@ -9,7 +9,9 @@ #include #include "base/callback.h" +#include "components/optimization_guide/core/model_info.h" #include "components/optimization_guide/core/page_content_annotations_common.h" +#include "third_party/abseil-cpp/absl/types/optional.h" namespace optimization_guide { @@ -28,6 +30,11 @@ class PageContentAnnotator { virtual void Annotate(BatchAnnotationCallback callback, const std::vector& inputs, AnnotationType annotation_type) = 0; + + // Returns the model info associated with the given AnnotationType, if it is + // available and loaded. + virtual absl::optional GetModelInfoForType( + AnnotationType annotation_type) const = 0; }; } // namespace optimization_guide diff --git a/components/optimization_guide/content/browser/test_page_content_annotator.cc b/components/optimization_guide/content/browser/test_page_content_annotator.cc index f1c12bed09796d..62d7ba8f7b6239 100644 --- a/components/optimization_guide/content/browser/test_page_content_annotator.cc +++ b/components/optimization_guide/content/browser/test_page_content_annotator.cc @@ -53,20 +53,40 @@ void TestPageContentAnnotator::Annotate(BatchAnnotationCallback callback, std::move(callback).Run(results); } +absl::optional TestPageContentAnnotator::GetModelInfoForType( + AnnotationType annotation_type) const { + if (annotation_type == AnnotationType::kPageTopics) + return topics_model_info_; + + if (annotation_type == AnnotationType::kPageEntities) + return entities_model_info_; + + if (annotation_type == AnnotationType::kPageEntities) + return visibility_scores_model_info_; + + return absl::nullopt; +} + void TestPageContentAnnotator::UsePageTopics( + const absl::optional& model_info, const base::flat_map>& topics_by_input) { + topics_model_info_ = model_info; topics_by_input_ = topics_by_input; } void TestPageContentAnnotator::UsePageEntities( + const absl::optional& model_info, const base::flat_map>& entities_by_input) { + entities_model_info_ = model_info; entities_by_input_ = entities_by_input; } void TestPageContentAnnotator::UseVisibilityScores( + const absl::optional& model_info, const base::flat_map& visibility_scores_for_input) { + visibility_scores_model_info_ = model_info; visibility_scores_for_input_ = visibility_scores_for_input; } diff --git a/components/optimization_guide/content/browser/test_page_content_annotator.h b/components/optimization_guide/content/browser/test_page_content_annotator.h index a38e1ccd5c440c..d91a5590cc2e03 100644 --- a/components/optimization_guide/content/browser/test_page_content_annotator.h +++ b/components/optimization_guide/content/browser/test_page_content_annotator.h @@ -23,18 +23,21 @@ class TestPageContentAnnotator : public PageContentAnnotator { // The given page topics are used for the matching BatchAnnotationResults by // input string. If the input is not found, the output is left as nullopt. void UsePageTopics( + const absl::optional& model_info, const base::flat_map>& topics_by_input); // The given page entities are used for the matching BatchAnnotationResults by // input string. If the input is not found, the output is left as nullopt. void UsePageEntities( + const absl::optional& model_info, const base::flat_map>& entities_by_input); // The given visibility score is used for the matching BatchAnnotationResults // by input string. If the input is not found, the output is left as nullopt. void UseVisibilityScores( + const absl::optional& model_info, const base::flat_map& visibility_scores_for_input); // PageContentAnnotator: @@ -42,10 +45,18 @@ class TestPageContentAnnotator : public PageContentAnnotator { const std::vector& inputs, AnnotationType annotation_type) override; + absl::optional GetModelInfoForType( + AnnotationType annotation_type) const override; + private: + absl::optional topics_model_info_; base::flat_map> topics_by_input_; + + absl::optional entities_model_info_; base::flat_map> entities_by_input_; + + absl::optional visibility_scores_model_info_; base::flat_map visibility_scores_for_input_; }; diff --git a/components/optimization_guide/core/page_content_annotations_common.h b/components/optimization_guide/core/page_content_annotations_common.h index 4f77386485cc57..0a7f14cb2a21dc 100644 --- a/components/optimization_guide/core/page_content_annotations_common.h +++ b/components/optimization_guide/core/page_content_annotations_common.h @@ -86,12 +86,12 @@ class BatchAnnotationResult { BatchAnnotationResult(const BatchAnnotationResult&); ~BatchAnnotationResult(); - std::string input() const { return input_; } + const std::string& input() const { return input_; } AnnotationType type() const { return type_; } - absl::optional> topics() const { + const absl::optional>& topics() const { return topics_; } - absl::optional> entities() const { + const absl::optional>& entities() const { return entities_; } absl::optional visibility_score() const { return visibility_score_; } diff --git a/content/browser/browsing_topics/browsing_topics_site_data_manager_impl.cc b/content/browser/browsing_topics/browsing_topics_site_data_manager_impl.cc index 63319aae2e4b1e..157ca860c95f10 100644 --- a/content/browser/browsing_topics/browsing_topics_site_data_manager_impl.cc +++ b/content/browser/browsing_topics/browsing_topics_site_data_manager_impl.cc @@ -53,11 +53,11 @@ void BrowsingTopicsSiteDataManagerImpl::GetBrowsingTopicsApiUsage( } void BrowsingTopicsSiteDataManagerImpl::OnBrowsingTopicsApiUsed( - const browsing_topics::HashedHost& hashed_top_host, + const browsing_topics::HashedHost& hashed_main_frame_host, const base::flat_set& hashed_context_domains) { storage_.AsyncCall(&BrowsingTopicsSiteDataStorage::OnBrowsingTopicsApiUsed) - .WithArgs(hashed_top_host, hashed_context_domains); + .WithArgs(hashed_main_frame_host, hashed_context_domains); } } // namespace content diff --git a/content/browser/browsing_topics/browsing_topics_site_data_manager_impl.h b/content/browser/browsing_topics/browsing_topics_site_data_manager_impl.h index d81c2bd741e846..10dc0a5401d34c 100644 --- a/content/browser/browsing_topics/browsing_topics_site_data_manager_impl.h +++ b/content/browser/browsing_topics/browsing_topics_site_data_manager_impl.h @@ -39,7 +39,7 @@ class CONTENT_EXPORT BrowsingTopicsSiteDataManagerImpl GetBrowsingTopicsApiUsageCallback callback) override; void OnBrowsingTopicsApiUsed( - const browsing_topics::HashedHost& hashed_top_host, + const browsing_topics::HashedHost& hashed_main_frame_host, const base::flat_set& hashed_context_domains) override; diff --git a/content/browser/browsing_topics/browsing_topics_site_data_manager_impl_unittest.cc b/content/browser/browsing_topics/browsing_topics_site_data_manager_impl_unittest.cc index 11f01618862c3a..43990e012c4576 100644 --- a/content/browser/browsing_topics/browsing_topics_site_data_manager_impl_unittest.cc +++ b/content/browser/browsing_topics/browsing_topics_site_data_manager_impl_unittest.cc @@ -36,7 +36,7 @@ TEST_F(BrowsingTopicsSiteDataManagerImplTest, GetBrowsingTopicsApiUsage) { base::Time initial_time = base::Time::Now(); topics_manager_->OnBrowsingTopicsApiUsed( - /*hashed_top_host=*/browsing_topics::HashedHost(123), + /*hashed_main_frame_host=*/browsing_topics::HashedHost(123), /*hashed_context_domains=*/{browsing_topics::HashedDomain(456)}); size_t query_result_count = 0; @@ -72,7 +72,7 @@ TEST_F(BrowsingTopicsSiteDataManagerImplTest, GetBrowsingTopicsApiUsage) { EXPECT_TRUE(result.success); EXPECT_EQ(result.api_usage_contexts.size(), 1u); - EXPECT_EQ(result.api_usage_contexts[0].hashed_top_host, + EXPECT_EQ(result.api_usage_contexts[0].hashed_main_frame_host, browsing_topics::HashedHost(123)); EXPECT_EQ(result.api_usage_contexts[0].hashed_context_domain, browsing_topics::HashedDomain(456)); diff --git a/content/browser/browsing_topics/browsing_topics_site_data_storage.cc b/content/browser/browsing_topics/browsing_topics_site_data_storage.cc index c6ef8eaadaa9e0..2454ae71941f65 100644 --- a/content/browser/browsing_topics/browsing_topics_site_data_storage.cc +++ b/content/browser/browsing_topics/browsing_topics_site_data_storage.cc @@ -68,7 +68,7 @@ BrowsingTopicsSiteDataStorage::GetBrowsingTopicsApiUsage(base::Time begin_time, static constexpr char kGetApiUsageSql[] = // clang-format off - "SELECT hashed_context_domain,hashed_top_host,last_usage_time " + "SELECT hashed_context_domain,hashed_main_frame_host,last_usage_time " "FROM browsing_topics_api_usages " "WHERE last_usage_time>=? AND last_usage_time& hashed_context_domains) { DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); @@ -123,14 +123,14 @@ void BrowsingTopicsSiteDataStorage::OnBrowsingTopicsApiUsed( static constexpr char kInsertApiUsageSql[] = // clang-format off "INSERT OR REPLACE INTO browsing_topics_api_usages " - "(hashed_context_domain,hashed_top_host,last_usage_time) " + "(hashed_context_domain,hashed_main_frame_host,last_usage_time) " "VALUES (?,?,?)"; // clang-format on sql::Statement insert_api_usage_statement( db_->GetCachedStatement(SQL_FROM_HERE, kInsertApiUsageSql)); insert_api_usage_statement.BindInt64(0, hashed_context_domain.value()); - insert_api_usage_statement.BindInt64(1, hashed_top_host.value()); + insert_api_usage_statement.BindInt64(1, hashed_main_frame_host.value()); insert_api_usage_statement.BindTime(2, current_time); if (!insert_api_usage_statement.Run()) @@ -211,9 +211,9 @@ bool BrowsingTopicsSiteDataStorage::CreateSchema() { // clang-format off "CREATE TABLE IF NOT EXISTS browsing_topics_api_usages(" "hashed_context_domain INTEGER NOT NULL," - "hashed_top_host INTEGER NOT NULL," + "hashed_main_frame_host INTEGER NOT NULL," "last_usage_time INTEGER NOT NULL," - "PRIMARY KEY (hashed_context_domain,hashed_top_host))"; + "PRIMARY KEY (hashed_context_domain,hashed_main_frame_host))"; // clang-format on if (!db_->Execute(kBrowsingTopicsApiUsagesTableSql)) return false; diff --git a/content/browser/browsing_topics/browsing_topics_site_data_storage.h b/content/browser/browsing_topics/browsing_topics_site_data_storage.h index 5e0815c3024bee..590d3b583e6aff 100644 --- a/content/browser/browsing_topics/browsing_topics_site_data_storage.h +++ b/content/browser/browsing_topics/browsing_topics_site_data_storage.h @@ -56,7 +56,7 @@ class CONTENT_EXPORT BrowsingTopicsSiteDataStorage { // Persist the browsing topics api usage context to storage. Called when the // usage is detected in a context on a page. void OnBrowsingTopicsApiUsed( - const browsing_topics::HashedHost& hashed_top_host, + const browsing_topics::HashedHost& hashed_main_frame_host, const base::flat_set& hashed_context_domains); diff --git a/content/browser/browsing_topics/browsing_topics_site_data_storage_unittest.cc b/content/browser/browsing_topics/browsing_topics_site_data_storage_unittest.cc index 76e8d20465396d..1e3bec1ea902de 100644 --- a/content/browser/browsing_topics/browsing_topics_site_data_storage_unittest.cc +++ b/content/browser/browsing_topics/browsing_topics_site_data_storage_unittest.cc @@ -123,7 +123,7 @@ TEST_F(BrowsingTopicsSiteDataStorageTest, // and [sqlite_autoindex_meta_1]. EXPECT_EQ(3u, sql::test::CountSQLIndices(&db)); - // `hashed_context_domain`, `hashed_top_host`, and `last_usage_time`. + // `hashed_context_domain`, `hashed_main_frame_host`, and `last_usage_time`. EXPECT_EQ(3u, sql::test::CountTableColumns(&db, "browsing_topics_api_usages")); @@ -206,7 +206,7 @@ TEST_F(BrowsingTopicsSiteDataStorageTest, LoadFromFile_VersionTooNew_Failure) { TEST_F(BrowsingTopicsSiteDataStorageTest, OnBrowsingTopicsApiUsed_SingleEntry) { OpenDatabase(); topics_storage()->OnBrowsingTopicsApiUsed( - /*hashed_top_host=*/browsing_topics::HashedHost(123), + /*hashed_main_frame_host=*/browsing_topics::HashedHost(123), /*hashed_context_domains=*/{browsing_topics::HashedDomain(456)}); CloseDatabase(); @@ -215,17 +215,18 @@ TEST_F(BrowsingTopicsSiteDataStorageTest, OnBrowsingTopicsApiUsed_SingleEntry) { EXPECT_EQ(1u, CountApiUsagesEntries(db)); const char kGetAllEntriesSql[] = - "SELECT hashed_context_domain, hashed_top_host, last_usage_time FROM " + "SELECT hashed_context_domain, hashed_main_frame_host, last_usage_time " + "FROM " "browsing_topics_api_usages"; sql::Statement s(db.GetUniqueStatement(kGetAllEntriesSql)); EXPECT_TRUE(s.Step()); int64_t hashed_context_domain = s.ColumnInt64(0); - int64_t hashed_top_host = s.ColumnInt64(1); + int64_t hashed_main_frame_host = s.ColumnInt64(1); base::Time time = s.ColumnTime(2); EXPECT_EQ(hashed_context_domain, 456); - EXPECT_EQ(hashed_top_host, 123); + EXPECT_EQ(hashed_main_frame_host, 123); EXPECT_EQ(time, base::Time::Now()); EXPECT_FALSE(s.Step()); @@ -235,17 +236,17 @@ TEST_F(BrowsingTopicsSiteDataStorageTest, OnBrowsingTopicsApiUsed_MultipleEntries) { OpenDatabase(); topics_storage()->OnBrowsingTopicsApiUsed( - /*hashed_top_host=*/browsing_topics::HashedHost(123), + /*hashed_main_frame_host=*/browsing_topics::HashedHost(123), /*hashed_context_domains=*/{browsing_topics::HashedDomain(123)}); task_environment_.FastForwardBy(base::Seconds(1)); topics_storage()->OnBrowsingTopicsApiUsed( - /*hashed_top_host=*/browsing_topics::HashedHost(123), + /*hashed_main_frame_host=*/browsing_topics::HashedHost(123), /*hashed_context_domains=*/{browsing_topics::HashedDomain(456), browsing_topics::HashedDomain(789)}); topics_storage()->OnBrowsingTopicsApiUsed( - /*hashed_top_host=*/browsing_topics::HashedHost(456), + /*hashed_main_frame_host=*/browsing_topics::HashedHost(456), /*hashed_context_domains=*/{browsing_topics::HashedDomain(789)}); CloseDatabase(); @@ -254,9 +255,10 @@ TEST_F(BrowsingTopicsSiteDataStorageTest, EXPECT_EQ(4u, CountApiUsagesEntries(db)); const char kGetAllEntriesSql[] = - "SELECT hashed_context_domain, hashed_top_host, last_usage_time FROM " + "SELECT hashed_context_domain, hashed_main_frame_host, last_usage_time " + "FROM " "browsing_topics_api_usages " - "ORDER BY last_usage_time, hashed_top_host, hashed_context_domain"; + "ORDER BY last_usage_time, hashed_main_frame_host, hashed_context_domain"; sql::Statement s(db.GetUniqueStatement(kGetAllEntriesSql)); @@ -264,11 +266,11 @@ TEST_F(BrowsingTopicsSiteDataStorageTest, EXPECT_TRUE(s.Step()); int64_t hashed_context_domain = s.ColumnInt64(0); - int64_t hashed_top_host = s.ColumnInt64(1); + int64_t hashed_main_frame_host = s.ColumnInt64(1); base::Time time = s.ColumnTime(2); EXPECT_EQ(hashed_context_domain, 123); - EXPECT_EQ(hashed_top_host, 123); + EXPECT_EQ(hashed_main_frame_host, 123); EXPECT_EQ(time, base::Time::Now() - base::Seconds(1)); } @@ -276,11 +278,11 @@ TEST_F(BrowsingTopicsSiteDataStorageTest, EXPECT_TRUE(s.Step()); int64_t hashed_context_domain = s.ColumnInt64(0); - int64_t hashed_top_host = s.ColumnInt64(1); + int64_t hashed_main_frame_host = s.ColumnInt64(1); base::Time time = s.ColumnTime(2); EXPECT_EQ(hashed_context_domain, 456); - EXPECT_EQ(hashed_top_host, 123); + EXPECT_EQ(hashed_main_frame_host, 123); EXPECT_EQ(time, base::Time::Now()); } @@ -288,11 +290,11 @@ TEST_F(BrowsingTopicsSiteDataStorageTest, EXPECT_TRUE(s.Step()); int64_t hashed_context_domain = s.ColumnInt64(0); - int64_t hashed_top_host = s.ColumnInt64(1); + int64_t hashed_main_frame_host = s.ColumnInt64(1); base::Time time = s.ColumnTime(2); EXPECT_EQ(hashed_context_domain, 789u); - EXPECT_EQ(hashed_top_host, 123); + EXPECT_EQ(hashed_main_frame_host, 123); EXPECT_EQ(time, base::Time::Now()); } @@ -300,11 +302,11 @@ TEST_F(BrowsingTopicsSiteDataStorageTest, EXPECT_TRUE(s.Step()); int64_t hashed_context_domain = s.ColumnInt64(0); - int64_t hashed_top_host = s.ColumnInt64(1); + int64_t hashed_main_frame_host = s.ColumnInt64(1); base::Time time = s.ColumnTime(2); EXPECT_EQ(hashed_context_domain, 789u); - EXPECT_EQ(hashed_top_host, 456); + EXPECT_EQ(hashed_main_frame_host, 456); EXPECT_EQ(time, base::Time::Now()); } @@ -315,13 +317,13 @@ TEST_F(BrowsingTopicsSiteDataStorageTest, GetBrowsingTopicsApiUsage) { OpenDatabase(); topics_storage()->OnBrowsingTopicsApiUsed( - /*hashed_top_host=*/browsing_topics::HashedHost(123), + /*hashed_main_frame_host=*/browsing_topics::HashedHost(123), /*hashed_context_domains=*/{browsing_topics::HashedDomain(123)}); task_environment_.FastForwardBy(base::Seconds(1)); topics_storage()->OnBrowsingTopicsApiUsed( - /*hashed_top_host=*/browsing_topics::HashedHost(123), + /*hashed_main_frame_host=*/browsing_topics::HashedHost(123), /*hashed_context_domains=*/{browsing_topics::HashedDomain(456)}); task_environment_.FastForwardBy(base::Seconds(1)); @@ -335,14 +337,14 @@ TEST_F(BrowsingTopicsSiteDataStorageTest, GetBrowsingTopicsApiUsage) { EXPECT_TRUE(result.success); EXPECT_EQ(result.api_usage_contexts.size(), 2u); - EXPECT_EQ(result.api_usage_contexts[0].hashed_top_host, + EXPECT_EQ(result.api_usage_contexts[0].hashed_main_frame_host, browsing_topics::HashedHost(123)); EXPECT_EQ(result.api_usage_contexts[0].hashed_context_domain, browsing_topics::HashedDomain(456)); EXPECT_EQ(result.api_usage_contexts[0].time, base::Time::Now() - base::Seconds(1)); - EXPECT_EQ(result.api_usage_contexts[1].hashed_top_host, + EXPECT_EQ(result.api_usage_contexts[1].hashed_main_frame_host, browsing_topics::HashedHost(123)); EXPECT_EQ(result.api_usage_contexts[1].hashed_context_domain, browsing_topics::HashedDomain(123)); @@ -355,13 +357,13 @@ TEST_F(BrowsingTopicsSiteDataStorageTest, OpenDatabase(); topics_storage()->OnBrowsingTopicsApiUsed( - /*hashed_top_host=*/browsing_topics::HashedHost(123), + /*hashed_main_frame_host=*/browsing_topics::HashedHost(123), /*hashed_context_domains=*/{browsing_topics::HashedDomain(123)}); task_environment_.FastForwardBy(base::Seconds(1)); topics_storage()->OnBrowsingTopicsApiUsed( - /*hashed_top_host=*/browsing_topics::HashedHost(123), + /*hashed_main_frame_host=*/browsing_topics::HashedHost(123), /*hashed_context_domains=*/{browsing_topics::HashedDomain(456)}); task_environment_.FastForwardBy(base::Seconds(1)); @@ -374,7 +376,7 @@ TEST_F(BrowsingTopicsSiteDataStorageTest, EXPECT_TRUE(result.success); EXPECT_EQ(result.api_usage_contexts.size(), 1u); - EXPECT_EQ(result.api_usage_contexts[0].hashed_top_host, + EXPECT_EQ(result.api_usage_contexts[0].hashed_main_frame_host, browsing_topics::HashedHost(123)); EXPECT_EQ(result.api_usage_contexts[0].hashed_context_domain, browsing_topics::HashedDomain(456)); @@ -392,13 +394,13 @@ TEST_F(BrowsingTopicsSiteDataStorageTest, ExpireDataBefore) { OpenDatabase(); topics_storage()->OnBrowsingTopicsApiUsed( - /*hashed_top_host=*/browsing_topics::HashedHost(123), + /*hashed_main_frame_host=*/browsing_topics::HashedHost(123), /*hashed_context_domains=*/{browsing_topics::HashedDomain(123)}); task_environment_.FastForwardBy(base::Seconds(1)); topics_storage()->OnBrowsingTopicsApiUsed( - /*hashed_top_host=*/browsing_topics::HashedHost(123), + /*hashed_main_frame_host=*/browsing_topics::HashedHost(123), /*hashed_context_domains=*/{browsing_topics::HashedDomain(456)}); task_environment_.FastForwardBy(base::Seconds(1)); @@ -412,17 +414,18 @@ TEST_F(BrowsingTopicsSiteDataStorageTest, ExpireDataBefore) { // The `ExpireDataBefore()` should have deleted the first inserted entry. const char kGetAllEntriesSql[] = - "SELECT hashed_context_domain, hashed_top_host, last_usage_time FROM " + "SELECT hashed_context_domain, hashed_main_frame_host, last_usage_time " + "FROM " "browsing_topics_api_usages"; sql::Statement s(db.GetUniqueStatement(kGetAllEntriesSql)); EXPECT_TRUE(s.Step()); int64_t hashed_context_domain = s.ColumnInt64(0); - int64_t hashed_top_host = s.ColumnInt64(1); + int64_t hashed_main_frame_host = s.ColumnInt64(1); base::Time time = s.ColumnTime(2); EXPECT_EQ(hashed_context_domain, 456); - EXPECT_EQ(hashed_top_host, 123); + EXPECT_EQ(hashed_main_frame_host, 123); EXPECT_EQ(time, base::Time::Now() - base::Seconds(1)); EXPECT_FALSE(s.Step()); @@ -445,13 +448,13 @@ TEST_F(BrowsingTopicsSiteDataStorageMaxEntriesToLoadTest, MaxEntriesToLoad) { OpenDatabase(); topics_storage()->OnBrowsingTopicsApiUsed( - /*hashed_top_host=*/browsing_topics::HashedHost(123), + /*hashed_main_frame_host=*/browsing_topics::HashedHost(123), /*hashed_context_domains=*/{browsing_topics::HashedDomain(123)}); task_environment_.FastForwardBy(base::Seconds(1)); topics_storage()->OnBrowsingTopicsApiUsed( - /*hashed_top_host=*/browsing_topics::HashedHost(123), + /*hashed_main_frame_host=*/browsing_topics::HashedHost(123), /*hashed_context_domains=*/{browsing_topics::HashedDomain(456)}); task_environment_.FastForwardBy(base::Seconds(1)); @@ -466,7 +469,7 @@ TEST_F(BrowsingTopicsSiteDataStorageMaxEntriesToLoadTest, MaxEntriesToLoad) { EXPECT_TRUE(result.success); EXPECT_EQ(result.api_usage_contexts.size(), 1u); - EXPECT_EQ(result.api_usage_contexts[0].hashed_top_host, + EXPECT_EQ(result.api_usage_contexts[0].hashed_main_frame_host, browsing_topics::HashedHost(123)); EXPECT_EQ(result.api_usage_contexts[0].hashed_context_domain, browsing_topics::HashedDomain(456)); diff --git a/content/public/browser/browsing_topics_site_data_manager.h b/content/public/browser/browsing_topics_site_data_manager.h index bb81bb2fe7a303..0b1948148ec05c 100644 --- a/content/public/browser/browsing_topics_site_data_manager.h +++ b/content/public/browser/browsing_topics_site_data_manager.h @@ -40,7 +40,7 @@ class CONTENT_EXPORT BrowsingTopicsSiteDataManager { // Persist the browsing topics api usage context to storage. Called when the // usage is detected in a context on a page. virtual void OnBrowsingTopicsApiUsed( - const browsing_topics::HashedHost& hashed_top_host, + const browsing_topics::HashedHost& hashed_main_frame_host, const base::flat_set& hashed_context_domains) = 0; }; diff --git a/content/public/test/browsing_topics_test_util.cc b/content/public/test/browsing_topics_test_util.cc new file mode 100644 index 00000000000000..bba7d29ce1fed8 --- /dev/null +++ b/content/public/test/browsing_topics_test_util.cc @@ -0,0 +1,44 @@ +// Copyright 2021 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "content/public/test/browsing_topics_test_util.h" + +#include "content/browser/browsing_topics/browsing_topics_site_data_manager_impl.h" + +namespace content { + +TesterBrowsingTopicsSiteDataManager::TesterBrowsingTopicsSiteDataManager( + const base::FilePath& user_data_directory) + : manager_impl_( + new BrowsingTopicsSiteDataManagerImpl(user_data_directory)) {} + +void TesterBrowsingTopicsSiteDataManager::ExpireDataBefore(base::Time time) { + manager_impl_->ExpireDataBefore(time); +} + +TesterBrowsingTopicsSiteDataManager::~TesterBrowsingTopicsSiteDataManager() = + default; + +void TesterBrowsingTopicsSiteDataManager::OnBrowsingTopicsApiUsed( + const browsing_topics::HashedHost& hashed_top_host, + const base::flat_set& + hashed_context_domains) { + manager_impl_->OnBrowsingTopicsApiUsed(hashed_top_host, + hashed_context_domains); +} + +void TesterBrowsingTopicsSiteDataManager::GetBrowsingTopicsApiUsage( + base::Time begin_time, + base::Time end_time, + GetBrowsingTopicsApiUsageCallback callback) { + if (!query_failure_override_) { + manager_impl_->GetBrowsingTopicsApiUsage(begin_time, end_time, + std::move(callback)); + return; + } + + std::move(callback).Run(browsing_topics::ApiUsageContextQueryResult()); +} + +} // namespace content diff --git a/content/public/test/browsing_topics_test_util.h b/content/public/test/browsing_topics_test_util.h new file mode 100644 index 00000000000000..eab0e193ca90f2 --- /dev/null +++ b/content/public/test/browsing_topics_test_util.h @@ -0,0 +1,60 @@ +// Copyright 2022 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CONTENT_PUBLIC_TEST_BROWSING_TOPICS_TEST_UTIL_H_ +#define CONTENT_PUBLIC_TEST_BROWSING_TOPICS_TEST_UTIL_H_ + +#include "base/files/file_path.h" +#include "content/public/browser/browsing_topics_site_data_manager.h" + +namespace content { + +class BrowsingTopicsSiteDataManagerImpl; + +// A tester class that allows mocking a query failure (e.g. database error). +class TesterBrowsingTopicsSiteDataManager + : public BrowsingTopicsSiteDataManager { + public: + explicit TesterBrowsingTopicsSiteDataManager( + const base::FilePath& user_data_directory); + + ~TesterBrowsingTopicsSiteDataManager() override; + + TesterBrowsingTopicsSiteDataManager( + const TesterBrowsingTopicsSiteDataManager&) = delete; + TesterBrowsingTopicsSiteDataManager& operator=( + const TesterBrowsingTopicsSiteDataManager&) = delete; + TesterBrowsingTopicsSiteDataManager(TesterBrowsingTopicsSiteDataManager&&) = + delete; + TesterBrowsingTopicsSiteDataManager& operator=( + TesterBrowsingTopicsSiteDataManager&&) = delete; + + // Use the default handling from `BrowsingTopicsSiteDataManagerImpl`. + void ExpireDataBefore(base::Time time) override; + + // Use the default handling from `BrowsingTopicsSiteDataManagerImpl`. + void OnBrowsingTopicsApiUsed( + const browsing_topics::HashedHost& hashed_top_host, + const base::flat_set& + hashed_context_domains) override; + + void SetQueryFailureOverride() { query_failure_override_ = true; } + + // Return a default/failed `ApiUsageContextQueryResult` if + // `query_failure_override_` is true; otherwise, sse the default handling from + // `BrowsingTopicsSiteDataManagerImpl`. + void GetBrowsingTopicsApiUsage( + base::Time begin_time, + base::Time end_time, + GetBrowsingTopicsApiUsageCallback callback) override; + + private: + std::unique_ptr manager_impl_; + + bool query_failure_override_ = false; +}; + +} // namespace content + +#endif // CONTENT_PUBLIC_TEST_BROWSING_TOPICS_TEST_UTIL_H_ diff --git a/content/test/BUILD.gn b/content/test/BUILD.gn index f29ae3cea76f0a..cc5047cfa7d654 100644 --- a/content/test/BUILD.gn +++ b/content/test/BUILD.gn @@ -130,6 +130,8 @@ static_library("test_support") { "../public/test/browser_test_utils.h", "../public/test/browsing_data_remover_test_util.cc", "../public/test/browsing_data_remover_test_util.h", + "../public/test/browsing_topics_test_util.cc", + "../public/test/browsing_topics_test_util.h", "../public/test/commit_message_delayer.cc", "../public/test/commit_message_delayer.h", "../public/test/content_mock_cert_verifier.cc", diff --git a/content/test/data/browsing_topics/v0.init_too_old.sql b/content/test/data/browsing_topics/v0.init_too_old.sql index 62298240a1480b..1393ca290f551f 100644 --- a/content/test/data/browsing_topics/v0.init_too_old.sql +++ b/content/test/data/browsing_topics/v0.init_too_old.sql @@ -4,9 +4,9 @@ BEGIN TRANSACTION; CREATE TABLE browsing_topics_api_usages ( hashed_context_domain INTEGER NOT NULL, -hashed_top_host INTEGER NOT NULL, +hashed_main_frame_host INTEGER NOT NULL, last_usage_time INTEGER NOT NULL, -PRIMARY KEY (hashed_context_domain, hashed_top_host)); +PRIMARY KEY (hashed_context_domain, hashed_main_frame_host)); CREATE INDEX last_usage_time_idx ON browsing_topics_api_usages(last_usage_time); diff --git a/content/test/data/browsing_topics/v1.init_too_new.sql b/content/test/data/browsing_topics/v1.init_too_new.sql index 825043b85013fa..2284c94c2539bc 100644 --- a/content/test/data/browsing_topics/v1.init_too_new.sql +++ b/content/test/data/browsing_topics/v1.init_too_new.sql @@ -4,9 +4,9 @@ BEGIN TRANSACTION; CREATE TABLE browsing_topics_api_usages ( hashed_context_domain INTEGER NOT NULL, -hashed_top_host INTEGER NOT NULL, +hashed_main_frame_host INTEGER NOT NULL, last_usage_time INTEGER NOT NULL, -PRIMARY KEY (hashed_context_domain, hashed_top_host)); +PRIMARY KEY (hashed_context_domain, hashed_main_frame_host)); CREATE INDEX last_usage_time_idx ON browsing_topics_api_usages(last_usage_time); diff --git a/content/test/data/browsing_topics/v1.sql b/content/test/data/browsing_topics/v1.sql index 10d637320409d7..9d5ce992cb8014 100644 --- a/content/test/data/browsing_topics/v1.sql +++ b/content/test/data/browsing_topics/v1.sql @@ -4,9 +4,9 @@ BEGIN TRANSACTION; CREATE TABLE browsing_topics_api_usages ( hashed_context_domain INTEGER NOT NULL, -hashed_top_host INTEGER NOT NULL, +hashed_main_frame_host INTEGER NOT NULL, last_usage_time INTEGER NOT NULL, -PRIMARY KEY (hashed_context_domain, hashed_top_host)); +PRIMARY KEY (hashed_context_domain, hashed_main_frame_host)); CREATE INDEX last_usage_time_idx ON browsing_topics_api_usages(last_usage_time); diff --git a/third_party/blink/common/features.cc b/third_party/blink/common/features.cc index e8c1c48630f7b1..193c322bbe087f 100644 --- a/third_party/blink/common/features.cc +++ b/third_party/blink/common/features.cc @@ -1045,6 +1045,9 @@ const base::Feature kBrowsingTopics{"BrowsingTopics", // requesting contexts. const base::FeatureParam kBrowsingTopicsNumberOfEpochsToExpose{ &kBrowsingTopics, "number_of_epochs_to_expose", 3}; +// The periodic topics calculation interval. +const base::FeatureParam kBrowsingTopicsTimePeriodPerEpoch{ + &kBrowsingTopics, "time_period_per_epoch", base::Days(7)}; // The number of top topics to derive and to keep for each epoch (week). const base::FeatureParam kBrowsingTopicsNumberOfTopTopicsPerEpoch{ &kBrowsingTopics, "number_of_top_topics_per_epoch", 5}; @@ -1055,6 +1058,18 @@ const base::FeatureParam kBrowsingTopicsNumberOfTopTopicsPerEpoch{ // topic instead of one of the top topics. const base::FeatureParam kBrowsingTopicsUseRandomTopicProbabilityPercent{ &kBrowsingTopics, "use_random_topic_probability_percent", 5}; +// How many epochs (weeks) of API usage data (i.e. topics observations) will be +// based off for the filtering of topics for a calling context. +const base::FeatureParam + kBrowsingTopicsNumberOfEpochsOfObservationDataToUseForFiltering{ + &kBrowsingTopics, + "number_of_epochs_of_observation_data_to_use_for_filtering", 3}; +// The max number of observed-by context domains to keep for each top topic. +// The intent is to cap the in-use memory. +const base::FeatureParam + kBrowsingTopicsMaxNumberOfApiUsageContextDomainsToKeepPerTopic{ + &kBrowsingTopics, + "max_number_of_api_usage_context_domains_to_keep_per_topic", 1000}; // The max number of entries allowed to be retrieved from the // `BrowsingTopicsSiteDataStorage` database for each query for the API usage // contexts. The query will occur once per epoch (week) at topics calculation @@ -1063,11 +1078,15 @@ const base::FeatureParam kBrowsingTopicsMaxNumberOfApiUsageContextEntriesToLoadPerEpoch{ &kBrowsingTopics, "max_number_of_api_usage_context_entries_to_load_per_epoch", 100000}; -// Encodes the rest of the configuration parameters. Each version number should -// only be mapped to one configuration set. In practice, this can be guaranteed -// by always bumping up the version number whenever parameters are updated. +// Encodes the configuration parameters above. Each version number should only +// be mapped to one configuration set. In practice, this can be guaranteed by +// always bumping up the version number whenever parameters are updated. const base::FeatureParam kBrowsingTopicsConfigVersion{&kBrowsingTopics, "config_version", 1}; +// The taxonomy version. This only affects the topics classification that occurs +// during this browser session, and doesn't affect the pre-existing epochs. +const base::FeatureParam kBrowsingTopicsTaxonomyVersion{ + &kBrowsingTopics, "taxonomy_version", 1}; // Enable the ability to minimize processing in the WebRTC APM when all audio // tracks are disabled. If disabled, the APM in WebRTC will ignore attempts to diff --git a/third_party/blink/public/common/features.h b/third_party/blink/public/common/features.h index 16aa2edf91966e..f977c97e6bac4f 100644 --- a/third_party/blink/public/common/features.h +++ b/third_party/blink/public/common/features.h @@ -467,16 +467,24 @@ BLINK_COMMON_EXPORT extern const base::Feature kAllowURNsInIframes; BLINK_COMMON_EXPORT bool IsAllowURNsInIframeEnabled(); BLINK_COMMON_EXPORT extern const base::Feature kBrowsingTopics; +BLINK_COMMON_EXPORT extern const base::FeatureParam + kBrowsingTopicsTimePeriodPerEpoch; BLINK_COMMON_EXPORT extern const base::FeatureParam kBrowsingTopicsNumberOfEpochsToExpose; BLINK_COMMON_EXPORT extern const base::FeatureParam kBrowsingTopicsNumberOfTopTopicsPerEpoch; BLINK_COMMON_EXPORT extern const base::FeatureParam kBrowsingTopicsUseRandomTopicProbabilityPercent; +BLINK_COMMON_EXPORT extern const base::FeatureParam + kBrowsingTopicsNumberOfEpochsOfObservationDataToUseForFiltering; +BLINK_COMMON_EXPORT extern const base::FeatureParam + kBrowsingTopicsMaxNumberOfApiUsageContextDomainsToKeepPerTopic; BLINK_COMMON_EXPORT extern const base::FeatureParam kBrowsingTopicsMaxNumberOfApiUsageContextEntriesToLoadPerEpoch; BLINK_COMMON_EXPORT extern const base::FeatureParam kBrowsingTopicsConfigVersion; +BLINK_COMMON_EXPORT extern const base::FeatureParam + kBrowsingTopicsTaxonomyVersion; // Control switch for minimizing processing in the WebRTC APM when all audio // tracks are disabled. diff --git a/tools/metrics/histograms/enums.xml b/tools/metrics/histograms/enums.xml index 94e60a9496eda2..a13a17c825825e 100644 --- a/tools/metrics/histograms/enums.xml +++ b/tools/metrics/histograms/enums.xml @@ -11460,6 +11460,15 @@ Called by update_bad_message_reasons.py.--> + + + + + + + + Deprecated 12/2020 as it is no longer used for analysis. diff --git a/tools/metrics/histograms/metadata/browsing_topics/histograms.xml b/tools/metrics/histograms/metadata/browsing_topics/histograms.xml index 1e7dfbd0223f39..4b1da0070e6992 100644 --- a/tools/metrics/histograms/metadata/browsing_topics/histograms.xml +++ b/tools/metrics/histograms/metadata/browsing_topics/histograms.xml @@ -32,8 +32,60 @@ chromium-metrics-reviews@google.com. + + yaoxia@chromium.org + jkarlin@chromium.org + + Records the browsing topics calculation result status (i.e. success, or the + failure reason). Recored at the end of each (weekly) topics calculation. + + + + + yaoxia@chromium.org + jkarlin@chromium.org + + Records the count of distinct history hosts that are eligible for topics + calculation. Recorded during each (weekly) topics calculation after the + observation domains are derived. In case of a calculation failure (e.g. + permission denied, etc.), this metric won't be recorded. + + + + + yaoxia@chromium.org + jkarlin@chromium.org + + Records the count of context domains for each of the calculated top topics. + This won't exceed the cap number + `kBrowsingTopicsMaxNumberOfApiUsageContextDomainsToKeepPerTopic`. Recorded + once for each calculated top topics, during each (weekly) topics calculation + after the observation domains are derived. In case of a calculation failure + (e.g. permission denied; candidate topic was blocked; etc.), this metric + won't be recorded. + + + + + yaoxia@chromium.org + jkarlin@chromium.org + + Records the count of derived top topics before random ones are padded. + Recorded during each (weekly) topics calculation after the top topics are + derived. In case of a calculation failure (e.g. permission denied, etc.), + this metric won't be recorded. + + + + enum="BooleanSuccess" expires_after="2023-03-14"> yaoxia@chromium.org jkarlin@chromium.org