Skip to content

Commit

Permalink
Plumb aliases to the keywords for journeys omnibox action
Browse files Browse the repository at this point in the history
This also removes the batching code in OnDeviceClusteringBackend for
simplicity of passing the entity metadata map around and moves the
params for anything that controls omnibox action keywords to the
omniboxaction feature for easier experimentation

(cherry picked from commit 0b6a73f)

Bug: 1324728
Change-Id: Ia582bd6c82bf87609f3c1620c26b46711b80b7ba
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3643460
Reviewed-by: Michael Crouse <mcrouse@chromium.org>
Reviewed-by: Tommy Li <tommycli@chromium.org>
Commit-Queue: Sophie Chang <sophiechang@chromium.org>
Cr-Original-Commit-Position: refs/heads/main@{#1003266}
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3652540
Cr-Commit-Position: refs/branch-heads/5060@{#65}
Cr-Branched-From: b83393d-refs/heads/main@{#1002911}
  • Loading branch information
Sophie Chang authored and Chromium LUCI CQ committed May 18, 2022
1 parent db97c93 commit fb2f9bd
Show file tree
Hide file tree
Showing 11 changed files with 157 additions and 207 deletions.
31 changes: 19 additions & 12 deletions components/history_clusters/core/config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,25 @@ Config::Config() {
internal::kOmniboxAction, "omnibox_action_on_noisy_urls",
omnibox_action_on_noisy_urls);

keyword_filter_on_entity_aliases = base::GetFieldTrialParamByFeatureAsBool(
history_clusters::features::kOnDeviceClusteringKeywordFiltering,
"keyword_filter_on_entity_aliases", keyword_filter_on_entity_aliases);

max_entity_aliases_in_keywords = base::GetFieldTrialParamByFeatureAsInt(
history_clusters::features::kOnDeviceClusteringKeywordFiltering,
"max_entity_aliases_in_keywords", max_entity_aliases_in_keywords);
if (max_entity_aliases_in_keywords <= 0) {
max_entity_aliases_in_keywords = SIZE_MAX;
}

keyword_filter_on_categories = GetFieldTrialParamByFeatureAsBool(
history_clusters::features::kOnDeviceClusteringKeywordFiltering,
"keyword_filter_on_categories", keyword_filter_on_categories);

keyword_filter_on_noisy_visits = GetFieldTrialParamByFeatureAsBool(
history_clusters::features::kOnDeviceClusteringKeywordFiltering,
"keyword_filter_on_noisy_visits", keyword_filter_on_noisy_visits);

non_user_visible_debug =
base::FeatureList::IsEnabled(internal::kNonUserVisibleDebug);

Expand Down Expand Up @@ -197,18 +216,6 @@ Config::Config() {
"content_clustering_intersection_threshold",
cluster_interaction_threshold);

should_include_categories_in_keywords = GetFieldTrialParamByFeatureAsBool(
features::kOnDeviceClustering, "include_categories_in_keywords",
should_include_categories_in_keywords);

should_exclude_keywords_from_noisy_visits = GetFieldTrialParamByFeatureAsBool(
features::kOnDeviceClustering, "exclude_keywords_from_noisy_visits",
should_exclude_keywords_from_noisy_visits);

clustering_tasks_batch_size = GetFieldTrialParamByFeatureAsInt(
features::kSplitClusteringTasksToSmallerBatches,
"clustering_task_batch_size", clustering_tasks_batch_size);

split_clusters_at_search_visits = GetFieldTrialParamByFeatureAsBool(
features::kOnDeviceClustering, "split_clusters_at_search_visits",
split_clusters_at_search_visits);
Expand Down
23 changes: 16 additions & 7 deletions components/history_clusters/core/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,22 @@ struct Config {
// visits. This does nothing if `omnibox_action_on_urls` is false.
bool omnibox_action_on_noisy_urls = true;

// If enabled, adds the keywords of aliases for detected entity names to a
// cluster.
bool keyword_filter_on_entity_aliases = false;

// If greater than 0, the max number of aliases to include in keywords. If <=
// 0, all aliases will be included.
size_t max_entity_aliases_in_keywords = 0;

// If enabled, adds the keywords of categories for detected entities to a
// cluster.
bool keyword_filter_on_categories = true;

// If enabled, adds the keywords of detected entities from noisy visits to a
// cluster.
bool keyword_filter_on_noisy_visits = true;

// Enables debug info in non-user-visible surfaces, like Chrome Inspector.
// Does nothing if `kJourneys` is disabled.
bool non_user_visible_debug = false;
Expand Down Expand Up @@ -191,13 +207,6 @@ struct Config {
// use when clustering based on intersection score.
int cluster_interaction_threshold = 2;

// Whether to include category names in the keywords for a cluster.
bool should_include_categories_in_keywords = true;

// Whether to exclude keywords from visits that may be considered "noisy" to
// the user (i.e. highly engaged, non-SRP).
bool should_exclude_keywords_from_noisy_visits = false;

// Returns the default batch size for annotating visits when clustering.
size_t clustering_tasks_batch_size = 250;

Expand Down
23 changes: 19 additions & 4 deletions components/history_clusters/core/keyword_cluster_finalizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,20 @@
#include "components/history_clusters/core/config.h"
#include "components/history_clusters/core/on_device_clustering_features.h"
#include "components/history_clusters/core/on_device_clustering_util.h"
#include "components/optimization_guide/core/entity_metadata.h"

namespace history_clusters {

KeywordClusterFinalizer::KeywordClusterFinalizer() = default;
KeywordClusterFinalizer::KeywordClusterFinalizer(
const base::flat_map<std::string, optimization_guide::EntityMetadata>&
entity_metadata_map)
: entity_metadata_map_(entity_metadata_map) {}
KeywordClusterFinalizer::~KeywordClusterFinalizer() = default;

void KeywordClusterFinalizer::FinalizeCluster(history::Cluster& cluster) {
base::flat_set<std::u16string> keywords_set;
for (const auto& visit : cluster.visits) {
if (GetConfig().should_exclude_keywords_from_noisy_visits &&
IsNoisyVisit(visit)) {
if (!GetConfig().keyword_filter_on_noisy_visits && IsNoisyVisit(visit)) {
// Do not put keywords if user visits the page a lot and it's not a
// search-like visit.
continue;
Expand All @@ -29,8 +32,20 @@ void KeywordClusterFinalizer::FinalizeCluster(history::Cluster& cluster) {
for (const auto& entity :
visit.annotated_visit.content_annotations.model_annotations.entities) {
keywords_set.insert(base::UTF8ToUTF16(entity.id));

if (GetConfig().keyword_filter_on_entity_aliases) {
auto it = entity_metadata_map_.find(entity.id);
if (it != entity_metadata_map_.end()) {
for (size_t i = 0; i < it->second.human_readable_aliases.size() &&
i < GetConfig().max_entity_aliases_in_keywords;
i++) {
keywords_set.insert(
base::UTF8ToUTF16(it->second.human_readable_aliases[i]));
}
}
}
}
if (GetConfig().should_include_categories_in_keywords) {
if (GetConfig().keyword_filter_on_categories) {
for (const auto& category : visit.annotated_visit.content_annotations
.model_annotations.categories) {
keywords_set.insert(base::UTF8ToUTF16(category.id));
Expand Down
17 changes: 16 additions & 1 deletion components/history_clusters/core/keyword_cluster_finalizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,33 @@
#ifndef COMPONENTS_HISTORY_CLUSTERS_CORE_KEYWORD_CLUSTER_FINALIZER_H_
#define COMPONENTS_HISTORY_CLUSTERS_CORE_KEYWORD_CLUSTER_FINALIZER_H_

#include <string>

#include "base/containers/flat_set.h"
#include "components/history_clusters/core/cluster_finalizer.h"

namespace optimization_guide {
struct EntityMetadata;
} // namespace optimization_guide

namespace history_clusters {

// A cluster finalizer that determines the set of keywords for a given cluster.
class KeywordClusterFinalizer : public ClusterFinalizer {
public:
KeywordClusterFinalizer();
explicit KeywordClusterFinalizer(
const base::flat_map<std::string, optimization_guide::EntityMetadata>&
entity_metadata_map);
~KeywordClusterFinalizer() override;

// ClusterFinalizer:
void FinalizeCluster(history::Cluster& cluster) override;

private:
// A map from human readable entity name to the metadata associated with that
// entity name.
const base::flat_map<std::string, optimization_guide::EntityMetadata>
entity_metadata_map_;
};

} // namespace history_clusters
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "components/history_clusters/core/clustering_test_utils.h"
#include "components/history_clusters/core/config.h"
#include "components/history_clusters/core/on_device_clustering_features.h"
#include "components/optimization_guide/core/entity_metadata.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"

Expand All @@ -20,10 +21,17 @@ using ::testing::UnorderedElementsAre;
class KeywordClusterFinalizerTest : public ::testing::Test {
public:
void SetUp() override {
cluster_finalizer_ = std::make_unique<KeywordClusterFinalizer>();

config_.should_exclude_keywords_from_noisy_visits = true;
config_.should_include_categories_in_keywords = false;
optimization_guide::EntityMetadata github_md;
github_md.human_readable_aliases = {"git hub", "github llc"};
base::flat_map<std::string, optimization_guide::EntityMetadata>
entity_metadata_map;
entity_metadata_map["github"] = github_md;
cluster_finalizer_ =
std::make_unique<KeywordClusterFinalizer>(entity_metadata_map);

config_.keyword_filter_on_noisy_visits = false;
config_.keyword_filter_on_categories = false;
config_.keyword_filter_on_entity_aliases = false;
SetConfigForTesting(config_);
}

Expand Down Expand Up @@ -73,26 +81,21 @@ TEST_F(KeywordClusterFinalizerTest, IncludesKeywordsBasedOnFeatureParameters) {
UnorderedElementsAre(u"github", u"otherentity"));
}

class KeywordClusterFinalizerIncludeAllTest : public ::testing::Test {
class KeywordClusterFinalizerIncludeAllTest
: public KeywordClusterFinalizerTest {
public:
void SetUp() override {
cluster_finalizer_ = std::make_unique<KeywordClusterFinalizer>();
KeywordClusterFinalizerTest::SetUp();

config_.should_exclude_keywords_from_noisy_visits = false;
config_.should_include_categories_in_keywords = true;
config_.keyword_filter_on_noisy_visits = true;
config_.keyword_filter_on_categories = true;
config_.keyword_filter_on_entity_aliases = true;
config_.max_entity_aliases_in_keywords = 1;
SetConfigForTesting(config_);
}

void TearDown() override { cluster_finalizer_.reset(); }

void FinalizeCluster(history::Cluster& cluster) {
cluster_finalizer_->FinalizeCluster(cluster);
}

private:
Config config_;
std::unique_ptr<KeywordClusterFinalizer> cluster_finalizer_;
base::test::TaskEnvironment task_environment_;
};

TEST_F(KeywordClusterFinalizerIncludeAllTest,
Expand Down Expand Up @@ -128,7 +131,7 @@ TEST_F(KeywordClusterFinalizerIncludeAllTest,
FinalizeCluster(cluster);
EXPECT_THAT(cluster.keywords,
UnorderedElementsAre(u"github", u"category", u"onlyinnoisyvisit",
u"otherentity"));
u"otherentity", u"git hub"));
}

} // namespace
Expand Down
90 changes: 29 additions & 61 deletions components/history_clusters/core/on_device_clustering_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -189,18 +189,14 @@ void OnDeviceClusteringBackend::OnBatchEntityMetadataRetrieved(
std::vector<history::ClusterVisit> cluster_visits;
cluster_visits.reserve(annotated_visits.size());

ProcessBatchOfVisits(clustering_request_source,
/*num_batches_processed_so_far=*/0,
/*index_to_process=*/0, std::move(cluster_visits),
completed_task, std::move(annotated_visits),
entity_metadata_start, std::move(callback),
entity_metadata_map);
ProcessVisits(clustering_request_source, std::move(cluster_visits),
completed_task, std::move(annotated_visits),
entity_metadata_start, std::move(callback),
entity_metadata_map);
}

void OnDeviceClusteringBackend::ProcessBatchOfVisits(
void OnDeviceClusteringBackend::ProcessVisits(
ClusteringRequestSource clustering_request_source,
size_t num_batches_processed_so_far,
size_t index_to_process,
std::vector<history::ClusterVisit> cluster_visits,
optimization_guide::BatchEntityMetadataTask* completed_task,
std::vector<history::AnnotatedVisit> annotated_visits,
Expand All @@ -212,31 +208,9 @@ void OnDeviceClusteringBackend::ProcessBatchOfVisits(

base::ElapsedThreadTimer process_batch_timer;

// Entries in |annotated_visits| that have index greater than or equal to
// |index_stop_batch_processing| should not be processed in this task loop.
size_t index_stop_batch_processing =
index_to_process + GetConfig().clustering_tasks_batch_size;

// Process all entries in one go in certain cases. e.g., if
// |clustering_request_source| is user blocking.
if (!base::FeatureList::IsEnabled(
features::kSplitClusteringTasksToSmallerBatches) ||
clustering_request_source == ClusteringRequestSource::kJourneysPage ||
annotated_visits.size() <= 1) {
index_stop_batch_processing = annotated_visits.size();
}

// Avoid overflows.
index_stop_batch_processing =
std::min(index_stop_batch_processing, annotated_visits.size());

base::UmaHistogramCounts1000(
"History.Clusters.Backend.ProcessBatchOfVisits.BatchSize",
index_stop_batch_processing - index_to_process);

while (index_to_process < index_stop_batch_processing) {
const auto& visit = annotated_visits[index_to_process];
++index_to_process;
base::flat_map<std::string, optimization_guide::EntityMetadata>
human_readable_entity_name_to_metadata_map;
for (const auto& visit : annotated_visits) {
history::ClusterVisit cluster_visit;
cluster_visit.annotated_visit = visit;
const std::string& visit_host = visit.url_row.url().host();
Expand Down Expand Up @@ -303,6 +277,8 @@ void OnDeviceClusteringBackend::ProcessBatchOfVisits(
rewritten_entity.id = entity_metadata_it->second.human_readable_name;
cluster_visit.annotated_visit.content_annotations.model_annotations
.entities.push_back(rewritten_entity);
human_readable_entity_name_to_metadata_map[rewritten_entity.id] =
entity_metadata_it->second;

for (const auto& category :
entity_metadata_it->second.human_readable_categories) {
Expand Down Expand Up @@ -330,42 +306,29 @@ void OnDeviceClusteringBackend::ProcessBatchOfVisits(
cluster_visits.push_back(cluster_visit);
}

if (index_to_process >= annotated_visits.size()) {
RecordBatchUpdateProcessingTime(process_batch_timer.Elapsed());
OnAllVisitsFinishedProcessing(
clustering_request_source, num_batches_processed_so_far + 1,
completed_task, std::move(cluster_visits), std::move(callback));
return;
}

RecordBatchUpdateProcessingTime(process_batch_timer.Elapsed());
base::ThreadTaskRunnerHandle::Get()->PostTask(
FROM_HERE,
base::BindOnce(&OnDeviceClusteringBackend::ProcessBatchOfVisits,
weak_ptr_factory_.GetWeakPtr(), clustering_request_source,
num_batches_processed_so_far + 1, index_to_process,
std::move(cluster_visits), completed_task,
std::move(annotated_visits), entity_metadata_start,
std::move(callback), entity_metadata_map));
OnAllVisitsFinishedProcessing(
clustering_request_source, completed_task, std::move(cluster_visits),
std::move(human_readable_entity_name_to_metadata_map),
std::move(callback));
}

void OnDeviceClusteringBackend::OnAllVisitsFinishedProcessing(
ClusteringRequestSource clustering_request_source,
size_t num_batches_processed,
optimization_guide::BatchEntityMetadataTask* completed_task,
std::vector<history::ClusterVisit> cluster_visits,
base::flat_map<std::string, optimization_guide::EntityMetadata>
human_readable_entity_name_to_entity_metadata_map,
ClustersCallback callback) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);

base::UmaHistogramCounts100(
"History.Clusters.Backend.NumBatchesProcessedForVisits",
num_batches_processed);

// Mark the task as completed, which will destruct |entity_metadata_map|.
// Mark the task as completed, as we are done with it and have moved
// everything adequately at this point.
if (completed_task) {
auto it = in_flight_batch_entity_metadata_tasks_.find(completed_task);
if (it != in_flight_batch_entity_metadata_tasks_.end())
if (it != in_flight_batch_entity_metadata_tasks_.end()) {
in_flight_batch_entity_metadata_tasks_.erase(it);
}
}

// Post the actual clustering work onto the thread pool, then reply on the
Expand All @@ -378,7 +341,8 @@ void OnDeviceClusteringBackend::OnAllVisitsFinishedProcessing(
FROM_HERE,
base::BindOnce(
&OnDeviceClusteringBackend::ClusterVisitsOnBackgroundThread,
engagement_score_provider_ != nullptr, std::move(cluster_visits)),
engagement_score_provider_ != nullptr, std::move(cluster_visits),
std::move(human_readable_entity_name_to_entity_metadata_map)),
std::move(callback));
return;
}
Expand All @@ -391,15 +355,18 @@ void OnDeviceClusteringBackend::OnAllVisitsFinishedProcessing(
FROM_HERE,
base::BindOnce(
&OnDeviceClusteringBackend::ClusterVisitsOnBackgroundThread,
engagement_score_provider_ != nullptr, std::move(cluster_visits)),
engagement_score_provider_ != nullptr, std::move(cluster_visits),
std::move(human_readable_entity_name_to_entity_metadata_map)),
std::move(callback));
}

// static
std::vector<history::Cluster>
OnDeviceClusteringBackend::ClusterVisitsOnBackgroundThread(
bool engagement_score_provider_is_valid,
std::vector<history::ClusterVisit> visits) {
std::vector<history::ClusterVisit> visits,
base::flat_map<std::string, optimization_guide::EntityMetadata>
human_readable_entity_name_to_entity_metadata_map) {
base::ElapsedThreadTimer cluster_visits_timer;

// TODO(crbug.com/1260145): All of these objects are "stateless" between
Expand Down Expand Up @@ -439,7 +406,8 @@ OnDeviceClusteringBackend::ClusterVisitsOnBackgroundThread(
GetConfig().should_filter_noisy_clusters) {
cluster_finalizers.push_back(std::make_unique<NoisyClusterFinalizer>());
}
cluster_finalizers.push_back(std::make_unique<KeywordClusterFinalizer>());
cluster_finalizers.push_back(std::make_unique<KeywordClusterFinalizer>(
human_readable_entity_name_to_entity_metadata_map));
if (GetConfig().should_label_clusters) {
cluster_finalizers.push_back(std::make_unique<LabelClusterFinalizer>());
}
Expand Down

0 comments on commit fb2f9bd

Please sign in to comment.