Skip to content

Commit

Permalink
Reland "Repurpose the BatchAnnotateValidation feature to be general f…
Browse files Browse the repository at this point in the history
…or all PCA"

This is a reland of commit e04d17b

Original change's description:
> Repurpose the BatchAnnotateValidation feature to be general for all PCA
>
> Changes the existing BatchAnnotateValidation feature to be fully
> configurable for all annotation types, changing its name in the process.
>
> This can be enabled via the command line for integration testing (where
> it will also output the inference via JSON), or by feature flag for
> performance testing in the wild.
>
> Also moves the feature to a separate class which is null whenever the
> feature is disabled.
>
> Bug: 1288877
> Change-Id: I85757e5e401d4f75898bc45d83b4e93ccd079175
> Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3635390
> Reviewed-by: Sophie Chang <sophiechang@chromium.org>
> Commit-Queue: Robert Ogden <robertogden@chromium.org>
> Reviewed-by: Yao Xiao <yaoxia@chromium.org>
> Cr-Commit-Position: refs/heads/main@{#1001913}

Cq-Include-Trybots: luci.chromium.try:linux_chromium_chromeos_asan_rel_ng
Cq-Include-Trybots: luci.chromium.try:linux_chromium_tsan_rel_ng
Cq-Include-Trybots: luci.chromium.try:linux_chromium_asan_rel_ng
Bug: 1288877
Change-Id: I7d03cd387343857e7a7a2845fb0a63ff40a31be4
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3640407
Reviewed-by: Sophie Chang <sophiechang@chromium.org>
Commit-Queue: Robert Ogden <robertogden@chromium.org>
Reviewed-by: Yao Xiao <yaoxia@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1002504}
  • Loading branch information
Robert Ogden authored and Chromium LUCI CQ committed May 12, 2022
1 parent a62a5a8 commit f7e2f97
Show file tree
Hide file tree
Showing 24 changed files with 861 additions and 171 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,8 @@ class BrowsingTopicsDisabledInternalsBrowserTest
features::kPrivacySandboxAdsAPIsOverride,
privacy_sandbox::kPrivacySandboxSettings3,
optimization_guide::features::kPageContentAnnotations,
optimization_guide::features::kBatchAnnotationsValidation});
optimization_guide::features::kPageContentAnnotationsValidation,
});
}

protected:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,8 @@ class PageContentAnnotationsServiceValidationBrowserTest
public:
PageContentAnnotationsServiceValidationBrowserTest() {
scoped_feature_list_.InitWithFeatures(
{features::kOptimizationHints, features::kBatchAnnotationsValidation},
{features::kOptimizationHints,
features::kPageContentAnnotationsValidation},
{features::kPageContentAnnotations});
}

Expand Down Expand Up @@ -907,34 +908,6 @@ IN_PROC_BROWSER_TEST_F(PageContentAnnotationsServiceModelNotLoadedOnStartupTest,
"OptimizationGuide.ModelExecutor.ExecutionStatus.PageVisibility", 2);
}

class PageContentAnnotationsServiceValidationTest
: public PageContentAnnotationsServiceBrowserTest {
public:
PageContentAnnotationsServiceValidationTest() {
scoped_feature_list_.InitAndEnableFeatureWithParameters(
features::kBatchAnnotationsValidation, {
{"startup_delay", "5"},
{"batch_size", "10"},
});
}
~PageContentAnnotationsServiceValidationTest() override = default;

private:
base::test::ScopedFeatureList scoped_feature_list_;
};

IN_PROC_BROWSER_TEST_F(PageContentAnnotationsServiceValidationTest,
StartsValidation) {
base::HistogramTester histogram_tester;

RetryForHistogramUntilCountReached(
&histogram_tester,
"OptimizationGuide.PageContentAnnotationsService.ValidationRun", 1);

histogram_tester.ExpectUniqueSample(
"OptimizationGuide.PageContentAnnotationsService.ValidationRun", 10, 1);
}

#endif // BUILDFLAG(BUILD_WITH_TFLITE_LIB)

} // namespace optimization_guide
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,10 @@ namespace {
bool ShouldEnablePageContentAnnotations() {
// Allow for the validation experiment and/or the Topics experiment to enable
// the PCAService without need to enable both features.
if (!optimization_guide::features::IsPageContentAnnotationEnabled() &&
!optimization_guide::features::BatchAnnotationsValidationEnabled() &&
!base::FeatureList::IsEnabled(blink::features::kBrowsingTopics)) {
return false;
}
return true;
return optimization_guide::features::IsPageContentAnnotationEnabled() ||
base::FeatureList::IsEnabled(
optimization_guide::features::kPageContentAnnotationsValidation) ||
base::FeatureList::IsEnabled(blink::features::kBrowsingTopics);
}

} // namespace
Expand Down
3 changes: 3 additions & 0 deletions components/optimization_guide/content/browser/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ static_library("browser") {
"optimization_guide_decider.h",
"page_content_annotations_service.cc",
"page_content_annotations_service.h",
"page_content_annotations_validator.cc",
"page_content_annotations_validator.h",
"page_content_annotations_web_contents_observer.cc",
"page_content_annotations_web_contents_observer.h",
"page_content_annotator.h",
Expand Down Expand Up @@ -76,6 +78,7 @@ source_set("unit_tests") {
testonly = true
sources = [
"page_content_annotations_service_unittest.cc",
"page_content_annotations_validator_unittest.cc",
"page_content_annotations_web_contents_observer_unittest.cc",
"page_text_dump_result_unittest.cc",
"page_text_observer_unittest.cc",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,9 @@
#include "base/strings/string_util.h"
#include "base/strings/stringprintf.h"
#include "base/strings/utf_string_conversions.h"
#include "base/time/default_tick_clock.h"
#include "base/timer/timer.h"
#include "components/history/core/browser/history_service.h"
#include "components/leveldb_proto/public/proto_database_provider.h"
#include "components/optimization_guide/content/browser/page_content_annotations_validator.h"
#include "components/optimization_guide/core/local_page_entities_metadata_provider.h"
#include "components/optimization_guide/core/noisy_metrics_recorder.h"
#include "components/optimization_guide/core/optimization_guide_enums.h"
Expand Down Expand Up @@ -105,14 +104,6 @@ void MaybeRecordVisibilityUKM(
}
#endif /* BUILDFLAG(BUILD_WITH_TFLITE_LIB) */

const char* kRandomWords[] = {
"interesting", "chunky", "maniacal", "tickle", "lettuce",
"obsequious", "stir", "bless", "colossal", "squealing",
"elegant", "ambitious", "eight", "frighten", "descriptive",
"pretty", "curly", "regular", "uneven", "heap",
};
const size_t kCountRandomWords = 20;

} // namespace

PageContentAnnotationsService::PageContentAnnotationsService(
Expand Down Expand Up @@ -154,22 +145,8 @@ PageContentAnnotationsService::PageContentAnnotationsService(
database_provider, database_dir, background_task_runner);
}

if (features::BatchAnnotationsValidationEnabled()) {
// Normally the caller would do this, but we are our own caller.
RequestAndNotifyWhenModelAvailable(
features::BatchAnnotationsValidationUsePageTopics()
? AnnotationType::kPageTopics
: AnnotationType::kContentVisibility,
base::DoNothing());

validation_timer_ = std::make_unique<base::OneShotTimer>(
base::DefaultTickClock::GetInstance());
validation_timer_->Start(
FROM_HERE, features::BatchAnnotationValidationStartupDelay(),
base::BindRepeating(
&PageContentAnnotationsService::RunBatchAnnotationValidation,
weak_ptr_factory_.GetWeakPtr()));
}
validator_ =
PageContentAnnotationsValidator::MaybeCreateAndStartTimer(annotator_);
}

PageContentAnnotationsService::~PageContentAnnotationsService() = default;
Expand Down Expand Up @@ -605,32 +582,6 @@ void PageContentAnnotationsService::PersistRemotePageEntities(
PageContentAnnotationsType::kModelAnnotations);
}

void PageContentAnnotationsService::RunBatchAnnotationValidation() {
DCHECK(features::BatchAnnotationsValidationEnabled());
DCHECK(validation_timer_);
validation_timer_.reset();

std::vector<std::string> dummy_inputs;
dummy_inputs.reserve(features::BatchAnnotationsValidationBatchSize());
for (size_t i = 0; i < features::BatchAnnotationsValidationBatchSize(); i++) {
const char* word1 = kRandomWords[base::RandGenerator(kCountRandomWords)];
const char* word2 = kRandomWords[base::RandGenerator(kCountRandomWords)];
dummy_inputs.emplace_back(base::StringPrintf("%s-%s.com", word1, word2));
}

LOCAL_HISTOGRAM_COUNTS_100(
"OptimizationGuide.PageContentAnnotationsService.ValidationRun",
dummy_inputs.size());

if (!features::BatchAnnotationsValidationUsePageTopics()) {
BatchAnnotate(base::DoNothing(), dummy_inputs,
AnnotationType::kContentVisibility);
return;
}

BatchAnnotatePageTopics(base::DoNothing(), dummy_inputs);
}

// static
HistoryVisit PageContentAnnotationsService::CreateHistoryVisitFromWebContents(
content::WebContents* web_contents,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,6 @@
#include "third_party/abseil-cpp/absl/types/optional.h"
#include "url/gurl.h"

namespace base {
class OneShotTimer;
} // namespace base

namespace content {
class WebContents;
} // namespace content
Expand All @@ -54,8 +50,9 @@ namespace optimization_guide {
class LocalPageEntitiesMetadataProvider;
class OptimizationGuideModelProvider;
class PageContentAnnotationsModelManager;
class PageContentAnnotationsServiceTest;
class PageContentAnnotationsServiceBrowserTest;
class PageContentAnnotationsServiceTest;
class PageContentAnnotationsValidator;
class PageContentAnnotationsWebContentsObserver;

// The information used by HistoryService to identify a visit to a URL.
Expand Down Expand Up @@ -255,10 +252,6 @@ class PageContentAnnotationsService : public KeyedService,
PageContentAnnotationsType annotation_type,
history::QueryURLResult url_result);

// Runs a batch annotation validation, that is calls |BatchAnnotate| with
// dummy input and discards the output.
void RunBatchAnnotationValidation();

// A metadata-only provider for page entities (as opposed to |model_manager_|
// which does both entity model execution and metadata providing) that uses a
// local database to provide the metadata for a given entity id. This is only
Expand Down Expand Up @@ -298,8 +291,9 @@ class PageContentAnnotationsService : public KeyedService,
// no visits are actively be annotated and a new batch can be started.
std::vector<HistoryVisit> current_visit_annotation_batch_;

// Is only ever set when the feature is enabled.
std::unique_ptr<base::OneShotTimer> validation_timer_;
// Set during this' ctor if the corresponding command line or feature flags
// are set.
std::unique_ptr<PageContentAnnotationsValidator> validator_;

base::WeakPtrFactory<PageContentAnnotationsService> weak_ptr_factory_{this};
};
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
// Copyright 2022 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/optimization_guide/content/browser/page_content_annotations_validator.h"

#include "base/bind.h"
#include "base/callback.h"
#include "base/logging.h"
#include "base/memory/ptr_util.h"
#include "base/rand_util.h"
#include "base/strings/strcat.h"
#include "base/strings/stringprintf.h"
#include "base/time/default_tick_clock.h"
#include "components/optimization_guide/content/browser/page_content_annotator.h"
#include "components/optimization_guide/core/optimization_guide_features.h"
#include "components/optimization_guide/core/optimization_guide_switches.h"
#include "components/optimization_guide/core/page_content_annotations_common.h"

namespace optimization_guide {

namespace {

const char* kRandomNouns[] = {
"Airplane", "Boat", "Book", "Dinosaur", "Earth",
"Football", "Fork", "Hummingbird", "Magic Wand", "Mailbox",
"Molecule", "Pizza", "Record Player", "Skeleton", "Soda",
"Sphere", "Strawberry", "Tiger", "Turkey", "Wolf",
};
const size_t kCountRandomNouns = 20;

void LogAnnotationResultToConsole(
const std::vector<BatchAnnotationResult>& results) {
if (!switches::LogPageContentAnnotationsValidationToConsole()) {
return;
}

LOG(ERROR) << "PageContentAnnotations Validation Complete:";
for (size_t i = 0; i < results.size(); i++) {
LOG(ERROR) << " " << i << ": " << results[i].ToJSON();
}
}

} // namespace

PageContentAnnotationsValidator::~PageContentAnnotationsValidator() = default;
PageContentAnnotationsValidator::PageContentAnnotationsValidator(
PageContentAnnotator* annotator)
: annotator_(annotator) {
DCHECK(annotator);
for (AnnotationType type : {
AnnotationType::kPageTopics,
AnnotationType::kPageEntities,
AnnotationType::kContentVisibility,
}) {
if (features::PageContentAnnotationValidationEnabledForType(type)) {
enabled_annotation_types_.push_back(type);
annotator_->RequestAndNotifyWhenModelAvailable(type, base::DoNothing());
}
}

timer_.Start(FROM_HERE,
features::PageContentAnnotationValidationStartupDelay(),
base::BindOnce(&PageContentAnnotationsValidator::Run,
weak_ptr_factory_.GetWeakPtr()));
}

// static
std::unique_ptr<PageContentAnnotationsValidator>
PageContentAnnotationsValidator::MaybeCreateAndStartTimer(
PageContentAnnotator* annotator) {
// This can happen with certain build/feature flags.
if (!annotator) {
return nullptr;
}

bool enabled_for_any_type = false;
for (AnnotationType type : {
AnnotationType::kPageTopics,
AnnotationType::kPageEntities,
AnnotationType::kContentVisibility,
}) {
enabled_for_any_type |=
features::PageContentAnnotationValidationEnabledForType(type);
}
if (!enabled_for_any_type) {
return nullptr;
}

// This is done because |PageContentAnnotationsValidator| has a private ctor.
return base::WrapUnique(new PageContentAnnotationsValidator(annotator));
}

void PageContentAnnotationsValidator::Run() {
for (AnnotationType type : enabled_annotation_types_) {
annotator_->Annotate(base::BindOnce(&LogAnnotationResultToConsole),
BuildInputsForType(type), type);
}
}

// static
std::vector<std::string> PageContentAnnotationsValidator::BuildInputsForType(
AnnotationType type) {
absl::optional<std::vector<std::string>> cmd_line_input =
switches::PageContentAnnotationsValidationInputForType(type);
if (cmd_line_input) {
return *cmd_line_input;
}

std::vector<std::string> inputs;
for (size_t i = 0; i < features::PageContentAnnotationsValidationBatchSize();
i++) {
const char* word1 = kRandomNouns[base::RandGenerator(kCountRandomNouns)];
const char* word2 = kRandomNouns[base::RandGenerator(kCountRandomNouns)];
inputs.emplace_back(base::StrCat({word1, " ", word2}));
}
return inputs;
}

} // namespace optimization_guide
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Copyright 2022 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_OPTIMIZATION_GUIDE_CONTENT_BROWSER_PAGE_CONTENT_ANNOTATIONS_VALIDATOR_H_
#define COMPONENTS_OPTIMIZATION_GUIDE_CONTENT_BROWSER_PAGE_CONTENT_ANNOTATIONS_VALIDATOR_H_

#include <string>
#include <vector>

#include "base/memory/raw_ptr.h"
#include "base/memory/weak_ptr.h"
#include "base/timer/timer.h"
#include "components/optimization_guide/core/page_content_annotation_type.h"

namespace optimization_guide {

class PageContentAnnotator;

// This class manages validation runs of the PageContentAnnotationsService,
// running the ML model for a given AnnotationType on dummy data after some
// delay from browser startup. This feature can be controlled by experimental
// feature flags and command line.
class PageContentAnnotationsValidator {
public:
~PageContentAnnotationsValidator();

// If the appropriate feature flag or command line switch is given, an
// instance of |this| is created, else nullptr.
static std::unique_ptr<PageContentAnnotationsValidator>
MaybeCreateAndStartTimer(PageContentAnnotator* annotator);

private:
explicit PageContentAnnotationsValidator(PageContentAnnotator* annotator);

// Runs the validation for all enabled AnnotationTypes.
void Run();

// Creates a set of dummy input data to run for the given |type|, either
// randomly generated off of experiment parameters or given on the command
// line.
static std::vector<std::string> BuildInputsForType(AnnotationType type);

std::vector<AnnotationType> enabled_annotation_types_;

// Out lives |this|, not owned.
raw_ptr<PageContentAnnotator> annotator_;

// Starts in the ctor, roughly on browser start, and calls |Run|.
base::OneShotTimer timer_;

base::WeakPtrFactory<PageContentAnnotationsValidator> weak_ptr_factory_{this};
};

} // namespace optimization_guide

#endif // COMPONENTS_OPTIMIZATION_GUIDE_CONTENT_BROWSER_PAGE_CONTENT_ANNOTATIONS_VALIDATOR_H_

0 comments on commit f7e2f97

Please sign in to comment.