Skip to content

Commit

Permalink
Rewrite domain blocking heuristics for WebGL and WebGPU.
Browse files Browse the repository at this point in the history
The new heuristics allow one context restoration per domain which had
a live WebGL context or WebGPU device when contexts / devices were
lost and/or the GPU process crashed. URLs are blocked together, in a
group, if the GPU process crashes. The presence of multiple groups of
URLs determines if all domains should be blocked from accessing 3D APIs.
Domain blocking expires after a time period. Unit tests are rewritten
and expanded.

Update WebGL's and WebGPU's context_lost tests for the new heuristics.
Update Pixel_WebGLSadCanvas and fix preexisting browser bugs affecting
it. Add Pixel_WebGLContextRestored. Mark both of these tests failing
until new images are triaged.

Bug: 1167246
Change-Id: Ib656284a3673e168b4f340d6c0bc4317ab881300
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3811145
Reviewed-by: Kai Ninomiya <kainino@chromium.org>
Reviewed-by: Austin Eng <enga@chromium.org>
Commit-Queue: Kenneth Russell <kbr@chromium.org>
Reviewed-by: Kyle Charbonneau <kylechar@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1033825}
  • Loading branch information
kenrussell authored and Chromium LUCI CQ committed Aug 11, 2022
1 parent 9b4306d commit 601ac28
Show file tree
Hide file tree
Showing 22 changed files with 737 additions and 263 deletions.
9 changes: 5 additions & 4 deletions components/viz/host/gpu_host_impl.cc
Expand Up @@ -195,9 +195,9 @@ void GpuHostImpl::AddConnectionErrorHandler(base::OnceClosure handler) {
void GpuHostImpl::BlockLiveOffscreenContexts() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);

for (auto& url : urls_with_live_offscreen_contexts_) {
delegate_->BlockDomainFrom3DAPIs(url, gpu::DomainGuilt::kUnknown);
}
std::set<GURL> urls(urls_with_live_offscreen_contexts_.begin(),
urls_with_live_offscreen_contexts_.end());
delegate_->BlockDomainsFrom3DAPIs(urls, gpu::DomainGuilt::kUnknown);
}

void GpuHostImpl::ConnectFrameSinkManager(
Expand Down Expand Up @@ -549,7 +549,8 @@ void GpuHostImpl::DidLoseContext(bool offscreen,
return;
}

delegate_->BlockDomainFrom3DAPIs(active_url, guilt);
std::set<GURL> urls{active_url};
delegate_->BlockDomainsFrom3DAPIs(urls, guilt);
}

void GpuHostImpl::DisableGpuCompositing() {
Expand Down
4 changes: 2 additions & 2 deletions components/viz/host/gpu_host_impl.h
Expand Up @@ -82,8 +82,8 @@ class VIZ_HOST_EXPORT GpuHostImpl : public mojom::GpuHost
virtual void DidUpdateOverlayInfo(const gpu::OverlayInfo& overlay_info) = 0;
virtual void DidUpdateDXGIInfo(gfx::mojom::DXGIInfoPtr dxgi_info) = 0;
#endif
virtual void BlockDomainFrom3DAPIs(const GURL& url,
gpu::DomainGuilt guilt) = 0;
virtual void BlockDomainsFrom3DAPIs(const std::set<GURL>& urls,
gpu::DomainGuilt guilt) = 0;
virtual void DisableGpuCompositing() = 0;
virtual bool GpuAccessAllowed() const = 0;
virtual gpu::GpuDiskCacheFactory* GetGpuDiskCacheFactory() = 0;
Expand Down
6 changes: 3 additions & 3 deletions content/browser/gpu/gpu_data_manager_impl.cc
Expand Up @@ -341,10 +341,10 @@ void GpuDataManagerImpl::HandleGpuSwitch() {
private_->HandleGpuSwitch();
}

void GpuDataManagerImpl::BlockDomainFrom3DAPIs(const GURL& url,
gpu::DomainGuilt guilt) {
void GpuDataManagerImpl::BlockDomainsFrom3DAPIs(const std::set<GURL>& urls,
gpu::DomainGuilt guilt) {
base::AutoLock auto_lock(lock_);
private_->BlockDomainFrom3DAPIs(url, guilt);
private_->BlockDomainsFrom3DAPIs(urls, guilt);
}

bool GpuDataManagerImpl::Are3DAPIsBlocked(const GURL& top_origin_url,
Expand Down
11 changes: 7 additions & 4 deletions content/browser/gpu/gpu_data_manager_impl.h
Expand Up @@ -180,11 +180,14 @@ class CONTENT_EXPORT GpuDataManagerImpl : public GpuDataManager,
// using client-facing 3D APIs (WebGL, Pepper 3D), either because
// the domain has caused the GPU to reset, or because too many GPU
// resets have been observed globally recently, and system stability
// might be compromised.
// might be compromised. A set of URLs is passed because in the
// situation where the GPU process crashes, the implementation needs
// to know that these URLs all came from the same crash.
//
// The given URL may be a partial URL (including at least the host)
// or a full URL to a page.
void BlockDomainFrom3DAPIs(const GURL& url, gpu::DomainGuilt guilt);
// In the set, each URL may be a partial URL (including at least the
// host) or a full URL to a page.
void BlockDomainsFrom3DAPIs(const std::set<GURL>& urls,
gpu::DomainGuilt guilt);
bool Are3DAPIsBlocked(const GURL& top_origin_url,
ThreeDAPIType requester);
void UnblockDomainFrom3DAPIs(const GURL& url);
Expand Down
152 changes: 86 additions & 66 deletions content/browser/gpu/gpu_data_manager_impl_private.cc
Expand Up @@ -373,19 +373,6 @@ void DisplayReconfigCallback(CGDirectDisplayID display,
}
#endif // BUILDFLAG(IS_MAC)

// Block all domains' use of 3D APIs for this many milliseconds if
// approaching a threshold where system stability might be compromised.
const int64_t kBlockAllDomainsMs = 10000;
const int kNumResetsWithinDuration = 1;

// Enums for UMA histograms.
enum BlockStatusHistogram {
BLOCK_STATUS_NOT_BLOCKED,
BLOCK_STATUS_SPECIFIC_DOMAIN_BLOCKED,
BLOCK_STATUS_ALL_DOMAINS_BLOCKED,
BLOCK_STATUS_MAX
};

void OnVideoMemoryUsageStats(
GpuDataManager::VideoMemoryUsageStatsCallback callback,
const gpu::VideoMemoryUsageStats& stats) {
Expand Down Expand Up @@ -975,29 +962,35 @@ void GpuDataManagerImplPrivate::RemoveObserver(
}

void GpuDataManagerImplPrivate::UnblockDomainFrom3DAPIs(const GURL& url) {
// This method must do two things:
//
// 1. If the specific domain is blocked, then unblock it.
//
// 2. Reset our notion of how many GPU resets have occurred recently.
// This is necessary even if the specific domain was blocked.
// Otherwise, if we call Are3DAPIsBlocked with the same domain right
// after unblocking it, it will probably still be blocked because of
// the recent GPU reset caused by that domain.
//
// These policies could be refined, but at a certain point the behavior
// will become difficult to explain.
// Remove all instances of this domain from the recent domain
// blocking events. This may have the side-effect of removing the
// kAllDomainsBlocked status.

// Shortcut in the common case where no blocking has occurred. This
// is important to not regress navigation performance, since this is
// now called on every user-initiated navigation.
if (blocked_domains_.empty() && timestamps_of_gpu_resets_.empty())
if (blocked_domains_.empty())
return;

std::string domain = GetDomainFromURL(url);
auto iter = blocked_domains_.begin();
while (iter != blocked_domains_.end()) {
if (domain == iter->second.domain) {
iter = blocked_domains_.erase(iter);
} else {
++iter;
}
}

blocked_domains_.erase(domain);
timestamps_of_gpu_resets_.clear();
// If there are have been enough context loss events spread over a
// long enough time period, it is possible that a given page will be
// blocked from using 3D APIs because of other domains' entries, and
// that reloading this page will not allow 3D APIs to run on this
// page. Compared to an earlier version of these heuristics, it's
// not clear whether unblocking a domain that doesn't exist in the
// blocked_domains_ list should clear out the list entirely.
// Currently, kBlockedDomainExpirationPeriod is set low enough that
// this should hopefully not be a problem in practice.
}

void GpuDataManagerImplPrivate::UpdateGpuInfo(
Expand Down Expand Up @@ -1552,9 +1545,10 @@ void GpuDataManagerImplPrivate::OnDisplayMetricsChanged(
}));
}

void GpuDataManagerImplPrivate::BlockDomainFrom3DAPIs(const GURL& url,
gpu::DomainGuilt guilt) {
BlockDomainFrom3DAPIsAtTime(url, guilt, base::Time::Now());
void GpuDataManagerImplPrivate::BlockDomainsFrom3DAPIs(
const std::set<GURL>& urls,
gpu::DomainGuilt guilt) {
BlockDomainsFrom3DAPIsAtTime(urls, guilt, base::Time::Now());
}

bool GpuDataManagerImplPrivate::Are3DAPIsBlocked(const GURL& top_origin_url,
Expand Down Expand Up @@ -1602,17 +1596,43 @@ std::string GpuDataManagerImplPrivate::GetDomainFromURL(const GURL& url) const {
return url.host();
}

void GpuDataManagerImplPrivate::BlockDomainFrom3DAPIsAtTime(
const GURL& url,
void GpuDataManagerImplPrivate::BlockDomainsFrom3DAPIsAtTime(
const std::set<GURL>& urls,
gpu::DomainGuilt guilt,
base::Time at_time) {
if (!domain_blocking_enabled_)
return;

std::string domain = GetDomainFromURL(url);
// The coalescing of multiple entries for the same blocking event is
// crucially important for the algorithm. Coalescing based on timestamp
// would introduce flakiness.
std::set<std::string> domains;
for (const auto& url : urls) {
domains.insert(GetDomainFromURL(url));
}

for (const auto& domain : domains) {
blocked_domains_.insert({at_time, {domain, guilt}});
}
}

static const base::TimeDelta kBlockedDomainExpirationPeriod = base::Minutes(2);

blocked_domains_[domain] = guilt;
timestamps_of_gpu_resets_.push_back(at_time);
void GpuDataManagerImplPrivate::ExpireOldBlockedDomainsAtTime(
base::Time at_time) const {
// After kBlockedDomainExpirationPeriod, un-block a domain previously
// blocked due to context loss.

// Uses the fact that "blocked_domains_" is mutable to perform a cleanup.
base::Time everything_expired_before =
at_time - kBlockedDomainExpirationPeriod;
blocked_domains_.erase(
blocked_domains_.begin(),
std::lower_bound(blocked_domains_.begin(), blocked_domains_.end(),
everything_expired_before,
[](const auto& elem, const base::Time& t) {
return elem.first < t;
}));
}

GpuDataManagerImplPrivate::DomainBlockStatus
Expand All @@ -1623,51 +1643,51 @@ GpuDataManagerImplPrivate::Are3DAPIsBlockedAtTime(const GURL& url,

// Note: adjusting the policies in this code will almost certainly
// require adjusting the associated unit tests.
std::string domain = GetDomainFromURL(url);

{
if (blocked_domains_.find(domain) != blocked_domains_.end()) {
// Err on the side of caution, and assume that if a particular
// domain shows up in the block map, it's there for a good
// reason and don't let its presence there automatically expire.
return DomainBlockStatus::kBlocked;
}
}
// First expire old domain blocks.
ExpireOldBlockedDomainsAtTime(at_time);

// Look at the timestamps of the recent GPU resets to see if there are
// enough within the threshold which would cause us to blocklist all
// domains. This doesn't need to be overly precise -- if time goes
// backward due to a system clock adjustment, that's fine.
std::string domain = GetDomainFromURL(url);
size_t losses_for_domain = std::count_if(
blocked_domains_.begin(), blocked_domains_.end(),
[domain](const auto& entry) { return (entry.second.domain == domain); });
// Allow one context loss per domain, so block if there are two or more.
if (losses_for_domain > 1)
return DomainBlockStatus::kBlocked;

// Look at and cluster the timestamps of recent domain blocking events to
// see if there are more than the threshold which would cause us to
// blocklist all domains. GPU process crashes or TDR events are
// discovered because the blocked domain entries all have the same
// timestamp.
//
// TODO(kbr): make this pay attention to the TDR thresholds in the
// Windows registry, but make sure it continues to be testable.
{
auto iter = timestamps_of_gpu_resets_.begin();
int num_resets_within_timeframe = 0;
while (iter != timestamps_of_gpu_resets_.end()) {
base::Time time = *iter;
base::TimeDelta delta_t = at_time - time;

// If this entry has "expired", just remove it.
if (delta_t.InMilliseconds() > kBlockAllDomainsMs) {
iter = timestamps_of_gpu_resets_.erase(iter);
continue;
int num_event_clusters = 0;
base::Time last_time; // Initialized to the "zero" time.

// Relies on the domain blocking events being sorted by increasing
// timestamp.
for (const auto& elem : blocked_domains_) {
if (last_time.is_null() || elem.first != last_time) {
last_time = elem.first;
++num_event_clusters;
}

++num_resets_within_timeframe;
++iter;
}

if (num_resets_within_timeframe >= kNumResetsWithinDuration) {
const int kMaxNumResetsWithinDuration = 2;

if (num_event_clusters > kMaxNumResetsWithinDuration)
return DomainBlockStatus::kAllDomainsBlocked;
}
}

return DomainBlockStatus::kNotBlocked;
}

int64_t GpuDataManagerImplPrivate::GetBlockAllDomainsDurationInMs() const {
return kBlockAllDomainsMs;
base::TimeDelta GpuDataManagerImplPrivate::GetDomainBlockingExpirationPeriod()
const {
return kBlockedDomainExpirationPeriod;
}

gpu::GpuMode GpuDataManagerImplPrivate::GetGpuMode() const {
Expand Down
56 changes: 37 additions & 19 deletions content/browser/gpu/gpu_data_manager_impl_private.h
Expand Up @@ -125,7 +125,8 @@ class CONTENT_EXPORT GpuDataManagerImplPrivate {

void HandleGpuSwitch();

void BlockDomainFrom3DAPIs(const GURL& url, gpu::DomainGuilt guilt);
void BlockDomainsFrom3DAPIs(const std::set<GURL>& urls,
gpu::DomainGuilt guilt);
bool Are3DAPIsBlocked(const GURL& top_origin_url,
ThreeDAPIType requester);

Expand All @@ -152,19 +153,29 @@ class CONTENT_EXPORT GpuDataManagerImplPrivate {

private:
friend class GpuDataManagerImplPrivateTest;
friend class GpuDataManagerImplPrivateTestP;

FRIEND_TEST_ALL_PREFIXES(GpuDataManagerImplPrivateTest,
GpuInfoUpdate);
FRIEND_TEST_ALL_PREFIXES(GpuDataManagerImplPrivateTest,
BlockAllDomainsFrom3DAPIs);
FRIEND_TEST_ALL_PREFIXES(GpuDataManagerImplPrivateTest,
UnblockGuiltyDomainFrom3DAPIs);
FRIEND_TEST_ALL_PREFIXES(GpuDataManagerImplPrivateTest,
UnblockDomainOfUnknownGuiltFrom3DAPIs);
FRIEND_TEST_ALL_PREFIXES(GpuDataManagerImplPrivateTest,
UnblockOtherDomainFrom3DAPIs);
FRIEND_TEST_ALL_PREFIXES(GpuDataManagerImplPrivateTest,
UnblockThisDomainFrom3DAPIs);
FRIEND_TEST_ALL_PREFIXES(GpuDataManagerImplPrivateTestP,
SingleContextLossDoesNotBlockDomain);
FRIEND_TEST_ALL_PREFIXES(GpuDataManagerImplPrivateTestP,
TwoContextLossesBlockDomain);
FRIEND_TEST_ALL_PREFIXES(GpuDataManagerImplPrivateTestP,
TwoSimultaneousContextLossesDoNotBlockDomain);
FRIEND_TEST_ALL_PREFIXES(GpuDataManagerImplPrivateTestP, DomainBlockExpires);
FRIEND_TEST_ALL_PREFIXES(GpuDataManagerImplPrivateTestP, UnblockDomain);
FRIEND_TEST_ALL_PREFIXES(GpuDataManagerImplPrivateTestP,
Domain1DoesNotBlockDomain2);
FRIEND_TEST_ALL_PREFIXES(GpuDataManagerImplPrivateTestP,
UnblockingDomain1DoesNotUnblockDomain2);
FRIEND_TEST_ALL_PREFIXES(GpuDataManagerImplPrivateTestP,
SimultaneousContextLossDoesNotBlock);
FRIEND_TEST_ALL_PREFIXES(GpuDataManagerImplPrivateTestP,
MultipleTDRsBlockAll);
FRIEND_TEST_ALL_PREFIXES(GpuDataManagerImplPrivateTestP, MultipleTDRsExpire);
FRIEND_TEST_ALL_PREFIXES(GpuDataManagerImplPrivateTestP,
MultipleTDRsCanBeUnblocked);

// Indicates the reason that access to a given client API (like
// WebGL or Pepper 3D) was blocked or not. This state is distinct
Expand All @@ -175,8 +186,6 @@ class CONTENT_EXPORT GpuDataManagerImplPrivate {
kNotBlocked,
};

using DomainGuiltMap = std::map<std::string, gpu::DomainGuilt>;

using GpuDataManagerObserverList =
base::ObserverListThreadSafe<GpuDataManagerObserver>;

Expand Down Expand Up @@ -206,12 +215,13 @@ class CONTENT_EXPORT GpuDataManagerImplPrivate {

// Implementation functions for blocking of 3D graphics APIs, used
// for unit testing.
void BlockDomainFrom3DAPIsAtTime(const GURL& url,
gpu::DomainGuilt guilt,
base::Time at_time);
void BlockDomainsFrom3DAPIsAtTime(const std::set<GURL>& url,
gpu::DomainGuilt guilt,
base::Time at_time);
void ExpireOldBlockedDomainsAtTime(base::Time at_time) const;
DomainBlockStatus Are3DAPIsBlockedAtTime(const GURL& url,
base::Time at_time) const;
int64_t GetBlockAllDomainsDurationInMs() const;
base::TimeDelta GetDomainBlockingExpirationPeriod() const;

// Notify all observers whenever there is a GPU info update.
void NotifyGpuInfoUpdate();
Expand Down Expand Up @@ -277,8 +287,16 @@ class CONTENT_EXPORT GpuDataManagerImplPrivate {
// they cause random failures.
bool update_histograms_ = true;

DomainGuiltMap blocked_domains_;
mutable std::list<base::Time> timestamps_of_gpu_resets_;
struct DomainBlockingEntry {
DomainBlockingEntry(const std::string& domain, gpu::DomainGuilt guilt)
: domain(domain), guilt(guilt) {}

std::string domain;
gpu::DomainGuilt guilt;
};

// Implicitly sorted by increasing timestamp.
mutable std::multimap<base::Time, DomainBlockingEntry> blocked_domains_;
bool domain_blocking_enabled_ = true;

bool application_is_visible_ = true;
Expand Down

0 comments on commit 601ac28

Please sign in to comment.