Skip to content

Commit

Permalink
Introduce History.DomainCount*_V3 metrics
Browse files Browse the repository at this point in the history
The existing "_V2" metrics include foreign/synced visits, which was not
intentional. This CL introduces a new set of "_V3" metrics that
properly filter out foreign visits.
Note that there are two slightly different kinds of foreign visits:
- "Legacy"-style visits, coming from TYPED_URLS. These are identified
  by SOURCE_SYNCED.
- "New"-style visits, coming from HISTORY. These are identified by a
  non-empty originator_visit_id, and also by SOURCE_SYNCED.
This CL filters out visits by both conditions, even though only
checking for SOURCE_SYNCED would technically be sufficient. The
reasons are:
a) `visit_source` is barely used otherwise, and might get removed at
   some point in the future.
b) Once `kSyncEnableHistoryDataType` is fully rolled out, the check
   for `visit_source` will become unnecessary (and checking the
   `originator_cache_guid` is simpler; no need to access the separate
   visit_source table).

The existing "_V2" metrics are *not* deprecated yet - it's useful to
keep them for a while, for ease of analysis and comparing with the new
ones.

Bug: 1422210
Change-Id: I12cabf24049f63b05c80ec8745dd8f4442bde271
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/4315807
Reviewed-by: Mark Pearson <mpearson@chromium.org>
Reviewed-by: Tommy Li <tommycli@chromium.org>
Commit-Queue: Marc Treib <treib@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1115601}
  • Loading branch information
Marc Treib authored and Chromium LUCI CQ committed Mar 10, 2023
1 parent 6de20cc commit 1cfcda2
Show file tree
Hide file tree
Showing 10 changed files with 422 additions and 117 deletions.
49 changes: 31 additions & 18 deletions components/history/core/browser/history_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1822,54 +1822,67 @@ HistoryCountResult HistoryBackend::CountUniqueHostsVisitedLastMonth() {
return {!!db_, db_ ? db_->CountUniqueHostsVisitedLastMonth() : 0};
}

DomainDiversityResults HistoryBackend::GetDomainDiversity(
std::pair<DomainDiversityResults, DomainDiversityResults>
HistoryBackend::GetDomainDiversity(
base::Time report_time,
int number_of_days_to_report,
DomainMetricBitmaskType metric_type_bitmask) {
DCHECK_GE(number_of_days_to_report, 0);
DomainDiversityResults result;
DomainDiversityResults local_result;
DomainDiversityResults all_result;

if (!db_)
return result;
return std::make_pair(local_result, all_result);

number_of_days_to_report =
std::min(number_of_days_to_report, kDomainDiversityMaxBacktrackedDays);

base::Time current_midnight = report_time.LocalMidnight();
SCOPED_UMA_HISTOGRAM_TIMER("History.DomainCountQueryTime_V2");
SCOPED_UMA_HISTOGRAM_TIMER("History.DomainCountQueryTime_V3");

for (int days_back = 0; days_back < number_of_days_to_report; ++days_back) {
DomainMetricSet single_metric_set;
single_metric_set.end_time = current_midnight;
DomainMetricSet local_metric_set;
local_metric_set.end_time = current_midnight;
DomainMetricSet all_metric_set;
all_metric_set.end_time = current_midnight;

if (metric_type_bitmask & kEnableLast1DayMetric) {
base::Time last_midnight = MidnightNDaysLater(current_midnight, -1);
single_metric_set.one_day_metric = DomainMetricCountType(
db_->CountUniqueDomainsVisited(last_midnight, current_midnight),
last_midnight);
auto [local_domains, all_domains] =
db_->CountUniqueDomainsVisited(last_midnight, current_midnight);
local_metric_set.one_day_metric =
DomainMetricCountType(local_domains, last_midnight);
all_metric_set.one_day_metric =
DomainMetricCountType(all_domains, last_midnight);
}

if (metric_type_bitmask & kEnableLast7DayMetric) {
base::Time seven_midnights_ago = MidnightNDaysLater(current_midnight, -7);
single_metric_set.seven_day_metric = DomainMetricCountType(
db_->CountUniqueDomainsVisited(seven_midnights_ago, current_midnight),
seven_midnights_ago);
auto [local_domains, all_domains] =
db_->CountUniqueDomainsVisited(seven_midnights_ago, current_midnight);
local_metric_set.seven_day_metric =
DomainMetricCountType(local_domains, seven_midnights_ago);
all_metric_set.seven_day_metric =
DomainMetricCountType(all_domains, seven_midnights_ago);
}

if (metric_type_bitmask & kEnableLast28DayMetric) {
base::Time twenty_eight_midnights_ago =
MidnightNDaysLater(current_midnight, -28);
single_metric_set.twenty_eight_day_metric = DomainMetricCountType(
db_->CountUniqueDomainsVisited(twenty_eight_midnights_ago,
current_midnight),
twenty_eight_midnights_ago);
auto [local_domains, all_domains] = db_->CountUniqueDomainsVisited(
twenty_eight_midnights_ago, current_midnight);
local_metric_set.twenty_eight_day_metric =
DomainMetricCountType(local_domains, twenty_eight_midnights_ago);
all_metric_set.twenty_eight_day_metric =
DomainMetricCountType(all_domains, twenty_eight_midnights_ago);
}
result.push_back(single_metric_set);
local_result.push_back(local_metric_set);
all_result.push_back(all_metric_set);

current_midnight = MidnightNDaysLater(current_midnight, -1);
}

return result;
return std::make_pair(local_result, all_result);
}

HistoryLastVisitResult HistoryBackend::GetLastVisitToHost(
Expand Down
7 changes: 6 additions & 1 deletion components/history/core/browser/history_backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,12 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>,
// metrics measuring domain visit counts spanning the following date ranges
// (all dates are inclusive):
// {{10/30, 10/3~10/30}, {10/29, 10/2~10/29}, {10/28, 10/1~10/28}}
DomainDiversityResults GetDomainDiversity(
//
// The return value is a pair of results, where the first member counts only
// local visits, and the second counts both local and foreign (synced) visits.
// TODO(crbug.com/1422210): Once the "V2" domain diversity metrics are
// deprecated, return only a single result, the "local" one.
std::pair<DomainDiversityResults, DomainDiversityResults> GetDomainDiversity(
base::Time report_time,
int number_of_days_to_report,
DomainMetricBitmaskType metric_type_bitmask);
Expand Down
54 changes: 37 additions & 17 deletions components/history/core/browser/history_database.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "base/rand_util.h"
#include "base/time/time.h"
#include "build/build_config.h"
#include "components/history/core/browser/history_types.h"
#include "components/sync/base/features.h"
#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
#include "sql/database.h"
Expand Down Expand Up @@ -317,37 +318,56 @@ int HistoryDatabase::CountUniqueHostsVisitedLastMonth() {
return hosts.size();
}

int HistoryDatabase::CountUniqueDomainsVisited(base::Time begin_time,
base::Time end_time) {
std::pair<int, int> HistoryDatabase::CountUniqueDomainsVisited(
base::Time begin_time,
base::Time end_time) {
// TODO(crbug.com/1365291): Once syncer::kSyncEnableHistoryDataType is fully
// rolled out, plus 3 months for old data to expire, the check for
// visit_source can be removed - it'll be enough to check for non-empty
// originator_cache_guid to detect foreign visits.
sql::Statement url_sql(db_.GetUniqueStatement(
"SELECT urls.url FROM urls JOIN visits "
"WHERE urls.id = visits.url "
"AND (transition & ?) != 0 " // CHAIN_END
"AND (transition & ?) NOT IN (?, ?, ?) " // NO SUBFRAME or
"SELECT urls.url, visits.originator_cache_guid, "
"IFNULL(visit_source.source, ?) " // SOURCE_BROWSED
"FROM urls "
"INNER JOIN visits ON urls.id = visits.url "
"LEFT JOIN visit_source ON visits.id = visit_source.id "
"WHERE (transition & ?) != 0 " // CHAIN_END
"AND (transition & ?) NOT IN (?, ?, ?) " // No *_SUBFRAME or
// KEYWORD_GENERATED
"AND hidden = 0 AND visit_time >= ? AND visit_time < ?"));

url_sql.BindInt64(0, ui::PAGE_TRANSITION_CHAIN_END);
url_sql.BindInt64(1, ui::PAGE_TRANSITION_CORE_MASK);
url_sql.BindInt64(2, ui::PAGE_TRANSITION_AUTO_SUBFRAME);
url_sql.BindInt64(3, ui::PAGE_TRANSITION_MANUAL_SUBFRAME);
url_sql.BindInt64(4, ui::PAGE_TRANSITION_KEYWORD_GENERATED);
url_sql.BindInt64(0, VisitSource::SOURCE_BROWSED);
url_sql.BindInt64(1, ui::PAGE_TRANSITION_CHAIN_END);
url_sql.BindInt64(2, ui::PAGE_TRANSITION_CORE_MASK);
url_sql.BindInt64(3, ui::PAGE_TRANSITION_AUTO_SUBFRAME);
url_sql.BindInt64(4, ui::PAGE_TRANSITION_MANUAL_SUBFRAME);
url_sql.BindInt64(5, ui::PAGE_TRANSITION_KEYWORD_GENERATED);

url_sql.BindTime(5, begin_time);
url_sql.BindTime(6, end_time);
url_sql.BindTime(6, begin_time);
url_sql.BindTime(7, end_time);

std::set<std::string> domains;
std::set<std::string> all_domains;
std::set<std::string> local_domains;
while (url_sql.Step()) {
GURL url(url_sql.ColumnString(0));
std::string domain = net::registry_controlled_domains::GetDomainAndRegistry(
url, net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);

// IP addresses, empty URLs, and URLs with empty or unregistered TLDs are
// all excluded.
if (!domain.empty())
domains.insert(domain);
if (domain.empty()) {
continue;
}

all_domains.insert(domain);

bool is_local = url_sql.ColumnString(1).empty() &&
url_sql.ColumnInt(2) == VisitSource::SOURCE_BROWSED;
if (is_local) {
local_domains.insert(domain);
}
}
return domains.size();
return std::make_pair(local_domains.size(), all_domains.size());
}

void HistoryDatabase::BeginExclusiveMode() {
Expand Down
6 changes: 5 additions & 1 deletion components/history/core/browser/history_database.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,11 @@ class HistoryDatabase : public DownloadDatabase,

// Counts the number of unique domains (eLTD+1) visited within
// [`begin_time`, `end_time`).
int CountUniqueDomainsVisited(base::Time begin_time, base::Time end_time);
// The return value is a pair of (local, all), where "local" only counts
// domains that were visited on this device, whereas "all" also counts
// foreign/synced visits.
std::pair<int, int> CountUniqueDomainsVisited(base::Time begin_time,
base::Time end_time);

// Call to set the mode on the database to exclusive. The default locking mode
// is "normal" but we want to run in exclusive mode for slightly better
Expand Down

0 comments on commit 1cfcda2

Please sign in to comment.