Skip to content

Commit

Permalink
[memories] [persist-clusters] Add history/core methods.
Browse files Browse the repository at this point in the history
This CL does 3 things:

1) Splits HB::ToAnnotatedVisits() from HB::GetAnnotatedVisits() so that
   the former can be reused.

2) Removes unused methods. Previous CLs added history/core methods in
   preparation for persistence work. But due to changes in how
   pagination and front-to-back traversal, some will no longer be used.
   Specifically, removes:
    - DB::GetRecentAnnotatedVisitIds()
    - DB::GetClusters()
    - DB::GetRecentClusterIds()
    - HB::GetClusters()
    - HB::GetRecentClusterIdsAndAnnotatedVisits()
    - HS::GetRecentClusterIdsAndAnnotatedVisits()
    - HS::GetClusters()

3) Adds the new history/core methods we plan to use. Specifically:
    - DB::GetMostRecentClusterIds()
    - DB::DeleteClusters()
    - HB::ReplaceClusters()
    - HB::GetMostRecentClusters()
    - HB::GetCluster()
    - HB::ReplaceClusters()
    - HS::ReplaceClusters()
    - HS::GetMostRecentClusters()

More details about cluster persistence:
https://docs.google.com/document/d/1bwDXx_p9jt_jLVzSVzLBSFg5gyYyk2QCu2zntQqP-xo/edit

Bug: 1184879, 1184875, 1171352
Change-Id: Iabc08e5dd5eecf88e2c0159a804c3e72baabb851
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3561325
Reviewed-by: Tommy Li <tommycli@chromium.org>
Commit-Queue: manuk hovanesian <manukh@chromium.org>
Cr-Commit-Position: refs/heads/main@{#988781}
  • Loading branch information
manukh authored and Chromium LUCI CQ committed Apr 5, 2022
1 parent 0d2e663 commit 602610e
Show file tree
Hide file tree
Showing 11 changed files with 347 additions and 479 deletions.
161 changes: 59 additions & 102 deletions components/history/core/browser/history_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "base/notreached.h"
#include "base/observer_list.h"
#include "base/rand_util.h"
#include "base/ranges/ranges.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "base/task/sequenced_task_runner.h"
Expand Down Expand Up @@ -1545,6 +1546,14 @@ std::vector<AnnotatedVisit> HistoryBackend::GetAnnotatedVisits(

DCHECK_LE(static_cast<int>(visit_rows.size()), options.EffectiveMaxCount());

return ToAnnotatedVisits(visit_rows);
}

std::vector<AnnotatedVisit> HistoryBackend::ToAnnotatedVisits(
const VisitVector& visit_rows) {
if (!db_)
return {};

VisitSourceMap sources;
GetVisitsSource(visit_rows, &sources);

Expand Down Expand Up @@ -1588,118 +1597,66 @@ std::vector<AnnotatedVisit> HistoryBackend::GetAnnotatedVisits(
return annotated_visits;
}

ClusterIdsAndAnnotatedVisitsResult
HistoryBackend::GetRecentClusterIdsAndAnnotatedVisits(base::Time minimum_time,
int max_results) {
TRACE_EVENT0("browser",
"HistoryBackend::GetRecentClusterIdsAndAnnotatedVisits");
std::vector<AnnotatedVisit> HistoryBackend::ToAnnotatedVisits(
const std::vector<VisitID>& visit_ids) {
if (!db_)
return {};

// Only interested in up to `max_results` unique `VisitID`s.
std::set<VisitID> recent_visit_ids;
const auto add_visit_ids = [&](std::vector<VisitID> visit_ids) {
for (const auto visit_id : visit_ids) {
if (recent_visit_ids.size() >= static_cast<size_t>(max_results))
break;
recent_visit_ids.insert(visit_id);
}
};

// Add recent visits.
add_visit_ids(db_->GetRecentAnnotatedVisitIds(minimum_time, max_results));

// Add visits in recent clusters.
std::vector<int64_t> recent_cluster_ids =
db_->GetRecentClusterIds(minimum_time);
for (const auto cluster_id : recent_cluster_ids) {
if (recent_visit_ids.size() >= static_cast<size_t>(max_results))
break;
// Request `max_results` visits instead of `max_results -
// recent_visit_ids.size()`, as some of the returned IDs may already be in
// `recent_visit_ids`.
add_visit_ids(db_->GetVisitIdsInCluster(cluster_id, max_results));
VisitVector visit_rows;
for (const auto visit_id : visit_ids) {
VisitRow visit_row;
if (db_->GetRowForVisit(visit_id, &visit_row))
visit_rows.push_back(visit_row);
}
return ToAnnotatedVisits(visit_rows);
}

// Convert the `VisitID`s to `AnnotatedVisitRow`s.
std::vector<AnnotatedVisitRow> recent_annotated_visit_rows;
base::ranges::transform(
recent_visit_ids, std::back_inserter(recent_annotated_visit_rows),
[&](const VisitID& visit_id) {
AnnotatedVisitRow row;
row.visit_id = visit_id;
// Deliberately ignore the return values. It's okay if the annotations
// don't exist and the structs are left unchanged.
db_->GetContentAnnotationsForVisit(visit_id, &row.content_annotations);
db_->GetContextAnnotationsForVisit(visit_id, &row.context_annotations);
return row;
});

return {recent_cluster_ids,
AnnotatedVisitsFromRows(recent_annotated_visit_rows)};
}

std::vector<Cluster> HistoryBackend::GetClusters(int max_results) {
TRACE_EVENT0("browser", "HistoryBackend::GetClusters");
void HistoryBackend::ReplaceClusters(
const std::vector<int64_t>& ids_to_delete,
const std::vector<Cluster>& clusters_to_add) {
TRACE_EVENT0("browser", "HistoryBackend::ReplaceClusters");
if (!db_)
return {};

std::vector<ClusterRow> cluster_rows = db_->GetClusters(max_results);
std::vector<AnnotatedVisitRow> annotated_visit_rows =
db_->GetClusteredAnnotatedVisits(max_results);
std::vector<AnnotatedVisit> annotated_visits =
AnnotatedVisitsFromRows(annotated_visit_rows);
return;
db_->DeleteClusters(ids_to_delete);
db_->AddClusters(clusters_to_add);
ScheduleCommit();
}

std::vector<Cluster> HistoryBackend::GetMostRecentClusters(
base::Time inclusive_min_time,
base::Time exclusive_max_time,
int max_clusters) {
TRACE_EVENT0("browser", "HistoryBackend::GetMostRecentClusters");
if (!db_)
return {};
const auto cluster_ids = db_->GetMostRecentClusterIds(
inclusive_min_time, exclusive_max_time, max_clusters);
std::vector<Cluster> clusters;

for (const auto& cluster_row : cluster_rows) {
std::vector<ClusterVisit> current_cluster_visits;
for (VisitID annotated_visit_id : cluster_row.visit_ids) {
const auto annotated_visits_it =
base::ranges::find(annotated_visits, annotated_visit_id,
[](const auto& annotated_visit) {
return annotated_visit.visit_row.visit_id;
});
// TODO(manukh): Add scores.
if (annotated_visits_it != annotated_visits.end()) {
ClusterVisit cluster_visit;
cluster_visit.annotated_visit = *annotated_visits_it;
current_cluster_visits.push_back(cluster_visit);
}
}
if (!current_cluster_visits.empty()) {
clusters.push_back({cluster_row.cluster_id, current_cluster_visits, {}});
}
}
base::ranges::transform(
cluster_ids, std::back_inserter(clusters),
[&](const auto& cluster_id) { return GetCluster(cluster_id); });
return clusters;
}

std::vector<AnnotatedVisit> HistoryBackend::AnnotatedVisitsFromRows(
const std::vector<AnnotatedVisitRow>& rows) {
std::vector<AnnotatedVisit> annotated_visits;
for (const auto& annotated_visit_row : rows) {
URLRow url_row;
VisitRow visit_row;
if (db_->GetRowForVisit(annotated_visit_row.visit_id, &visit_row) &&
db_->GetURLRow(visit_row.url_id, &url_row)) {
VisitSource source;
GetVisitSource(annotated_visit_row.visit_id, &source);
VisitRow redirect_start = GetRedirectChainStart(visit_row);
annotated_visits.push_back({url_row,
visit_row,
annotated_visit_row.context_annotations,
{},
redirect_start.referring_visit,
redirect_start.opener_visit,
source});
} else {
// Ignore corrupt data but do not crash, as user DBs can be in bad states.
DVLOG(0) << "HistoryBackend: AnnotatedVisit found with missing associated"
"URL or visit. visit_id = "
<< annotated_visit_row.visit_id;
}
}
return annotated_visits;
Cluster HistoryBackend::GetCluster(int64_t cluster_id) {
TRACE_EVENT0("browser", "HistoryBackend::GetCluster");
if (!db_)
return {};

// TODO(manukh): `Cluster`s and `ClusterRow`s have more fields that should be
// set here once we begin persisting them to DB.

Cluster cluster;
cluster.cluster_id = cluster_id;

const auto visit_ids = db_->GetVisitIdsInCluster(cluster_id);
const auto annotated_visits = ToAnnotatedVisits(visit_ids);
base::ranges::transform(annotated_visits, std::back_inserter(cluster.visits),
[&](const auto& annotated_visit) {
ClusterVisit cluster_visit;
cluster_visit.annotated_visit = annotated_visit;
return cluster_visit;
});
return cluster;
}

VisitRow HistoryBackend::GetRedirectChainStart(VisitRow visit) {
Expand Down
41 changes: 20 additions & 21 deletions components/history/core/browser/history_backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>,
virtual void NotifyFaviconsChanged(const std::set<GURL>& page_urls,
const GURL& icon_url) = 0;

// Notify HistoryService that the user is visiting an URL. The event will
// Notify HistoryService that the user is visiting a URL. The event will
// be forwarded to the HistoryServiceObservers in the correct thread.
virtual void NotifyURLVisited(ui::PageTransition transition,
const URLRow& row,
Expand Down Expand Up @@ -221,7 +221,7 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>,
const HistoryDatabaseParams& history_database_params);

// Notification that the history system is shutting down. This will break
// the refs owned by the delegate and any pending transaction so it will
// the refs owned by the delegate and any pending transaction, so it will
// actually be deleted.
void Closing();

Expand All @@ -236,7 +236,7 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>,
// Clears all on-demand favicons.
void ClearAllOnDemandFavicons();

// Gets the counts and last last time of URLs that belong to `origins` in the
// Gets the counts and last time of URLs that belong to `origins` in the
// history database. Origins that are not in the history database will be in
// the map with a count and time of 0.
// Returns an empty map if db_ is not initialized.
Expand Down Expand Up @@ -456,21 +456,27 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>,
VisitID visit_id,
const VisitContextAnnotations& visit_context_annotations);

// Gets a vector of reverse-chronological `AnnotatedVisit` instances based on
// `options`. Uses the same de-duplication and visibility logic as
// `HistoryService::QueryHistory()`.
//
// If `limited_by_max_count` is non-nullptr, it will be set to true if the
// number of results was limited by `options.max_count`.
std::vector<AnnotatedVisit> GetAnnotatedVisits(
const QueryOptions& options,
bool* limited_by_max_count = nullptr);

ClusterIdsAndAnnotatedVisitsResult GetRecentClusterIdsAndAnnotatedVisits(
base::Time minimum_time,
int max_results);
// Utility method to Construct `AnnotatedVisit`s.
std::vector<AnnotatedVisit> ToAnnotatedVisits(const VisitVector& visit_rows);

// Like above, but will first construct `visit_rows` from each `VisitID`
// before delegating to the overloaded `ToAnnotatedVisits()` above.
std::vector<AnnotatedVisit> ToAnnotatedVisits(
const std::vector<VisitID>& visit_ids);

void ReplaceClusters(const std::vector<int64_t>& ids_to_delete,
const std::vector<Cluster>& clusters_to_add);

std::vector<Cluster> GetMostRecentClusters(base::Time inclusive_min_time,
base::Time exclusive_max_time,
int max_clusters);

std::vector<Cluster> GetClusters(int max_results);
// Get a `Cluster`.
Cluster GetCluster(int64_t cluster_id);

// Finds the 1st visit in the redirect chain containing `visit`. Similar to
// `GetRedirectsToSpecificVisit()`, except 1) only returns the 1st visit of
Expand Down Expand Up @@ -557,7 +563,7 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>,

// Calls ExpireHistoryBetween() once for each element in the vector.
// The fields of `ExpireHistoryArgs` map directly to the arguments of
// of ExpireHistoryBetween().
// ExpireHistoryBetween().
void ExpireHistory(const std::vector<ExpireHistoryArgs>& expire_list);

// Expires all visits before and including the given time, updating the URLs
Expand Down Expand Up @@ -699,13 +705,6 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>,
// match the `host_name` string. Returns any matches.
URLRows GetMatchesForHost(const std::u16string& host_name);

// Clusters ------------------------------------------------------------------

// Convert `AnnotatedVisitRow`s to `AnnotatedVisit`s. Drops rows without
// associated URL or visit rows, though this shouldn't happen usually.
std::vector<AnnotatedVisit> AnnotatedVisitsFromRows(
const std::vector<AnnotatedVisitRow>& rows);

// Committing ----------------------------------------------------------------

// We always keep a transaction open on the history database so that multiple
Expand Down

0 comments on commit 602610e

Please sign in to comment.