Skip to content

Commit

Permalink
[Segmentation Platform] Don't compact signals for days that compacted
Browse files Browse the repository at this point in the history
before

This CL adds a pref to store when the last compaction happens, so that
we don't need to do compaction for signals before it.
If user adjust their clock to some time in the past, the pref will be
reset to that time so compaction can happen later on.

BUG=1360505

Change-Id: Ia74c327da234f363b18231bff2c3a4e8f165a0a2
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3928476
Commit-Queue: Min Qin <qinmin@chromium.org>
Reviewed-by: Siddhartha S <ssid@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1055163}
  • Loading branch information
Min Qin authored and Chromium LUCI CQ committed Oct 5, 2022
1 parent e393bd2 commit 4292555
Show file tree
Hide file tree
Showing 11 changed files with 173 additions and 97 deletions.
3 changes: 3 additions & 0 deletions components/segmentation_platform/internal/constants.cc
Expand Up @@ -21,4 +21,7 @@ const char kSegmentationLastCollectionTimePref[] =
const char kSegmentationPlatformRefreshResultsSwitch[] =
"segmentation-platform-refresh-results";

const char kSegmentationLastDBCompactionTimePref[] =
"segmentation_platform.last_db_compaction_time";

} // namespace segmentation_platform
3 changes: 3 additions & 0 deletions components/segmentation_platform/internal/constants.h
Expand Up @@ -18,6 +18,9 @@ extern const char kSegmentationLastCollectionTimePref[];

extern const char kSegmentationPlatformRefreshResultsSwitch[];

// The timestamp before which all samples were compacted and future compactions
// need to only check for days after it.
extern const char kSegmentationLastDBCompactionTimePref[];
} // namespace segmentation_platform

#endif // COMPONENTS_SEGMENTATION_PLATFORM_INTERNAL_CONSTANTS_H_
Expand Up @@ -15,10 +15,13 @@
#include "base/callback.h"
#include "base/callback_forward.h"
#include "base/callback_helpers.h"
#include "base/check_is_test.h"
#include "base/containers/adapters.h"
#include "base/threading/thread_task_runner_handle.h"
#include "base/time/clock.h"
#include "base/time/time.h"
#include "components/prefs/pref_service.h"
#include "components/segmentation_platform/internal/constants.h"
#include "components/segmentation_platform/internal/database/segment_info_database.h"
#include "components/segmentation_platform/internal/database/signal_database.h"
#include "components/segmentation_platform/internal/database/signal_storage_config.h"
Expand All @@ -37,6 +40,12 @@ using SignalIdentifier = DatabaseMaintenanceImpl::SignalIdentifier;
using CleanupItem = DatabaseMaintenanceImpl::CleanupItem;

namespace {
// Gets the end of the UTC day time for `current_time`.
base::Time GetEndOfDayTime(base::Time current_time) {
base::Time day_start_time = current_time.UTCMidnight();
return day_start_time + base::Days(1) - base::Seconds(1);
}

std::set<SignalIdentifier> CollectAllSignalIdentifiers(
const DefaultModelManager::SegmentInfoList& segment_infos) {
std::set<SignalIdentifier> signal_ids;
Expand Down Expand Up @@ -93,13 +102,15 @@ DatabaseMaintenanceImpl::DatabaseMaintenanceImpl(
SegmentInfoDatabase* segment_info_database,
SignalDatabase* signal_database,
SignalStorageConfig* signal_storage_config,
DefaultModelManager* default_model_manager)
DefaultModelManager* default_model_manager,
PrefService* profile_prefs)
: segment_ids_(segment_ids),
clock_(clock),
segment_info_database_(segment_info_database),
signal_database_(signal_database),
signal_storage_config_(signal_storage_config),
default_model_manager_(default_model_manager) {}
default_model_manager_(default_model_manager),
profile_prefs_(profile_prefs) {}

DatabaseMaintenanceImpl::~DatabaseMaintenanceImpl() = default;

Expand Down Expand Up @@ -197,21 +208,36 @@ void DatabaseMaintenanceImpl::CleanupSignalStorageDone(
void DatabaseMaintenanceImpl::CompactSamples(
std::set<SignalIdentifier> signal_ids,
base::OnceClosure next_action) {
for (uint64_t days_ago = kFirstCompactionDay;
days_ago <= kMaxSignalStorageDays; ++days_ago) {
base::Time compaction_day = clock_->Now() - base::Days(days_ago);
base::Time last_compation_time = base::Time::Min();
if (profile_prefs_) {
last_compation_time =
profile_prefs_->GetTime(kSegmentationLastDBCompactionTimePref);
} else {
CHECK_IS_TEST();
}
base::Time end_of_day = GetEndOfDayTime(clock_->Now());
base::Time most_recent_day_to_compact =
end_of_day - base::Days(kFirstCompactionDay);
base::Time ealiest_day_to_compact =
end_of_day - base::Days(kMaxSignalStorageDays);
base::Time compaction_day = most_recent_day_to_compact;

while (compaction_day >= ealiest_day_to_compact &&
compaction_day > last_compation_time) {
for (auto signal_id : signal_ids) {
signal_database_->CompactSamplesForDay(
signal_id.second, signal_id.first, compaction_day,
base::BindOnce(&DatabaseMaintenanceImpl::RecordCompactionResult,
weak_ptr_factory_.GetWeakPtr(), signal_id.second,
signal_id.first));
}
compaction_day -= base::Days(1);
}
base::ThreadTaskRunnerHandle::Get()->PostTask(
FROM_HERE,
base::BindOnce(&DatabaseMaintenanceImpl::CompactSamplesDone,
weak_ptr_factory_.GetWeakPtr(), std::move(next_action)));
weak_ptr_factory_.GetWeakPtr(), std::move(next_action),
most_recent_day_to_compact));
}

void DatabaseMaintenanceImpl::RecordCompactionResult(
Expand All @@ -222,7 +248,11 @@ void DatabaseMaintenanceImpl::RecordCompactionResult(
}

void DatabaseMaintenanceImpl::CompactSamplesDone(
base::OnceClosure next_action) {
base::OnceClosure next_action,
base::Time last_compation_time) {
if (profile_prefs_)
profile_prefs_->SetTime(kSegmentationLastDBCompactionTimePref,
last_compation_time);
std::move(next_action).Run();
}

Expand Down
Expand Up @@ -20,6 +20,8 @@
#include "components/segmentation_platform/public/proto/segmentation_platform.pb.h"
#include "components/segmentation_platform/public/proto/types.pb.h"

class PrefService;

namespace base {
class Clock;
class Time;
Expand All @@ -45,7 +47,8 @@ class DatabaseMaintenanceImpl : public DatabaseMaintenance {
SegmentInfoDatabase* segment_info_database,
SignalDatabase* signal_database,
SignalStorageConfig* signal_storage_config,
DefaultModelManager* default_model_manager);
DefaultModelManager* default_model_manager,
PrefService* profile_prefs);
~DatabaseMaintenanceImpl() override;

// DatabaseMaintenance overrides.
Expand Down Expand Up @@ -84,7 +87,8 @@ class DatabaseMaintenanceImpl : public DatabaseMaintenance {
void RecordCompactionResult(proto::SignalType signal_type,
uint64_t name_hash,
bool success);
void CompactSamplesDone(base::OnceClosure next_action);
void CompactSamplesDone(base::OnceClosure next_action,
base::Time last_compation_time);

// Input.
base::flat_set<SegmentId> segment_ids_;
Expand All @@ -98,6 +102,9 @@ class DatabaseMaintenanceImpl : public DatabaseMaintenance {
// Default model provider.
raw_ptr<DefaultModelManager> default_model_manager_;

// PrefService from profile.
raw_ptr<PrefService> profile_prefs_;

base::WeakPtrFactory<DatabaseMaintenanceImpl> weak_ptr_factory_{this};
};

Expand Down
Expand Up @@ -15,6 +15,8 @@
#include "base/test/task_environment.h"
#include "base/threading/thread_task_runner_handle.h"
#include "base/time/time.h"
#include "components/prefs/testing_pref_service.h"
#include "components/segmentation_platform/internal/constants.h"
#include "components/segmentation_platform/internal/database/mock_signal_database.h"
#include "components/segmentation_platform/internal/database/mock_signal_storage_config.h"
#include "components/segmentation_platform/internal/database/signal_storage_config.h"
Expand All @@ -24,6 +26,7 @@
#include "components/segmentation_platform/public/proto/aggregation.pb.h"
#include "components/segmentation_platform/public/proto/segmentation_platform.pb.h"
#include "components/segmentation_platform/public/proto/types.pb.h"
#include "components/segmentation_platform/public/segmentation_platform_service.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"

Expand Down Expand Up @@ -98,12 +101,31 @@ class TestDefaultModelManager : public DefaultModelManager {
}
};

std::set<DatabaseMaintenanceImpl::SignalIdentifier> GetSignalIds(
std::vector<SignalData> signal_datas) {
std::set<DatabaseMaintenanceImpl::SignalIdentifier> signal_ids;
for (auto& sd : signal_datas)
signal_ids.emplace(sd.name_hash, sd.signal_type);

return signal_ids;
}

std::vector<CleanupItem> GetCleanupItems(std::vector<SignalData> signal_datas) {
std::vector<CleanupItem> cleanup_items;
for (auto& sd : signal_datas) {
cleanup_items.emplace_back(sd.name_hash, sd.signal_type,
sd.earliest_needed_timestamp);
}
return cleanup_items;
}

class DatabaseMaintenanceImplTest : public testing::Test {
public:
DatabaseMaintenanceImplTest() = default;
~DatabaseMaintenanceImplTest() override = default;

void SetUp() override {
SegmentationPlatformService::RegisterProfilePrefs(prefs_.registry());
segment_info_database_ = std::make_unique<test::TestSegmentInfoDatabase>();
signal_database_ = std::make_unique<MockSignalDatabase>();
signal_storage_config_ = std::make_unique<MockSignalStorageConfig>();
Expand All @@ -114,8 +136,7 @@ class DatabaseMaintenanceImplTest : public testing::Test {
database_maintenance_ = std::make_unique<DatabaseMaintenanceImpl>(
segment_ids, &clock_, segment_info_database_.get(),
signal_database_.get(), signal_storage_config_.get(),
default_model_manager_.get());

default_model_manager_.get(), &prefs_);
clock_.SetNow(base::Time::Now());
}

Expand Down Expand Up @@ -149,6 +170,64 @@ class DatabaseMaintenanceImplTest : public testing::Test {
}
}

void TestMaintenanceTasksScheduling(uint64_t earliest_days_ago) {
std::vector<SignalData> signal_datas = {
{SegmentId::OPTIMIZATION_TARGET_SEGMENTATION_NEW_TAB,
SignalType::HISTOGRAM_VALUE, "Foo", base::HashMetricName("Foo"), 44, 1,
Aggregation::COUNT, clock_.Now() - base::Days(10), true},
{SegmentId::OPTIMIZATION_TARGET_SEGMENTATION_SHARE,
SignalType::HISTOGRAM_ENUM, "Bar", base::HashMetricName("Bar"), 33, 1,
Aggregation::COUNT, clock_.Now() - base::Days(5), true},
{SegmentId::OPTIMIZATION_TARGET_SEGMENTATION_SHARE,
SignalType::USER_ACTION, "Failed", base::HashMetricName("Failed"), 22,
1, Aggregation::COUNT, clock_.Now() - base::Days(1), false},
};

// Prepare test setup.
AddFeatures(signal_datas);
std::set<DatabaseMaintenanceImpl::SignalIdentifier> signal_ids =
GetSignalIds(signal_datas);
std::vector<CleanupItem> cleanup_items = GetCleanupItems(signal_datas);

// Ensure we return the correct signals.
ON_CALL(*signal_storage_config_, GetSignalsForCleanup(_, _))
.WillByDefault(SetArgReferee<1>(cleanup_items));

// We should try to delete each signal.
for (auto& sd : signal_datas) {
EXPECT_CALL(*signal_database_,
DeleteSamples(sd.signal_type, sd.name_hash,
sd.earliest_needed_timestamp, _))
.WillOnce(RunOnceCallback<3>(sd.success));
}

// The Failed signal failed to clean up, so we should not be updating it,
// but the rest should be updated.
cleanup_items.erase(
std::remove_if(cleanup_items.begin(), cleanup_items.end(),
[](CleanupItem item) {
return std::get<0>(item) ==
base::HashMetricName("Failed");
}),
cleanup_items.end());
EXPECT_CALL(*signal_storage_config_,
UpdateSignalsForCleanup(cleanup_items));

// Verify that for each of the signal data, we get a compaction request for
// each day within the correct range.
for (auto& sd : signal_datas) {
for (uint64_t days_ago = kLatestCompactionDaysAgo;
days_ago <= earliest_days_ago; ++days_ago) {
EXPECT_CALL(
*signal_database_,
CompactSamplesForDay(sd.signal_type, sd.name_hash,
clock_.Now().UTCMidnight() + base::Days(1) -
base::Seconds(1) - base::Days(days_ago),
_))
.WillOnce(RunOnceCallback<3>(/*success=*/sd.success));
}
}
}
base::test::TaskEnvironment task_environment_;

std::unique_ptr<Config> config_;
Expand All @@ -159,79 +238,20 @@ class DatabaseMaintenanceImplTest : public testing::Test {
std::unique_ptr<TestDefaultModelManager> default_model_manager_;

std::unique_ptr<DatabaseMaintenanceImpl> database_maintenance_;
TestingPrefServiceSimple prefs_;
};

std::set<DatabaseMaintenanceImpl::SignalIdentifier> GetSignalIds(
std::vector<SignalData> signal_datas) {
std::set<DatabaseMaintenanceImpl::SignalIdentifier> signal_ids;
for (auto& sd : signal_datas)
signal_ids.emplace(std::make_pair(sd.name_hash, sd.signal_type));

return signal_ids;
}

std::vector<CleanupItem> GetCleanupItems(std::vector<SignalData> signal_datas) {
std::vector<CleanupItem> cleanup_items;
for (auto& sd : signal_datas) {
cleanup_items.emplace_back(std::make_tuple(sd.name_hash, sd.signal_type,
sd.earliest_needed_timestamp));
}
return cleanup_items;
}

TEST_F(DatabaseMaintenanceImplTest, ExecuteMaintenanceTasks) {
std::vector<SignalData> signal_datas = {
{SegmentId::OPTIMIZATION_TARGET_SEGMENTATION_NEW_TAB,
SignalType::HISTOGRAM_VALUE, "Foo", base::HashMetricName("Foo"), 44, 1,
Aggregation::COUNT, clock_.Now() - base::Days(10), true},
{SegmentId::OPTIMIZATION_TARGET_SEGMENTATION_SHARE,
SignalType::HISTOGRAM_ENUM, "Bar", base::HashMetricName("Bar"), 33, 1,
Aggregation::COUNT, clock_.Now() - base::Days(5), true},
{SegmentId::OPTIMIZATION_TARGET_SEGMENTATION_SHARE,
SignalType::USER_ACTION, "Failed", base::HashMetricName("Failed"), 22, 1,
Aggregation::COUNT, clock_.Now() - base::Days(1), false},
};

// Prepare test setup.
AddFeatures(signal_datas);
std::set<DatabaseMaintenanceImpl::SignalIdentifier> signal_ids =
GetSignalIds(signal_datas);
std::vector<CleanupItem> cleanup_items = GetCleanupItems(signal_datas);

// Ensure we return the correct signals.
ON_CALL(*signal_storage_config_, GetSignalsForCleanup(_, _))
.WillByDefault(SetArgReferee<1>(cleanup_items));

// We should try to delete each signal.
for (auto& sd : signal_datas) {
EXPECT_CALL(*signal_database_,
DeleteSamples(sd.signal_type, sd.name_hash,
sd.earliest_needed_timestamp, _))
.WillOnce(RunOnceCallback<3>(sd.success));
}

// The Failed signal failed to clean up, so we should not be updating it, but
// the rest should be updated.
cleanup_items.erase(std::remove_if(cleanup_items.begin(), cleanup_items.end(),
[](CleanupItem item) {
return std::get<0>(item) ==
base::HashMetricName("Failed");
}),
cleanup_items.end());
EXPECT_CALL(*signal_storage_config_, UpdateSignalsForCleanup(cleanup_items));

// Verify that for each of the signal data, we get a compaction request for
// each day within the correct range.
for (auto& sd : signal_datas) {
for (uint64_t days_ago = kLatestCompactionDaysAgo;
days_ago <= kEarliestCompactionDaysAgo; ++days_ago) {
EXPECT_CALL(*signal_database_,
CompactSamplesForDay(sd.signal_type, sd.name_hash,
clock_.Now() - base::Days(days_ago), _))
.WillOnce(RunOnceCallback<3>(/*success=*/sd.success));
}
}
TestMaintenanceTasksScheduling(kEarliestCompactionDaysAgo);
// Kick off all tasks.
database_maintenance_->ExecuteMaintenanceTasks();
}

TEST_F(DatabaseMaintenanceImplTest, NoCompactionForDataAlreadyCompacted) {
// Simulate that a maintenance task has compacted all samples 4 days ago.
prefs_.SetTime(kSegmentationLastDBCompactionTimePref,
clock_.Now().UTCMidnight() - base::Days(3) - base::Seconds(1));
TestMaintenanceTasksScheduling(3);
// Kick off all tasks.
database_maintenance_->ExecuteMaintenanceTasks();
}
Expand Down
Expand Up @@ -233,14 +233,13 @@ void SignalDatabaseImpl::OnGetSamplesForDeletion(

void SignalDatabaseImpl::CompactSamplesForDay(proto::SignalType signal_type,
uint64_t name_hash,
base::Time day_start_time,
base::Time day_end_time,
SuccessCallback callback) {
TRACE_EVENT("segmentation_platform",
"SignalDatabaseImpl::CompactSamplesForDay");
DCHECK(initialized_);
// Compact the signals between 00:00:00AM to 23:59:59PM.
day_start_time = day_start_time.UTCMidnight();
base::Time day_end_time = day_start_time + base::Days(1) - base::Seconds(1);
base::Time day_start_time = day_end_time.UTCMidnight();
SignalKey compact_key(metadata_utils::SignalTypeToSignalKind(signal_type),
name_hash, day_end_time, day_start_time);
database_->LoadKeysAndEntriesWithFilter(
Expand Down

0 comments on commit 4292555

Please sign in to comment.