Skip to content

Commit

Permalink
Cache signal database samples
Browse files Browse the repository at this point in the history
Avoids copying the entries for every feature processing
request.

Bug: b/299529800
Change-Id: Ie7e8dc8b0bd3dd6c7f3986c68b1b43ad6df901f0
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/4938894
Reviewed-by: Ritika Gupta <ritikagup@google.com>
Commit-Queue: Siddhartha S <ssid@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1213051}
  • Loading branch information
ssiddhartha authored and Chromium LUCI CQ committed Oct 21, 2023
1 parent 5d81f22 commit 3a2c325
Show file tree
Hide file tree
Showing 10 changed files with 88 additions and 139 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ class MockSignalDatabase : public SignalDatabase {
base::Time,
SignalDatabase::SamplesCallback),
(override));
MOCK_METHOD(void,
MOCK_METHOD(const std::vector<SignalDatabase::DbEntry>*,
GetAllSamples,
(base::Time, base::Time, SignalDatabase::EntriesCallback),
(),
(override));
MOCK_METHOD(void,
DeleteSamples,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,10 @@ class SignalDatabase {
};
using EntriesCallback = base::OnceCallback<void(std::vector<DbEntry>)>;

// Called to fetch all entries from the signal database in the given time
// range.
virtual void GetAllSamples(base::Time start_time,
base::Time end_time,
EntriesCallback callback) = 0;
// Called to fetch all entries from the signal database. WARNING: This may
// return signals that are deleted from database but are still cached in
// memory. The caller should filter signals in time range as needed.
virtual const std::vector<DbEntry>* GetAllSamples() = 0;

// Called to delete database entries having end time earlier than |end_time|.
virtual void DeleteSamples(proto::SignalType signal_type,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,10 @@ void SignalDatabaseImpl::WriteSample(proto::SignalType signal_type,
sample->set_time_sec_delta(midnight_delta.InSeconds());

recently_added_signals_[key] = signal_data;
all_signals_.emplace_back(DbEntry{.type = signal_type,
.name_hash = name_hash,
.time = timestamp,
.value = (value ? *value : 0)});

// Write as a new db entry.
auto entries_to_save = std::make_unique<
Expand Down Expand Up @@ -209,26 +213,19 @@ void SignalDatabaseImpl::OnGetSamples(
std::move(callback).Run(out);
}

void SignalDatabaseImpl::GetAllSamples(base::Time start_time,
base::Time end_time,
EntriesCallback callback) {
const std::vector<SignalDatabase::DbEntry>*
SignalDatabaseImpl::GetAllSamples() {
TRACE_EVENT("segmentation_platform", "SignalDatabaseImpl::GetAllSamples");
DCHECK(initialized_);
DCHECK_LE(start_time, end_time);
database_->LoadKeysAndEntries(base::BindOnce(
&SignalDatabaseImpl::OnGetAllSamples, weak_ptr_factory_.GetWeakPtr(),
std::move(callback), start_time, end_time));
return &all_signals_;
}

void SignalDatabaseImpl::OnGetAllSamples(
EntriesCallback callback,
base::Time start_time,
base::Time end_time,
SuccessCallback callback,
bool success,
std::unique_ptr<std::map<std::string, proto::SignalData>> entries) {
std::vector<DbEntry> out;
IterateOverAllSamples(
start_time, end_time, success, std::move(entries),
base::Time::Min(), base::Time::Max(), success, std::move(entries),
base::BindRepeating(
[](std::vector<DbEntry>* out, const SignalKey& key,
base::Time timestamp, const proto::Sample& sample) {
Expand All @@ -238,8 +235,8 @@ void SignalDatabaseImpl::OnGetAllSamples(
.time = timestamp,
.value = sample.value()});
},
base::Unretained(&out)));
std::move(callback).Run(out);
base::Unretained(&all_signals_)));
std::move(callback).Run(success);
}

void SignalDatabaseImpl::DeleteSamples(proto::SignalType signal_type,
Expand All @@ -248,6 +245,10 @@ void SignalDatabaseImpl::DeleteSamples(proto::SignalType signal_type,
SuccessCallback callback) {
TRACE_EVENT("segmentation_platform", "SignalDatabaseImpl::DeleteSamples");
DCHECK(initialized_);
// TODO(ssid): Delete samples from `all_samples_` cache as well. It is not
// wrong to keep samples for longer since the UMA processor will filter only
// the samples that are needed. So, this would be memory saving optimization
// only.
SignalKey dummy_key(metadata_utils::SignalTypeToSignalKind(signal_type),
name_hash, base::Time(), base::Time());
std::string key_prefix = dummy_key.GetPrefixInBinary();
Expand Down Expand Up @@ -348,7 +349,13 @@ void SignalDatabaseImpl::OnDatabaseInitialized(
SuccessCallback callback,
leveldb_proto::Enums::InitStatus status) {
initialized_ = status == leveldb_proto::Enums::InitStatus::kOK;
std::move(callback).Run(status == leveldb_proto::Enums::InitStatus::kOK);
if (initialized_) {
database_->LoadKeysAndEntries(
base::BindOnce(&SignalDatabaseImpl::OnGetAllSamples,
weak_ptr_factory_.GetWeakPtr(), std::move(callback)));
} else {
std::move(callback).Run(false);
}
}

void SignalDatabaseImpl::CleanupStaleCachedEntries(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,7 @@ class SignalDatabaseImpl : public SignalDatabase {
base::Time start_time,
base::Time end_time,
SamplesCallback callback) override;
void GetAllSamples(base::Time start_time,
base::Time end_time,
EntriesCallback callback) override;
const std::vector<DbEntry>* GetAllSamples() override;
void DeleteSamples(proto::SignalType signal_type,
uint64_t name_hash,
base::Time end_time,
Expand All @@ -76,9 +74,7 @@ class SignalDatabaseImpl : public SignalDatabase {
std::unique_ptr<std::map<std::string, proto::SignalData>> entries);

void OnGetAllSamples(
EntriesCallback callback,
base::Time start_time,
base::Time end_time,
SuccessCallback callback,
bool success,
std::unique_ptr<std::map<std::string, proto::SignalData>> entries);

Expand Down Expand Up @@ -106,6 +102,8 @@ class SignalDatabaseImpl : public SignalDatabase {
// Whether or not initialization has been completed.
bool initialized_{false};

std::vector<DbEntry> all_signals_;

// A cache of recently added signals. Used for avoiding collisions between two
// signals if they end up generating the same signal key, which can happen if
// the two WriteSample() calls are less than 1 second apart. In that case, the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ class SignalDatabaseImplTest : public testing::Test {

signal_db_->Initialize(base::DoNothing());
db_->InitStatusCallback(leveldb_proto::Enums::InitStatus::kOK);
db_->LoadCallback(true);

test_clock_.SetNow(base::Time::Now().UTCMidnight() + base::Hours(8));
}
Expand Down Expand Up @@ -115,20 +116,9 @@ class SignalDatabaseImplTest : public testing::Test {
}

void ExpectGetAllSamples(
base::Time start_time,
const std::vector<SignalDatabase::DbEntry>& expected_list) {
ExpectGetAllSamples(start_time, test_clock_.Now(), expected_list);
}
void ExpectGetAllSamples(
base::Time start_time,
base::Time end_time,
const std::vector<SignalDatabase::DbEntry>& expected_list) {
signal_db_->GetAllSamples(
start_time, end_time,
base::BindOnce(&SignalDatabaseImplTest::OnAllGetSamples,
base::Unretained(this)));
db_->LoadCallback(true);
CheckVectorsEqual(expected_list, get_all_samples_result_);
const auto& samples = *signal_db_->GetAllSamples();
CheckVectorsEqual(expected_list, samples);
}

base::test::TaskEnvironment task_environment_;
Expand All @@ -149,7 +139,7 @@ TEST_F(SignalDatabaseImplTest, WriteSampleAndRead) {

// No entries to begin with.
ExpectGetSamples(signal_type, name_hash, now.UTCMidnight(), {});
ExpectGetAllSamples(now.UTCMidnight(), {});
ExpectGetAllSamples({});

// Write a sample.
int32_t value = 10;
Expand All @@ -161,8 +151,7 @@ TEST_F(SignalDatabaseImplTest, WriteSampleAndRead) {
// Read back the sample and verify.
ExpectGetSamples(signal_type, name_hash, now.UTCMidnight(),
{{timestamp, value}});
ExpectGetAllSamples(now.UTCMidnight(),
{SignalDatabase::DbEntry{.type = signal_type,
ExpectGetAllSamples({SignalDatabase::DbEntry{.type = signal_type,
.name_hash = name_hash,
.time = timestamp,
.value = value}});
Expand All @@ -176,8 +165,7 @@ TEST_F(SignalDatabaseImplTest, WriteSampleAndRead) {

ExpectGetSamples(signal_type, name_hash, now.UTCMidnight(),
{{timestamp, value}, {timestamp, value2}});
ExpectGetAllSamples(now.UTCMidnight(),
{SignalDatabase::DbEntry{.type = signal_type,
ExpectGetAllSamples({SignalDatabase::DbEntry{.type = signal_type,
.name_hash = name_hash,
.time = timestamp,
.value = value},
Expand Down Expand Up @@ -214,13 +202,12 @@ TEST_F(SignalDatabaseImplTest, WriteSampleAndReadWithPrefixMismatch) {
ExpectGetSamples(signal_type_3, name_hash_3, now.UTCMidnight(), {});
// Read samples for signal 4 and verify.
ExpectGetSamples(signal_type_4, name_hash_4, now.UTCMidnight(), {});
ExpectGetAllSamples(now.UTCMidnight(),
{
SignalDatabase::DbEntry{.type = signal_type_1,
.name_hash = name_hash_1,
.time = timestamp,
.value = value},
});
ExpectGetAllSamples({
SignalDatabase::DbEntry{.type = signal_type_1,
.name_hash = name_hash_1,
.time = timestamp,
.value = value},
});
}

TEST_F(SignalDatabaseImplTest, DeleteSamples) {
Expand Down Expand Up @@ -303,24 +290,27 @@ TEST_F(SignalDatabaseImplTest, WriteMultipleSamplesAndRunCompaction) {
signal_db_->WriteSample(signal_type, name_hash, absl::nullopt,
base::DoNothing());
db_->UpdateCallback(true);
std::vector<SignalDatabase::DbEntry> all_cached_samples = {
SignalDatabase::DbEntry{.type = signal_type,
.name_hash = name_hash,
.time = timestamp_day1_1,
.value = 0},
SignalDatabase::DbEntry{.type = signal_type,
.name_hash = name_hash,
.time = timestamp_day1_2,
.value = 0},
SignalDatabase::DbEntry{.type = signal_type,
.name_hash = name_hash,
.time = timestamp_day2_1,
.value = 0}};

EXPECT_EQ(3u, db_entries_.size());

// Verify samples for the day1. There should be two of them.
ExpectGetSamples(signal_type, name_hash, day1.UTCMidnight(),
day2.UTCMidnight(),
{{timestamp_day1_1, 0}, {timestamp_day1_2, 0}});
ExpectGetAllSamples(day1.UTCMidnight(), day2.UTCMidnight(),
{
SignalDatabase::DbEntry{.type = signal_type,
.name_hash = name_hash,
.time = timestamp_day1_1,
.value = 0},
SignalDatabase::DbEntry{.type = signal_type,
.name_hash = name_hash,
.time = timestamp_day1_2,
.value = 0},
});
ExpectGetAllSamples(all_cached_samples);

// Compact samples for the day1 and verify. We will have two samples, but one
// less entry.
Expand All @@ -338,17 +328,7 @@ TEST_F(SignalDatabaseImplTest, WriteMultipleSamplesAndRunCompaction) {
ExpectGetSamples(signal_type, name_hash, day1.UTCMidnight(),
day2.UTCMidnight(),
{{timestamp_day1_1, 0}, {timestamp_day1_2, 0}});
ExpectGetAllSamples(day1.UTCMidnight(), day2.UTCMidnight(),
{
SignalDatabase::DbEntry{.type = signal_type,
.name_hash = name_hash,
.time = timestamp_day1_1,
.value = 0},
SignalDatabase::DbEntry{.type = signal_type,
.name_hash = name_hash,
.time = timestamp_day1_2,
.value = 0},
});
ExpectGetAllSamples(all_cached_samples);

EXPECT_EQ(2u, db_entries_.size());

Expand All @@ -359,13 +339,7 @@ TEST_F(SignalDatabaseImplTest, WriteMultipleSamplesAndRunCompaction) {

ExpectGetSamples(signal_type, name_hash, day2.UTCMidnight(),
day3.UTCMidnight(), {{timestamp_day2_1, 0}});
ExpectGetAllSamples(day2.UTCMidnight(), day3.UTCMidnight(),
{
SignalDatabase::DbEntry{.type = signal_type,
.name_hash = name_hash,
.time = timestamp_day2_1,
.value = 0},
});
ExpectGetAllSamples(all_cached_samples);

EXPECT_EQ(2u, db_entries_.size());

Expand All @@ -380,22 +354,14 @@ TEST_F(SignalDatabaseImplTest, WriteMultipleSamplesAndRunCompaction) {

ExpectGetSamples(signal_type, name_hash, day3.UTCMidnight(),
day3.UTCMidnight() + base::Days(1), {});
ExpectGetAllSamples(day3.UTCMidnight(), day3.UTCMidnight() + base::Days(1),
{});
ExpectGetAllSamples(all_cached_samples);

EXPECT_EQ(2u, db_entries_.size());

// Read a range of samples not aligned to midnight.
ExpectGetSamples(signal_type, name_hash, timestamp_day1_1 + base::Hours(1),
timestamp_day2_1 - base::Hours(1), {{timestamp_day1_2, 0}});
ExpectGetAllSamples(timestamp_day1_1 + base::Hours(1),
timestamp_day2_1 - base::Hours(1),
{
SignalDatabase::DbEntry{.type = signal_type,
.name_hash = name_hash,
.time = timestamp_day1_2,
.value = 0},
});
ExpectGetAllSamples(all_cached_samples);
}

} // namespace segmentation_platform

0 comments on commit 3a2c325

Please sign in to comment.