Skip to content

Commit

Permalink
Finish archive entry inclusion heuristics
Browse files Browse the repository at this point in the history
This CL removes the special case for hidden entries, since it isn't
expected to be useful long-term. It adds special cases for an entry deep
in the directory structure, and for one wildcard executable.

Our inclusion criteria is now:
- Pick one encrypted entry, if it exists
- Pick one entry at the deepest folder depth, if it exists
- Pick one random executable
- Sort the remainder by folder depth and file type weight, and take the
  highest priority.

Fixed: 1373673
Change-Id: I1d824b7f115745b965b42722e9ff5fa07ff90c15
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/4327153
Commit-Queue: Daniel Rubery <drubery@chromium.org>
Reviewed-by: Lily Chen <chlily@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1115933}
  • Loading branch information
Daniel Rubery authored and Chromium LUCI CQ committed Mar 10, 2023
1 parent 15e134f commit 33d21f3
Show file tree
Hide file tree
Showing 2 changed files with 142 additions and 75 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include "base/hash/sha1.h"
#include "base/metrics/histogram_macros.h"
#include "base/rand_util.h"
#include "base/strings/string_number_conversions.h"
#include "components/safe_browsing/content/common/file_type_policies.h"
#include "net/cert/x509_util.h"
Expand Down Expand Up @@ -39,17 +40,54 @@ int ArchiveEntryWeight(const ClientDownloadRequest::ArchivedBinary& entry) {
.file_weight();
}

bool IsHidden(const ClientDownloadRequest::ArchivedBinary& entry) {
#if BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_MAC) || BUILDFLAG(IS_CHROMEOS)
for (const base::FilePath::StringType& component :
base::FilePath::FromUTF8Unsafe(entry.file_basename()).GetComponents()) {
if (component.size() >= 1 && component[0] == '.') {
return true;
}
size_t ArchiveEntryDepth(const ClientDownloadRequest::ArchivedBinary& entry) {
return base::FilePath::FromUTF8Unsafe(entry.file_basename())
.GetComponents()
.size();
}

void SelectEncryptedEntry(
std::vector<ClientDownloadRequest::ArchivedBinary>* considering,
google::protobuf::RepeatedPtrField<ClientDownloadRequest::ArchivedBinary>*
selected) {
auto it = base::ranges::find_if(
*considering, &ClientDownloadRequest::ArchivedBinary::is_encrypted);
if (it != considering->end()) {
*selected->Add() = *it;
considering->erase(it);
}
}

void SelectDeepestEntry(
std::vector<ClientDownloadRequest::ArchivedBinary>* considering,
google::protobuf::RepeatedPtrField<ClientDownloadRequest::ArchivedBinary>*
selected) {
auto it = base::ranges::max_element(*considering, {}, &ArchiveEntryDepth);
if (it != considering->end()) {
*selected->Add() = *it;
considering->erase(it);
}
#endif
}

return false;
void SelectWildcardEntry(
std::vector<ClientDownloadRequest::ArchivedBinary>* considering,
google::protobuf::RepeatedPtrField<ClientDownloadRequest::ArchivedBinary>*
selected) {
int remaining_executables = base::ranges::count_if(
*considering, &ClientDownloadRequest::ArchivedBinary::is_executable);
for (auto it = considering->begin(); it != considering->end(); ++it) {
if (it->is_executable()) {
// Choose the current entry with probability 1/remaining_executables. This
// leads to a uniform distribution over all executables.
if (remaining_executables * base::RandDouble() < 1) {
*selected->Add() = *it;
considering->erase(it);
return;
}

--remaining_executables;
}
}
}

} // namespace
Expand Down Expand Up @@ -121,36 +159,50 @@ GURL GetFileSystemAccessDownloadUrl(const GURL& frame_url) {
google::protobuf::RepeatedPtrField<ClientDownloadRequest::ArchivedBinary>
SelectArchiveEntries(const google::protobuf::RepeatedPtrField<
ClientDownloadRequest::ArchivedBinary>& src_binaries) {
// Limit the number of entries so we don't clog the backend.
// We can expand this limit by pushing a new download_file_types update.
size_t limit =
FileTypePolicies::GetInstance()->GetMaxArchivedBinariesToReport();

google::protobuf::RepeatedPtrField<ClientDownloadRequest::ArchivedBinary>
selected;

std::vector<ClientDownloadRequest::ArchivedBinary> considering;
bool has_encrypted = false, has_hidden = false;
for (const ClientDownloadRequest::ArchivedBinary& binary : src_binaries) {
if (!has_encrypted && binary.is_encrypted()) {
has_encrypted = true;
*selected.Add() = binary;
} else if (!has_hidden && IsHidden(binary)) {
has_hidden = true;
*selected.Add() = binary;
} else {
considering.push_back(binary);
for (const ClientDownloadRequest::ArchivedBinary& entry : src_binaries) {
if (entry.is_executable() || entry.is_archive()) {
considering.push_back(entry);
}
}

if (static_cast<size_t>(selected.size()) < limit) {
SelectEncryptedEntry(&considering, &selected);
}

if (static_cast<size_t>(selected.size()) < limit) {
SelectDeepestEntry(&considering, &selected);
}

// Only add the wildcard if we otherwise wouldn't be able to fit all the
// entries.
if (static_cast<size_t>(selected.size()) < limit &&
considering.size() + selected.size() > limit) {
SelectWildcardEntry(&considering, &selected);
}

std::sort(considering.begin(), considering.end(),
[](const ClientDownloadRequest::ArchivedBinary& lhs,
const ClientDownloadRequest::ArchivedBinary& rhs) {
// The comparator should return true if `lhs` should come before
// `rhs`. We want the first item to have the highest weight.
// `rhs`. We want the shallowest and highest-weight entries first.
if (ArchiveEntryDepth(lhs) != ArchiveEntryDepth(rhs)) {
return ArchiveEntryDepth(lhs) < ArchiveEntryDepth(rhs);
}

return ArchiveEntryWeight(lhs) > ArchiveEntryWeight(rhs);
});

// Limit the number of entries so we don't clog the backend.
// We can expand this limit by pushing a new download_file_types update.
int limit = FileTypePolicies::GetInstance()->GetMaxArchivedBinariesToReport();
for (const ClientDownloadRequest::ArchivedBinary& binary : considering) {
if (selected.size() >= limit) {
if (static_cast<size_t>(selected.size()) >= limit) {
break;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ TEST(DownloadProtectionUtilTest, HigherWeightArchivesSelectedFirst) {
// Setup fake file-type config so that this test is not dependent on the
// actual policy values.
auto fake_config = std::make_unique<DownloadFileTypeConfig>();
fake_config->set_max_archived_binaries_to_report(1);
fake_config->set_max_archived_binaries_to_report(10);
fake_config->mutable_default_file_type()
->add_platform_settings()
->set_file_weight(0);
Expand Down Expand Up @@ -141,140 +141,155 @@ TEST(DownloadProtectionUtilTest, HigherWeightArchivesSelectedFirst) {
google::protobuf::RepeatedPtrField<ClientDownloadRequest::ArchivedBinary>
selected_binaries = SelectArchiveEntries(binaries);

ASSERT_EQ(selected_binaries.size(), 1);
EXPECT_EQ(selected_binaries[0].file_basename(), "a.msi");
// Selecting a single deepest entry leads to just one zip in front of the
// higher-weight files. So we expect this order.
ASSERT_EQ(selected_binaries.size(), 3);
EXPECT_EQ(selected_binaries[0].file_basename(), "a.zip");
EXPECT_EQ(selected_binaries[1].file_basename(), "a.msi");
EXPECT_EQ(selected_binaries[2].file_basename(), "a.zip");
}

#if BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_MAC) || BUILDFLAG(IS_CHROMEOS)
TEST(DownloadProtectionUtilTest, HiddenFileSelectedFirst) {
TEST(DownloadProtectionUtilTest, EncryptedFileSelected) {
safe_browsing::FileTypePoliciesTestOverlay scoped_dangerous;
{
// Setup fake file-type config so that this test is not dependent on the
// actual policy values.
auto fake_config = std::make_unique<DownloadFileTypeConfig>();
fake_config->set_max_archived_binaries_to_report(10);
fake_config->mutable_default_file_type()->add_platform_settings();
fake_config->set_max_archived_binaries_to_report(1);
scoped_dangerous.SwapConfig(fake_config);
}

ClientDownloadRequest::ArchivedBinary zip;
zip.set_file_basename("a.zip");
zip.set_is_archive(true);

ClientDownloadRequest::ArchivedBinary hidden;
hidden.set_file_basename(".hidden.dangerous");
hidden.set_is_executable(true);
ClientDownloadRequest::ArchivedBinary encrypted;
encrypted.set_file_basename("encrypted.dll");
encrypted.set_is_executable(true);
encrypted.set_is_encrypted(true);

google::protobuf::RepeatedPtrField<ClientDownloadRequest::ArchivedBinary>
binaries;
*binaries.Add() = zip;
*binaries.Add() = hidden;
*binaries.Add() = encrypted;

google::protobuf::RepeatedPtrField<ClientDownloadRequest::ArchivedBinary>
selected_binaries = SelectArchiveEntries(binaries);

ASSERT_EQ(selected_binaries.size(), 1);
EXPECT_EQ(selected_binaries[0].file_basename(), ".hidden.dangerous");
ASSERT_EQ(selected_binaries.size(), 2);
EXPECT_EQ(selected_binaries[0].file_basename(), "encrypted.dll");
EXPECT_EQ(selected_binaries[1].file_basename(), "a.zip");
}

TEST(DownloadProtectionUtilTest, OnlyOneHiddenFileSelected) {
TEST(DownloadProtectionUtilTest, OnlyOneEncryptedFilePrioritized) {
safe_browsing::FileTypePoliciesTestOverlay scoped_dangerous;
{
// Setup fake file-type config so that this test is not dependent on the
// actual policy values.
auto fake_config = std::make_unique<DownloadFileTypeConfig>();
fake_config->set_max_archived_binaries_to_report(10);
fake_config->mutable_default_file_type()->add_platform_settings();
fake_config->set_max_archived_binaries_to_report(2);
scoped_dangerous.SwapConfig(fake_config);
}

ClientDownloadRequest::ArchivedBinary zip;
zip.set_file_basename("a.zip");
zip.set_is_archive(true);
ClientDownloadRequest::ArchivedBinary exe;
exe.set_file_basename("evil.exe");
exe.set_is_archive(true);

ClientDownloadRequest::ArchivedBinary hidden;
hidden.set_file_basename(".hidden.dangerous");
hidden.set_is_executable(true);
ClientDownloadRequest::ArchivedBinary encrypted;
encrypted.set_file_basename("encrypted.dll");
encrypted.set_is_executable(true);
encrypted.set_is_encrypted(true);

google::protobuf::RepeatedPtrField<ClientDownloadRequest::ArchivedBinary>
binaries;
*binaries.Add() = zip;
*binaries.Add() = hidden;
*binaries.Add() = hidden;
*binaries.Add() = exe;
*binaries.Add() = encrypted;

encrypted.set_file_basename("other_encrypted.dll");
*binaries.Add() = encrypted;

google::protobuf::RepeatedPtrField<ClientDownloadRequest::ArchivedBinary>
selected_binaries = SelectArchiveEntries(binaries);

ASSERT_EQ(selected_binaries.size(), 2);
EXPECT_EQ(selected_binaries[0].file_basename(), ".hidden.dangerous");
EXPECT_EQ(selected_binaries[1].file_basename(), "a.zip");
// Only one encrypted DLL is prioritized over the more relevant exe.
ASSERT_EQ(selected_binaries.size(), 3);
EXPECT_EQ(selected_binaries[0].file_basename(), "encrypted.dll");
EXPECT_EQ(selected_binaries[1].file_basename(), "evil.exe");
EXPECT_EQ(selected_binaries[2].file_basename(), "other_encrypted.dll");
}
#endif

TEST(DownloadProtectionUtilTest, EncryptedFileSelected) {
TEST(DownloadProtectionUtilTest, DeepestEntrySelected) {
safe_browsing::FileTypePoliciesTestOverlay scoped_dangerous;
{
// Setup fake file-type config so that this test is not dependent on the
// actual policy values.
auto fake_config = std::make_unique<DownloadFileTypeConfig>();
fake_config->set_max_archived_binaries_to_report(10);
fake_config->mutable_default_file_type()->add_platform_settings();
fake_config->set_max_archived_binaries_to_report(1);
scoped_dangerous.SwapConfig(fake_config);
}

ClientDownloadRequest::ArchivedBinary zip;
zip.set_file_basename("a.zip");
zip.set_is_archive(true);

ClientDownloadRequest::ArchivedBinary encrypted;
encrypted.set_file_basename("encrypted.dll");
encrypted.set_is_executable(true);
encrypted.set_is_encrypted(true);
ClientDownloadRequest::ArchivedBinary deep;
deep.set_file_basename("hidden/in/deep/path/file.exe");
deep.set_is_executable(true);

google::protobuf::RepeatedPtrField<ClientDownloadRequest::ArchivedBinary>
binaries;
*binaries.Add() = zip;
*binaries.Add() = encrypted;
*binaries.Add() = deep;

google::protobuf::RepeatedPtrField<ClientDownloadRequest::ArchivedBinary>
selected_binaries = SelectArchiveEntries(binaries);

ASSERT_EQ(selected_binaries.size(), 1);
EXPECT_EQ(selected_binaries[0].file_basename(), "encrypted.dll");
ASSERT_EQ(selected_binaries.size(), 2);
EXPECT_EQ(selected_binaries[0].file_basename(),
"hidden/in/deep/path/file.exe");
EXPECT_EQ(selected_binaries[1].file_basename(), "a.zip");
}

TEST(DownloadProtectionUtilTest, OnlyOneEncryptedFileSelected) {
TEST(DownloadProtectionUtilTest, OnlyOneDeepestEntryPrioritized) {
safe_browsing::FileTypePoliciesTestOverlay scoped_dangerous;
{
// Setup fake file-type config so that this test is not dependent on the
// actual policy values.
auto fake_config = std::make_unique<DownloadFileTypeConfig>();
fake_config->set_max_archived_binaries_to_report(10);
fake_config->mutable_default_file_type()->add_platform_settings();
fake_config->set_max_archived_binaries_to_report(2);
scoped_dangerous.SwapConfig(fake_config);
}

ClientDownloadRequest::ArchivedBinary zip;
zip.set_file_basename("a.zip");
zip.set_is_archive(true);
ClientDownloadRequest::ArchivedBinary exe;
exe.set_file_basename("evil.exe");
exe.set_is_executable(true);

ClientDownloadRequest::ArchivedBinary encrypted;
encrypted.set_file_basename("encrypted.dll");
encrypted.set_is_executable(true);
encrypted.set_is_encrypted(true);
ClientDownloadRequest::ArchivedBinary deep;
deep.set_file_basename("hidden/in/deep/path/random.dll");
deep.set_is_executable(true);

google::protobuf::RepeatedPtrField<ClientDownloadRequest::ArchivedBinary>
binaries;
*binaries.Add() = zip;
*binaries.Add() = encrypted;
*binaries.Add() = encrypted;
*binaries.Add() = exe;
*binaries.Add() = deep;

deep.set_file_basename("hidden/in/deep/path/other.dll");
*binaries.Add() = deep;

google::protobuf::RepeatedPtrField<ClientDownloadRequest::ArchivedBinary>
selected_binaries = SelectArchiveEntries(binaries);

ASSERT_EQ(selected_binaries.size(), 2);
EXPECT_EQ(selected_binaries[0].file_basename(), "encrypted.dll");
EXPECT_EQ(selected_binaries[1].file_basename(), "a.zip");
// One deep entry is prioritized over the more relevant entry at the root.
ASSERT_EQ(selected_binaries.size(), 3);
EXPECT_EQ(selected_binaries[0].file_basename(),
"hidden/in/deep/path/random.dll");
EXPECT_EQ(selected_binaries[1].file_basename(), "evil.exe");
EXPECT_EQ(selected_binaries[2].file_basename(),
"hidden/in/deep/path/other.dll");
}

} // namespace safe_browsing

0 comments on commit 33d21f3

Please sign in to comment.