Skip to content

Commit

Permalink
[zip] Normalize paths
Browse files Browse the repository at this point in the history
When reading and extracting ZIP archives, ZipReader normalizes file
paths.

An absolute path (starting with "/") is normalized as a relative path
starting with "ROOT".

Components ".." in the path are replaced with "UP".

Components "." in the path are replaced with "DOT".

Duplicated path separators "/" are collapsed.

Characters that are not valid in Windows file names are replaced with
the replacement character U+FFFD �. This replacement is done even on
POSIX systems where these characters are valid.


BUG=chromium:953256
TEST=autoninja -C out/Default zlib_unittests && out/Default/zlib_unittests

Change-Id: I5062e0c58afaeec1b4e858eedda97e35d83316b1
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3538040
Reviewed-by: Alex Danilo <adanilo@chromium.org>
Commit-Queue: François Degros <fdegros@chromium.org>
Cr-Commit-Position: refs/heads/main@{#984716}
  • Loading branch information
fdegros authored and Chromium LUCI CQ committed Mar 24, 2022
1 parent e067f5d commit 6c8cfa8
Show file tree
Hide file tree
Showing 5 changed files with 243 additions and 86 deletions.
Binary file modified third_party/zlib/google/test/data/Mixed Paths.zip
Binary file not shown.
148 changes: 128 additions & 20 deletions third_party/zlib/google/zip_reader.cc
Expand Up @@ -59,6 +59,26 @@ std::ostream& operator<<(std::ostream& out, UnzipError error) {
#undef SWITCH_ERR
}

bool IsValidFileNameCharacterOnWindows(char16_t c) {
if (c < 32)
return false;

switch (c) {
case '<': // Less than
case '>': // Greater than
case ':': // Colon
case '"': // Double quote
case '|': // Vertical bar or pipe
case '?': // Question mark
case '*': // Asterisk
case '/': // Forward slash
case '\\': // Backslash
return false;
}

return true;
}

// A writer delegate that writes to a given string.
class StringWriterDelegate : public WriterDelegate {
public:
Expand Down Expand Up @@ -210,18 +230,10 @@ bool ZipReader::OpenEntry() {
return false;
}

entry_.path = base::FilePath::FromUTF16Unsafe(path_in_utf16);
entry_.original_size = info.uncompressed_size;

// Directory entries in ZIP have a path ending with "/".
entry_.is_directory = base::EndsWith(path_in_utf16, u"/");
// Normalize path.
Normalize(path_in_utf16);

// Check the entry path for directory traversal issues. We consider entry
// paths unsafe if they are absolute or if they contain "..". On Windows,
// IsAbsolute() returns false for paths starting with "/".
entry_.is_unsafe = entry_.path.ReferencesParent() ||
entry_.path.IsAbsolute() ||
base::StartsWith(path_in_utf16, u"/");
entry_.original_size = info.uncompressed_size;

// The file content of this entry is encrypted if flag bit 0 is set.
entry_.is_encrypted = info.flag & 1;
Expand Down Expand Up @@ -249,6 +261,73 @@ bool ZipReader::OpenEntry() {
return true;
}

void ZipReader::Normalize(base::StringPiece16 in) {
entry_.is_unsafe = true;

// Directory entries in ZIP have a path ending with "/".
entry_.is_directory = base::EndsWith(in, u"/");

std::u16string normalized_path;
if (base::StartsWith(in, u"/")) {
normalized_path = u"ROOT";
entry_.is_unsafe = false;
}

for (;;) {
// Consume initial path separators.
const base::StringPiece16::size_type i = in.find_first_not_of(u'/');
if (i == base::StringPiece16::npos)
break;

in.remove_prefix(i);
DCHECK(!in.empty());

// Isolate next path component.
const base::StringPiece16 part = in.substr(0, in.find_first_of(u'/'));
DCHECK(!part.empty());

in.remove_prefix(part.size());

if (!normalized_path.empty())
normalized_path += u'/';

if (part == u".") {
normalized_path += u"DOT";
entry_.is_unsafe = true;
continue;
}

if (part == u"..") {
normalized_path += u"UP";
entry_.is_unsafe = true;
continue;
}

// Windows has more restrictions than other systems when it comes to valid
// file paths. Replace Windows-invalid characters on all systems for
// consistency. In particular, this prevents a path component containing
// colon and backslash from being misinterpreted as an absolute path on
// Windows.
for (const char16_t c : part) {
normalized_path += IsValidFileNameCharacterOnWindows(c) ? c : 0xFFFD;
}

entry_.is_unsafe = false;
}

// If the entry is a directory, add the final path separator to the entry
// path.
if (entry_.is_directory && !normalized_path.empty()) {
normalized_path += u'/';
entry_.is_unsafe = false;
}

entry_.path = base::FilePath::FromUTF16Unsafe(normalized_path);

// By construction, we should always get a relative path.
DCHECK(!entry_.path.IsAbsolute()) << entry_.path;
}

bool ZipReader::ExtractCurrentEntry(WriterDelegate* delegate,
uint64_t num_bytes_to_extract) const {
DCHECK(zip_file_);
Expand Down Expand Up @@ -504,12 +583,26 @@ FileWriterDelegate::~FileWriterDelegate() {}

bool FileWriterDelegate::PrepareOutput() {
DCHECK(file_);
const bool ok = file_->IsValid();
if (ok) {
DCHECK_EQ(file_->GetLength(), 0)
<< " The output file should be initially empty";

if (!file_->IsValid()) {
LOG(ERROR) << "File is not valid";
return false;
}
return ok;

const int64_t length = file_->GetLength();
if (length < 0) {
PLOG(ERROR) << "Cannot get length of file handle "
<< file_->GetPlatformFile();
return false;
}

if (length > 0) {
PLOG(ERROR) << "File handle " << file_->GetPlatformFile()
<< " is not empty: Its length is " << length << " bytes";
return false;
}

return true;
}

bool FileWriterDelegate::WriteBytes(const char* data, int num_bytes) {
Expand Down Expand Up @@ -553,10 +646,25 @@ bool FilePathWriterDelegate::PrepareOutput() {

owned_file_.Initialize(output_file_path_,
base::File::FLAG_CREATE | base::File::FLAG_WRITE);
PLOG_IF(ERROR, !owned_file_.IsValid())
<< "Cannot create file " << Redact(output_file_path_) << ": "
<< base::File::ErrorToString(owned_file_.error_details());
return FileWriterDelegate::PrepareOutput();
if (!owned_file_.IsValid()) {
PLOG(ERROR) << "Cannot create file " << Redact(output_file_path_) << ": "
<< base::File::ErrorToString(owned_file_.error_details());
return false;
}

const int64_t length = owned_file_.GetLength();
if (length < 0) {
PLOG(ERROR) << "Cannot get length of file " << Redact(output_file_path_);
return false;
}

if (length > 0) {
PLOG(ERROR) << "File " << Redact(output_file_path_)
<< " is not empty: Its length is " << length << " bytes";
return false;
}

return true;
}

void FilePathWriterDelegate::OnError() {
Expand Down
19 changes: 14 additions & 5 deletions third_party/zlib/google/zip_reader.h
Expand Up @@ -94,9 +94,14 @@ class ZipReader {
// if it wants to interpret this path correctly.
std::string path_in_original_encoding;

// Path of the entry, converted to Unicode. This path is usually relative
// (eg "foo/bar.txt"), but it can also be absolute (eg "/foo/bar.txt") or
// parent-relative (eg "../foo/bar.txt"). See also |is_unsafe|.
// Path of the entry, converted to Unicode. This path is relative (eg
// "foo/bar.txt"). Absolute paths (eg "/foo/bar.txt") or paths containing
// ".." or "." components (eg "../foo/bar.txt") are converted to safe
// relative paths. Eg:
// (In ZIP) -> (Entry.path)
// /foo/bar -> ROOT/foo/bar
// ../a -> UP/a
// ./a -> DOT/a
base::FilePath path;

// Size of the original uncompressed file, or 0 if the entry is a directory.
Expand All @@ -122,8 +127,8 @@ class ZipReader {
// False if the entry is a file.
bool is_directory;

// True if the entry path is considered unsafe, ie if it is absolute or if
// it contains "..".
// True if the entry path cannot be converted to a safe relative path. This
// happens if a file entry (not a directory) has a filename "." or "..".
bool is_unsafe;

// True if the file content is encrypted.
Expand Down Expand Up @@ -257,6 +262,10 @@ class ZipReader {
// reset automatically as needed.
bool OpenEntry();

// Normalizes the given path passed as UTF-16 string piece. Sets entry_.path,
// entry_.is_directory and entry_.is_unsafe.
void Normalize(base::StringPiece16 in);

// Extracts a chunk of the file to the target. Will post a task for the next
// chunk and success/failure/progress callbacks as necessary.
void ExtractChunk(base::File target_file,
Expand Down
18 changes: 8 additions & 10 deletions third_party/zlib/google/zip_reader_unittest.cc
Expand Up @@ -308,19 +308,18 @@ TEST_F(ZipReaderTest, DotDotFile) {
ZipReader reader;
ASSERT_TRUE(reader.Open(data_dir_.AppendASCII("evil.zip")));
base::FilePath target_path(FILE_PATH_LITERAL(
"../levilevilevilevilevilevilevilevilevilevilevilevil"));
"UP/levilevilevilevilevilevilevilevilevilevilevilevil"));
const ZipReader::Entry* entry = LocateAndOpenEntry(&reader, target_path);
ASSERT_TRUE(entry);
EXPECT_EQ(target_path, entry->path);
// This file is unsafe because of ".." in the file name.
EXPECT_TRUE(entry->is_unsafe);
EXPECT_FALSE(entry->is_unsafe);
EXPECT_FALSE(entry->is_directory);
}

TEST_F(ZipReaderTest, InvalidUTF8File) {
ZipReader reader;
ASSERT_TRUE(reader.Open(data_dir_.AppendASCII("evil_via_invalid_utf8.zip")));
base::FilePath target_path = base::FilePath::FromUTF8Unsafe(".�.\\evil.txt");
base::FilePath target_path = base::FilePath::FromUTF8Unsafe(".�.evil.txt");
const ZipReader::Entry* entry = LocateAndOpenEntry(&reader, target_path);
ASSERT_TRUE(entry);
EXPECT_EQ(target_path, entry->path);
Expand All @@ -337,7 +336,7 @@ TEST_F(ZipReaderTest, EncodingSjisAsUtf8) {
EXPECT_THAT(
GetPaths(data_dir_.AppendASCII("SJIS Bug 846195.zip")),
ElementsAre(
base::FilePath::FromUTF8Unsafe("�V�����t�H���_/SJIS_835C_�\\.txt"),
base::FilePath::FromUTF8Unsafe("�V�����t�H���_/SJIS_835C_�.txt"),
base::FilePath::FromUTF8Unsafe(
"�V�����t�H���_/�V�����e�L�X�g �h�L�������g.txt")));
}
Expand All @@ -349,7 +348,7 @@ TEST_F(ZipReaderTest, EncodingSjisAs1252) {
EXPECT_THAT(
GetPaths(data_dir_.AppendASCII("SJIS Bug 846195.zip"), "windows-1252"),
ElementsAre(base::FilePath::FromUTF8Unsafe(
"\u0090V‚µ‚¢ƒtƒHƒ‹ƒ_/SJIS_835C_ƒ\\.txt"),
"\u0090V‚µ‚¢ƒtƒHƒ‹ƒ_/SJIS_835C_ƒ.txt"),
base::FilePath::FromUTF8Unsafe(
"\u0090V‚µ‚¢ƒtƒHƒ‹ƒ_/\u0090V‚µ‚¢ƒeƒLƒXƒg "
"ƒhƒLƒ…ƒ\u0081ƒ“ƒg.txt")));
Expand All @@ -361,7 +360,7 @@ TEST_F(ZipReaderTest, EncodingSjisAsIbm866) {
EXPECT_THAT(
GetPaths(data_dir_.AppendASCII("SJIS Bug 846195.zip"), "IBM866"),
ElementsAre(
base::FilePath::FromUTF8Unsafe("РVВ╡ВвГtГHГЛГ_/SJIS_835C_Г\\.txt"),
base::FilePath::FromUTF8Unsafe("РVВ╡ВвГtГHГЛГ_/SJIS_835C_Г.txt"),
base::FilePath::FromUTF8Unsafe(
"РVВ╡ВвГtГHГЛГ_/РVВ╡ВвГeГLГXГg ГhГLГЕГБГУГg.txt")));
}
Expand All @@ -380,12 +379,11 @@ TEST_F(ZipReaderTest, AbsoluteFile) {
ZipReader reader;
ASSERT_TRUE(
reader.Open(data_dir_.AppendASCII("evil_via_absolute_file_name.zip")));
base::FilePath target_path(FILE_PATH_LITERAL("/evil.txt"));
base::FilePath target_path(FILE_PATH_LITERAL("ROOT/evil.txt"));
const ZipReader::Entry* entry = LocateAndOpenEntry(&reader, target_path);
ASSERT_TRUE(entry);
EXPECT_EQ(target_path, entry->path);
// This file is unsafe because of the absolute file name.
EXPECT_TRUE(entry->is_unsafe);
EXPECT_FALSE(entry->is_unsafe);
EXPECT_FALSE(entry->is_directory);
}

Expand Down

0 comments on commit 6c8cfa8

Please sign in to comment.