Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add verification of the length of the protobuf message #6070

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
29 changes: 16 additions & 13 deletions dbms/src/Formats/ProtobufReader.cpp
Expand Up @@ -41,11 +41,12 @@ namespace
constexpr UInt64 END_OF_GROUP = static_cast<UInt64>(-2);

Int64 decodeZigZag(UInt64 n) { return static_cast<Int64>((n >> 1) ^ (~(n & 1) + 1)); }
}

[[noreturn]] void unknownFormat()
{
throw Exception("Protobuf messages are corrupted or don't match the provided schema. Please note that Protobuf stream is length-delimited: every message is prefixed by its length in varint.", ErrorCodes::UNKNOWN_PROTOBUF_FORMAT);
}

[[noreturn]] void ProtobufReader::SimpleReader::throwUnknownFormat() const
{
throw Exception("Protobuf messages are corrupted or don't match the provided schema. Please note that Protobuf stream is length-delimited: every message is prefixed by its length in varint.", ErrorCodes::UNKNOWN_PROTOBUF_FORMAT);
}


Expand All @@ -67,7 +68,10 @@ bool ProtobufReader::SimpleReader::startMessage()
if (unlikely(in.eof()))
return false;
size_t size_of_message = readVarint();
if (size_of_message == 0)
throwUnknownFormat();
Copy link
Member

@vitlibar vitlibar Jul 22, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we should throw an exception in this case.
A message with all fields set by default doesn't seem to be incorrect.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in #6132.

current_message_end = cursor + size_of_message;
root_message_end = current_message_end;
}
else
{
Expand All @@ -91,7 +95,7 @@ void ProtobufReader::SimpleReader::endMessage()
else if (unlikely(cursor > current_message_end))
{
if (!parent_message_ends.empty())
unknownFormat();
throwUnknownFormat();
moveCursorBackward(cursor - current_message_end);
}
current_message_end = REACHED_END;
Expand Down Expand Up @@ -141,7 +145,7 @@ bool ProtobufReader::SimpleReader::readFieldNumber(UInt32 & field_number)

UInt64 varint = readVarint();
if (unlikely(varint & (static_cast<UInt64>(0xFFFFFFFF) << 32)))
unknownFormat();
throwUnknownFormat();
UInt32 key = static_cast<UInt32>(varint);
field_number = (key >> 3);
WireType wire_type = static_cast<WireType>(key & 0x07);
Expand Down Expand Up @@ -171,7 +175,7 @@ bool ProtobufReader::SimpleReader::readFieldNumber(UInt32 & field_number)
case GROUP_END:
{
if (current_message_end != END_OF_GROUP)
unknownFormat();
throwUnknownFormat();
current_message_end = REACHED_END;
return false;
}
Expand All @@ -181,7 +185,7 @@ bool ProtobufReader::SimpleReader::readFieldNumber(UInt32 & field_number)
return true;
}
}
unknownFormat();
throwUnknownFormat();
__builtin_unreachable();
}

Expand Down Expand Up @@ -257,7 +261,7 @@ void ProtobufReader::SimpleReader::ignore(UInt64 num_bytes)
void ProtobufReader::SimpleReader::moveCursorBackward(UInt64 num_bytes)
{
if (in.offset() < num_bytes)
unknownFormat();
throwUnknownFormat();
in.position() -= num_bytes;
cursor -= num_bytes;
}
Expand Down Expand Up @@ -294,7 +298,7 @@ UInt64 ProtobufReader::SimpleReader::continueReadingVarint(UInt64 first_byte)
PROTOBUF_READER_READ_VARINT_BYTE(10)
#undef PROTOBUF_READER_READ_VARINT_BYTE

unknownFormat();
throwUnknownFormat();
__builtin_unreachable();
}

Expand Down Expand Up @@ -327,7 +331,7 @@ void ProtobufReader::SimpleReader::ignoreVarint()
PROTOBUF_READER_IGNORE_VARINT_BYTE(10)
#undef PROTOBUF_READER_IGNORE_VARINT_BYTE

unknownFormat();
throwUnknownFormat();
}

void ProtobufReader::SimpleReader::ignoreGroup()
Expand Down Expand Up @@ -371,11 +375,10 @@ void ProtobufReader::SimpleReader::ignoreGroup()
break;
}
}
unknownFormat();
throwUnknownFormat();
}
}


// Implementation for a converter from any protobuf field type to any DB data type.
class ProtobufReader::ConverterBaseImpl : public ProtobufReader::IConverter
{
Expand Down
17 changes: 15 additions & 2 deletions dbms/src/Formats/ProtobufReader.h
Expand Up @@ -97,10 +97,19 @@ class ProtobufReader : private boost::noncopyable
bool readUInt(UInt64 & value);
template<typename T> bool readFixed(T & value);
bool readStringInto(PaddedPODArray<UInt8> & str);
bool ALWAYS_INLINE maybeCanReadValue() const { return field_end != REACHED_END; }

bool ALWAYS_INLINE maybeCanReadValue() const
{
if (field_end == REACHED_END)
return false;
if (cursor < root_message_end)
return true;

throwUnknownFormat();
Copy link
Member

@vitlibar vitlibar Jul 22, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bopohaa I am not sure if this code is useful because I think there are enough checks for the end of the message in the functions SimpleReader::read* and we don't need one more check. Have you met a case when the ProtobufReader didn't throw an exception while it was reading invalid data?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bopohaa I am not sure if this code is useful because I think there are enough checks for the end of the message in the functions SimpleReader::read* and we don't need one more check. Have you met a case when the ProtobufReader didn't throw an exception while it was reading invalid data?

SimpleReader :: read * reads the entire stream without splitting it into separate messages. Therefore, it can read for quite a long time if the messages arrive with sufficient intensity.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now I understand your point. I agree it's better to detect errors as soon as possible. I've slightly changed your solution because it's faster to only compare cursor with field_end here (#6132)

}

private:
void readBinary(void* data, size_t size);
void readBinary(void * data, size_t size);
void ignore(UInt64 num_bytes);
void moveCursorBackward(UInt64 num_bytes);

Expand All @@ -119,13 +128,17 @@ class ProtobufReader : private boost::noncopyable
void ignoreVarint();
void ignoreGroup();

[[noreturn]] void throwUnknownFormat() const;

static constexpr UInt64 REACHED_END = 0;

ReadBuffer & in;
UInt64 cursor;
std::vector<UInt64> parent_message_ends;
UInt64 current_message_end;
UInt64 field_end;

UInt64 root_message_end;
};

class IConverter
Expand Down