From 928fddc20e5da7a9a0ae8f7fbf3e5ba28f3a0463 Mon Sep 17 00:00:00 2001 From: Ash Logan Date: Wed, 17 Jan 2024 16:47:22 +1100 Subject: [PATCH] Overhaul endian handling in ByteBuffer and FastNBT Rather than juggling "swapped" and "unswapped" versions of integers, different library functions, #defines, etc., simply always read everything byte-by-byte. This works regardless of host CPU endian, got optimised down to either a normal load or a byteswap on every compiler I tested - only 1 instruction on most CPU architectures. This commit introduces a "Bytes" array type to keep endian-sensitive data seperate from host data, alongside the needed C++ template machinery for it to work seamlessly. This approach is a little bit safer as well since you get length- and type-checking for most callsites. --- CONTRIBUTORS | 1 + src/ByteBuffer.cpp | 78 +++++++-------- src/Endianness.h | 183 +++++++++++++++++++++++------------ src/WorldStorage/FastNBT.cpp | 55 +++++------ src/WorldStorage/FastNBT.h | 13 +-- 5 files changed, 194 insertions(+), 136 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 6607464457..a7cab1688a 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -9,6 +9,7 @@ as provided in the LICENSE file. 9caihezi AirOne01 Altenius +ashquarky BasedDoge (Donated AlchemistVillage prefabs) bearbin (Alexander Harkness) beeduck diff --git a/src/ByteBuffer.cpp b/src/ByteBuffer.cpp index 8121fc3efe..c5fe0a7c08 100644 --- a/src/ByteBuffer.cpp +++ b/src/ByteBuffer.cpp @@ -285,10 +285,9 @@ bool cByteBuffer::ReadBEInt16(Int16 & a_Value) CHECK_THREAD CheckValid(); NEEDBYTES(2); - UInt16 val; - ReadBuf(&val, 2); - val = ntohs(val); - memcpy(&a_Value, &val, 2); + Bytes bytes; + ReadBuf(bytes.data(), bytes.size()); + a_Value = NetworkToHost(bytes); return true; } @@ -301,8 +300,9 @@ bool cByteBuffer::ReadBEUInt16(UInt16 & a_Value) CHECK_THREAD CheckValid(); NEEDBYTES(2); - ReadBuf(&a_Value, 2); - a_Value = ntohs(a_Value); + Bytes bytes; + ReadBuf(bytes.data(), bytes.size()); + a_Value = NetworkToHost(bytes); return true; } @@ -315,10 +315,9 @@ bool cByteBuffer::ReadBEInt32(Int32 & a_Value) CHECK_THREAD CheckValid(); NEEDBYTES(4); - UInt32 val; - ReadBuf(&val, 4); - val = ntohl(val); - memcpy(&a_Value, &val, 4); + Bytes bytes; + ReadBuf(bytes.data(), bytes.size()); + a_Value = NetworkToHost(bytes); return true; } @@ -331,8 +330,9 @@ bool cByteBuffer::ReadBEUInt32(UInt32 & a_Value) CHECK_THREAD CheckValid(); NEEDBYTES(4); - ReadBuf(&a_Value, 4); - a_Value = ntohl(a_Value); + Bytes bytes; + ReadBuf(bytes.data(), bytes.size()); + a_Value = NetworkToHost(bytes); return true; } @@ -345,8 +345,9 @@ bool cByteBuffer::ReadBEInt64(Int64 & a_Value) CHECK_THREAD CheckValid(); NEEDBYTES(8); - ReadBuf(&a_Value, 8); - a_Value = NetworkToHostLong8(&a_Value); + Bytes bytes; + ReadBuf(bytes.data(), bytes.size()); + a_Value = NetworkToHost(bytes); return true; } @@ -359,8 +360,9 @@ bool cByteBuffer::ReadBEUInt64(UInt64 & a_Value) CHECK_THREAD CheckValid(); NEEDBYTES(8); - ReadBuf(&a_Value, 8); - a_Value = NetworkToHostULong8(&a_Value); + Bytes bytes; + ReadBuf(bytes.data(), bytes.size()); + a_Value = NetworkToHost(bytes); return true; } @@ -373,8 +375,9 @@ bool cByteBuffer::ReadBEFloat(float & a_Value) CHECK_THREAD CheckValid(); NEEDBYTES(4); - ReadBuf(&a_Value, 4); - a_Value = NetworkToHostFloat4(&a_Value); + Bytes bytes; + ReadBuf(bytes.data(), bytes.size()); + a_Value = NetworkToHost(bytes); return true; } @@ -387,8 +390,9 @@ bool cByteBuffer::ReadBEDouble(double & a_Value) CHECK_THREAD CheckValid(); NEEDBYTES(8); - ReadBuf(&a_Value, 8); - a_Value = NetworkToHostDouble8(&a_Value); + Bytes bytes; + ReadBuf(bytes.data(), bytes.size()); + a_Value = NetworkToHost(bytes); return true; } @@ -629,10 +633,8 @@ bool cByteBuffer::WriteBEInt16(Int16 a_Value) CHECK_THREAD CheckValid(); PUTBYTES(2); - UInt16 val; - memcpy(&val, &a_Value, 2); - val = htons(val); - return WriteBuf(&val, 2); + auto Converted = HostToNetwork(a_Value); + return WriteBuf(Converted.data(), Converted.size()); } @@ -644,8 +646,8 @@ bool cByteBuffer::WriteBEUInt16(UInt16 a_Value) CHECK_THREAD CheckValid(); PUTBYTES(2); - a_Value = htons(a_Value); - return WriteBuf(&a_Value, 2); + auto Converted = HostToNetwork(a_Value); + return WriteBuf(Converted.data(), Converted.size()); } @@ -657,8 +659,8 @@ bool cByteBuffer::WriteBEInt32(Int32 a_Value) CHECK_THREAD CheckValid(); PUTBYTES(4); - UInt32 Converted = HostToNetwork4(&a_Value); - return WriteBuf(&Converted, 4); + auto Converted = HostToNetwork(a_Value); + return WriteBuf(Converted.data(), Converted.size()); } @@ -670,8 +672,8 @@ bool cByteBuffer::WriteBEUInt32(UInt32 a_Value) CHECK_THREAD CheckValid(); PUTBYTES(4); - UInt32 Converted = HostToNetwork4(&a_Value); - return WriteBuf(&Converted, 4); + auto Converted = HostToNetwork(a_Value); + return WriteBuf(Converted.data(), Converted.size()); } @@ -683,8 +685,8 @@ bool cByteBuffer::WriteBEInt64(Int64 a_Value) CHECK_THREAD CheckValid(); PUTBYTES(8); - UInt64 Converted = HostToNetwork8(&a_Value); - return WriteBuf(&Converted, 8); + auto Converted = HostToNetwork(a_Value); + return WriteBuf(Converted.data(), Converted.size()); } @@ -696,8 +698,8 @@ bool cByteBuffer::WriteBEUInt64(UInt64 a_Value) CHECK_THREAD CheckValid(); PUTBYTES(8); - UInt64 Converted = HostToNetwork8(&a_Value); - return WriteBuf(&Converted, 8); + auto Converted = HostToNetwork(a_Value); + return WriteBuf(Converted.data(), Converted.size()); } @@ -709,8 +711,8 @@ bool cByteBuffer::WriteBEFloat(float a_Value) CHECK_THREAD CheckValid(); PUTBYTES(4); - UInt32 Converted = HostToNetwork4(&a_Value); - return WriteBuf(&Converted, 4); + auto Converted = HostToNetwork(a_Value); + return WriteBuf(Converted.data(), Converted.size()); } @@ -722,8 +724,8 @@ bool cByteBuffer::WriteBEDouble(double a_Value) CHECK_THREAD CheckValid(); PUTBYTES(8); - UInt64 Converted = HostToNetwork8(&a_Value); - return WriteBuf(&Converted, 8); + auto Converted = HostToNetwork(a_Value); + return WriteBuf(Converted.data(), Converted.size()); } diff --git a/src/Endianness.h b/src/Endianness.h index 0e8bc8e996..c756985877 100644 --- a/src/Endianness.h +++ b/src/Endianness.h @@ -1,86 +1,143 @@ - #pragma once -#undef ntohll -#define ntohll(x) (((static_cast(ntohl(static_cast(x)))) << 32) + ntohl(x >> 32)) - - - - - -// Changes endianness -inline UInt64 HostToNetwork8(const void * a_Value) +#include +template +using Bytes = std::array; + +// bit_cast used for going between ulong, float, etc. It's a new C++20 feature +#ifdef __cpp_lib_bit_cast +#include +using std::bit_cast; + +// Fallback in case we're using C++17 +#else +// bit-for-bit convert one type to another. In C++ the only non-UB way to do this is *memcpy*. Nearly every other +// option is a strict aliasing violation. +template +std::enable_if_t< + sizeof(To) == sizeof(From), + To> +bit_cast(const From &src) noexcept { - UInt64 buf; - memcpy( &buf, a_Value, sizeof( buf)); - buf = (( ( static_cast(htonl(static_cast(buf)))) << 32) + htonl(buf >> 32)); - return buf; + To dst; + std::memcpy(&dst, &src, sizeof(To)); + return dst; } +#endif - - - -inline UInt32 HostToNetwork4(const void * a_Value) +/** Converts a 16-bit host integer or float value to bytes in big-endian (Network) order. +@tparam Value The host 16-bit type (Int16, UInt16). Usually inferred. +@param a_Value The input integer or float value. +@return The resulting bytes. */ +template = true> +inline Bytes HostToNetwork(Value a_Value) { - UInt32 buf; - memcpy( &buf, a_Value, sizeof( buf)); - buf = ntohl( buf); - return buf; + UInt16 Bits = bit_cast(a_Value); + return + { + std::byte(Bits >> 8), + std::byte(Bits) + }; } - - - - -inline double NetworkToHostDouble8(const void * a_Value) +/** Converts a 32-bit host integer or float value to bytes in big-endian (Network) order. +@tparam Value The host 32-bit type (Int32, UInt32, float). Usually inferred. +@param a_Value The input integer or float value. +@return The resulting bytes. */ +template = true> +inline Bytes HostToNetwork(Value a_Value) { - UInt64 buf = 0; - memcpy(&buf, a_Value, 8); - buf = ntohll(buf); - double x; - memcpy(&x, &buf, sizeof(double)); - return x; + UInt32 Bits = bit_cast(a_Value); + return + { + std::byte(Bits >> 24), + std::byte(Bits >> 16), + std::byte(Bits >> 8), + std::byte(Bits) + }; } - - - - -inline Int64 NetworkToHostLong8(const void * a_Value) +/** Converts a 64-bit host integer or float value to bytes in big-endian (Network) order. +@tparam Value The host 64-bit type (Int64, UInt64, double). Usually inferred. +@param a_Value The input integer or float value. +@return The resulting bytes. */ +template = true> +inline Bytes HostToNetwork(Value a_Value) { - UInt64 buf; - memcpy(&buf, a_Value, 8); - buf = ntohll(buf); - return *reinterpret_cast(&buf); + UInt64 Bits = bit_cast(a_Value); + return + { + std::byte(Bits >> 56), + std::byte(Bits >> 48), + std::byte(Bits >> 40), + std::byte(Bits >> 32), + std::byte(Bits >> 24), + std::byte(Bits >> 16), + std::byte(Bits >> 8), + std::byte(Bits) + }; } - - - - -inline UInt64 NetworkToHostULong8(const void * a_Value) +/** Reads a 16-bit integer or float value from big-endian (Network) bytes. +@tparam Value The desired 16-bit type (Int16, UInt16) +@param a_Value The input bytes. +@return The resulting integer or float value. */ +template = true> +inline Value NetworkToHost(Bytes a_Value) { - UInt64 buf; - memcpy(&buf, a_Value, 8); - buf = ntohll(buf); - return buf; + UInt16 val = UInt16( + UInt16(a_Value[0]) << 8 | + UInt16(a_Value[1])); + return bit_cast(val); } - - - - -inline float NetworkToHostFloat4(const void * a_Value) +/** Reads a 32-bit integer or float value from big-endian (Network) bytes. +@tparam Value The desired 32-bit type (Int32, UInt32, float) +@param a_Value The input bytes. +@return The resulting integer or float value. */ +template = true> +inline Value NetworkToHost(Bytes a_Value) { - UInt32 buf; - float x; - memcpy(&buf, a_Value, 4); - buf = ntohl(buf); - memcpy(&x, &buf, sizeof(float)); - return x; + UInt32 val = UInt32( + UInt32(a_Value[0]) << 24 | + UInt32(a_Value[1]) << 16 | + UInt32(a_Value[2]) << 8 | + UInt32(a_Value[3])); + return bit_cast(val); } +/** Reads a 64-bit integer or float value from big-endian (Network) bytes. +@tparam Value The desired 64-bit type (Int64, UInt64, double) +@param a_Value The input bytes. +@return The resulting integer or float value. */ +template = true> +inline Value NetworkToHost(Bytes a_Value) +{ + UInt64 val = UInt64( + UInt64(a_Value[0]) << 56 | + UInt64(a_Value[1]) << 48 | + UInt64(a_Value[2]) << 40 | + UInt64(a_Value[3]) << 32 | + UInt64(a_Value[4]) << 24 | + UInt64(a_Value[5]) << 16 | + UInt64(a_Value[6]) << 8 | + UInt64(a_Value[7])); + return bit_cast(val); +} +/** Reads an integer or float type from its big-endian (Network) bytes. +@tparam Value The desired result type (Int16 / 32 / 64, UInt16 / 32 / 64, float, double). +@param a_Mem A pointer to the first input byte. Length is inferred from the result type. +@return The resulting integer or float value. - +Consider using NetworkToHost when the data is owned since it provides additional type safety (length is known). */ +template +inline Value NetworkBufToHost(const std::byte* a_Mem) +{ + // Copy unfortunately needed to add the length information required by the rest of the API. + // Gets completely optimised out in my testing. + Bytes bytes; + std::copy(a_Mem, a_Mem + sizeof(Value), bytes.begin()); + return NetworkToHost(bytes); +} diff --git a/src/WorldStorage/FastNBT.cpp b/src/WorldStorage/FastNBT.cpp index df93e21e47..030300387b 100644 --- a/src/WorldStorage/FastNBT.cpp +++ b/src/WorldStorage/FastNBT.cpp @@ -189,7 +189,7 @@ eNBTParseError cParsedNBT::ReadString(size_t & a_StringStart, size_t & a_StringL { NEEDBYTES(2, eNBTParseError::npStringMissingLength); a_StringStart = m_Pos + 2; - a_StringLen = static_cast(GetBEShort(m_Data.data() + m_Pos)); + a_StringLen = static_cast(NetworkBufToHost(m_Data.data() + m_Pos)); NEEDBYTES(2 + a_StringLen, eNBTParseError::npStringInvalidLength); m_Pos += 2 + a_StringLen; return eNBTParseError::npSuccess; @@ -247,7 +247,7 @@ eNBTParseError cParsedNBT::ReadList(eTagType a_ChildrenType) // Read the count: NEEDBYTES(4, eNBTParseError::npListMissingLength); - int Count = GetBEInt(m_Data.data() + m_Pos); + int Count = NetworkBufToHost(m_Data.data() + m_Pos); m_Pos += 4; auto MinChildSize = GetMinTagSize(a_ChildrenType); if ((Count < 0) || (Count > static_cast((m_Data.size() - m_Pos) / MinChildSize))) @@ -311,7 +311,7 @@ eNBTParseError cParsedNBT::ReadTag(void) case TAG_ByteArray: { NEEDBYTES(4, eNBTParseError::npArrayMissingLength); - int len = GetBEInt(m_Data.data() + m_Pos); + int len = NetworkBufToHost(m_Data.data() + m_Pos); m_Pos += 4; if (len < 0) { @@ -343,7 +343,7 @@ eNBTParseError cParsedNBT::ReadTag(void) case TAG_IntArray: { NEEDBYTES(4, eNBTParseError::npArrayMissingLength); - int len = GetBEInt(m_Data.data() + m_Pos); + int len = NetworkBufToHost(m_Data.data() + m_Pos); m_Pos += 4; if (len < 0) { @@ -539,7 +539,8 @@ void cFastNBTWriter::EndList(void) ASSERT(m_Stack[m_CurrentStack].m_Type == TAG_List); // Update the list count: - SetBEInt(m_Result.data() + m_Stack[m_CurrentStack].m_Pos, m_Stack[m_CurrentStack].m_Count); + auto Value = HostToNetwork(m_Stack[m_CurrentStack].m_Count); + std::copy(Value.begin(), Value.end(), m_Result.data() + m_Stack[m_CurrentStack].m_Pos); --m_CurrentStack; } @@ -561,8 +562,8 @@ void cFastNBTWriter::AddByte(const AString & a_Name, unsigned char a_Value) void cFastNBTWriter::AddShort(const AString & a_Name, Int16 a_Value) { TagCommon(a_Name, TAG_Short); - UInt16 Value = htons(static_cast(a_Value)); - m_Result.append(reinterpret_cast(&Value), 2); + auto Value = HostToNetwork(a_Value); + m_Result.append(Value.begin(), Value.end()); } @@ -572,8 +573,8 @@ void cFastNBTWriter::AddShort(const AString & a_Name, Int16 a_Value) void cFastNBTWriter::AddInt(const AString & a_Name, Int32 a_Value) { TagCommon(a_Name, TAG_Int); - UInt32 Value = htonl(static_cast(a_Value)); - m_Result.append(reinterpret_cast(&Value), 4); + auto Value = HostToNetwork(a_Value); + m_Result.append(Value.begin(), Value.end()); } @@ -583,8 +584,8 @@ void cFastNBTWriter::AddInt(const AString & a_Name, Int32 a_Value) void cFastNBTWriter::AddLong(const AString & a_Name, Int64 a_Value) { TagCommon(a_Name, TAG_Long); - UInt64 Value = HostToNetwork8(&a_Value); - m_Result.append(reinterpret_cast(&Value), 8); + auto Value = HostToNetwork(a_Value); + m_Result.append(Value.begin(), Value.end()); } @@ -594,8 +595,8 @@ void cFastNBTWriter::AddLong(const AString & a_Name, Int64 a_Value) void cFastNBTWriter::AddFloat(const AString & a_Name, float a_Value) { TagCommon(a_Name, TAG_Float); - UInt32 Value = HostToNetwork4(&a_Value); - m_Result.append(reinterpret_cast(&Value), 4); + auto Value = HostToNetwork(a_Value); + m_Result.append(Value.begin(), Value.end()); } @@ -605,8 +606,8 @@ void cFastNBTWriter::AddFloat(const AString & a_Name, float a_Value) void cFastNBTWriter::AddDouble(const AString & a_Name, double a_Value) { TagCommon(a_Name, TAG_Double); - UInt64 Value = HostToNetwork8(&a_Value); - m_Result.append(reinterpret_cast(&Value), 8); + auto Value = HostToNetwork(a_Value); + m_Result.append(Value.begin(), Value.end()); } @@ -616,8 +617,8 @@ void cFastNBTWriter::AddDouble(const AString & a_Name, double a_Value) void cFastNBTWriter::AddString(const AString & a_Name, const std::string_view a_Value) { TagCommon(a_Name, TAG_String); - const UInt16 Length = htons(static_cast(a_Value.size())); - m_Result.append(reinterpret_cast(&Length), sizeof(Length)); + auto Length = HostToNetwork(static_cast(a_Value.size())); + m_Result.append(Length.begin(), Length.end()); m_Result.append({ reinterpret_cast(a_Value.data()), a_Value.size() }); } @@ -628,8 +629,8 @@ void cFastNBTWriter::AddString(const AString & a_Name, const std::string_view a_ void cFastNBTWriter::AddByteArray(const AString & a_Name, const char * a_Value, size_t a_NumElements) { TagCommon(a_Name, TAG_ByteArray); - UInt32 len = htonl(static_cast(a_NumElements)); - m_Result.append(reinterpret_cast(&len), 4); + auto Length = HostToNetwork(static_cast(a_NumElements)); + m_Result.append(Length.begin(), Length.end()); m_Result.append(reinterpret_cast(a_Value), a_NumElements); } @@ -640,8 +641,8 @@ void cFastNBTWriter::AddByteArray(const AString & a_Name, const char * a_Value, void cFastNBTWriter::AddByteArray(const AString & a_Name, size_t a_NumElements, unsigned char a_Value) { TagCommon(a_Name, TAG_ByteArray); - UInt32 len = htonl(static_cast(a_NumElements)); - m_Result.append(reinterpret_cast(&len), 4); + auto Length = HostToNetwork(static_cast(a_NumElements)); + m_Result.append(Length.begin(), Length.end()); m_Result.append(a_NumElements, std::byte(a_Value)); } @@ -652,18 +653,18 @@ void cFastNBTWriter::AddByteArray(const AString & a_Name, size_t a_NumElements, void cFastNBTWriter::AddIntArray(const AString & a_Name, const Int32 * a_Value, size_t a_NumElements) { TagCommon(a_Name, TAG_IntArray); - UInt32 len = htonl(static_cast(a_NumElements)); + auto Length = HostToNetwork(static_cast(a_NumElements)); size_t cap = m_Result.capacity(); size_t size = m_Result.length(); if ((cap - size) < (4 + a_NumElements * 4)) { m_Result.reserve(size + 4 + (a_NumElements * 4)); } - m_Result.append(reinterpret_cast(&len), sizeof(len)); + m_Result.append(Length.begin(), Length.end()); for (size_t i = 0; i < a_NumElements; i++) { - UInt32 Element = htonl(static_cast(a_Value[i])); - m_Result.append(reinterpret_cast(&Element), sizeof(Element)); + auto Element = HostToNetwork(a_Value[i]); + m_Result.append(Element.begin(), Element.end()); } } @@ -684,7 +685,7 @@ void cFastNBTWriter::Finish(void) void cFastNBTWriter::WriteString(const std::string_view a_Data) { // TODO check size <= short max - UInt16 Len = htons(static_cast(a_Data.size())); - m_Result.append(reinterpret_cast(&Len), sizeof(Len)); + auto Length = HostToNetwork(static_cast(a_Data.size())); + m_Result.append(Length.begin(), Length.end()); m_Result.append(reinterpret_cast(a_Data.data()), a_Data.size()); } diff --git a/src/WorldStorage/FastNBT.h b/src/WorldStorage/FastNBT.h index d9c3881792..b2eb851d7d 100644 --- a/src/WorldStorage/FastNBT.h +++ b/src/WorldStorage/FastNBT.h @@ -227,21 +227,21 @@ class cParsedNBT inline Int16 GetShort(int a_Tag) const { ASSERT(m_Tags[static_cast(a_Tag)].m_Type == TAG_Short); - return GetBEShort(GetData(a_Tag)); + return NetworkBufToHost(GetData(a_Tag)); } /** Returns the value stored in an Int tag. Not valid for any other tag type. */ inline Int32 GetInt(int a_Tag) const { ASSERT(m_Tags[static_cast(a_Tag)].m_Type == TAG_Int); - return GetBEInt(GetData(a_Tag)); + return NetworkBufToHost(GetData(a_Tag)); } /** Returns the value stored in a Long tag. Not valid for any other tag type. */ inline Int64 GetLong(int a_Tag) const { ASSERT(m_Tags[static_cast(a_Tag)].m_Type == TAG_Long); - return NetworkToHostLong8(GetData(a_Tag)); + return NetworkBufToHost(GetData(a_Tag)); } /** Returns the value stored in a Float tag. Not valid for any other tag type. */ @@ -256,10 +256,7 @@ class cParsedNBT UNUSED_VAR(Check1); UNUSED_VAR(Check2); - Int32 i = GetBEInt(GetData(a_Tag)); - float f; - memcpy(&f, &i, sizeof(f)); - return f; + return NetworkBufToHost(GetData(a_Tag)); } /** Returns the value stored in a Double tag. Not valid for any other tag type. */ @@ -273,7 +270,7 @@ class cParsedNBT UNUSED_VAR(Check2); ASSERT(m_Tags[static_cast(a_Tag)].m_Type == TAG_Double); - return NetworkToHostDouble8(GetData(a_Tag)); + return NetworkBufToHost(GetData(a_Tag)); } /** Returns the value stored in a String tag. Not valid for any other tag type. */