Skip to content

Commit

Permalink
ORC-1315: [C++] Fix byte to integer conversions fail on platforms wit…
Browse files Browse the repository at this point in the history
…h unsigned char type

### What changes were proposed in this pull request?

This patch adds static_cast for the char type cast in RLE/ColumnReader.

### Why are the changes needed?

For the context of [ORC-1315](https://issues.apache.org/jira/browse/ORC-1315), this patch fixes signed char to unsigned char conversions fail where char is by default unsigned.

### How was this patch tested?

No new tests were added.

Closes #1607 from wgtmac/branch-1.8.

Authored-by: Gang Wu <ustcwg@gmail.com>
Signed-off-by: Gang Wu <ustcwg@gmail.com>
  • Loading branch information
wgtmac committed Aug 31, 2023
1 parent 89398a0 commit 48b26d7
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 24 deletions.
6 changes: 3 additions & 3 deletions c++/src/ByteRLE.cc
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ namespace orc {
if (bufferStart == bufferEnd) {
nextBuffer();
}
return *(bufferStart++);
return static_cast<signed char>(*(bufferStart++));
}

void ByteRleDecoderImpl::readHeader() {
Expand All @@ -377,7 +377,7 @@ namespace orc {
} else {
remainingValues = static_cast<size_t>(ch) + MINIMUM_REPEAT;
repeating = true;
value = readByte();
value = static_cast<char>(readByte());
}
}

Expand Down Expand Up @@ -466,7 +466,7 @@ namespace orc {
if (notNull) {
for(uint64_t i=0; i < count; ++i) {
if (notNull[position + i]) {
data[position + i] = readByte();
data[position + i] = static_cast<char>(readByte());
consumed += 1;
}
}
Expand Down
2 changes: 1 addition & 1 deletion c++/src/ColumnReader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ namespace orc {
*/
void expandBytesToLongs(int64_t* buffer, uint64_t numValues) {
for(size_t i=numValues - 1; i < numValues; --i) {
buffer[i] = reinterpret_cast<char *>(buffer)[i];
buffer[i] = reinterpret_cast<signed char *>(buffer)[i];
}
}

Expand Down
2 changes: 1 addition & 1 deletion c++/src/RLEv1.cc
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ signed char RleDecoderV1::readByte() {
bufferStart = static_cast<const char*>(bufferPointer);
bufferEnd = bufferStart + bufferLength;
}
return *(bufferStart++);
return static_cast<signed char>(*(bufferStart++));
}

uint64_t RleDecoderV1::readLong() {
Expand Down
31 changes: 15 additions & 16 deletions c++/test/TestByteRle.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ TEST(ByteRle, nullTest) {
char buffer[258];
char notNull[266];
char result[266];
buffer[0] = -128;
buffer[129] = -128;
buffer[0] = static_cast<char>(-128);
buffer[129] = static_cast<char>(-128);
for(int i=0; i < 128; ++i) {
buffer[1 + i] = static_cast<char>(i);
buffer[130 + i] = static_cast<char>(128 + i);
Expand Down Expand Up @@ -186,7 +186,7 @@ TEST(ByteRle, testNulls) {
createByteRleDecoder(
std::unique_ptr<orc::SeekableInputStream>
(new SeekableArrayInputStream(buffer, ARRAY_SIZE(buffer), 3)));
std::vector<char> data(16, -1);
std::vector<char> data(16, static_cast<char>(-1));
std::vector<char> notNull(data.size());
for (size_t i = 0; i < data.size(); ++i) {
notNull[i] = (i + 1) % 2;
Expand All @@ -198,17 +198,16 @@ TEST(ByteRle, testNulls) {
EXPECT_EQ((i*data.size() + j)/2, data[j]) << "Output wrong at "
<< (i * data.size() + j);
} else {
EXPECT_EQ(-1, data[j]) << "Output wrong at "
<< (i * data.size() + j);
EXPECT_EQ(static_cast<char>(-1), data[j]) << "Output wrong at "
<< (i * data.size() + j);
}
}
}
for (size_t i = 0; i < 8; ++i) {
rle->next(data.data(), data.size(), notNull.data());
for (size_t j = 0; j < data.size(); ++j) {
EXPECT_EQ(j % 2 == 0 ? -36 : -1,
data[j])
<< "Output wrong at " << (i * data.size() + j + 32);
EXPECT_EQ(j % 2 == 0 ? static_cast<char>(-36) : static_cast<char>(-1), data[j])
<< "Output wrong at " << (i * data.size() + j + 32);
}
}
}
Expand All @@ -221,26 +220,26 @@ TEST(ByteRle, testAllNulls) {
createByteRleDecoder(
std::unique_ptr<orc::SeekableInputStream>
(new SeekableArrayInputStream(buffer, ARRAY_SIZE(buffer))));
std::vector<char> data(16, -1);
std::vector<char> data(16, static_cast<char>(-1));
std::vector<char> allNull(data.size(), 0);
std::vector<char> noNull(data.size(), 1);
rle->next(data.data(), data.size(), allNull.data());
for (size_t i = 0; i < data.size(); ++i) {
EXPECT_EQ(-1, data[i]) << "Output wrong at " << i;
EXPECT_EQ(static_cast<char>(-1), data[i]) << "Output wrong at " << i;
}
rle->next(data.data(), data.size(), noNull.data());
for (size_t i = 0; i < data.size(); ++i) {
EXPECT_EQ(i, data[i]) << "Output wrong at " << i;
data[i] = -1;
data[i] = static_cast<char>(-1);
}
rle->next(data.data(), data.size(), allNull.data());
for (size_t i = 0; i < data.size(); ++i) {
EXPECT_EQ(-1, data[i]) << "Output wrong at " << i;
EXPECT_EQ(static_cast<char>(-1), data[i]) << "Output wrong at " << i;
}
for (size_t i = 0; i < 4; ++i) {
rle->next(data.data(), data.size(), noNull.data());
for (size_t j = 0; j < data.size(); ++j) {
EXPECT_EQ(-36, data[j]) << "Output wrong at " << i;
EXPECT_EQ(static_cast<char>(-36), data[j]) << "Output wrong at " << i;
}
}
rle->next(data.data(), data.size(), allNull.data());
Expand Down Expand Up @@ -1107,7 +1106,7 @@ TEST(BooleanRle, skipTestWithNulls) {
someNull[1] = 1;
std::vector<char> allNull(data.size(), 0);
for (size_t i = 0; i < 16384; i += 5) {
data.assign(data.size(), -1);
data.assign(data.size(), static_cast<char>(-1));
rle->next(data.data(), data.size(), someNull.data());
EXPECT_EQ(0, data[0]) << "Output wrong at " << i;
EXPECT_EQ(0, data[2]) << "Output wrong at " << i;
Expand All @@ -1118,7 +1117,7 @@ TEST(BooleanRle, skipTestWithNulls) {
rle->skip(4);
}
rle->skip(0);
data.assign(data.size(), -1);
data.assign(data.size(), static_cast<char>(-1));
rle->next(data.data(), data.size(), allNull.data());
for (size_t j = 0; j < data.size(); ++j) {
EXPECT_EQ(0, data[j]) << "Output wrong at " << i << ", " << j;
Expand Down Expand Up @@ -1390,7 +1389,7 @@ TEST(BooleanRle, seekTestWithNulls) {
EXPECT_EQ(i < 8192 ? i & 1 : (i / 3) & 1,
data[i])
<< "Output wrong at " << i;
data[0] = -1;
data[0] = static_cast<char>(-1);
rle->next(data.data(), 1, allNull.data());
EXPECT_EQ(0, data[0]) << "Output wrong at " << i;
} while (i != 0);
Expand Down
4 changes: 2 additions & 2 deletions c++/test/TestColumnReader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ TEST(TestColumnReader, testByteWithNulls) {
EXPECT_EQ(0, longBatch->notNull[i]) << "Wrong value at " << i;
} else {
EXPECT_EQ(1, longBatch->notNull[i]) << "Wrong value at " << i;
EXPECT_EQ(static_cast<char>(next++), longBatch->data[i])
EXPECT_EQ(static_cast<char>(next++), static_cast<char>(longBatch->data[i]))
<< "Wrong value at " << i;
}
}
Expand Down Expand Up @@ -337,7 +337,7 @@ TEST(TestColumnReader, testByteSkipsWithNulls) {
ASSERT_EQ(true, !batch.hasNulls);
ASSERT_EQ(5, longBatch->numElements);
ASSERT_EQ(true, longBatch->hasNulls);
EXPECT_EQ(static_cast<char>(-1), longBatch->data[0]);
EXPECT_EQ(static_cast<char>(-1), static_cast<char>(longBatch->data[0]));
EXPECT_EQ(true, !longBatch->notNull[1]);
EXPECT_EQ(true, !longBatch->notNull[2]);
EXPECT_EQ(true, !longBatch->notNull[3]);
Expand Down
2 changes: 1 addition & 1 deletion tools/test/TestMatch.cc
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ namespace orc {
ASSERT_EQ(true, expected.nextLine(expectedLine));
line.clear();
printer->printRow(i);
EXPECT_EQ(expectedLine, line)
ASSERT_EQ(expectedLine, line)
<< "wrong output at row " << (rowCount + i);
}
rowCount += batch->numElements;
Expand Down

0 comments on commit 48b26d7

Please sign in to comment.