Skip to content

Commit

Permalink
[CARBONDATA-2884] Rename the methods of ByteUtil class to avoid the m…
Browse files Browse the repository at this point in the history
…isuse

The method toBytes will execute XOR operation on data.
So the result is not the byte array of the real value.
Better to rename the methods of ByteUtil class to avoid the misuse

This closes #2657
  • Loading branch information
QiangCai authored and jackylk committed Aug 30, 2018
1 parent 612552e commit f012f5b
Show file tree
Hide file tree
Showing 12 changed files with 307 additions and 163 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,9 @@ public int fillVector(int[] filteredRowId, ColumnVectorInfo[] vectorInfo, int ch
double doubleData = columnPage.getDouble(rowId);
if (srcDataType == DataTypes.FLOAT) {
float out = (float) doubleData;
return ByteUtil.toBytes(out);
return ByteUtil.toXorBytes(out);
} else {
return ByteUtil.toBytes(doubleData);
return ByteUtil.toXorBytes(doubleData);
}
} else if (DataTypes.isDecimal(srcDataType)) {
throw new RuntimeException("unsupported type: " + srcDataType);
Expand All @@ -95,22 +95,22 @@ public int fillVector(int[] filteredRowId, ColumnVectorInfo[] vectorInfo, int ch
long longData = columnPage.getLong(rowId);
if ((srcDataType == DataTypes.BYTE)) {
byte out = (byte) longData;
return ByteUtil.toBytes(out);
return ByteUtil.toXorBytes(out);
} else if (srcDataType == DataTypes.BOOLEAN) {
byte out = (byte) longData;
return ByteUtil.toBytes(ByteUtil.toBoolean(out));
} else if (srcDataType == DataTypes.SHORT) {
short out = (short) longData;
return ByteUtil.toBytes(out);
return ByteUtil.toXorBytes(out);
} else if (srcDataType == DataTypes.SHORT_INT) {
int out = (int) longData;
return ByteUtil.toBytes(out);
return ByteUtil.toXorBytes(out);
} else if (srcDataType == DataTypes.INT) {
int out = (int) longData;
return ByteUtil.toBytes(out);
return ByteUtil.toXorBytes(out);
} else {
// timestamp and long
return ByteUtil.toBytes(longData);
return ByteUtil.toXorBytes(longData);
}
} else if ((targetDataType == DataTypes.STRING) || (targetDataType == DataTypes.VARCHAR) || (
targetDataType == DataTypes.BYTE_ARRAY)) {
Expand All @@ -126,7 +126,7 @@ public int fillVector(int[] filteredRowId, ColumnVectorInfo[] vectorInfo, int ch
} else if (srcDataType == DataTypes.BYTE_ARRAY) {
return columnPage.getBytes(rowId);
} else if (srcDataType == DataTypes.DOUBLE) {
return ByteUtil.toBytes(columnPage.getDouble(rowId));
return ByteUtil.toXorBytes(columnPage.getDouble(rowId));
} else {
throw new RuntimeException("unsupported type: " + targetDataType);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,15 +154,15 @@ public void fillRow(int rowId, CarbonColumnVector vector, int vectorRow) {
} else if (dt == DataTypes.BOOLEAN) {
vector.putBoolean(vectorRow, ByteUtil.toBoolean(data[currentDataOffset]));
} else if (dt == DataTypes.SHORT) {
vector.putShort(vectorRow, ByteUtil.toShort(data, currentDataOffset, length));
vector.putShort(vectorRow, ByteUtil.toXorShort(data, currentDataOffset, length));
} else if (dt == DataTypes.INT) {
vector.putInt(vectorRow, ByteUtil.toInt(data, currentDataOffset, length));
vector.putInt(vectorRow, ByteUtil.toXorInt(data, currentDataOffset, length));
} else if (dt == DataTypes.LONG) {
vector.putLong(vectorRow,
DataTypeUtil.getDataBasedOnRestructuredDataType(data, vector.getBlockDataType(),
currentDataOffset, length));
} else if (dt == DataTypes.TIMESTAMP) {
vector.putLong(vectorRow, ByteUtil.toLong(data, currentDataOffset, length) * 1000L);
vector.putLong(vectorRow, ByteUtil.toXorLong(data, currentDataOffset, length) * 1000L);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -247,15 +247,15 @@ public void fillRow(int rowId, CarbonColumnVector vector, int vectorRow) {
} else if (dt == DataTypes.BOOLEAN) {
vector.putBoolean(vectorRow, ByteUtil.toBoolean(value[0]));
} else if (dt == DataTypes.SHORT) {
vector.putShort(vectorRow, ByteUtil.toShort(value, 0, length));
vector.putShort(vectorRow, ByteUtil.toXorShort(value, 0, length));
} else if (dt == DataTypes.INT) {
vector.putInt(vectorRow, ByteUtil.toInt(value, 0, length));
vector.putInt(vectorRow, ByteUtil.toXorInt(value, 0, length));
} else if (dt == DataTypes.LONG) {
vector.putLong(vectorRow,
DataTypeUtil.getDataBasedOnRestructuredDataType(value, vector.getBlockDataType(), 0,
length));
} else if (dt == DataTypes.TIMESTAMP) {
vector.putLong(vectorRow, ByteUtil.toLong(value, 0, length) * 1000L);
vector.putLong(vectorRow, ByteUtil.toXorLong(value, 0, length) * 1000L);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ private Object getDataObject(ByteBuffer dataBuffer, int size) {
actualData = null;
} else {
actualData = this.directDictGenForDate.getValueFromSurrogate(
ByteUtil.toInt(value, 0, CarbonCommonConstants.INT_SIZE_IN_BYTE));
ByteUtil.toXorInt(value, 0, CarbonCommonConstants.INT_SIZE_IN_BYTE));
}
} else {
actualData = DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn(value, this.dataType);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ private static Object getNoDictionaryDefaultValue(DataType datatype, byte[] defa
value = new String(defaultValue, Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET));
noDictionaryDefaultValue = Long.parseLong(value);
} else if (datatype == DataTypes.TIMESTAMP) {
long timestampValue = ByteUtil.toLong(defaultValue, 0, defaultValue.length);
long timestampValue = ByteUtil.toXorLong(defaultValue, 0, defaultValue.length);
noDictionaryDefaultValue = timestampValue * 1000L;
} else {
noDictionaryDefaultValue =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -615,7 +615,7 @@ private BitSet setFilterdIndexToBitSet(DimensionColumnPage dimensionColumnPage,
defaultValue = FilterUtil.getMaskKey(key, currentBlockDimension,
this.segmentProperties.getSortColumnsGenerator());
} else {
defaultValue = ByteUtil.toBytes(key);
defaultValue = ByteUtil.toXorBytes(key);
}
} else {
if (dimColEvaluatorInfo.getDimension().getDataType() == DataTypes.STRING) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ private BitSet getFilteredIndexes(DimensionColumnPage dimensionColumnPage,
defaultValue = FilterUtil.getMaskKey(key, currentBlockDimension,
this.segmentProperties.getSortColumnsGenerator());
} else {
defaultValue = ByteUtil.toBytes(key);
defaultValue = ByteUtil.toXorBytes(key);
}
} else if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() != DataTypes.STRING) {
defaultValue = CarbonCommonConstants.EMPTY_BYTE_ARRAY;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ private BitSet getFilteredIndexes(DimensionColumnPage dimensionColumnPage,
defaultValue = FilterUtil.getMaskKey(key, currentBlockDimension,
this.segmentProperties.getSortColumnsGenerator());
} else {
defaultValue = ByteUtil.toBytes(key);
defaultValue = ByteUtil.toXorBytes(key);
}
} else if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() != DataTypes.STRING) {
defaultValue = CarbonCommonConstants.EMPTY_BYTE_ARRAY;
Expand Down
59 changes: 53 additions & 6 deletions core/src/main/java/org/apache/carbondata/core/util/ByteUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,6 @@ public static boolean toBoolean(final byte b) {
* @return
*/
public static byte[] toBytes(short val) {
val = (short)(val ^ Short.MIN_VALUE);
byte[] b = new byte[SIZEOF_SHORT];
b[1] = (byte) val;
val >>= 8;
Expand Down Expand Up @@ -448,7 +447,7 @@ public static short toShort(byte[] bytes, int offset, final int length) {
n <<= 8;
n ^= bytes[offset + 1] & 0xFF;
}
return (short)(n ^ Short.MIN_VALUE);
return n;
}

/**
Expand All @@ -458,7 +457,6 @@ public static short toShort(byte[] bytes, int offset, final int length) {
* @return
*/
public static byte[] toBytes(int val) {
val = val ^ Integer.MIN_VALUE;
byte[] b = new byte[4];
for (int i = 3; i > 0; i--) {
b[i] = (byte) val;
Expand Down Expand Up @@ -519,7 +517,7 @@ public static int toInt(byte[] bytes, int offset, final int length) {
n ^= bytes[i] & 0xFF;
}
}
return n ^ Integer.MIN_VALUE;
return n;
}

public static int toInt(byte[] bytes, int offset) {
Expand Down Expand Up @@ -550,7 +548,6 @@ public static void setShort(byte[] data, int offset, int value) {
* @return
*/
public static byte[] toBytes(long val) {
val = val ^ Long.MIN_VALUE;
byte[] b = new byte[8];
for (int i = 7; i > 0; i--) {
b[i] = (byte) val;
Expand Down Expand Up @@ -589,7 +586,7 @@ public static long toLong(byte[] bytes, int offset, final int length) {
l ^= bytes[i] & 0xFF;
}
}
return l ^ Long.MIN_VALUE;
return l;
}

private static IllegalArgumentException explainWrongLengthOrOffset(final byte[] bytes,
Expand Down Expand Up @@ -669,4 +666,54 @@ public static byte[] flatten(byte[][] input) {
return flattenedData;
}

/**
* If number type column is in sort_columns, the column will be no-dictionary column.
* It will compare byte arrays to sort the data.
* For example the binary string of int value as follows.
* 1 : 00000000 00000000 00000000 00000001
* -1 : 11111111 11111111 11111111 11111111
* In this case, the compare method of byte arrays will return a wrong result.(1 < -1)
* The root cause is that the sign bit of negative number is 1.
* These XOR methods will change the sign bit as follows.
* 1 ^ MIN_VALUE : 10000000 00000000 00000000 00000001
* -1 ^ MIN_VALUE : 01111111 11111111 11111111 11111111
* After the transform, the compare method of byte arrays will return a right result.(1 > -1)
*/
public static byte[] toXorBytes(short val) {
val = (short) (val ^ Short.MIN_VALUE);
return toBytes(val);
}

public static byte[] toXorBytes(int val) {
val = val ^ Integer.MIN_VALUE;
return toBytes(val);
}

public static byte[] toXorBytes(long val) {
val = val ^ Long.MIN_VALUE;
return toBytes(val);
}

public static byte[] toXorBytes(double val) {
return toXorBytes(Double.doubleToLongBits(val));
}

/**
* The following methods convert byte array back to the real value.
*/
public static short toXorShort(byte[] bytes, int offset, final int length) {
return (short) (toShort(bytes, offset, length) ^ Short.MIN_VALUE);
}

public static int toXorInt(byte[] bytes, int offset, final int length) {
return toInt(bytes, offset, length) ^ Integer.MIN_VALUE;
}

public static long toXorLong(byte[] bytes, int offset, final int length) {
return toLong(bytes, offset, length) ^ Long.MIN_VALUE;
}

public static double toXorDouble(byte[] value, int offset, int length) {
return Double.longBitsToDouble(toXorLong(value, offset, length));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -324,13 +324,13 @@ public static byte[] getBytesBasedOnDataTypeForNoDictionaryColumn(String dimensi
if (actualDataType == DataTypes.BOOLEAN) {
return ByteUtil.toBytes(BooleanConvert.parseBoolean(dimensionValue));
} else if (actualDataType == DataTypes.SHORT) {
return ByteUtil.toBytes(Short.parseShort(dimensionValue));
return ByteUtil.toXorBytes(Short.parseShort(dimensionValue));
} else if (actualDataType == DataTypes.INT) {
return ByteUtil.toBytes(Integer.parseInt(dimensionValue));
return ByteUtil.toXorBytes(Integer.parseInt(dimensionValue));
} else if (actualDataType == DataTypes.LONG) {
return ByteUtil.toBytes(Long.parseLong(dimensionValue));
return ByteUtil.toXorBytes(Long.parseLong(dimensionValue));
} else if (actualDataType == DataTypes.DOUBLE) {
return ByteUtil.toBytes(Double.parseDouble(dimensionValue));
return ByteUtil.toXorBytes(Double.parseDouble(dimensionValue));
} else if (DataTypes.isDecimal(actualDataType)) {
return bigDecimalToByte(new BigDecimal(dimensionValue));
} else if (actualDataType == DataTypes.TIMESTAMP) {
Expand All @@ -344,7 +344,7 @@ public static byte[] getBytesBasedOnDataTypeForNoDictionaryColumn(String dimensi
dateFormatter = timeStampformatter.get();
}
dateToStr = dateFormatter.parse(dimensionValue);
return ByteUtil.toBytes(dateToStr.getTime());
return ByteUtil.toXorBytes(dateToStr.getTime());
} catch (ParseException e) {
throw new NumberFormatException(e.getMessage());
}
Expand Down Expand Up @@ -401,13 +401,13 @@ public static byte[] getBytesDataDataTypeForNoDictionaryColumn(Object dimensionV
if (actualDataType == DataTypes.BOOLEAN) {
return ByteUtil.toBytes((Boolean) dimensionValue);
} else if (actualDataType == DataTypes.SHORT) {
return ByteUtil.toBytes((Short) dimensionValue);
return ByteUtil.toXorBytes((Short) dimensionValue);
} else if (actualDataType == DataTypes.INT) {
return ByteUtil.toBytes((Integer) dimensionValue);
return ByteUtil.toXorBytes((Integer) dimensionValue);
} else if (actualDataType == DataTypes.LONG) {
return ByteUtil.toBytes((Long) dimensionValue);
return ByteUtil.toXorBytes((Long) dimensionValue);
} else if (actualDataType == DataTypes.TIMESTAMP) {
return ByteUtil.toBytes((Long)dimensionValue);
return ByteUtil.toXorBytes((Long)dimensionValue);
} else {
// Default action for String/Varchar
return ByteUtil.toBytes(dimensionValue.toString());
Expand Down Expand Up @@ -465,31 +465,31 @@ public static Object getDataBasedOnDataTypeForNoDictionaryColumn(byte[] dataInBy
if (isEmptyByteArray(dataInBytes)) {
return null;
}
return ByteUtil.toShort(dataInBytes, 0, dataInBytes.length);
return ByteUtil.toXorShort(dataInBytes, 0, dataInBytes.length);
} else if (actualDataType == DataTypes.INT) {
if (isEmptyByteArray(dataInBytes)) {
return null;
}
return ByteUtil.toInt(dataInBytes, 0, dataInBytes.length);
return ByteUtil.toXorInt(dataInBytes, 0, dataInBytes.length);
} else if (actualDataType == DataTypes.LONG) {
if (isEmptyByteArray(dataInBytes)) {
return null;
}
return ByteUtil.toLong(dataInBytes, 0, dataInBytes.length);
return ByteUtil.toXorLong(dataInBytes, 0, dataInBytes.length);
} else if (actualDataType == DataTypes.TIMESTAMP) {
if (isEmptyByteArray(dataInBytes)) {
return null;
}
if (isTimeStampConversion) {
return ByteUtil.toLong(dataInBytes, 0, dataInBytes.length) * 1000L;
return ByteUtil.toXorLong(dataInBytes, 0, dataInBytes.length) * 1000L;
} else {
return ByteUtil.toLong(dataInBytes, 0, dataInBytes.length);
return ByteUtil.toXorLong(dataInBytes, 0, dataInBytes.length);
}
} else if (actualDataType == DataTypes.DOUBLE) {
if (isEmptyByteArray(dataInBytes)) {
return null;
}
return ByteUtil.toDouble(dataInBytes, 0, dataInBytes.length);
return ByteUtil.toXorDouble(dataInBytes, 0, dataInBytes.length);
} else if (DataTypes.isDecimal(actualDataType)) {
if (isEmptyByteArray(dataInBytes)) {
return null;
Expand Down Expand Up @@ -774,7 +774,7 @@ public static byte[] convertDataToBytesBasedOnDataType(String data, ColumnSchema
try {
timeStampformatter.remove();
Date dateToStr = timeStampformatter.get().parse(data);
return ByteUtil.toBytes(dateToStr.getTime());
return ByteUtil.toXorBytes(dateToStr.getTime());
} catch (ParseException e) {
LOGGER.error(
"Cannot convert value to Time/Long type value. Value is considered as null" + e
Expand Down Expand Up @@ -969,9 +969,9 @@ public static long getDataBasedOnRestructuredDataType(byte[] data, DataType rest
int currentDataOffset, int length) {
long value = 0L;
if (restructuredDataType == DataTypes.INT) {
value = ByteUtil.toInt(data, currentDataOffset, length);
value = ByteUtil.toXorInt(data, currentDataOffset, length);
} else if (restructuredDataType == DataTypes.LONG) {
value = ByteUtil.toLong(data, currentDataOffset, length);
value = ByteUtil.toXorLong(data, currentDataOffset, length);
}
return value;
}
Expand Down
Loading

0 comments on commit f012f5b

Please sign in to comment.