Skip to content

Commit

Permalink
DRILL-1874: Support casting empty string to null numeric based on con…
Browse files Browse the repository at this point in the history
…figuration parameter
  • Loading branch information
hsuanyi committed Jan 9, 2015
1 parent f820649 commit 487d98e
Show file tree
Hide file tree
Showing 12 changed files with 380 additions and 16 deletions.
Expand Up @@ -18,13 +18,18 @@
package org.apache.drill.common.expression.fn;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;

import java.util.Set;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.common.types.TypeProtos.MinorType;

public class CastFunctions {

private static Map<MinorType, String> TYPE2FUNC = new HashMap<>();
private static Set<String> CAST_FUNC_REPLACEMENT_NEEDED = new HashSet<>(); // The cast fucntions which are needed to be replaced (if "drill.exec.functions.cast_empty_string_to_null"" is set as true)
private static Map<String, String> CAST_FUNC_REPLACEMENT_FROM_NONNULLABLE = new HashMap<>(); // Map from the replaced functions to the new ones (for non-nullable varchar)
private static Map<String, String> CAST_FUNC_REPLACEMENT_FROM_NULLABLE = new HashMap<>(); // Map from the replaced functions to the new ones (for nullable varchar)

static {
TYPE2FUNC.put(MinorType.BIGINT, "castBIGINT");
Expand All @@ -49,8 +54,40 @@ public class CastFunctions {
TYPE2FUNC.put(MinorType.DECIMAL28DENSE, "castDECIMAL28DENSE");
TYPE2FUNC.put(MinorType.DECIMAL38SPARSE, "castDECIMAL38SPARSE");
TYPE2FUNC.put(MinorType.DECIMAL38DENSE, "castDECIMAL38DENSE");

CAST_FUNC_REPLACEMENT_NEEDED.add(TYPE2FUNC.get(MinorType.INT));
CAST_FUNC_REPLACEMENT_NEEDED.add(TYPE2FUNC.get(MinorType.BIGINT));
CAST_FUNC_REPLACEMENT_NEEDED.add(TYPE2FUNC.get(MinorType.FLOAT4));
CAST_FUNC_REPLACEMENT_NEEDED.add(TYPE2FUNC.get(MinorType.FLOAT8));
CAST_FUNC_REPLACEMENT_NEEDED.add(TYPE2FUNC.get(MinorType.DECIMAL9));
CAST_FUNC_REPLACEMENT_NEEDED.add(TYPE2FUNC.get(MinorType.DECIMAL18));
CAST_FUNC_REPLACEMENT_NEEDED.add(TYPE2FUNC.get(MinorType.DECIMAL28SPARSE));
CAST_FUNC_REPLACEMENT_NEEDED.add(TYPE2FUNC.get(MinorType.DECIMAL38SPARSE));

CAST_FUNC_REPLACEMENT_FROM_NONNULLABLE.put(TYPE2FUNC.get(MinorType.INT), "castEmptyStringVarCharToNullableINT");
CAST_FUNC_REPLACEMENT_FROM_NONNULLABLE.put(TYPE2FUNC.get(MinorType.BIGINT), "castEmptyStringVarCharToNullableBIGINT");
CAST_FUNC_REPLACEMENT_FROM_NONNULLABLE.put(TYPE2FUNC.get(MinorType.FLOAT4), "castEmptyStringVarCharToNullableFLOAT4");
CAST_FUNC_REPLACEMENT_FROM_NONNULLABLE.put(TYPE2FUNC.get(MinorType.FLOAT8), "castEmptyStringVarCharToNullableFLOAT8");
CAST_FUNC_REPLACEMENT_FROM_NONNULLABLE.put(TYPE2FUNC.get(MinorType.DECIMAL9), "castEmptyStringVarCharToNullableDECIMAL9");
CAST_FUNC_REPLACEMENT_FROM_NONNULLABLE.put(TYPE2FUNC.get(MinorType.DECIMAL18), "castEmptyStringVarCharToNullableDECIMAL18");
CAST_FUNC_REPLACEMENT_FROM_NONNULLABLE.put(TYPE2FUNC.get(MinorType.DECIMAL28SPARSE), "castEmptyStringVarCharToNullableDECIMAL28SPARSE");
CAST_FUNC_REPLACEMENT_FROM_NONNULLABLE.put(TYPE2FUNC.get(MinorType.DECIMAL38SPARSE), "castEmptyStringVarCharToNullableDECIMAL38SPARSE");

CAST_FUNC_REPLACEMENT_FROM_NULLABLE.put(TYPE2FUNC.get(MinorType.INT), "castEmptyStringNullableVarCharToNullableINT");
CAST_FUNC_REPLACEMENT_FROM_NULLABLE.put(TYPE2FUNC.get(MinorType.BIGINT), "castEmptyStringNullableVarCharToNullableBIGINT");
CAST_FUNC_REPLACEMENT_FROM_NULLABLE.put(TYPE2FUNC.get(MinorType.FLOAT4), "castEmptyStringNullableVarCharToNullableFLOAT4");
CAST_FUNC_REPLACEMENT_FROM_NULLABLE.put(TYPE2FUNC.get(MinorType.FLOAT8), "castEmptyStringNullableVarCharToNullableFLOAT8");
CAST_FUNC_REPLACEMENT_FROM_NULLABLE.put(TYPE2FUNC.get(MinorType.DECIMAL9), "castEmptyStringNullableVarCharToNullableDECIMAL9");
CAST_FUNC_REPLACEMENT_FROM_NULLABLE.put(TYPE2FUNC.get(MinorType.DECIMAL18), "castEmptyStringNullableVarCharToNullableDECIMAL18");
CAST_FUNC_REPLACEMENT_FROM_NULLABLE.put(TYPE2FUNC.get(MinorType.DECIMAL28SPARSE), "castEmptyStringNullableVarCharToNullableDECIMAL28SPARSE");
CAST_FUNC_REPLACEMENT_FROM_NULLABLE.put(TYPE2FUNC.get(MinorType.DECIMAL38SPARSE), "castEmptyStringNullableVarCharToNullableDECIMAL38SPARSE");
}

/**
* Given the target type, get the appropriate cast function
* @param targetMinorType the target data type
* @return
*/
public static String getCastFunc(MinorType targetMinorType) {
String func = TYPE2FUNC.get(targetMinorType);
if (func != null) {
Expand All @@ -61,4 +98,49 @@ public static String getCastFunc(MinorType targetMinorType) {
String.format("cast function for type %s is not defined", targetMinorType.name()));
}

/**
* Get a replacing cast function for the original function, based on the specified data mode
* @param originalCastFunction original cast function
* @param dataMode data mode of the input data
* @return
*/
public static String getReplacingCastFunction(String originalCastFunction, org.apache.drill.common.types.TypeProtos.DataMode dataMode) {
if(dataMode == TypeProtos.DataMode.OPTIONAL) {
return getReplacingCastFunctionFromNullable(originalCastFunction);
}

if(dataMode == TypeProtos.DataMode.REQUIRED) {
return getReplacingCastFunctionFromNonNullable(originalCastFunction);
}

throw new RuntimeException(
String.format("replacing cast function for datatype %s is not defined", dataMode));
}

/**
* Check if a replacing cast function is available for the the original function
* @param originalfunction original cast function
* @return
*/
public static boolean isReplacementNeeded(MinorType inputType, String originalfunction) {
return inputType == MinorType.VARCHAR && CAST_FUNC_REPLACEMENT_NEEDED.contains(originalfunction);
}

private static String getReplacingCastFunctionFromNonNullable(String originalCastFunction) {
if(CAST_FUNC_REPLACEMENT_FROM_NONNULLABLE.containsKey(originalCastFunction)) {
return CAST_FUNC_REPLACEMENT_FROM_NONNULLABLE.get(originalCastFunction);
}

throw new RuntimeException(
String.format("replacing cast function for %s is not defined", originalCastFunction));
}

private static String getReplacingCastFunctionFromNullable(String originalCastFunction) {
if(CAST_FUNC_REPLACEMENT_FROM_NULLABLE.containsKey(originalCastFunction)) {
return CAST_FUNC_REPLACEMENT_FROM_NULLABLE.get(originalCastFunction);
}

throw new RuntimeException(
String.format("replacing cast function for %s is not defined", originalCastFunction));
}
}
22 changes: 21 additions & 1 deletion exec/java-exec/src/main/codegen/data/Casts.tdd
Expand Up @@ -172,7 +172,27 @@

{from: "Decimal38Sparse", to: "Decimal28Sparse", major: "DownwardDecimalComplexDecimalComplex", arraySize: "5"},
{from: "Decimal38Sparse", to: "Decimal18", major: "DownwardDecimalComplexDecimalSimple", javatype: "long"},
{from: "Decimal38Sparse", to: "Decimal9", major: "DownwardDecimalComplexDecimalSimple", javatype: "int"}
{from: "Decimal38Sparse", to: "Decimal9", major: "DownwardDecimalComplexDecimalSimple", javatype: "int"},

{from: "VarChar", to: "Int", major: "EmptyString", javaType:"Integer", primeType:"int"},
{from: "VarChar", to: "BigInt", major: "EmptyString", javaType: "Long", primeType: "long"},
{from: "VarChar", to: "Float4", major: "EmptyString", javaType:"Float", parse:"Float"},
{from: "VarChar", to: "Float8", major: "EmptyString", javaType:"Double", parse:"Double"},

{from: "VarChar", to: "Decimal9", major: "EmptyStringVarCharDecimalSimple", javatype: "int"},
{from: "VarChar", to: "Decimal18", major: "EmptyStringVarCharDecimalSimple", javatype: "long"},
{from: "VarChar", to: "Decimal28Sparse", major: "EmptyStringVarCharDecimalComplex", arraySize: "5"},
{from: "VarChar", to: "Decimal38Sparse", major: "EmptyStringVarCharDecimalComplex", arraySize: "6"},

{from: "NullableVarChar", to: "Int", major: "EmptyString", javaType:"Integer", primeType:"int"},
{from: "NullableVarChar", to: "BigInt", major: "EmptyString", javaType: "Long", primeType: "long"},
{from: "NullableVarChar", to: "Float4", major: "EmptyString", javaType:"Float", parse:"Float"},
{from: "NullableVarChar", to: "Float8", major: "EmptyString", javaType:"Double", parse:"Double"},

{from: "NullableVarChar", to: "Decimal9", major: "EmptyStringVarCharDecimalSimple", javatype: "int"},
{from: "NullableVarChar", to: "Decimal18", major: "EmptyStringVarCharDecimalSimple", javatype: "long"},
{from: "NullableVarChar", to: "Decimal28Sparse", major: "EmptyStringVarCharDecimalComplex", arraySize: "5"},
{from: "NullableVarChar", to: "Decimal38Sparse", major: "EmptyStringVarCharDecimalComplex", arraySize: "6"},

]
}
@@ -0,0 +1,85 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
<@pp.dropOutputFile />

<#macro doError>
{
byte[] buf = new byte[in.end - in.start];
in.buffer.getBytes(in.start, buf, 0, in.end - in.start);
throw new NumberFormatException(new String(buf, com.google.common.base.Charsets.UTF_8));
}
</#macro>

<#list cast.types as type>
<#if type.major == "EmptyString">

<@pp.changeOutputFile name="/org/apache/drill/exec/expr/fn/impl/gcast/CastEmptyString${type.from}ToNullable${type.to}.java" />

<#include "/@includes/license.ftl" />

package org.apache.drill.exec.expr.fn.impl.gcast;

import org.apache.drill.exec.expr.DrillSimpleFunc;
import org.apache.drill.exec.expr.annotations.FunctionTemplate;
import org.apache.drill.exec.expr.annotations.FunctionTemplate.NullHandling;
import org.apache.drill.exec.expr.annotations.Output;
import org.apache.drill.exec.expr.annotations.Param;
import org.apache.drill.exec.expr.holders.*;
import org.apache.drill.exec.record.RecordBatch;
import javax.inject.Inject;
import io.netty.buffer.DrillBuf;

@SuppressWarnings("unused")
@FunctionTemplate(name = "castEmptyString${type.from}ToNullable${type.to?upper_case}", scope = FunctionTemplate.FunctionScope.SIMPLE, nulls=NullHandling.INTERNAL)
public class CastEmptyString${type.from}ToNullable${type.to} implements DrillSimpleFunc{

@Param ${type.from}Holder in;
@Output Nullable${type.to}Holder out;

public void setup(RecordBatch incoming) {}

public void eval() {
<#if type.to == "Float4" || type.to == "Float8">
if(<#if type.from == "NullableVarChar"> in.isSet == 0 || </#if> in.end == in.start) {
out.isSet = 0;
} else{
out.isSet = 1;
byte[]buf=new byte[in.end-in.start];
in.buffer.getBytes(in.start,buf,0,in.end-in.start);
out.value=${type.javaType}.parse${type.parse}(new String(buf,com.google.common.base.Charsets.UTF_8));
}
<#elseif type.to=="Int">
if(<#if type.from == "NullableVarChar"> in.isSet == 0 || </#if> in.end == in.start) {
out.isSet = 0;
} else {
out.isSet = 1;
out.value = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.varCharToInt(in.start, in.end, in.buffer);
}
<#elseif type.to == "BigInt">
if(<#if type.from == "NullableVarChar"> in.isSet == 0 || </#if> in.end == in.start) {
out.isSet = 0;
} else {
out.isSet = 1;
out.value = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.varCharToLong(in.start, in.end, in.buffer);
}
</#if>
}
}

</#if> <#-- type.major -->
</#list>
Expand Up @@ -19,8 +19,13 @@

<#list cast.types as type>

<#if type.major == "VarCharDecimalSimple"> <#-- Cast function template for conversion from VarChar to Decimal9, Decimal18 -->
<@pp.changeOutputFile name="/org/apache/drill/exec/expr/fn/impl/gcast/Cast${type.from}${type.to}.java" />
<#if type.major == "VarCharDecimalSimple" || type.major == "EmptyStringVarCharDecimalSimple"> <#-- Cast function template for conversion from VarChar to Decimal9, Decimal18 -->

<#if type.major == "VarCharDecimalSimple">
<@pp.changeOutputFile name="/org/apache/drill/exec/expr/fn/impl/gcast/Cast${type.from}${type.to}.java"/>
<#elseif type.major == "EmptyStringVarCharDecimalSimple">
<@pp.changeOutputFile name="/org/apache/drill/exec/expr/fn/impl/gcast/CastEmptyString${type.from}ToNullable${type.to}.java"/>
</#if>

<#include "/@includes/license.ftl" />

Expand All @@ -44,18 +49,35 @@
import java.nio.ByteBuffer;

@SuppressWarnings("unused")
@FunctionTemplate(name = "cast${type.to?upper_case}", scope = FunctionTemplate.FunctionScope.DECIMAL_CAST, nulls=NullHandling.NULL_IF_NULL)
<#if type.major == "VarCharDecimalSimple">
@FunctionTemplate(name ="cast${type.to?upper_case}", scope = FunctionTemplate.FunctionScope.DECIMAL_CAST, nulls=NullHandling.NULL_IF_NULL)
public class Cast${type.from}${type.to} implements DrillSimpleFunc {

<#elseif type.major == "EmptyStringVarCharDecimalSimple">
@FunctionTemplate(name ="castEmptyString${type.from}ToNullable${type.to?upper_case}", scope = FunctionTemplate.FunctionScope.DECIMAL_CAST, nulls=NullHandling.INTERNAL)
public class CastEmptyString${type.from}ToNullable${type.to} implements DrillSimpleFunc {
</#if>
@Param ${type.from}Holder in;
@Param BigIntHolder precision;
@Param BigIntHolder scale;
<#if type.major == "VarCharDecimalSimple">
@Output ${type.to}Holder out;
<#elseif type.major == "EmptyStringVarCharDecimalSimple">
@Output Nullable${type.to}Holder out;
</#if>
public void setup(RecordBatch incoming) {
}
public void eval() {
<#if type.major == "EmptyStringVarCharDecimalSimple">
// Check if the input is null or empty string
if(<#if type.from == "NullableVarChar"> in.isSet == 0 || </#if> in.end == in.start) {
out.isSet = 0;
return;
}
out.isSet = 1;
</#if>
// Assign the scale and precision
out.scale = (int) scale.value;
Expand All @@ -64,10 +86,13 @@ public void eval() {
int readIndex = in.start;
int endIndex = in.end;
<#if type.major == "VarCharDecimalSimple">
// Check if its an empty string
if (endIndex - readIndex == 0) {
throw new org.apache.drill.common.exceptions.DrillRuntimeException("Empty String, cannot cast to Decimal");
}
</#if>
// Starting position of fractional part
int scaleIndex = -1;
// true if we have a negative sign at the beginning
Expand Down Expand Up @@ -167,8 +192,13 @@ public void eval() {
}
}
<#elseif type.major == "VarCharDecimalComplex"> <#-- Cast function template for conversion from VarChar to Decimal28, Decimal38 -->
<@pp.changeOutputFile name="/org/apache/drill/exec/expr/fn/impl/gcast/Cast${type.from}${type.to}.java" />
<#elseif type.major == "VarCharDecimalComplex" || type.major == "EmptyStringVarCharDecimalComplex"> <#-- Cast function template for conversion from VarChar to Decimal28, Decimal38 -->

<#if type.major == "VarCharDecimalComplex">
<@pp.changeOutputFile name="/org/apache/drill/exec/expr/fn/impl/gcast/Cast${type.from}${type.to}.java"/>
<#elseif type.major == "EmptyStringVarCharDecimalComplex">
<@pp.changeOutputFile name="/org/apache/drill/exec/expr/fn/impl/gcast/CastEmptyString${type.from}ToNullable${type.to}.java"/>
</#if>

<#include "/@includes/license.ftl" />

Expand All @@ -191,21 +221,39 @@ public void eval() {
import java.nio.ByteBuffer;

@SuppressWarnings("unused")
<#if type.major == "VarCharDecimalComplex">
@FunctionTemplate(name = "cast${type.to?upper_case}", scope = FunctionTemplate.FunctionScope.DECIMAL_CAST, nulls=NullHandling.NULL_IF_NULL)
public class Cast${type.from}${type.to} implements DrillSimpleFunc {
<#elseif type.major == "EmptyStringVarCharDecimalComplex">
@FunctionTemplate(name = "castEmptyString${type.from}ToNullable${type.to?upper_case}", scope = FunctionTemplate.FunctionScope.DECIMAL_CAST, nulls=NullHandling.INTERNAL)
public class CastEmptyString${type.from}ToNullable${type.to} implements DrillSimpleFunc {
</#if>
@Param ${type.from}Holder in;
@Inject DrillBuf buffer;
@Param BigIntHolder precision;
@Param BigIntHolder scale;
<#if type.major == "VarCharDecimalComplex">
@Output ${type.to}Holder out;
<#elseif type.major == "EmptyStringVarCharDecimalComplex">
@Output Nullable${type.to}Holder out;
</#if>
public void setup(RecordBatch incoming) {
int size = ${type.arraySize} * (org.apache.drill.exec.util.DecimalUtility.integerSize);
buffer = buffer.reallocIfNeeded(size);
}
public void eval() {
<#if type.major == "EmptyStringVarCharDecimalComplex">
// Check if the input is null or empty string
if(<#if type.from == "NullableVarChar"> in.isSet == 0 || </#if> in.end == in.start) {
out.isSet = 0;
return;
}
out.isSet = 1;
</#if>
out.buffer = buffer;
out.start = 0;
Expand Down Expand Up @@ -241,9 +289,12 @@ public void eval() {
scaleIndex = readIndex; // Fractional part starts at the first position
}
<#if type.major == "VarCharDecimalComplex">
// Check if its an empty string
if (in.end - readIndex == 0) {
throw new org.apache.drill.common.exceptions.DrillRuntimeException("Empty String, cannot cast to Decimal");
}
</#if>
// Store start index for the second pass
startIndex = readIndex;
Expand Down
Expand Up @@ -104,6 +104,9 @@ public interface ExecConstants {
public static final String SLICE_TARGET = "planner.slice_target";
public static final OptionValidator SLICE_TARGET_OPTION = new PositiveLongValidator(SLICE_TARGET, Long.MAX_VALUE, 100000);

public static final String CAST_TO_NULLABLE_NUMERIC = "drill.exec.functions.cast_empty_string_to_null";
public static final OptionValidator CAST_TO_NULLABLE_NUMERIC_OPTION = new BooleanValidator(CAST_TO_NULLABLE_NUMERIC, false);

/**
* HashTable runtime settings
*/
Expand Down
Expand Up @@ -520,8 +520,8 @@ public LogicalExpression visitCastExpression(CastExpression e, FunctionImplement
// if the cast is pointless, remove it.
LogicalExpression input = e.getInput().accept(this, value);

MajorType newMajor = e.getMajorType();
MinorType newMinor = input.getMajorType().getMinorType();
MajorType newMajor = e.getMajorType(); // Output type
MinorType newMinor = input.getMajorType().getMinorType(); // Input type

if (castEqual(e.getPosition(), input.getMajorType(), newMajor)) {
return input; // don't do pointless cast.
Expand Down Expand Up @@ -618,5 +618,4 @@ private boolean castEqual(ExpressionPosition pos, MajorType from, MajorType to)
}
}
}

}

0 comments on commit 487d98e

Please sign in to comment.