Merge pull request #541 from cwida/nan

Disallow NaN/INF in the system
duckdb · Apr 13, 2020 · ee09b60 · ee09b60
2 parents 4ab3d45 + 1cc0cfc
commit ee09b60
Show file tree

Hide file tree

Showing 25 changed files with 510 additions and 292 deletions.
diff --git a/src/common/operator/cast_operators.cpp b/src/common/operator/cast_operators.cpp
@@ -134,6 +134,26 @@ template <> int64_t Cast::Operation(double input) {
 	return cast_with_overflow_check<double, int64_t>(input);
 }
 
+//===--------------------------------------------------------------------===//
+// Double -> float casts
+//===--------------------------------------------------------------------===//
+template <> bool TryCast::Operation(double input, float &result) {
+	auto res = (float) input;
+	if (std::isnan(res) || std::isinf(res)) {
+		return false;
+	}
+	result = res;
+	return true;
+}
+
+template <> float Cast::Operation(double input) {
+	float result;
+	if (!TryCast::Operation(input, result)) {
+		throw ValueOutOfRangeException(input, GetTypeId<double>(), GetTypeId<float>());
+	}
+	return result;
+}
+
 //===--------------------------------------------------------------------===//
 // Cast String -> Numeric
 //===--------------------------------------------------------------------===//
@@ -319,6 +339,19 @@ template <class T, bool NEGATIVE> static bool DoubleCastLoop(const char *buf, T
 	return pos > (NEGATIVE ? 1 : 0);
 }
 
+template<class T>
+bool CheckDoubleValidity(T value);
+
+template<>
+bool CheckDoubleValidity(float value) {
+	return Value::FloatIsValid(value);
+}
+
+template<>
+bool CheckDoubleValidity(double value) {
+	return Value::DoubleIsValid(value);
+}
+
 template <class T> static bool TryDoubleCast(const char *buf, T &result) {
 	if (!*buf) {
 		return false;
@@ -331,10 +364,18 @@ template <class T> static bool TryDoubleCast(const char *buf, T &result) {
 
 	result = 0;
 	if (!negative) {
-		return DoubleCastLoop<T, false>(buf, result);
+		if (!DoubleCastLoop<T, false>(buf, result)) {
+			return false;
+		}
 	} else {
-		return DoubleCastLoop<T, true>(buf, result);
+		if (!DoubleCastLoop<T, true>(buf, result)) {
+			return false;
+		}
 	}
+	if (!CheckDoubleValidity<T>(result)) {
+		return false;
+	}
+	return true;
 }
 
 template <> bool TryCast::Operation(string_t input, float &result) {

diff --git a/src/common/types/value.cpp b/src/common/types/value.cpp
@@ -144,14 +144,28 @@ Value Value::BIGINT(int64_t value) {
 	return result;
 }
 
+bool Value::FloatIsValid(float value) {
+	return !(std::isnan(value) || std::isinf(value));
+}
+
+bool Value::DoubleIsValid(double value) {
+	return !(std::isnan(value) || std::isinf(value));
+}
+
 Value Value::FLOAT(float value) {
+	if (!Value::FloatIsValid(value)) {
+		throw OutOfRangeException("Invalid float value %f", value);
+	}
 	Value result(TypeId::FLOAT);
 	result.value_.float_ = value;
 	result.is_null = false;
 	return result;
 }
 
 Value Value::DOUBLE(double value) {
+	if (!Value::DoubleIsValid(value)) {
+		throw OutOfRangeException("Invalid double value %f", value);
+	}
 	Value result(TypeId::DOUBLE);
 	result.value_.double_ = value;
 	result.is_null = false;

diff --git a/src/common/types/vector.cpp b/src/common/types/vector.cpp
@@ -615,6 +615,30 @@ void Vector::Verify(const SelectionVector &sel, idx_t count) {
 			break;
 		}
 	}
+	if (type == TypeId::DOUBLE) {
+		// verify that there are no INF or NAN values
+		switch (vector_type) {
+		case VectorType::CONSTANT_VECTOR: {
+			auto dbl = ConstantVector::GetData<double>(*this);
+			if (!ConstantVector::IsNull(*this)) {
+				assert(Value::DoubleIsValid(*dbl));
+			}
+			break;
+		}
+		case VectorType::FLAT_VECTOR: {
+			auto doubles = FlatVector::GetData<double>(*this);
+			for (idx_t i = 0; i < count; i++) {
+				auto oidx = sel.get_index(i);
+				if (!nullmask[oidx]) {
+					assert(Value::DoubleIsValid(doubles[oidx]));
+				}
+			}
+			break;
+		}
+		default:
+			break;
+		}
+	}
 
 	if (type == TypeId::STRUCT) {
 		if (vector_type == VectorType::FLAT_VECTOR || vector_type == VectorType::CONSTANT_VECTOR) {

diff --git a/src/execution/operator/join/physical_piecewise_merge_join.cpp b/src/execution/operator/join/physical_piecewise_merge_join.cpp
@@ -265,30 +265,6 @@ static void templated_quicksort(VectorData &vdata, const SelectionVector &not_nu
 	templated_quicksort<T, duckdb::LessThanEquals>((T *)vdata.data, *vdata.sel, not_null_sel, not_null_count, result);
 }
 
-idx_t FilterNulls(VectorData &vdata, idx_t count, SelectionVector &not_null) {
-	idx_t not_null_count = 0;
-	for (idx_t i = 0; i < count; i++) {
-		auto idx = vdata.sel->get_index(i);
-		if (!(*vdata.nullmask)[idx]) {
-			not_null.set_index(not_null_count++, i);
-		}
-	}
-	return not_null_count;
-}
-
-template<class T>
-idx_t FilterNullsAndNaNs(VectorData &vdata, idx_t count, SelectionVector &not_null) {
-	auto data = (T *) vdata.data;
-	idx_t not_null_count = 0;
-	for (idx_t i = 0; i < count; i++) {
-		auto idx = vdata.sel->get_index(i);
-		if (!(*vdata.nullmask)[idx] && (!std::isnan(data[idx]))) {
-			not_null.set_index(not_null_count++, i);
-		}
-	}
-	return not_null_count;
-}
-
 void OrderVector(Vector &vector, idx_t count, MergeOrder &order) {
 	if (count == 0) {
 		order.count = 0;
@@ -299,18 +275,14 @@ void OrderVector(Vector &vector, idx_t count, MergeOrder &order) {
 
 	// first filter out all the non-null values
 	SelectionVector not_null(STANDARD_VECTOR_SIZE);
-	idx_t not_null_count;
-	switch(vector.type) {
-	case TypeId::FLOAT:
-		not_null_count = FilterNullsAndNaNs<float>(vdata, count, not_null);
-		break;
-	case TypeId::DOUBLE:
-		not_null_count = FilterNullsAndNaNs<double>(vdata, count, not_null);
-		break;
-	default:
-		not_null_count = FilterNulls(vdata, count, not_null);
-		break;
+	idx_t not_null_count = 0;
+	for (idx_t i = 0; i < count; i++) {
+		auto idx = vdata.sel->get_index(i);
+		if (!(*vdata.nullmask)[idx]) {
+			not_null.set_index(not_null_count++, i);
+		}
 	}
+
 	order.count = not_null_count;
 	order.order.Initialize(STANDARD_VECTOR_SIZE);
 	switch (vector.type) {

diff --git a/src/function/aggregate/algebraic/avg.cpp b/src/function/aggregate/algebraic/avg.cpp
@@ -37,7 +37,9 @@ struct AverageFunction {
 
 	template <class T, class STATE>
 	static void Finalize(Vector &result, STATE *state, T *target, nullmask_t &nullmask, idx_t idx) {
-		if (state->count == 0) {
+		if (!Value::DoubleIsValid(state->sum)) {
+			throw OutOfRangeException("AVG is out of range!");
+		} else if (state->count == 0) {
 			nullmask[idx] = true;
 		} else {
 			target[idx] = state->sum / state->count;

diff --git a/src/function/aggregate/distributive/minmax.cpp b/src/function/aggregate/distributive/minmax.cpp
@@ -20,6 +20,12 @@ struct MinMaxBase : public StandardDistributiveFunction {
 			OP::template Execute<INPUT_TYPE, STATE>(state, input[0]);
 		}
 	}
+
+	template <class T, class STATE>
+	static void Finalize(Vector &result, STATE *state, T *target, nullmask_t &nullmask, idx_t idx) {
+		nullmask[idx] = IsNullValue<T>(*state);
+		target[idx] = *state;
+	}
 };
 
 struct NumericMinMaxBase : public MinMaxBase {

diff --git a/src/function/aggregate/distributive/sum.cpp b/src/function/aggregate/distributive/sum.cpp
@@ -27,8 +27,23 @@ struct SumOperation : public StandardDistributiveFunction {
 		}
 		*state += input[0] * count;
 	}
+
+	template <class T, class STATE>
+	static void Finalize(Vector &result, STATE *state, T *target, nullmask_t &nullmask, idx_t idx) {
+		nullmask[idx] = IsNullValue<T>(*state);
+		target[idx] = *state;
+	}
 };
 
+template <>
+void SumOperation::Finalize(Vector &result, double *state, double *target, nullmask_t &nullmask, idx_t idx) {
+	if (!Value::DoubleIsValid(*state)) {
+		throw OutOfRangeException("SUM is out of range!");
+	}
+	nullmask[idx] = IsNullValue<double>(*state);
+	target[idx] = *state;
+}
+
 void SumFun::RegisterFunction(BuiltinFunctions &set) {
 	AggregateFunctionSet sum("sum");
 	// integer sums to bigint

diff --git a/src/function/scalar/math/CMakeLists.txt b/src/function/scalar/math/CMakeLists.txt
@@ -2,8 +2,7 @@ add_library_unity(duckdb_func_math
                   OBJECT
                   numeric.cpp
                   random.cpp
-                  setseed.cpp
-                  trigonometrics.cpp)
+                  setseed.cpp)
 set(ALL_OBJECT_FILES
     ${ALL_OBJECT_FILES} $<TARGET_OBJECTS:duckdb_func_math>
     PARENT_SCOPE)