Merge pull request #779 from cwida/interval

Interval Type
duckdb · Jul 24, 2020 · 7e42c8b · 7e42c8b
2 parents de6073e + 8546329
commit 7e42c8b
Show file tree

Hide file tree

Showing 63 changed files with 1,466 additions and 402 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -59,6 +59,8 @@ if(FORCE_32_BIT)
   set(M32_FLAG " -m32 ")
 endif()
 
+option(DISABLE_UNITY "Disable unity builds." FALSE)
+
 option(FORCE_COLORED_OUTPUT
        "Always produce ANSI-colored output (GNU/Clang only)." FALSE)
 if(${FORCE_COLORED_OUTPUT})

diff --git a/src/common/operator/cast_operators.cpp b/src/common/operator/cast_operators.cpp
@@ -3,6 +3,7 @@
 #include "duckdb/common/exception.hpp"
 #include "duckdb/common/limits.hpp"
 #include "duckdb/common/types/date.hpp"
+#include "duckdb/common/types/interval.hpp"
 #include "duckdb/common/types/time.hpp"
 #include "duckdb/common/types/timestamp.hpp"
 #include "duckdb/common/types/vector.hpp"
@@ -160,23 +161,25 @@ template <> float Cast::Operation(double input) {
 template <class T> static T try_cast_string(string_t input) {
 	T result;
 	if (!TryCast::Operation<string_t, T>(input, result)) {
-		throw ConversionException("Could not convert string '%s' to numeric", input.GetData());
+		throw ConversionException("Could not convert string '%s' to %s", input.GetData(), TypeIdToString(GetTypeId<T>()).c_str());
 	}
 	return result;
 }
 
+
+
 template <class T> static T try_strict_cast_string(string_t input) {
 	T result;
 	if (!TryCast::Operation<string_t, T>(input, result, true)) {
-		throw ConversionException("Could not convert string '%s' to numeric", input.GetData());
+		throw ConversionException("Could not convert string '%s' to %s", input.GetData(), TypeIdToString(GetTypeId<T>()).c_str());
 	}
 	return result;
 }
 
 template <class T, bool NEGATIVE, bool ALLOW_EXPONENT>
-static bool IntegerCastLoop(const char *buf, T &result, bool strict) {
+static bool IntegerCastLoop(const char *buf, idx_t len, T &result, bool strict) {
 	idx_t pos = NEGATIVE || *buf == '+' ? 1 : 0;
-	while (buf[pos]) {
+	while(pos < len) {
 		if (!std::isdigit((unsigned char)buf[pos])) {
 			// not a digit!
 			if (buf[pos] == '.') {
@@ -187,7 +190,7 @@ static bool IntegerCastLoop(const char *buf, T &result, bool strict) {
 				// we just truncate them
 				// make sure everything after the period is a number
 				pos++;
-				while (buf[pos]) {
+				while(pos < len) {
 					if (!std::isdigit((unsigned char)buf[pos++])) {
 						return false;
 					}
@@ -196,7 +199,7 @@ static bool IntegerCastLoop(const char *buf, T &result, bool strict) {
 			}
 			if (std::isspace((unsigned char)buf[pos])) {
 				// skip any trailing spaces
-				while (buf[++pos]) {
+				while(++pos < len) {
 					if (!std::isspace((unsigned char)buf[pos])) {
 						return false;
 					}
@@ -209,11 +212,11 @@ static bool IntegerCastLoop(const char *buf, T &result, bool strict) {
 					int64_t exponent = 0;
 					int negative = buf[pos] == '-';
 					if (negative) {
-						if (!IntegerCastLoop<int64_t, true, false>(buf + pos, exponent, strict)) {
+						if (!IntegerCastLoop<int64_t, true, false>(buf + pos, len - pos, exponent, strict)) {
 							return false;
 						}
 					} else {
-						if (!IntegerCastLoop<int64_t, false, false>(buf + pos, exponent, strict)) {
+						if (!IntegerCastLoop<int64_t, false, false>(buf + pos, len - pos, exponent, strict)) {
 							return false;
 						}
 					}
@@ -243,21 +246,22 @@ static bool IntegerCastLoop(const char *buf, T &result, bool strict) {
 	return pos > (NEGATIVE ? 1 : 0);
 }
 
-template <class T, bool ALLOW_EXPONENT = true> static bool TryIntegerCast(const char *buf, T &result, bool strict) {
-	if (!*buf) {
-		return false;
-	}
+template <class T, bool ALLOW_EXPONENT = true> static bool TryIntegerCast(const char *buf, idx_t len, T &result, bool strict) {
 	// skip any spaces at the start
-	while (std::isspace((unsigned char)*buf)) {
+	while(len > 0 && std::isspace(*buf)) {
 		buf++;
+		len--;
+	}
+	if (len == 0) {
+		return false;
 	}
 	int negative = *buf == '-';
 
 	result = 0;
 	if (!negative) {
-		return IntegerCastLoop<T, false, ALLOW_EXPONENT>(buf, result, strict);
+		return IntegerCastLoop<T, false, ALLOW_EXPONENT>(buf, len, result, strict);
 	} else {
-		return IntegerCastLoop<T, true, ALLOW_EXPONENT>(buf, result, strict);
+		return IntegerCastLoop<T, true, ALLOW_EXPONENT>(buf, len, result, strict);
 	}
 }
 
@@ -286,16 +290,16 @@ template <> bool TryCast::Operation(string_t input, bool &result, bool strict) {
 	return true;
 }
 template <> bool TryCast::Operation(string_t input, int8_t &result, bool strict) {
-	return TryIntegerCast<int8_t>(input.GetData(), result, strict);
+	return TryIntegerCast<int8_t>(input.GetData(), input.GetSize(), result, strict);
 }
 template <> bool TryCast::Operation(string_t input, int16_t &result, bool strict) {
-	return TryIntegerCast<int16_t>(input.GetData(), result, strict);
+	return TryIntegerCast<int16_t>(input.GetData(), input.GetSize(), result, strict);
 }
 template <> bool TryCast::Operation(string_t input, int32_t &result, bool strict) {
-	return TryIntegerCast<int32_t>(input.GetData(), result, strict);
+	return TryIntegerCast<int32_t>(input.GetData(), input.GetSize(), result, strict);
 }
 template <> bool TryCast::Operation(string_t input, int64_t &result, bool strict) {
-	return TryIntegerCast<int64_t>(input.GetData(), result, strict);
+	return TryIntegerCast<int64_t>(input.GetData(), input.GetSize(), result, strict);
 }
 
 template <class T, bool NEGATIVE> static void ComputeDoubleResult(T &result, idx_t decimal, idx_t decimal_factor) {
@@ -308,11 +312,11 @@ template <class T, bool NEGATIVE> static void ComputeDoubleResult(T &result, idx
 	}
 }
 
-template <class T, bool NEGATIVE> static bool DoubleCastLoop(const char *buf, T &result, bool strict) {
+template <class T, bool NEGATIVE> static bool DoubleCastLoop(const char *buf, idx_t len, T &result, bool strict) {
 	idx_t pos = NEGATIVE || *buf == '+' ? 1 : 0;
 	idx_t decimal = 0;
 	idx_t decimal_factor = 0;
-	while (buf[pos]) {
+	while (pos < len) {
 		if (!std::isdigit((unsigned char)buf[pos])) {
 			// not a digit!
 			if (buf[pos] == '.') {
@@ -326,7 +330,7 @@ template <class T, bool NEGATIVE> static bool DoubleCastLoop(const char *buf, T
 				continue;
 			} else if (std::isspace((unsigned char)buf[pos])) {
 				// skip any trailing spaces
-				while (buf[++pos]) {
+				while (++pos < len) {
 					if (!std::isspace((unsigned char)buf[pos])) {
 						return false;
 					}
@@ -338,7 +342,7 @@ template <class T, bool NEGATIVE> static bool DoubleCastLoop(const char *buf, T
 				// parse an integer, this time not allowing another exponent
 				pos++;
 				int64_t exponent;
-				if (!TryIntegerCast<int64_t, false>(buf + pos, exponent, strict)) {
+				if (!TryIntegerCast<int64_t, false>(buf + pos, len - pos, exponent, strict)) {
 					return false;
 				}
 				ComputeDoubleResult<T, NEGATIVE>(result, decimal, decimal_factor);
@@ -374,23 +378,24 @@ template <> bool CheckDoubleValidity(double value) {
 	return Value::DoubleIsValid(value);
 }
 
-template <class T> static bool TryDoubleCast(const char *buf, T &result, bool strict) {
-	if (!*buf) {
-		return false;
-	}
+template <class T> static bool TryDoubleCast(const char *buf, idx_t len, T &result, bool strict) {
 	// skip any spaces at the start
-	while (std::isspace((unsigned char)*buf)) {
+	while(len > 0 && std::isspace(*buf)) {
 		buf++;
+		len--;
+	}
+	if (len == 0) {
+		return false;
 	}
 	int negative = *buf == '-';
 
 	result = 0;
 	if (!negative) {
-		if (!DoubleCastLoop<T, false>(buf, result, strict)) {
+		if (!DoubleCastLoop<T, false>(buf, len, result, strict)) {
 			return false;
 		}
 	} else {
-		if (!DoubleCastLoop<T, true>(buf, result, strict)) {
+		if (!DoubleCastLoop<T, true>(buf, len, result, strict)) {
 			return false;
 		}
 	}
@@ -401,10 +406,10 @@ template <class T> static bool TryDoubleCast(const char *buf, T &result, bool st
 }
 
 template <> bool TryCast::Operation(string_t input, float &result, bool strict) {
-	return TryDoubleCast<float>(input.GetData(), result, strict);
+	return TryDoubleCast<float>(input.GetData(), input.GetSize(), result, strict);
 }
 template <> bool TryCast::Operation(string_t input, double &result, bool strict) {
-	return TryDoubleCast<double>(input.GetData(), result, strict);
+	return TryDoubleCast<double>(input.GetData(), input.GetSize(), result, strict);
 }
 
 template <> bool Cast::Operation(string_t input) {
@@ -627,6 +632,12 @@ template <> string_t StringCast::Operation(double input, Vector &vector) {
 	return StringVector::AddString(vector, s);
 }
 
+template <> string_t StringCast::Operation(interval_t input, Vector &vector) {
+	std::string s = Interval::ToString(input);
+	return StringVector::AddString(vector, s);
+}
+
+
 //===--------------------------------------------------------------------===//
 // Cast From Date
 //===--------------------------------------------------------------------===//
@@ -917,4 +928,19 @@ template <> string_t CastToBlob::Operation(string_t input, Vector &vector) {
 	return result;
 }
 
+//===--------------------------------------------------------------------===//
+// Cast From Interval
+//===--------------------------------------------------------------------===//
+template <> bool TryCast::Operation(string_t input, interval_t &result, bool strict) {
+	return Interval::FromCString(input.GetData(), input.GetSize(), result);
+}
+
+template <> interval_t StrictCast::Operation(string_t input) {
+	return try_strict_cast_string<interval_t>(input);
+}
+
+template <> interval_t Cast::Operation(string_t input) {
+	return try_cast_string<interval_t>(input);
+}
+
 } // namespace duckdb
diff --git a/src/common/serializer/buffered_file_reader.cpp b/src/common/serializer/buffered_file_reader.cpp
@@ -3,6 +3,7 @@
 #include "duckdb/common/exception.hpp"
 
 #include <cstring>
+#include <algorithm>
 
 using namespace duckdb;
 using namespace std;

diff --git a/src/common/serializer/buffered_file_writer.cpp b/src/common/serializer/buffered_file_writer.cpp
@@ -1,6 +1,6 @@
 #include "duckdb/common/serializer/buffered_file_writer.hpp"
 #include "duckdb/common/exception.hpp"
-
+#include "duckdb/common/algorithm.hpp"
 #include <cstring>
 
 using namespace duckdb;
@@ -23,7 +23,7 @@ void BufferedFileWriter::WriteData(const_data_ptr_t buffer, uint64_t write_size)
 	// first copy anything we can from the buffer
 	const_data_ptr_t end_ptr = buffer + write_size;
 	while (buffer < end_ptr) {
-		idx_t to_write = std::min((idx_t)(end_ptr - buffer), FILE_BUFFER_SIZE - offset);
+		idx_t to_write = min((idx_t)(end_ptr - buffer), FILE_BUFFER_SIZE - offset);
 		assert(to_write > 0);
 		memcpy(data.get() + offset, buffer, to_write);
 		offset += to_write;

diff --git a/src/common/types.cpp b/src/common/types.cpp
@@ -28,6 +28,7 @@ const SQLType SQLType::VARCHAR = SQLType(SQLTypeId::VARCHAR);
 const SQLType SQLType::VARBINARY = SQLType(SQLTypeId::VARBINARY);
 
 const SQLType SQLType::BLOB = SQLType(SQLTypeId::BLOB);
+const SQLType SQLType::INTERVAL = SQLType(SQLTypeId::INTERVAL);
 
 // TODO these are incomplete and should maybe not exist as such
 const SQLType SQLType::STRUCT = SQLType(SQLTypeId::STRUCT);
@@ -44,7 +45,7 @@ const vector<SQLType> SQLType::INTEGRAL = {SQLType::TINYINT, SQLType::SMALLINT,
 const vector<SQLType> SQLType::ALL_TYPES = {
     SQLType::BOOLEAN, SQLType::TINYINT,   SQLType::SMALLINT, SQLType::INTEGER, SQLType::BIGINT,
     SQLType::DATE,    SQLType::TIMESTAMP, SQLType::DOUBLE,   SQLType::FLOAT,   SQLType(SQLTypeId::DECIMAL),
-    SQLType::VARCHAR, SQLType::BLOB};
+    SQLType::VARCHAR, SQLType::BLOB, SQLType::INTERVAL};
 // TODO add LIST/STRUCT here
 
 const TypeId ROW_TYPE = TypeId::INT64;
@@ -73,6 +74,8 @@ string TypeIdToString(TypeId type) {
 		return "VARCHAR";
 	case TypeId::VARBINARY:
 		return "VARBINARY";
+	case TypeId::INTERVAL:
+		return "INTERVAL";
 	case TypeId::STRUCT:
 		return "STRUCT<?>";
 	case TypeId::LIST:
@@ -104,6 +107,8 @@ idx_t GetTypeIdSize(TypeId type) {
 		return sizeof(uintptr_t);
 	case TypeId::VARCHAR:
 		return sizeof(string_t);
+	case TypeId::INTERVAL:
+		return sizeof(interval_t);
 	case TypeId::STRUCT:
 		return 0; // no own payload
 	case TypeId::LIST:
@@ -131,6 +136,8 @@ SQLType SQLTypeFromInternalType(TypeId type) {
 		return SQLType::FLOAT;
 	case TypeId::DOUBLE:
 		return SQLType::DOUBLE;
+	case TypeId::INTERVAL:
+		return SQLType::INTERVAL;
 	case TypeId::VARCHAR:
 		return SQLType::VARCHAR;
 	case TypeId::VARBINARY:
@@ -147,7 +154,7 @@ SQLType SQLTypeFromInternalType(TypeId type) {
 bool TypeIsConstantSize(TypeId type) {
 	return (type >= TypeId::BOOL && type <= TypeId::DOUBLE) ||
 	       (type >= TypeId::FIXED_SIZE_BINARY && type <= TypeId::DECIMAL) || type == TypeId::HASH ||
-	       type == TypeId::POINTER;
+	       type == TypeId::POINTER || type == TypeId::INTERVAL;
 }
 bool TypeIsIntegral(TypeId type) {
 	return (type >= TypeId::UINT8 && type <= TypeId::INT64) || type == TypeId::HASH || type == TypeId::POINTER;
@@ -206,6 +213,8 @@ string SQLTypeIdToString(SQLTypeId id) {
 		return "VARBINARY";
 	case SQLTypeId::CHAR:
 		return "CHAR";
+	case SQLTypeId::INTERVAL:
+		return "INTERVAL";
 	case SQLTypeId::SQLNULL:
 		return "NULL";
 	case SQLTypeId::ANY:
@@ -278,6 +287,8 @@ SQLType TransformStringToSQLType(string str) {
 		return SQLType::DATE;
 	} else if (lower_str == "time") {
 		return SQLType::TIME;
+	} else if (lower_str == "interval") {
+		return SQLType::INTERVAL;
 	} else {
 		throw NotImplementedException("DataType %s not supported yet...\n", str.c_str());
 	}
@@ -403,6 +414,8 @@ TypeId GetInternalType(SQLType type) {
 		return TypeId::VARCHAR;
 	case SQLTypeId::VARBINARY:
 		return TypeId::VARBINARY;
+	case SQLTypeId::INTERVAL:
+		return TypeId::INTERVAL;
 	case SQLTypeId::STRUCT:
 		return TypeId::STRUCT;
 	case SQLTypeId::LIST:

diff --git a/src/common/types/CMakeLists.txt b/src/common/types/CMakeLists.txt
@@ -5,6 +5,7 @@ add_library_unity(duckdb_common_types
                   date.cpp
                   hash.cpp
                   hyperloglog.cpp
+                  interval.cpp
                   null_value.cpp
                   selection_vector.cpp
                   string_heap.cpp

diff --git a/src/common/types/chunk_collection.cpp b/src/common/types/chunk_collection.cpp
@@ -144,6 +144,8 @@ static int32_t compare_value(Vector &left_vec, Vector &right_vec, idx_t vector_i
 		return templated_compare_value<double>(left_vec, right_vec, vector_idx_left, vector_idx_right);
 	case TypeId::VARCHAR:
 		return templated_compare_value<string_t>(left_vec, right_vec, vector_idx_left, vector_idx_right);
+	case TypeId::INTERVAL:
+		return templated_compare_value<interval_t>(left_vec, right_vec, vector_idx_left, vector_idx_right);
 	default:
 		throw NotImplementedException("Type for comparison");
 	}
@@ -381,6 +383,9 @@ void ChunkCollection::MaterializeSortedChunk(DataChunk &target, idx_t order[], i
 		case TypeId::VARCHAR:
 			templated_set_values<string_t>(this, target.data[col_idx], order, col_idx, start_offset, remaining_data);
 			break;
+		case TypeId::INTERVAL:
+			templated_set_values<interval_t>(this, target.data[col_idx], order, col_idx, start_offset, remaining_data);
+			break;
 
 		case TypeId::LIST:
 		case TypeId::STRUCT: {

diff --git a/src/common/types/hash.cpp b/src/common/types/hash.cpp
@@ -25,6 +25,10 @@ template <> hash_t Hash(double val) {
 	return std::hash<double>{}(val);
 }
 
+template <> hash_t Hash(interval_t val) {
+	return Hash(val.days) ^ Hash(val.months) ^ Hash(val.msecs);
+}
+
 template <> hash_t Hash(const char *str) {
 	hash_t hash = 5381;
 	hash_t c;