Skip to content

Commit

Permalink
Merge pull request #409 from cwida/vectortypes
Browse files Browse the repository at this point in the history
Vector Rework (Vector types, three buffer system, arrow compatible TypeIds)
  • Loading branch information
Mytherin committed Feb 4, 2020
2 parents 3391be8 + bac1e4f commit fa48e95
Show file tree
Hide file tree
Showing 182 changed files with 3,466 additions and 3,079 deletions.
19 changes: 19 additions & 0 deletions benchmark/tpch/lineitem_aggregate.cpp
Expand Up @@ -27,6 +27,25 @@ string BenchmarkInfo() override {
}
FINISH_BENCHMARK(LineitemSimpleAggregate)

DUCKDB_BENCHMARK(LineitemCount, "[aggregate]")
void Load(DuckDBBenchmarkState *state) override {
// load the data into the tpch schema
tpch::dbgen(SF, state->db);
}
string GetQuery() override {
return "SELECT COUNT(*) FROM lineitem";
}
string VerifyResult(QueryResult *result) override {
if (!result->success) {
return result->error;
}
return string();
}
string BenchmarkInfo() override {
return "Execute the query \"SELECT COUNT(*) FROM lineitem\" on SF1";
}
FINISH_BENCHMARK(LineitemCount)

DUCKDB_BENCHMARK(LineitemGroupAggregate, "[aggregate]")
void Load(DuckDBBenchmarkState *state) override {
// load the data into the tpch schema
Expand Down
22 changes: 11 additions & 11 deletions examples/programmatic-querying/main.cpp
Expand Up @@ -33,15 +33,15 @@ void my_scan_function(ClientContext &context, DataChunk &input, DataChunk &outpu
size_t this_rows = std::min(data.nrow, (size_t)1024);
data.nrow -= this_rows;

auto int_data = (int32_t *)output.data[0].data;
auto int_data = (int32_t *)output.data[0].GetData();
for (size_t row = 0; row < this_rows; row++) {
int_data[row] = row % 10;
}
output.data[0].count = this_rows;
output.data[1].count = this_rows;
for (size_t row = 0; row < this_rows; row++) {
output.data[1].SetStringValue(row, ("hello_" + std::to_string(row)).c_str());
output.data[1].SetValue(row, Value("hello_" + std::to_string(row)));
}
output.data[1].count = this_rows;
}

class MyScanFunction : public TableFunction {
Expand Down Expand Up @@ -109,7 +109,7 @@ int main() {
TABLE_FUNCTION
*/

vector<TypeId> types{TypeId::INTEGER, TypeId::VARCHAR};
vector<TypeId> types{TypeId::INT32, TypeId::VARCHAR};

// TABLE_FUNCTION my_scan
vector<unique_ptr<ParsedExpression>> children; // empty
Expand All @@ -123,12 +123,12 @@ int main() {

auto lte_expr = make_unique_base<Expression, BoundComparisonExpression>(
ExpressionType::COMPARE_LESSTHANOREQUALTO,
make_unique_base<Expression, BoundReferenceExpression>(TypeId::INTEGER, 0),
make_unique_base<Expression, BoundReferenceExpression>(TypeId::INT32, 0),
make_unique_base<Expression, BoundConstantExpression>(Value::INTEGER(7)));

auto gte_expr = make_unique_base<Expression, BoundComparisonExpression>(
ExpressionType::COMPARE_GREATERTHANOREQUALTO,
make_unique_base<Expression, BoundReferenceExpression>(TypeId::INTEGER, 0),
make_unique_base<Expression, BoundReferenceExpression>(TypeId::INT32, 0),
make_unique_base<Expression, BoundConstantExpression>(Value::INTEGER(3)));

filter_expressions.push_back(move(lte_expr));
Expand All @@ -138,19 +138,19 @@ int main() {
filter->children.push_back(move(scan_function));

// HASH_GROUP_BY some_int aggregating COUNT(*)
vector<TypeId> aggr_types{TypeId::INTEGER, TypeId::BIGINT};
vector<TypeId> aggr_types{TypeId::INT32, TypeId::INT64};
vector<unique_ptr<Expression>> aggr_expressions;
aggr_expressions.push_back(resolve_aggregate(con, "count", {}));

vector<unique_ptr<Expression>> aggr_groups;
aggr_groups.push_back(make_unique_base<Expression, BoundReferenceExpression>(TypeId::INTEGER, 0));
aggr_groups.push_back(make_unique_base<Expression, BoundReferenceExpression>(TypeId::INT32, 0));

auto group_by = make_unique<PhysicalHashAggregate>(aggr_types, move(aggr_expressions), move(aggr_groups));
group_by->children.push_back(move(filter));

// PROJECTION[%(+(some_int, 42), 2) count()]
auto add_expr = resolve_function(con, "+", {SQLTypeId::INTEGER, SQLTypeId::INTEGER});
add_expr->children.push_back(make_unique_base<Expression, BoundReferenceExpression>(TypeId::INTEGER, 0));
add_expr->children.push_back(make_unique_base<Expression, BoundReferenceExpression>(TypeId::INT32, 0));
add_expr->children.push_back(make_unique_base<Expression, BoundConstantExpression>(Value::INTEGER(42)));

auto mod_expr = resolve_function(con, "%", {SQLTypeId::INTEGER, SQLTypeId::INTEGER});
Expand All @@ -159,14 +159,14 @@ int main() {

vector<unique_ptr<Expression>> proj_expressions;
proj_expressions.push_back(move(mod_expr));
proj_expressions.push_back(make_unique_base<Expression, BoundReferenceExpression>(TypeId::BIGINT, 1));
proj_expressions.push_back(make_unique_base<Expression, BoundReferenceExpression>(TypeId::INT64, 1));
auto projection = make_unique<PhysicalProjection>(aggr_types, move(proj_expressions));
projection->children.push_back(move(group_by));

// ORDER_BY 1
BoundOrderByNode order_by;
order_by.type = OrderType::ASCENDING;
order_by.expression = make_unique_base<Expression, BoundReferenceExpression>(TypeId::INTEGER, 0);
order_by.expression = make_unique_base<Expression, BoundReferenceExpression>(TypeId::INT32, 0);

vector<BoundOrderByNode> orders;
orders.push_back(move(order_by));
Expand Down
2 changes: 1 addition & 1 deletion src/catalog/catalog_entry/table_catalog_entry.cpp
Expand Up @@ -137,7 +137,7 @@ vector<TypeId> TableCatalogEntry::GetTypes(const vector<column_t> &column_ids) {
vector<TypeId> result;
for (auto &index : column_ids) {
if (index == COLUMN_IDENTIFIER_ROW_ID) {
result.push_back(TypeId::BIGINT);
result.push_back(TypeId::INT64);
} else {
result.push_back(GetInternalType(columns[index].type));
}
Expand Down
30 changes: 15 additions & 15 deletions src/common/limits.cpp
Expand Up @@ -10,13 +10,13 @@ namespace duckdb {
// hashtables
int64_t MinimumValue(TypeId type) {
switch (type) {
case TypeId::TINYINT:
case TypeId::INT8:
return MinimumValue<int8_t>();
case TypeId::SMALLINT:
case TypeId::INT16:
return MinimumValue<int16_t>();
case TypeId::INTEGER:
case TypeId::INT32:
return MinimumValue<int32_t>();
case TypeId::BIGINT:
case TypeId::INT64:
return MinimumValue<int64_t>();
case TypeId::HASH:
return MinimumValue<uint64_t>();
Expand All @@ -29,13 +29,13 @@ int64_t MinimumValue(TypeId type) {

uint64_t MaximumValue(TypeId type) {
switch (type) {
case TypeId::TINYINT:
case TypeId::INT8:
return MaximumValue<int8_t>();
case TypeId::SMALLINT:
case TypeId::INT16:
return MaximumValue<int16_t>();
case TypeId::INTEGER:
case TypeId::INT32:
return MaximumValue<int32_t>();
case TypeId::BIGINT:
case TypeId::INT64:
return MaximumValue<int64_t>();
case TypeId::HASH:
return MaximumValue<uint64_t>();
Expand All @@ -47,16 +47,16 @@ uint64_t MaximumValue(TypeId type) {
}

TypeId MinimalType(int64_t value) {
if (value >= MinimumValue(TypeId::TINYINT) && (uint64_t)value <= MaximumValue(TypeId::TINYINT)) {
return TypeId::TINYINT;
if (value >= MinimumValue(TypeId::INT8) && (uint64_t)value <= MaximumValue(TypeId::INT8)) {
return TypeId::INT8;
}
if (value >= MinimumValue(TypeId::SMALLINT) && (uint64_t)value <= MaximumValue(TypeId::SMALLINT)) {
return TypeId::SMALLINT;
if (value >= MinimumValue(TypeId::INT16) && (uint64_t)value <= MaximumValue(TypeId::INT16)) {
return TypeId::INT16;
}
if (value >= MinimumValue(TypeId::INTEGER) && (uint64_t)value <= MaximumValue(TypeId::INTEGER)) {
return TypeId::INTEGER;
if (value >= MinimumValue(TypeId::INT32) && (uint64_t)value <= MaximumValue(TypeId::INT32)) {
return TypeId::INT32;
}
return TypeId::BIGINT;
return TypeId::INT64;
}

} // namespace duckdb
62 changes: 32 additions & 30 deletions src/common/types.cpp
Expand Up @@ -34,20 +34,20 @@ const vector<SQLType> SQLType::ALL_TYPES = {
SQLType::DATE, SQLType::TIMESTAMP, SQLType::DOUBLE, SQLType::FLOAT, SQLType(SQLTypeId::DECIMAL),
SQLType::VARCHAR};

const TypeId ROW_TYPE = TypeId::BIGINT;
const TypeId ROW_TYPE = TypeId::INT64;

string TypeIdToString(TypeId type) {
switch (type) {
case TypeId::BOOLEAN:
return "BOOLEAN";
case TypeId::TINYINT:
return "TINYINT";
case TypeId::SMALLINT:
return "SMALLINT";
case TypeId::INTEGER:
return "INTEGER";
case TypeId::BIGINT:
return "BIGINT";
case TypeId::BOOL:
return "BOOL";
case TypeId::INT8:
return "INT8";
case TypeId::INT16:
return "INT16";
case TypeId::INT32:
return "INT32";
case TypeId::INT64:
return "INT64";
case TypeId::HASH:
return "HASH";
case TypeId::POINTER:
Expand All @@ -67,15 +67,15 @@ string TypeIdToString(TypeId type) {

index_t GetTypeIdSize(TypeId type) {
switch (type) {
case TypeId::BOOLEAN:
case TypeId::BOOL:
return sizeof(bool);
case TypeId::TINYINT:
case TypeId::INT8:
return sizeof(int8_t);
case TypeId::SMALLINT:
case TypeId::INT16:
return sizeof(int16_t);
case TypeId::INTEGER:
case TypeId::INT32:
return sizeof(int32_t);
case TypeId::BIGINT:
case TypeId::INT64:
return sizeof(int64_t);
case TypeId::FLOAT:
return sizeof(float);
Expand All @@ -96,15 +96,15 @@ index_t GetTypeIdSize(TypeId type) {

SQLType SQLTypeFromInternalType(TypeId type) {
switch (type) {
case TypeId::BOOLEAN:
case TypeId::BOOL:
return SQLType(SQLTypeId::BOOLEAN);
case TypeId::TINYINT:
case TypeId::INT8:
return SQLType::TINYINT;
case TypeId::SMALLINT:
case TypeId::INT16:
return SQLType::SMALLINT;
case TypeId::INTEGER:
case TypeId::INT32:
return SQLType::INTEGER;
case TypeId::BIGINT:
case TypeId::INT64:
return SQLType::BIGINT;
case TypeId::FLOAT:
return SQLType::FLOAT;
Expand All @@ -120,16 +120,18 @@ SQLType SQLTypeFromInternalType(TypeId type) {
}

bool TypeIsConstantSize(TypeId type) {
return type < TypeId::VARCHAR;
return (type >= TypeId::BOOL && type <= TypeId::DOUBLE) ||
(type >= TypeId::FIXED_SIZE_BINARY && type <= TypeId::DECIMAL) || type == TypeId::HASH ||
type == TypeId::POINTER;
}
bool TypeIsIntegral(TypeId type) {
return type >= TypeId::TINYINT && type <= TypeId::POINTER;
return (type >= TypeId::UINT8 && type <= TypeId::INT64) || type == TypeId::HASH || type == TypeId::POINTER;
}
bool TypeIsNumeric(TypeId type) {
return type >= TypeId::TINYINT && type <= TypeId::DOUBLE;
return type >= TypeId::UINT8 && type <= TypeId::DOUBLE;
}
bool TypeIsInteger(TypeId type) {
return type >= TypeId::TINYINT && type <= TypeId::BIGINT;
return type >= TypeId::UINT8 && type <= TypeId::INT64;
}

void SQLType::Serialize(Serializer &serializer) {
Expand Down Expand Up @@ -219,19 +221,19 @@ bool SQLType::IsNumeric() const {
TypeId GetInternalType(SQLType type) {
switch (type.id) {
case SQLTypeId::BOOLEAN:
return TypeId::BOOLEAN;
return TypeId::BOOL;
case SQLTypeId::TINYINT:
return TypeId::TINYINT;
return TypeId::INT8;
case SQLTypeId::SMALLINT:
return TypeId::SMALLINT;
return TypeId::INT16;
case SQLTypeId::SQLNULL:
case SQLTypeId::DATE:
case SQLTypeId::TIME:
case SQLTypeId::INTEGER:
return TypeId::INTEGER;
return TypeId::INT32;
case SQLTypeId::BIGINT:
case SQLTypeId::TIMESTAMP:
return TypeId::BIGINT;
return TypeId::INT64;
case SQLTypeId::FLOAT:
return TypeId::FLOAT;
case SQLTypeId::DOUBLE:
Expand Down
1 change: 1 addition & 0 deletions src/common/types/CMakeLists.txt
Expand Up @@ -9,6 +9,7 @@ add_library(duckdb_common_types OBJECT
timestamp.cpp
time.cpp
value.cpp
vector_buffer.cpp
vector.cpp)
set(ALL_OBJECT_FILES
${ALL_OBJECT_FILES} $<TARGET_OBJECTS:duckdb_common_types>
Expand Down

0 comments on commit fa48e95

Please sign in to comment.