Skip to content

Commit

Permalink
disabling schema parsing and passing to parquet extension, since parq…
Browse files Browse the repository at this point in the history
…uet is not ready to deal with complex data types in supplied schema yet.
  • Loading branch information
Devendra committed Feb 23, 2024
1 parent 506efca commit d9e5d72
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 15 deletions.
3 changes: 0 additions & 3 deletions src/common/iceberg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,6 @@ string IcebergSnapshot::ReadMetaData(const string &path, FileSystem &fs) {
metadata_file_path = path;
// check if metadata is gz compressed file?
if (metadata_file_path.find(".gz.metadata.") != string::npos) {
printf("it's gz!\n");
return IcebergUtils::GzFileToString(metadata_file_path, fs);
}
return IcebergUtils::FileToString(metadata_file_path, fs);
Expand All @@ -179,12 +178,10 @@ string IcebergSnapshot::ReadMetaData(const string &path, FileSystem &fs) {
auto meta_path = fs.JoinPath(path, "metadata");
metadata_file_path = fs.JoinPath(meta_path, "v" + table_version + ".metadata.json");

printf("trying! metadata_file_path:%s\n", metadata_file_path.c_str());
try {
// attempting to return file content as json string.
return IcebergUtils::FileToString(metadata_file_path, fs);
} catch(...) {
printf("re-trying! metadata_file_path:%s\n", metadata_file_path.c_str());
// try with gz metadata file
metadata_file_path = fs.JoinPath(meta_path, "v" + table_version + ".gz.metadata.json");
// attempting to return file content as gz compressed json string.
Expand Down
10 changes: 5 additions & 5 deletions src/common/schema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,11 +159,11 @@ static vector<IcebergColumnDefinition> ParseSchemaFromJson(yyjson_val *schema_js
yyjson_val *field;
size_t max, idx;
vector<IcebergColumnDefinition> ret;

auto fields = yyjson_obj_get(schema_json, "fields");
yyjson_arr_foreach(fields, idx, max, field) {
ret.push_back(IcebergColumnDefinition::ParseFromJson(field));
}
// todo - Since parquet_scan doesn't support complex data type yet, not parsing schema, relying on parquet scan to auto infer schema.
// auto fields = yyjson_obj_get(schema_json, "fields");
// yyjson_arr_foreach(fields, idx, max, field) {
// ret.push_back(IcebergColumnDefinition::ParseFromJson(field));
// }

return ret;
}
Expand Down
1 change: 0 additions & 1 deletion src/common/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ string IcebergUtils::GzFileToString(const string &path, FileSystem &fs) {
} while (zs.avail_out == 0);
inflateEnd(&zs);
string ds = decompressed.str();
printf("content=%s\n", ds.c_str());
return ds;
}

Expand Down
14 changes: 8 additions & 6 deletions src/iceberg_functions/iceberg_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,10 @@ static unique_ptr<TableRef> MakeScanExpression(vector<Value> &data_file_values,
table_function_ref_data->alias = "iceberg_scan_data";
vector<unique_ptr<ParsedExpression>> left_children;
left_children.push_back(make_uniq<ConstantExpression>(Value::LIST(data_file_values)));
left_children.push_back(
make_uniq<ComparisonExpression>(ExpressionType::COMPARE_EQUAL, make_uniq<ColumnRefExpression>("schema"),
make_uniq<ConstantExpression>(GetParquetSchemaParam(schema))));
// todo: not passing schema in parquet scan, need to enable this once parquet extension starts supporting complex data types.
// left_children.push_back(
// make_uniq<ComparisonExpression>(ExpressionType::COMPARE_EQUAL, make_uniq<ColumnRefExpression>("schema"),
// make_uniq<ConstantExpression>(GetParquetSchemaParam(schema))));
table_function_ref_data->function = make_uniq<FunctionExpression>("parquet_scan", std::move(left_children));
return std::move(table_function_ref_data);
}
Expand Down Expand Up @@ -169,9 +170,10 @@ static unique_ptr<TableRef> MakeScanExpression(vector<Value> &data_file_values,
left_children.push_back(make_uniq<ComparisonExpression>(ExpressionType::COMPARE_EQUAL,
make_uniq<ColumnRefExpression>("file_row_number"),
make_uniq<ConstantExpression>(Value(1))));
left_children.push_back(
make_uniq<ComparisonExpression>(ExpressionType::COMPARE_EQUAL, make_uniq<ColumnRefExpression>("schema"),
make_uniq<ConstantExpression>(GetParquetSchemaParam(schema))));
// todo: not passing schema in parquet scan, need to enable this once parquet extension starts supporting complex data types.
// left_children.push_back(
// make_uniq<ComparisonExpression>(ExpressionType::COMPARE_EQUAL, make_uniq<ColumnRefExpression>("schema"),
// make_uniq<ConstantExpression>(GetParquetSchemaParam(schema))));

table_function_ref_data->function = make_uniq<FunctionExpression>("parquet_scan", std::move(left_children));
join_node->left = std::move(table_function_ref_data);
Expand Down

0 comments on commit d9e5d72

Please sign in to comment.