diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java index a4638c4dfa241a..cab36a7bb62251 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java @@ -195,7 +195,7 @@ public List getParquetSchemas() { return parquetSchemas; } - public void analyze(List resultExprs, List colLabels) throws UserException { + public void analyze(List resultExprs, List colLabels, boolean needFormat) throws UserException { if (isAnalyzed) { // If the query stmt is rewritten, the whole stmt will be analyzed again. // But some of fields in this OutfileClause has been changed, @@ -214,10 +214,16 @@ public void analyze(List resultExprs, List colLabels) throws UserE } isAnalyzed = true; - if (isParquetFormat()) { - analyzeForParquetFormat(resultExprs, colLabels); - } else if (isOrcFormat()) { - analyzeForOrcFormat(resultExprs, colLabels); + // This analyze() method will be called twice: + // one is normal query plan analyze, + // the other is when writing success file after outfile is done on FE side. + // In the second time, we do not need to analyze format related things again. + if (needFormat) { + if (isParquetFormat()) { + analyzeForParquetFormat(resultExprs, colLabels); + } else if (isOrcFormat()) { + analyzeForOrcFormat(resultExprs, colLabels); + } } } @@ -750,3 +756,4 @@ public TResultFileSinkOptions toSinkOptions() { return sinkOptions; } } + diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/LogicalPlanAdapter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/LogicalPlanAdapter.java index e061181b46658c..a68b0058709a9d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/LogicalPlanAdapter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/LogicalPlanAdapter.java @@ -79,7 +79,7 @@ public OutFileClause getOutFileClause() { fileSink.getProperties() ); try { - outFile.analyze(Lists.newArrayList(), Lists.newArrayList()); + outFile.analyze(Lists.newArrayList(), Lists.newArrayList(), false); } catch (Exception e) { throw new AnalysisException(e.getMessage(), e.getCause()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index bafbacfb032cf0..bb989cea88ef51 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -604,7 +604,7 @@ public PlanFragment visitPhysicalFileSink(PhysicalFileSink fileS // TODO: should not call legacy planner analyze in Nereids try { - outFile.analyze(outputExprs, labels); + outFile.analyze(outputExprs, labels, true); } catch (Exception e) { throw new AnalysisException(e.getMessage(), e.getCause()); } diff --git a/regression-test/data/export_p0/outfile/test_outfile_parquet_schema.out b/regression-test/data/export_p0/outfile/test_outfile_parquet_schema.out new file mode 100644 index 00000000000000..7d849aada574ea --- /dev/null +++ b/regression-test/data/export_p0/outfile/test_outfile_parquet_schema.out @@ -0,0 +1,11 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select_base1 -- +1 abc + +-- !select_tvf -- +1 abc + +-- !desc_s3 -- +id int Yes false \N NONE +name text Yes false \N NONE + diff --git a/regression-test/suites/export_p0/outfile/test_outfile_parquet_schema.groovy b/regression-test/suites/export_p0/outfile/test_outfile_parquet_schema.groovy new file mode 100644 index 00000000000000..793ce9c489167b --- /dev/null +++ b/regression-test/suites/export_p0/outfile/test_outfile_parquet_schema.groovy @@ -0,0 +1,93 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +import java.nio.charset.StandardCharsets +import java.nio.file.Files +import java.nio.file.Paths + +suite("test_outfile_parquet_schema", "p0") { + String ak = getS3AK() + String sk = getS3SK() + String s3_endpoint = getS3Endpoint() + String region = getS3Region() + String bucket = getS3BucketName(); + + def export_table_name = "outfile_parquet_schema_test" + def outFilePath = "${bucket}/outfile/outfile_parquet_schema_test/exp_" + + // create table to export data + sql """ DROP TABLE IF EXISTS ${export_table_name} """ + sql """ + CREATE TABLE IF NOT EXISTS ${export_table_name} ( + `id` int(11) NULL, + `name` string NULL + ) + DISTRIBUTED BY HASH(id) BUCKETS 3 + PROPERTIES("replication_num" = "1"); + """ + // insert data + sql """insert into ${export_table_name} values(1, "abc");""" + + def check_outfile_data = { outfile_url, outfile_format -> + order_qt_select_tvf """ SELECT * FROM S3 ( + "uri" = "http://${bucket}.${s3_endpoint}${outfile_url.substring(5 + bucket.length(), outfile_url.length() - 1)}0.${outfile_format}", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "${outfile_format}", + "region" = "${region}" + ); + """ + } + + def check_outfile_column_name = { outfile_url, outfile_format -> + order_qt_desc_s3 """ Desc function S3 ( + "uri" = "http://${bucket}.${s3_endpoint}${outfile_url.substring(5 + bucket.length(), outfile_url.length() - 1)}0.${outfile_format}", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "${outfile_format}", + "region" = "${region}" + ); + """ + } + + def test_q1 = { outfile_format -> + order_qt_select_base1 """ select * from ${export_table_name} """ + + // select ... into outfile ... + def res = sql """ + SELECT id, name FROM ${export_table_name} + INTO OUTFILE "s3://${outFilePath}" + FORMAT AS ${outfile_format} + PROPERTIES ( + "s3.endpoint" = "${s3_endpoint}", + "s3.region" = "${region}", + "s3.secret_key"="${sk}", + "s3.access_key" = "${ak}", + "schema" = "required,byte_array,id;required,byte_array,name" + ); + """ + def outfile_url = res[0][3] + + check_outfile_data(outfile_url, outfile_format) + check_outfile_column_name(outfile_url, outfile_format) + } + + // test parquet format + test_q1("parquet") +}