File tree Expand file tree Collapse file tree 4 files changed +46
-6
lines changed
main/scala/org/apache/spark/sql/delta/commands
test/scala/org/apache/spark/sql/delta Expand file tree Collapse file tree 4 files changed +46
-6
lines changed Original file line number Diff line number Diff line change @@ -291,7 +291,7 @@ trait MergeIntoCommandBase extends LeafRunnableCommand
291291 fileIndex : TahoeFileIndex ,
292292 columnsToDrop : Seq [String ]): LogicalPlan = {
293293
294- val targetOutputCols = getTargetOutputCols(deltaTxn)
294+ val targetOutputCols = getTargetOutputCols(spark, deltaTxn)
295295
296296 val plan = {
297297
@@ -339,8 +339,16 @@ trait MergeIntoCommandBase extends LeafRunnableCommand
339339 * this transaction, since new columns will have a value of null for all existing rows.
340340 */
341341 protected def getTargetOutputCols (
342- txn : OptimisticTransaction , makeNullable : Boolean = false ): Seq [NamedExpression ] = {
343- txn.metadata.schema.map { col =>
342+ spark : SparkSession ,
343+ txn : OptimisticTransaction ,
344+ makeNullable : Boolean = false )
345+ : Seq [NamedExpression ] = {
346+ // Internal metadata attached to the table schema must not leak into the the target plan to
347+ // prevent inconsistencies - e.p. metadata matters when comparing data type of struct with
348+ // nested fields.
349+ val schema = DeltaColumnMapping .dropColumnMappingMetadata(
350+ DeltaTableUtils .removeInternalMetadata(spark, txn.metadata.schema))
351+ schema.map { col =>
344352 targetOutputAttributesMap
345353 .get(col.name)
346354 .map { a =>
Original file line number Diff line number Diff line change @@ -388,7 +388,7 @@ trait ClassicMergeExecutor extends MergeOutputGeneration {
388388
389389 // The target output columns need to be marked as nullable here, as they are going to be used
390390 // to reference the output of an outer join.
391- val targetOutputCols = getTargetOutputCols(deltaTxn, makeNullable = true )
391+ val targetOutputCols = getTargetOutputCols(spark, deltaTxn, makeNullable = true )
392392
393393 // If there are N columns in the target table, the full outer join output will have:
394394 // - N columns for target table
Original file line number Diff line number Diff line change @@ -96,7 +96,7 @@ trait InsertOnlyMergeExecutor extends MergeOutputGeneration {
9696 sourceDF
9797 }
9898
99- val outputDF = generateInsertsOnlyOutputDF(preparedSourceDF, deltaTxn)
99+ val outputDF = generateInsertsOnlyOutputDF(spark, preparedSourceDF, deltaTxn)
100100 logDebug(s " $extraOpType: output plan: \n " + outputDF.queryExecution)
101101
102102 val newFiles = writeFiles(spark, deltaTxn, outputDF)
@@ -142,10 +142,11 @@ trait InsertOnlyMergeExecutor extends MergeOutputGeneration {
142142 * and when there are multiple insert clauses.
143143 */
144144 private def generateInsertsOnlyOutputDF (
145+ spark : SparkSession ,
145146 preparedSourceDF : DataFrame ,
146147 deltaTxn : OptimisticTransaction ): DataFrame = {
147148
148- val targetOutputColNames = getTargetOutputCols(deltaTxn).map(_.name)
149+ val targetOutputColNames = getTargetOutputCols(spark, deltaTxn).map(_.name)
149150
150151 // When there is only one insert clause, there is no need for ROW_DROPPED_COL and
151152 // output df can be generated without CaseWhen.
Original file line number Diff line number Diff line change @@ -3154,6 +3154,37 @@ abstract class MergeIntoSuiteBase
31543154 expectedErrorMsgForDataSetTempView = null
31553155 )
31563156
3157+ test(" merge correctly handle field metadata" ) {
3158+ withTable(" source" , " target" ) {
3159+ // Create a target table with user metadata (comments) and internal metadata (column mapping
3160+ // information) on both a top-level column and a nested field.
3161+ sql(
3162+ """
3163+ |CREATE TABLE target(
3164+ | key int not null COMMENT 'data column',
3165+ | value int not null,
3166+ | cstruct struct<foo int COMMENT 'foo field'>)
3167+ |USING DELTA
3168+ |TBLPROPERTIES (
3169+ | 'delta.minReaderVersion' = '2',
3170+ | 'delta.minWriterVersion' = '5',
3171+ | 'delta.columnMapping.mode' = 'name')
3172+ """ .stripMargin
3173+ )
3174+ sql(s " INSERT INTO target VALUES (0, 0, null) " )
3175+
3176+ sql(" CREATE TABLE source (key int not null, value int not null) USING DELTA" )
3177+ sql(s " INSERT INTO source VALUES (1, 1) " )
3178+
3179+ executeMerge(
3180+ tgt = " target" ,
3181+ src = " source" ,
3182+ cond = " source.key = target.key" ,
3183+ update(condition = " target.key = 1" , set = " target.value = 42" ),
3184+ updateNotMatched(condition = " target.key = 100" , set = " target.value = 22" ))
3185+ }
3186+ }
3187+
31573188 test(" UDT Data Types - simple and nested" ) {
31583189 withTable(" source" ) {
31593190 withTable(" target" ) {
You can’t perform that action at this time.
0 commit comments