[CARBONDATA-4014] Support Change Column Comment

Why is this PR needed? Now, we support add column comment in "CREATE TABLE" and "ADD COLUMN". but do not support alter comment of the column, which shall be support in 'CHANGE COLUMN' What changes were proposed in this PR? Support "ALTER TABLE table_name CHANGE [COLUMN] col_name col_name data_type [COMMENT col_comment]" Does this PR introduce any user interface change? Yes. add CHANGE COLUMN explaination in the ddl document. Is any new testcase added? Yes
apache · Sep 29, 2020 · de35992 · de35992
1 parent 44b933b
commit de35992
Show file tree

Hide file tree

Showing 11 changed files with 200 additions and 90 deletions.
diff --git a/docs/ddl-of-carbondata.md b/docs/ddl-of-carbondata.md
@@ -49,7 +49,7 @@ CarbonData DDL statements are documented here,which includes:
     * [ADD COLUMNS](#add-columns)
     * [DROP COLUMNS](#drop-columns)
     * [RENAME COLUMN](#change-column-nametype)
-    * [CHANGE COLUMN NAME/TYPE](#change-column-nametype)
+    * [CHANGE COLUMN NAME/TYPE/COMMENT](#change-column-nametype)
     * [MERGE INDEXES](#merge-index)
     * [SET/UNSET](#set-and-unset)
   * [DROP TABLE](#drop-table)
@@ -719,18 +719,23 @@ Users can specify which columns to include and exclude for local dictionary gene
 
      **NOTE:** Drop Complex child column is not supported.
 
-   - #### CHANGE COLUMN NAME/TYPE
+   - #### CHANGE COLUMN NAME/TYPE/COMMENT
    
-     This command is used to change column name and the data type from INT to BIGINT or decimal precision from lower to higher.
+     This command is used to change column name and comment and the data type from INT to BIGINT or decimal precision from lower to higher.
      Change of decimal data type from lower precision to higher precision will only be supported for cases where there is no data loss.
+     Change of comment will only be supported for columns other than the partition column
 
      ```
-     ALTER TABLE [db_name.]table_name CHANGE col_old_name col_new_name column_type
+     ALTER TABLE [db_name.]table_name CHANGE col_old_name col_new_name column_type [COMMENT 'col_comment']
      ```
 
      Valid Scenarios
-     - Invalid scenario - Change of decimal precision from (10,2) to (10,5) is invalid as in this case only scale is increased but total number of digits remains the same.
-     - Valid scenario - Change of decimal precision from (10,2) to (12,3) is valid as the total number of digits are increased by 2 but scale is increased only by 1 which will not lead to any data loss.
+     - Invalid scenarios 
+       * Change of decimal precision from (10,2) to (10,5) is invalid as in this case only scale is increased but total number of digits remains the same.
+       * Change the comment of the partition column
+     - Valid scenarios
+       * Change of decimal precision from (10,2) to (12,3) is valid as the total number of digits are increased by 2 but scale is increased only by 1 which will not lead to any data loss.
+       * Change the comment of columns other than partition column
      - **NOTE:** The allowed range is 38,38 (precision, scale) and is a valid upper case scenario which is not resulting in data loss.
 
      Example1:Change column a1's name to a2 and its data type from INT to BIGINT.
@@ -750,6 +755,11 @@ Users can specify which columns to include and exclude for local dictionary gene
      ```
      ALTER TABLE test_db.carbon CHANGE a3 a4 STRING
      ```
+     Example3:Change column a3's comment to "col_comment".
+     
+     ```
+     ALTER TABLE test_db.carbon CHANGE a3 a3 STRING COMMENT 'col_comment'
+     ```
 
      **NOTE:** Once the column is renamed, user has to take care about replacing the fileheader with the new name or changing the column header in csv file.
    

diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/catalyst/CarbonParserUtil.scala b/integration/spark/src/main/scala/org/apache/spark/sql/catalyst/CarbonParserUtil.scala
@@ -1110,8 +1110,7 @@ object CarbonParserUtil {
    */
   def parseDataType(
       dataType: String,
-      values: Option[List[(Int, Int)]],
-      isColumnRename: Boolean): DataTypeInfo = {
+      values: Option[List[(Int, Int)]]): DataTypeInfo = {
     var precision: Int = 0
     var scale: Int = 0
     dataType match {
@@ -1135,11 +1134,7 @@ object CarbonParserUtil {
         }
         DataTypeInfo("decimal", precision, scale)
       case _ =>
-        if (isColumnRename) {
-          DataTypeInfo(DataTypeConverterUtil.convertToCarbonType(dataType).getName.toLowerCase)
-        } else {
-          throw new MalformedCarbonCommandException("Data type provided is invalid.")
-        }
+        DataTypeInfo(DataTypeConverterUtil.convertToCarbonType(dataType).getName.toLowerCase)
     }
   }
 

diff --git a/...spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchemaCommon.scala b/...spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchemaCommon.scala
@@ -169,7 +169,8 @@ case class AlterTableDataTypeChangeModel(dataTypeInfo: DataTypeInfo,
     tableName: String,
     columnName: String,
     newColumnName: String,
-    isColumnRename: Boolean)
+    isColumnRename: Boolean,
+    newColumnComment: Option[String] = None)
   extends AlterTableColumnRenameModel(columnName, newColumnName, isColumnRename)
 
 case class AlterTableRenameModel(

diff --git a/...e/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala b/...e/spark/sql/execution/command/schema/CarbonAlterTableColRenameDataTypeChangeCommand.scala
@@ -17,16 +17,20 @@
 
 package org.apache.spark.sql.execution.command.schema
 
+import java.util
+
 import scala.collection.JavaConverters._
 import scala.collection.mutable
 
+import org.apache.hadoop.hive.metastore.api.InvalidOperationException
 import org.apache.spark.sql.{CarbonEnv, Row, SparkSession}
 import org.apache.spark.sql.execution.command.{AlterTableDataTypeChangeModel, DataTypeInfo, MetadataCommand}
 import org.apache.spark.sql.hive.CarbonSessionCatalogUtil
-import org.apache.spark.util.{AlterTableUtil, SparkUtil}
+import org.apache.spark.util.AlterTableUtil
 
 import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException
 import org.apache.carbondata.common.logging.LogServiceFactory
+import org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.features.TableOperation
 import org.apache.carbondata.core.locks.{ICarbonLock, LockUsage}
 import org.apache.carbondata.core.metadata.converter.ThriftWrapperSchemaConverterImpl
@@ -57,20 +61,6 @@ abstract class CarbonAlterTableColumnRenameCommand(oldColumnName: String, newCol
                                                 s"column ${ oldCarbonColumn.getColName }")
     }
 
-    // if column rename operation is on partition column, then fail the rename operation
-    if (null != carbonTable.getPartitionInfo) {
-      val partitionColumns = carbonTable.getPartitionInfo.getColumnSchemaList
-      partitionColumns.asScala.foreach {
-        col =>
-          if (col.getColumnName.equalsIgnoreCase(oldColumnName)) {
-            throw new MalformedCarbonCommandException(
-              s"Column Rename Operation failed. Renaming " +
-              s"the partition column $newColumnName is not " +
-              s"allowed")
-          }
-      }
-    }
-
     // if column rename operation is on bucket column, then fail the rename operation
     if (null != carbonTable.getBucketingInfo) {
       val bucketColumns = carbonTable.getBucketingInfo.getListOfColumns
@@ -139,6 +129,8 @@ private[sql] case class CarbonAlterTableColRenameDataTypeChangeCommand(
         .fireEvent(alterTableColRenameAndDataTypeChangePreEvent, operationContext)
       val newColumnName = alterTableColRenameAndDataTypeChangeModel.newColumnName.toLowerCase
       val oldColumnName = alterTableColRenameAndDataTypeChangeModel.columnName.toLowerCase
+      val isColumnRename = alterTableColRenameAndDataTypeChangeModel.isColumnRename
+      val newColumnComment = alterTableColRenameAndDataTypeChangeModel.newColumnComment
       val carbonColumns = carbonTable.getCreateOrderColumn().asScala.filter(!_.isInvisible)
       if (!carbonColumns.exists(_.getColName.equalsIgnoreCase(oldColumnName))) {
         throwMetadataException(dbName, tableName, s"Column does not exist: $oldColumnName")
@@ -150,45 +142,48 @@ private[sql] case class CarbonAlterTableColRenameDataTypeChangeCommand(
       }
       val newColumnPrecision = alterTableColRenameAndDataTypeChangeModel.dataTypeInfo.precision
       val newColumnScale = alterTableColRenameAndDataTypeChangeModel.dataTypeInfo.scale
-      if (alterTableColRenameAndDataTypeChangeModel.isColumnRename) {
-        // validate the columns to be renamed
-        validColumnsForRenaming(carbonColumns, oldCarbonColumn.head, carbonTable)
-        // if the datatype is source datatype, then it is just a column rename operation, else set
-        // the isDataTypeChange flag to true
-        if (oldCarbonColumn.head.getDataType.getName
-          .equalsIgnoreCase(alterTableColRenameAndDataTypeChangeModel.dataTypeInfo.dataType)) {
-          val newColumnPrecision =
-            alterTableColRenameAndDataTypeChangeModel.dataTypeInfo.precision
-          val newColumnScale = alterTableColRenameAndDataTypeChangeModel.dataTypeInfo.scale
-          // if the source datatype is decimal and there is change in precision and scale, then
-          // along with rename, datatype change is also required for the command, so set the
-          // isDataTypeChange flag to true in this case
-          if (oldCarbonColumn.head.getDataType.getName.equalsIgnoreCase("decimal") &&
-              (oldCarbonColumn.head.getDataType.asInstanceOf[DecimalType].getPrecision !=
-               newColumnPrecision ||
-               oldCarbonColumn.head.getDataType.asInstanceOf[DecimalType].getScale !=
-               newColumnScale)) {
-            isDataTypeChange = true
-          }
-        } else {
+      // set isDataTypeChange flag
+      if (oldCarbonColumn.head.getDataType.getName
+        .equalsIgnoreCase(alterTableColRenameAndDataTypeChangeModel.dataTypeInfo.dataType)) {
+        val newColumnPrecision =
+          alterTableColRenameAndDataTypeChangeModel.dataTypeInfo.precision
+        val newColumnScale = alterTableColRenameAndDataTypeChangeModel.dataTypeInfo.scale
+        // if the source datatype is decimal and there is change in precision and scale, then
+        // along with rename, datatype change is also required for the command, so set the
+        // isDataTypeChange flag to true in this case
+        if (oldCarbonColumn.head.getDataType.getName.equalsIgnoreCase("decimal") &&
+            (oldCarbonColumn.head.getDataType.asInstanceOf[DecimalType].getPrecision !=
+             newColumnPrecision ||
+             oldCarbonColumn.head.getDataType.asInstanceOf[DecimalType].getScale !=
+             newColumnScale)) {
           isDataTypeChange = true
         }
       } else {
         isDataTypeChange = true
       }
-      if (isDataTypeChange) {
-        // if column datatype change operation is on partition column, then fail the datatype change
-        // operation
-        if (null != carbonTable.getPartitionInfo) {
-          val partitionColumns = carbonTable.getPartitionInfo.getColumnSchemaList
-          partitionColumns.asScala.foreach {
-            col =>
-              if (col.getColumnName.equalsIgnoreCase(oldColumnName)) {
-                throw new MalformedCarbonCommandException(
-                  s"Alter datatype of the partition column $newColumnName is not allowed")
-              }
-          }
+      // If there is no columnrename and datatype change and comment change
+      // return directly without execution
+      if (!isColumnRename && !isDataTypeChange && !newColumnComment.isDefined) {
+        return Seq.empty
+      }
+      // if column datatype change operation is on partition column, then fail the
+      // chang column operation
+      if (null != carbonTable.getPartitionInfo) {
+        val partitionColumns = carbonTable.getPartitionInfo.getColumnSchemaList
+        partitionColumns.asScala.foreach {
+          col =>
+            if (col.getColumnName.equalsIgnoreCase(oldColumnName)) {
+              throw new InvalidOperationException(
+                s"Alter on partition column $newColumnName is not supported")
+            }
         }
+      }
+      if (alterTableColRenameAndDataTypeChangeModel.isColumnRename) {
+        // validate the columns to be renamed
+        validColumnsForRenaming(carbonColumns, oldCarbonColumn.head, carbonTable)
+      }
+      if (isDataTypeChange) {
+        // validate the columns to change datatype
         validateColumnDataType(alterTableColRenameAndDataTypeChangeModel.dataTypeInfo,
           oldCarbonColumn.head)
       }
@@ -222,6 +217,14 @@ private[sql] case class CarbonAlterTableColRenameDataTypeChangeCommand(
               .setPrecision(newColumnPrecision)
             columnSchema.setScale(newColumnScale)
           }
+          if (newColumnComment.isDefined && columnSchema.getColumnProperties != null) {
+            columnSchema.getColumnProperties.put(
+              CarbonCommonConstants.COLUMN_COMMENT, newColumnComment.get)
+          } else if (newColumnComment.isDefined) {
+            val newColumnProperties = new util.HashMap[String, String]
+            newColumnProperties.put(CarbonCommonConstants.COLUMN_COMMENT, newColumnComment.get)
+            columnSchema.setColumnProperties(newColumnProperties)
+          }
           addColumnSchema = columnSchema
           timeStamp = System.currentTimeMillis()
           // make a new schema evolution entry after column rename or datatype change

diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/DDLHelper.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/DDLHelper.scala
@@ -31,12 +31,13 @@ import org.apache.spark.sql.execution.command.table._
 import org.apache.spark.sql.execution.datasources.{LogicalRelation, RefreshResource, RefreshTable}
 import org.apache.spark.sql.hive.execution.CreateHiveTableAsSelectCommand
 import org.apache.spark.sql.parser.{CarbonSpark2SqlParser, CarbonSparkSqlParserUtil}
-import org.apache.spark.sql.types.DecimalType
+import org.apache.spark.sql.types.{DecimalType, Metadata}
 import org.apache.spark.sql.util.SparkSQLUtil
 import org.apache.spark.util.{CarbonReflectionUtils, FileUtils}
 
 import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException
 import org.apache.carbondata.common.logging.LogServiceFactory
+import org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.metadata.schema.table.{CarbonTable, TableInfo}
 import org.apache.carbondata.core.util.{CarbonProperties, ThreadLocalSessionInfo}
 import org.apache.carbondata.spark.util.DataTypeConverterUtil
@@ -218,6 +219,7 @@ object DDLHelper {
     } else {
       val columnName = changeColumnCommand.columnName
       val newColumn = changeColumnCommand.newColumn
+      val newColumnMetaData = newColumn.metadata
       val isColumnRename = !columnName.equalsIgnoreCase(newColumn.name)
       val values = newColumn.dataType match {
         case d: DecimalType => Some(List((d.precision, d.scale)))
@@ -228,16 +230,23 @@ object DDLHelper {
           .convertToCarbonType(newColumn.dataType.typeName)
           .getName
           .toLowerCase,
-        values,
-        isColumnRename)
+        values)
+      var newColumnComment: Option[String] = Option.empty
+      if (newColumnMetaData != null &&
+        newColumnMetaData.contains(CarbonCommonConstants.COLUMN_COMMENT)) {
+        newColumnComment =
+          Some(newColumnMetaData.getString(CarbonCommonConstants.COLUMN_COMMENT))
+      }
+
       val alterTableColRenameAndDataTypeChangeModel =
         AlterTableDataTypeChangeModel(
           dataTypeInfo,
           tableName.database.map(_.toLowerCase),
           tableName.table.toLowerCase,
           columnName.toLowerCase,
           newColumn.name.toLowerCase,
-          isColumnRename)
+          isColumnRename,
+          newColumnComment)
 
       CarbonAlterTableColRenameDataTypeChangeCommand(
         alterTableColRenameAndDataTypeChangeModel

diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonSessionUtil.scala b/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonSessionUtil.scala
@@ -32,6 +32,7 @@ import org.apache.spark.util.{CarbonReflectionUtils, PartitionCacheKey, Partitio
 
 import org.apache.carbondata.common.logging.LogServiceFactory
 import org.apache.carbondata.converter.SparkDataTypeConverterImpl
+import org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.metadata.schema.table.CarbonTable
 import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema
 import org.apache.carbondata.core.util.CarbonUtil
@@ -142,14 +143,16 @@ object CarbonSessionUtil {
       if (!column.isInvisible) {
         val structFiled =
           if (null != column.getColumnProperties &&
-              column.getColumnProperties.get("comment") != null) {
+              column.getColumnProperties.get(CarbonCommonConstants.COLUMN_COMMENT) != null) {
             StructField(column.getColumnName,
               SparkTypeConverter
                 .convertCarbonToSparkDataType(column,
                   carbonTable),
               true,
               // update the column comment if present in the schema
-              new MetadataBuilder().putString("comment", column.getColumnProperties.get("comment"))
+              new MetadataBuilder().putString(
+                CarbonCommonConstants.COLUMN_COMMENT,
+                column.getColumnProperties.get(CarbonCommonConstants.COLUMN_COMMENT))
                 .build())
           } else {
             StructField(column.getColumnName,

diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/hive/SqlAstBuilderHelper.scala b/integration/spark/src/main/scala/org/apache/spark/sql/hive/SqlAstBuilderHelper.scala
@@ -44,7 +44,7 @@ trait SqlAstBuilderHelper extends SparkSqlAstBuilder {
 
     val alterTableColRenameAndDataTypeChangeModel =
       AlterTableDataTypeChangeModel(
-        CarbonParserUtil.parseDataType(typeString, values, isColumnRename),
+        CarbonParserUtil.parseDataType(typeString, values),
         CarbonParserUtil.convertDbNameToLowerCase(Option(ctx.tableIdentifier().db).map(_.getText)),
         ctx.tableIdentifier().table.getText.toLowerCase,
         ctx.identifier.getText.toLowerCase,

diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/parser/CarbonSparkSqlParserUtil.scala b/integration/spark/src/main/scala/org/apache/spark/sql/parser/CarbonSparkSqlParserUtil.scala
@@ -665,8 +665,7 @@ object CarbonSparkSqlParserUtil {
     val alterTableColRenameAndDataTypeChangeModel =
       AlterTableDataTypeChangeModel(
         CarbonParserUtil.parseDataType(dataType.toLowerCase,
-          values,
-          isColumnRename),
+          values),
         CarbonParserUtil.convertDbNameToLowerCase(dbName),
         table.toLowerCase,
         columnName.toLowerCase,

diff --git a/...he/carbondata/spark/testsuite/standardpartition/StandardPartitionTableQueryTestCase.scala b/...he/carbondata/spark/testsuite/standardpartition/StandardPartitionTableQueryTestCase.scala
@@ -515,12 +515,10 @@ test("Creation of partition table should fail if the colname in table schema and
       sql("alter table onlyPart drop columns(name)")
     }
     assert(ex1.getMessage.contains("alter table drop column is failed, cannot have the table with all columns as partition column"))
-    if (SparkUtil.isSparkVersionXAndAbove("2.2")) {
-      val ex2 = intercept[MalformedCarbonCommandException] {
-        sql("alter table onlyPart change age age bigint")
-      }
-      assert(ex2.getMessage.contains("Alter datatype of the partition column age is not allowed"))
+    val ex2 = intercept[MalformedCarbonCommandException] {
+      sql("alter table onlyPart change age age bigint")
     }
+    assert(ex2.getMessage.contains("Alter on partition column age is not supported"))
     sql("drop table if exists onlyPart")
   }
 

diff --git a/...src/test/scala/org/apache/spark/carbondata/restructure/AlterTableValidationTestCase.scala b/...src/test/scala/org/apache/spark/carbondata/restructure/AlterTableValidationTestCase.scala
@@ -366,7 +366,7 @@ class AlterTableValidationTestCase extends QueryTest with BeforeAndAfterAll {
     sql("alter table default.restructure change decimalfield deciMalfield Decimal(11,3)")
     sql("alter table default.restructure change decimalfield deciMalfield Decimal(12,3)")
     intercept[ProcessMetaDataException] {
-      sql("alter table default.restructure change decimalfield deciMalfield Decimal(12,3)")
+      sql("alter table default.restructure change decimalfield deciMalfield Decimal(12,2)")
     }
     intercept[ProcessMetaDataException] {
       sql("alter table default.restructure change decimalfield deciMalfield Decimal(13,1)")