From 4b6d362c9d843faff80f455a3e8dada13f7bf9a0 Mon Sep 17 00:00:00 2001 From: zhengruifeng Date: Thu, 8 Nov 2018 17:31:03 +0800 Subject: [PATCH 1/5] init --- .../main/scala/org/apache/spark/ml/feature/Tokenizer.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala index 748c869af4117..9d56201b6b84a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala @@ -17,6 +17,8 @@ package org.apache.spark.ml.feature +import java.util.Locale + import org.apache.spark.annotation.Since import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.param._ @@ -37,7 +39,7 @@ class Tokenizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) override protected def createTransformFunc: String => Seq[String] = { // scalastyle:off caselocale - _.toLowerCase.split("\\s") + _.toLowerCase(Locale.ROOT).split("\\s") // scalastyle:on caselocale } @@ -143,7 +145,7 @@ class RegexTokenizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) override protected def createTransformFunc: String => Seq[String] = { originStr => val re = $(pattern).r // scalastyle:off caselocale - val str = if ($(toLowercase)) originStr.toLowerCase() else originStr + val str = if ($(toLowercase)) originStr.toLowerCase(Locale.ROOT) else originStr // scalastyle:on caselocale val tokens = if ($(gaps)) re.split(str).toSeq else re.findAllIn(str).toSeq val minLength = $(minTokenLength) From ab90b3354396427a023190a94cfecfd6999002de Mon Sep 17 00:00:00 2001 From: zhengruifeng Date: Thu, 8 Nov 2018 17:57:51 +0800 Subject: [PATCH 2/5] init --- .../apache/spark/unsafe/types/UTF8String.java | 5 +++-- .../types/UTF8StringPropertyCheckSuite.scala | 6 ++++-- project/SparkBuild.scala | 10 +++++++--- .../analysis/higherOrderFunctions.scala | 4 +++- .../sql/catalyst/csv/CSVHeaderChecker.scala | 6 ++++-- .../spark/sql/catalyst/parser/AstBuilder.scala | 2 +- .../apache/spark/sql/util/SchemaUtils.scala | 4 +++- .../InsertIntoHadoopFsRelationCommand.scala | 3 ++- .../datasources/PartitioningUtils.scala | 4 ++-- .../execution/datasources/csv/CSVUtils.scala | 6 ++++-- .../spark/sql/hive/HiveExternalCatalog.scala | 18 +++++++++--------- .../spark/sql/hive/HiveMetastoreCatalog.scala | 12 +++++++----- 12 files changed, 49 insertions(+), 31 deletions(-) diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index 3a3bfc4a94bb3..7f73790584ea7 100644 --- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -23,6 +23,7 @@ import java.nio.ByteOrder; import java.nio.charset.StandardCharsets; import java.util.Arrays; +import java.util.Locale; import java.util.Map; import com.esotericsoftware.kryo.Kryo; @@ -411,7 +412,7 @@ public UTF8String toUpperCase() { } private UTF8String toUpperCaseSlow() { - return fromString(toString().toUpperCase()); + return fromString(toString().toUpperCase(Locale.ROOT)); } /** @@ -441,7 +442,7 @@ public UTF8String toLowerCase() { } private UTF8String toLowerCaseSlow() { - return fromString(toString().toLowerCase()); + return fromString(toString().toLowerCase(Locale.ROOT)); } /** diff --git a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala index 9656951810daf..05faaf38930a7 100644 --- a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala +++ b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala @@ -17,6 +17,8 @@ package org.apache.spark.unsafe.types +import java.util.Locale + import org.apache.commons.lang3.StringUtils import org.scalacheck.{Arbitrary, Gen} import org.scalatest.prop.GeneratorDrivenPropertyChecks @@ -66,13 +68,13 @@ class UTF8StringPropertyCheckSuite extends FunSuite with GeneratorDrivenProperty // scalastyle:off caselocale test("toUpperCase") { forAll { (s: String) => - assert(toUTF8(s).toUpperCase === toUTF8(s.toUpperCase)) + assert(toUTF8(s).toUpperCase === toUTF8(s.toUpperCase(Locale.ROOT))) } } test("toLowerCase") { forAll { (s: String) => - assert(toUTF8(s).toLowerCase === toUTF8(s.toLowerCase)) + assert(toUTF8(s).toLowerCase === toUTF8(s.toLowerCase(Locale.ROOT))) } } // scalastyle:on caselocale diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index ca57df0e31a7f..5e034f9fe2a95 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -17,6 +17,7 @@ import java.io._ import java.nio.file.Files +import java.util.Locale import scala.io.Source import scala.util.Properties @@ -650,10 +651,13 @@ object Assembly { }, jarName in (Test, assembly) := s"${moduleName.value}-test-${version.value}.jar", mergeStrategy in assembly := { - case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard - case m if m.toLowerCase.matches("meta-inf.*\\.sf$") => MergeStrategy.discard + case m if m.toLowerCase(Locale.ROOT).endsWith("manifest.mf") + => MergeStrategy.discard + case m if m.toLowerCase(Locale.ROOT).matches("meta-inf.*\\.sf$") + => MergeStrategy.discard case "log4j.properties" => MergeStrategy.discard - case m if m.toLowerCase.startsWith("meta-inf/services/") => MergeStrategy.filterDistinctLines + case m if m.toLowerCase(Locale.ROOT).startsWith("meta-inf/services/") + => MergeStrategy.filterDistinctLines case "reference.conf" => MergeStrategy.concat case _ => MergeStrategy.first } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala index a8a7bbd9f9cd0..61a8bc71c0040 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.catalyst.analysis +import java.util.Locale + import org.apache.spark.sql.catalyst.catalog.SessionCatalog import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan @@ -74,7 +76,7 @@ case class ResolveLambdaVariables(conf: SQLConf) extends Rule[LogicalPlan] { private val canonicalizer = { if (!conf.caseSensitiveAnalysis) { // scalastyle:off caselocale - s: String => s.toLowerCase + s: String => s.toLowerCase(Locale.ROOT) // scalastyle:on caselocale } else { s: String => s diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVHeaderChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVHeaderChecker.scala index c39f77e891ae1..b668f5dc23914 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVHeaderChecker.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVHeaderChecker.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.catalyst.csv +import java.util.Locale + import com.univocity.parsers.csv.CsvParser import org.apache.spark.internal.Logging @@ -67,8 +69,8 @@ class CSVHeaderChecker( var (nameInSchema, nameInHeader) = (fieldNames(i), columnNames(i)) if (!caseSensitive) { // scalastyle:off caselocale - nameInSchema = nameInSchema.toLowerCase - nameInHeader = nameInHeader.toLowerCase + nameInSchema = nameInSchema.toLowerCase(Locale.ROOT) + nameInHeader = nameInHeader.toLowerCase(Locale.ROOT) // scalastyle:on caselocale } if (nameInHeader != nameInSchema) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 672bffcfc0cad..1b52e3231ac47 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -679,7 +679,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging unrequiredChildIndex = Nil, outer = ctx.OUTER != null, // scalastyle:off caselocale - Some(ctx.tblName.getText.toLowerCase), + Some(ctx.tblName.getText.toLowerCase(Locale.ROOT)), // scalastyle:on caselocale ctx.colName.asScala.map(_.getText).map(UnresolvedAttribute.apply), query) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala index 052014ab86744..44e67580b2744 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.util +import java.util.Locale + import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.types.StructType @@ -78,7 +80,7 @@ private[spark] object SchemaUtils { def checkColumnNameDuplication( columnNames: Seq[String], colType: String, caseSensitiveAnalysis: Boolean): Unit = { // scalastyle:off caselocale - val names = if (caseSensitiveAnalysis) columnNames else columnNames.map(_.toLowerCase) + val names = if (caseSensitiveAnalysis) columnNames else columnNames.map(_.toLowerCase(Locale.ROOT)) // scalastyle:on caselocale if (names.distinct.length != names.length) { val duplicateColumns = names.groupBy(identity).collect { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala index d43fa3893df1d..65e3492b7be45 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.execution.datasources import java.io.IOException +import java.util.Locale import org.apache.hadoop.fs.{FileSystem, Path} @@ -96,7 +97,7 @@ case class InsertIntoHadoopFsRelationCommand( val partitionOverwriteMode = parameters.get("partitionOverwriteMode") // scalastyle:off caselocale - .map(mode => PartitionOverwriteMode.withName(mode.toUpperCase)) + .map(mode => PartitionOverwriteMode.withName(mode.toUpperCase(Locale.ROOT))) // scalastyle:on caselocale .getOrElse(sparkSession.sessionState.conf.partitionOverwriteMode) val enableDynamicOverwrite = partitionOverwriteMode == PartitionOverwriteMode.DYNAMIC diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala index 3183fd30e5e0d..c941c3d1209ef 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala @@ -129,7 +129,7 @@ object PartitioningUtils { // "hdfs://host:9000/invalidPath" // "hdfs://host:9000/path" // TODO: Selective case sensitivity. - val discoveredBasePaths = optDiscoveredBasePaths.flatten.map(_.toString.toLowerCase()) + val discoveredBasePaths = optDiscoveredBasePaths.flatten.map(_.toString.toLowerCase(Locale.ROOT)) assert( discoveredBasePaths.distinct.size == 1, "Conflicting directory structures detected. Suspicious paths:\b" + @@ -324,7 +324,7 @@ object PartitioningUtils { } else { // TODO: Selective case sensitivity. val distinctPartColNames = - pathsWithPartitionValues.map(_._2.columnNames.map(_.toLowerCase())).distinct + pathsWithPartitionValues.map(_._2.columnNames.map(_.toLowerCase((Locale.ROOT)))).distinct assert( distinctPartColNames.size == 1, listConflictingPartitionColumns(pathsWithPartitionValues)) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVUtils.scala index 21fabac472f4b..4b9dacbbd4180 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVUtils.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.execution.datasources.csv +import java.util.Locale + import org.apache.spark.rdd.RDD import org.apache.spark.sql.Dataset import org.apache.spark.sql.catalyst.csv.CSVExprUtils @@ -70,7 +72,7 @@ object CSVUtils { val duplicates = { val headerNames = row.filter(_ != null) // scalastyle:off caselocale - .map(name => if (caseSensitive) name else name.toLowerCase) + .map(name => if (caseSensitive) name else name.toLowerCase(Locale.ROOT)) // scalastyle:on caselocale headerNames.diff(headerNames.distinct).distinct } @@ -81,7 +83,7 @@ object CSVUtils { // index as the suffix. s"_c$index" // scalastyle:off caselocale - } else if (!caseSensitive && duplicates.contains(value.toLowerCase)) { + } else if (!caseSensitive && duplicates.contains(value.toLowerCase(Locale.ROOT))) { // scalastyle:on caselocale // When there are case-insensitive duplicates, put the index as the suffix. s"$value$index" diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala index c1178ad4a84fb..b1fcc1fc93a28 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala @@ -40,7 +40,6 @@ import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils._ import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.plans.logical.ColumnStat import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.spark.sql.execution.command.DDLUtils import org.apache.spark.sql.execution.datasources.{PartitioningUtils, SourceOptions} @@ -871,7 +870,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat // columns. Here we Lowercase the column names before passing the partition spec to Hive // client, to satisfy Hive. // scalastyle:off caselocale - orderedPartitionSpec.put(colName.toLowerCase, partition(colName)) + orderedPartitionSpec.put(colName.toLowerCase(Locale.ROOT), partition(colName)) // scalastyle:on caselocale } @@ -901,7 +900,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat // columns. Here we Lowercase the column names before passing the partition spec to Hive // client, to satisfy Hive. // scalastyle:off caselocale - orderedPartitionSpec.put(colName.toLowerCase, partition(colName)) + orderedPartitionSpec.put(colName.toLowerCase(Locale.ROOT), partition(colName)) // scalastyle:on caselocale } @@ -923,7 +922,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat // APIs, to match this behaviour. private def lowerCasePartitionSpec(spec: TablePartitionSpec): TablePartitionSpec = { // scalastyle:off caselocale - spec.map { case (k, v) => k.toLowerCase -> v } + spec.map { case (k, v) => k.toLowerCase(Locale.ROOT) -> v } // scalastyle:on caselocale } @@ -931,7 +930,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat private def buildLowerCasePartColNameMap(table: CatalogTable): Map[String, String] = { val actualPartColNames = table.partitionColumnNames // scalastyle:off caselocale - actualPartColNames.map(colName => (colName.toLowerCase, colName)).toMap + actualPartColNames.map(colName => (colName.toLowerCase(Locale.ROOT), colName)).toMap // scalastyle:on caselocale } @@ -942,7 +941,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat spec: TablePartitionSpec, partColMap: Map[String, String]): TablePartitionSpec = { // scalastyle:off caselocale - spec.map { case (k, v) => partColMap(k.toLowerCase) -> v } + spec.map { case (k, v) => partColMap(k.toLowerCase(Locale.ROOT)) -> v } // scalastyle:on caselocale } @@ -1003,7 +1002,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat // a default path generate by the new spec with lower cased partition column names. This is // unexpected and we need to rename them manually and alter the partition location. // scalastyle:off caselocale - val hasUpperCasePartitionColumn = partitionColumnNames.exists(col => col.toLowerCase != col) + val hasUpperCasePartitionColumn = partitionColumnNames + .exists(col => col.toLowerCase(Locale.ROOT) != col) // scalastyle:on caselocale if (tableMeta.tableType == MANAGED && hasUpperCasePartitionColumn) { val tablePath = new Path(tableMeta.location) @@ -1046,7 +1046,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat // just move `a=1/b=3` into `A=1` with new name `B=3`. } else { // scalastyle:off caselocale - val actualPartitionString = getPartitionPathString(col.toLowerCase, partValue) + val actualPartitionString = getPartitionPathString(col.toLowerCase(Locale.ROOT), partValue) // scalastyle:on caselocale val actualPartitionPath = new Path(currentFullPath, actualPartitionString) try { @@ -1199,7 +1199,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat val partSpec = PartitioningUtils.parsePathFragmentAsSeq(partitionPath) partSpec.map { case (partName, partValue) => // scalastyle:off caselocale - partColNameMap(partName.toLowerCase) + "=" + escapePathName(partValue) + partColNameMap(partName.toLowerCase(Locale.ROOT)) + "=" + escapePathName(partValue) // scalastyle:on caselocale }.mkString("/") } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala index d047953327958..585b92bb5fec0 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.hive +import java.util.Locale + import scala.util.control.NonFatal import com.google.common.util.concurrent.Striped @@ -60,8 +62,8 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log private[hive] def getCachedDataSourceTable(table: TableIdentifier): LogicalPlan = { val key = QualifiedTableName( // scalastyle:off caselocale - table.database.getOrElse(sessionState.catalog.getCurrentDatabase).toLowerCase, - table.table.toLowerCase) + table.database.getOrElse(sessionState.catalog.getCurrentDatabase).toLowerCase(Locale.ROOT), + table.table.toLowerCase(Locale.ROOT)) // scalastyle:on caselocale catalogProxy.getCachedTable(key) } @@ -277,14 +279,14 @@ private[hive] object HiveMetastoreCatalog { inferredSchema: StructType): StructType = try { // scalastyle:off caselocale // Find any nullable fields in mestastore schema that are missing from the inferred schema. - val metastoreFields = metastoreSchema.map(f => f.name.toLowerCase -> f).toMap + val metastoreFields = metastoreSchema.map(f => f.name.toLowerCase(Locale.ROOT) -> f).toMap val missingNullables = metastoreFields - .filterKeys(!inferredSchema.map(_.name.toLowerCase).contains(_)) + .filterKeys(!inferredSchema.map(_.name.toLowerCase(Locale.ROOT)).contains(_)) .values .filter(_.nullable) // Merge missing nullable fields to inferred schema and build a case-insensitive field map. val inferredFields = StructType(inferredSchema ++ missingNullables) - .map(f => f.name.toLowerCase -> f).toMap + .map(f => f.name.toLowerCase(Locale.ROOT) -> f).toMap // scalastyle:on caselocale StructType(metastoreSchema.map(f => f.copy(name = inferredFields(f.name).name))) } catch { From a2d91bc2d7affe430c5b0ab52f00e644f2668bf9 Mon Sep 17 00:00:00 2001 From: zhengruifeng Date: Thu, 8 Nov 2018 18:14:15 +0800 Subject: [PATCH 3/5] style --- .../src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala | 3 ++- .../spark/sql/execution/datasources/PartitioningUtils.scala | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala index 44e67580b2744..9eb8d7b8129c3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala @@ -80,7 +80,8 @@ private[spark] object SchemaUtils { def checkColumnNameDuplication( columnNames: Seq[String], colType: String, caseSensitiveAnalysis: Boolean): Unit = { // scalastyle:off caselocale - val names = if (caseSensitiveAnalysis) columnNames else columnNames.map(_.toLowerCase(Locale.ROOT)) + val names = if (caseSensitiveAnalysis) columnNames else + columnNames.map(_.toLowerCase(Locale.ROOT)) // scalastyle:on caselocale if (names.distinct.length != names.length) { val duplicateColumns = names.groupBy(identity).collect { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala index c941c3d1209ef..2cbb380a980af 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala @@ -129,7 +129,8 @@ object PartitioningUtils { // "hdfs://host:9000/invalidPath" // "hdfs://host:9000/path" // TODO: Selective case sensitivity. - val discoveredBasePaths = optDiscoveredBasePaths.flatten.map(_.toString.toLowerCase(Locale.ROOT)) + val discoveredBasePaths = optDiscoveredBasePaths.flatten + .map(_.toString.toLowerCase(Locale.ROOT)) assert( discoveredBasePaths.distinct.size == 1, "Conflicting directory structures detected. Suspicious paths:\b" + From 91a20e502e711cf79e9cb3012de27d357b5d8370 Mon Sep 17 00:00:00 2001 From: zhengruifeng Date: Thu, 8 Nov 2018 18:26:10 +0800 Subject: [PATCH 4/5] style 2 --- .../main/scala/org/apache/spark/sql/util/SchemaUtils.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala index 9eb8d7b8129c3..3a6d3170d011e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala @@ -80,8 +80,11 @@ private[spark] object SchemaUtils { def checkColumnNameDuplication( columnNames: Seq[String], colType: String, caseSensitiveAnalysis: Boolean): Unit = { // scalastyle:off caselocale - val names = if (caseSensitiveAnalysis) columnNames else + val names = if (caseSensitiveAnalysis) { + columnNames + } else { columnNames.map(_.toLowerCase(Locale.ROOT)) + } // scalastyle:on caselocale if (names.distinct.length != names.length) { val duplicateColumns = names.groupBy(identity).collect { From aa5aa8e2094ded81cf13e15bd3c59beac2886f7b Mon Sep 17 00:00:00 2001 From: zhengruifeng Date: Fri, 9 Nov 2018 10:26:14 +0800 Subject: [PATCH 5/5] revert --- .../apache/spark/unsafe/types/UTF8String.java | 5 ++--- .../types/UTF8StringPropertyCheckSuite.scala | 6 ++---- .../apache/spark/ml/feature/Tokenizer.scala | 6 ++---- .../analysis/higherOrderFunctions.scala | 4 +--- .../sql/catalyst/csv/CSVHeaderChecker.scala | 6 ++---- .../spark/sql/catalyst/parser/AstBuilder.scala | 2 +- .../apache/spark/sql/util/SchemaUtils.scala | 8 +------- .../InsertIntoHadoopFsRelationCommand.scala | 3 +-- .../datasources/PartitioningUtils.scala | 5 ++--- .../execution/datasources/csv/CSVUtils.scala | 6 ++---- .../spark/sql/hive/HiveExternalCatalog.scala | 18 +++++++++--------- .../spark/sql/hive/HiveMetastoreCatalog.scala | 12 +++++------- 12 files changed, 30 insertions(+), 51 deletions(-) diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index 7f73790584ea7..3a3bfc4a94bb3 100644 --- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -23,7 +23,6 @@ import java.nio.ByteOrder; import java.nio.charset.StandardCharsets; import java.util.Arrays; -import java.util.Locale; import java.util.Map; import com.esotericsoftware.kryo.Kryo; @@ -412,7 +411,7 @@ public UTF8String toUpperCase() { } private UTF8String toUpperCaseSlow() { - return fromString(toString().toUpperCase(Locale.ROOT)); + return fromString(toString().toUpperCase()); } /** @@ -442,7 +441,7 @@ public UTF8String toLowerCase() { } private UTF8String toLowerCaseSlow() { - return fromString(toString().toLowerCase(Locale.ROOT)); + return fromString(toString().toLowerCase()); } /** diff --git a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala index 05faaf38930a7..9656951810daf 100644 --- a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala +++ b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala @@ -17,8 +17,6 @@ package org.apache.spark.unsafe.types -import java.util.Locale - import org.apache.commons.lang3.StringUtils import org.scalacheck.{Arbitrary, Gen} import org.scalatest.prop.GeneratorDrivenPropertyChecks @@ -68,13 +66,13 @@ class UTF8StringPropertyCheckSuite extends FunSuite with GeneratorDrivenProperty // scalastyle:off caselocale test("toUpperCase") { forAll { (s: String) => - assert(toUTF8(s).toUpperCase === toUTF8(s.toUpperCase(Locale.ROOT))) + assert(toUTF8(s).toUpperCase === toUTF8(s.toUpperCase)) } } test("toLowerCase") { forAll { (s: String) => - assert(toUTF8(s).toLowerCase === toUTF8(s.toLowerCase(Locale.ROOT))) + assert(toUTF8(s).toLowerCase === toUTF8(s.toLowerCase)) } } // scalastyle:on caselocale diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala index 9d56201b6b84a..748c869af4117 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala @@ -17,8 +17,6 @@ package org.apache.spark.ml.feature -import java.util.Locale - import org.apache.spark.annotation.Since import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.param._ @@ -39,7 +37,7 @@ class Tokenizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) override protected def createTransformFunc: String => Seq[String] = { // scalastyle:off caselocale - _.toLowerCase(Locale.ROOT).split("\\s") + _.toLowerCase.split("\\s") // scalastyle:on caselocale } @@ -145,7 +143,7 @@ class RegexTokenizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) override protected def createTransformFunc: String => Seq[String] = { originStr => val re = $(pattern).r // scalastyle:off caselocale - val str = if ($(toLowercase)) originStr.toLowerCase(Locale.ROOT) else originStr + val str = if ($(toLowercase)) originStr.toLowerCase() else originStr // scalastyle:on caselocale val tokens = if ($(gaps)) re.split(str).toSeq else re.findAllIn(str).toSeq val minLength = $(minTokenLength) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala index 61a8bc71c0040..a8a7bbd9f9cd0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala @@ -17,8 +17,6 @@ package org.apache.spark.sql.catalyst.analysis -import java.util.Locale - import org.apache.spark.sql.catalyst.catalog.SessionCatalog import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan @@ -76,7 +74,7 @@ case class ResolveLambdaVariables(conf: SQLConf) extends Rule[LogicalPlan] { private val canonicalizer = { if (!conf.caseSensitiveAnalysis) { // scalastyle:off caselocale - s: String => s.toLowerCase(Locale.ROOT) + s: String => s.toLowerCase // scalastyle:on caselocale } else { s: String => s diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVHeaderChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVHeaderChecker.scala index b668f5dc23914..c39f77e891ae1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVHeaderChecker.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVHeaderChecker.scala @@ -17,8 +17,6 @@ package org.apache.spark.sql.catalyst.csv -import java.util.Locale - import com.univocity.parsers.csv.CsvParser import org.apache.spark.internal.Logging @@ -69,8 +67,8 @@ class CSVHeaderChecker( var (nameInSchema, nameInHeader) = (fieldNames(i), columnNames(i)) if (!caseSensitive) { // scalastyle:off caselocale - nameInSchema = nameInSchema.toLowerCase(Locale.ROOT) - nameInHeader = nameInHeader.toLowerCase(Locale.ROOT) + nameInSchema = nameInSchema.toLowerCase + nameInHeader = nameInHeader.toLowerCase // scalastyle:on caselocale } if (nameInHeader != nameInSchema) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 1b52e3231ac47..672bffcfc0cad 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -679,7 +679,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging unrequiredChildIndex = Nil, outer = ctx.OUTER != null, // scalastyle:off caselocale - Some(ctx.tblName.getText.toLowerCase(Locale.ROOT)), + Some(ctx.tblName.getText.toLowerCase), // scalastyle:on caselocale ctx.colName.asScala.map(_.getText).map(UnresolvedAttribute.apply), query) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala index 3a6d3170d011e..052014ab86744 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala @@ -17,8 +17,6 @@ package org.apache.spark.sql.util -import java.util.Locale - import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.types.StructType @@ -80,11 +78,7 @@ private[spark] object SchemaUtils { def checkColumnNameDuplication( columnNames: Seq[String], colType: String, caseSensitiveAnalysis: Boolean): Unit = { // scalastyle:off caselocale - val names = if (caseSensitiveAnalysis) { - columnNames - } else { - columnNames.map(_.toLowerCase(Locale.ROOT)) - } + val names = if (caseSensitiveAnalysis) columnNames else columnNames.map(_.toLowerCase) // scalastyle:on caselocale if (names.distinct.length != names.length) { val duplicateColumns = names.groupBy(identity).collect { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala index 65e3492b7be45..d43fa3893df1d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala @@ -18,7 +18,6 @@ package org.apache.spark.sql.execution.datasources import java.io.IOException -import java.util.Locale import org.apache.hadoop.fs.{FileSystem, Path} @@ -97,7 +96,7 @@ case class InsertIntoHadoopFsRelationCommand( val partitionOverwriteMode = parameters.get("partitionOverwriteMode") // scalastyle:off caselocale - .map(mode => PartitionOverwriteMode.withName(mode.toUpperCase(Locale.ROOT))) + .map(mode => PartitionOverwriteMode.withName(mode.toUpperCase)) // scalastyle:on caselocale .getOrElse(sparkSession.sessionState.conf.partitionOverwriteMode) val enableDynamicOverwrite = partitionOverwriteMode == PartitionOverwriteMode.DYNAMIC diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala index 2cbb380a980af..3183fd30e5e0d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala @@ -129,8 +129,7 @@ object PartitioningUtils { // "hdfs://host:9000/invalidPath" // "hdfs://host:9000/path" // TODO: Selective case sensitivity. - val discoveredBasePaths = optDiscoveredBasePaths.flatten - .map(_.toString.toLowerCase(Locale.ROOT)) + val discoveredBasePaths = optDiscoveredBasePaths.flatten.map(_.toString.toLowerCase()) assert( discoveredBasePaths.distinct.size == 1, "Conflicting directory structures detected. Suspicious paths:\b" + @@ -325,7 +324,7 @@ object PartitioningUtils { } else { // TODO: Selective case sensitivity. val distinctPartColNames = - pathsWithPartitionValues.map(_._2.columnNames.map(_.toLowerCase((Locale.ROOT)))).distinct + pathsWithPartitionValues.map(_._2.columnNames.map(_.toLowerCase())).distinct assert( distinctPartColNames.size == 1, listConflictingPartitionColumns(pathsWithPartitionValues)) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVUtils.scala index 4b9dacbbd4180..21fabac472f4b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVUtils.scala @@ -17,8 +17,6 @@ package org.apache.spark.sql.execution.datasources.csv -import java.util.Locale - import org.apache.spark.rdd.RDD import org.apache.spark.sql.Dataset import org.apache.spark.sql.catalyst.csv.CSVExprUtils @@ -72,7 +70,7 @@ object CSVUtils { val duplicates = { val headerNames = row.filter(_ != null) // scalastyle:off caselocale - .map(name => if (caseSensitive) name else name.toLowerCase(Locale.ROOT)) + .map(name => if (caseSensitive) name else name.toLowerCase) // scalastyle:on caselocale headerNames.diff(headerNames.distinct).distinct } @@ -83,7 +81,7 @@ object CSVUtils { // index as the suffix. s"_c$index" // scalastyle:off caselocale - } else if (!caseSensitive && duplicates.contains(value.toLowerCase(Locale.ROOT))) { + } else if (!caseSensitive && duplicates.contains(value.toLowerCase)) { // scalastyle:on caselocale // When there are case-insensitive duplicates, put the index as the suffix. s"$value$index" diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala index b1fcc1fc93a28..c1178ad4a84fb 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala @@ -40,6 +40,7 @@ import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils._ import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.plans.logical.ColumnStat import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.spark.sql.execution.command.DDLUtils import org.apache.spark.sql.execution.datasources.{PartitioningUtils, SourceOptions} @@ -870,7 +871,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat // columns. Here we Lowercase the column names before passing the partition spec to Hive // client, to satisfy Hive. // scalastyle:off caselocale - orderedPartitionSpec.put(colName.toLowerCase(Locale.ROOT), partition(colName)) + orderedPartitionSpec.put(colName.toLowerCase, partition(colName)) // scalastyle:on caselocale } @@ -900,7 +901,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat // columns. Here we Lowercase the column names before passing the partition spec to Hive // client, to satisfy Hive. // scalastyle:off caselocale - orderedPartitionSpec.put(colName.toLowerCase(Locale.ROOT), partition(colName)) + orderedPartitionSpec.put(colName.toLowerCase, partition(colName)) // scalastyle:on caselocale } @@ -922,7 +923,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat // APIs, to match this behaviour. private def lowerCasePartitionSpec(spec: TablePartitionSpec): TablePartitionSpec = { // scalastyle:off caselocale - spec.map { case (k, v) => k.toLowerCase(Locale.ROOT) -> v } + spec.map { case (k, v) => k.toLowerCase -> v } // scalastyle:on caselocale } @@ -930,7 +931,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat private def buildLowerCasePartColNameMap(table: CatalogTable): Map[String, String] = { val actualPartColNames = table.partitionColumnNames // scalastyle:off caselocale - actualPartColNames.map(colName => (colName.toLowerCase(Locale.ROOT), colName)).toMap + actualPartColNames.map(colName => (colName.toLowerCase, colName)).toMap // scalastyle:on caselocale } @@ -941,7 +942,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat spec: TablePartitionSpec, partColMap: Map[String, String]): TablePartitionSpec = { // scalastyle:off caselocale - spec.map { case (k, v) => partColMap(k.toLowerCase(Locale.ROOT)) -> v } + spec.map { case (k, v) => partColMap(k.toLowerCase) -> v } // scalastyle:on caselocale } @@ -1002,8 +1003,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat // a default path generate by the new spec with lower cased partition column names. This is // unexpected and we need to rename them manually and alter the partition location. // scalastyle:off caselocale - val hasUpperCasePartitionColumn = partitionColumnNames - .exists(col => col.toLowerCase(Locale.ROOT) != col) + val hasUpperCasePartitionColumn = partitionColumnNames.exists(col => col.toLowerCase != col) // scalastyle:on caselocale if (tableMeta.tableType == MANAGED && hasUpperCasePartitionColumn) { val tablePath = new Path(tableMeta.location) @@ -1046,7 +1046,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat // just move `a=1/b=3` into `A=1` with new name `B=3`. } else { // scalastyle:off caselocale - val actualPartitionString = getPartitionPathString(col.toLowerCase(Locale.ROOT), partValue) + val actualPartitionString = getPartitionPathString(col.toLowerCase, partValue) // scalastyle:on caselocale val actualPartitionPath = new Path(currentFullPath, actualPartitionString) try { @@ -1199,7 +1199,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat val partSpec = PartitioningUtils.parsePathFragmentAsSeq(partitionPath) partSpec.map { case (partName, partValue) => // scalastyle:off caselocale - partColNameMap(partName.toLowerCase(Locale.ROOT)) + "=" + escapePathName(partValue) + partColNameMap(partName.toLowerCase) + "=" + escapePathName(partValue) // scalastyle:on caselocale }.mkString("/") } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala index 585b92bb5fec0..d047953327958 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala @@ -17,8 +17,6 @@ package org.apache.spark.sql.hive -import java.util.Locale - import scala.util.control.NonFatal import com.google.common.util.concurrent.Striped @@ -62,8 +60,8 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log private[hive] def getCachedDataSourceTable(table: TableIdentifier): LogicalPlan = { val key = QualifiedTableName( // scalastyle:off caselocale - table.database.getOrElse(sessionState.catalog.getCurrentDatabase).toLowerCase(Locale.ROOT), - table.table.toLowerCase(Locale.ROOT)) + table.database.getOrElse(sessionState.catalog.getCurrentDatabase).toLowerCase, + table.table.toLowerCase) // scalastyle:on caselocale catalogProxy.getCachedTable(key) } @@ -279,14 +277,14 @@ private[hive] object HiveMetastoreCatalog { inferredSchema: StructType): StructType = try { // scalastyle:off caselocale // Find any nullable fields in mestastore schema that are missing from the inferred schema. - val metastoreFields = metastoreSchema.map(f => f.name.toLowerCase(Locale.ROOT) -> f).toMap + val metastoreFields = metastoreSchema.map(f => f.name.toLowerCase -> f).toMap val missingNullables = metastoreFields - .filterKeys(!inferredSchema.map(_.name.toLowerCase(Locale.ROOT)).contains(_)) + .filterKeys(!inferredSchema.map(_.name.toLowerCase).contains(_)) .values .filter(_.nullable) // Merge missing nullable fields to inferred schema and build a case-insensitive field map. val inferredFields = StructType(inferredSchema ++ missingNullables) - .map(f => f.name.toLowerCase(Locale.ROOT) -> f).toMap + .map(f => f.name.toLowerCase -> f).toMap // scalastyle:on caselocale StructType(metastoreSchema.map(f => f.copy(name = inferredFields(f.name).name))) } catch {