diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala b/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala index 36ef906439451..162f090f011c6 100644 --- a/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala +++ b/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala @@ -150,7 +150,7 @@ private[spark] object ResourceUtils extends Logging { def listResourceIds(sparkConf: SparkConf, componentName: String): Seq[ResourceID] = { sparkConf.getAllWithPrefix(s"$componentName.$RESOURCE_PREFIX.").map { case (key, _) => key.substring(0, key.indexOf('.')) - }.toSet.toSeq.map(name => new ResourceID(componentName, name)) + }.distinct.map(name => new ResourceID(componentName, name)) } def parseAllResourceRequests( diff --git a/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala index 857c89d7a98f5..15f2161fac39d 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala @@ -69,7 +69,7 @@ private[spark] class ResultTask[T, U]( with Serializable { @transient private[this] val preferredLocs: Seq[TaskLocation] = { - if (locs == null) Nil else locs.toSet.toSeq + if (locs == null) Nil else locs.distinct } override def runTask(context: TaskContext): U = { diff --git a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala index 4c0c30a3caf67..a0ba9208ea647 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala @@ -71,7 +71,7 @@ private[spark] class ShuffleMapTask( } @transient private val preferredLocs: Seq[TaskLocation] = { - if (locs == null) Nil else locs.toSet.toSeq + if (locs == null) Nil else locs.distinct } override def runTask(context: TaskContext): MapStatus = { diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala index 7e2fbb43e3098..f0f84fe63d1cf 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala @@ -487,7 +487,7 @@ private[spark] class TaskSchedulerImpl( newExecAvail = true } } - val hosts = offers.map(_.host).toSet.toSeq + val hosts = offers.map(_.host).distinct for ((host, Some(rack)) <- hosts.zip(getRacksForHosts(hosts))) { hostsByRack.getOrElseUpdate(rack, new HashSet[String]()) += host } diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala index 9ee84a8179c77..b9a11e7f66c64 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala @@ -761,7 +761,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B // that are explicitly blacklisted, plus those that have *any* executors blacklisted. val nodesForBlacklistedExecutors = offers.filter { offer => execBlacklist.contains(offer.executorId) - }.map(_.host).toSet.toSeq + }.map(_.host).distinct val nodesWithAnyBlacklisting = (nodeBlacklist ++ nodesForBlacklistedExecutors).toSet // Similarly, figure out which executors have any blacklisting. This means all executors // that are explicitly blacklisted, plus all executors on nodes that are blacklisted. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index e3c63881b07ac..e1e3e8e6722ef 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -2455,7 +2455,7 @@ class Dataset[T] private[sql]( def dropDuplicates(colNames: Seq[String]): Dataset[T] = withTypedPlan { val resolver = sparkSession.sessionState.analyzer.resolver val allColumns = queryExecution.analyzed.output - val groupCols = colNames.toSet.toSeq.flatMap { (colName: String) => + val groupCols = colNames.distinct.flatMap { (colName: String) => // It is possibly there are more than one columns with the same name, // so we call filter instead of find. val cols = allColumns.filter(col => resolver(col.name, colName))