Skip to content

Commit

Permalink
[SPARK-31292][CORE][SQL] Replace toSet.toSeq with distinct for readab…
Browse files Browse the repository at this point in the history
…ility

### What changes were proposed in this pull request?

This PR replaces the method calls of `toSet.toSeq` with `distinct`.

### Why are the changes needed?

`toSet.toSeq` is intended to make its elements unique but a bit verbose. Using `distinct` instead is easier to understand and improves readability.

### Does this PR introduce any user-facing change?

No

### How was this patch tested?

Tested with the existing unit tests and found no problem.

Closes #28062 from sekikn/SPARK-31292.

Authored-by: Kengo Seki <sekikn@apache.org>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
(cherry picked from commit 0b237bd)
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
  • Loading branch information
sekikn authored and maropu committed Mar 28, 2020
1 parent 4e13ba9 commit 71dcf66
Show file tree
Hide file tree
Showing 6 changed files with 6 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ private[spark] object ResourceUtils extends Logging {
def listResourceIds(sparkConf: SparkConf, componentName: String): Seq[ResourceID] = {
sparkConf.getAllWithPrefix(s"$componentName.$RESOURCE_PREFIX.").map { case (key, _) =>
key.substring(0, key.indexOf('.'))
}.toSet.toSeq.map(name => new ResourceID(componentName, name))
}.distinct.map(name => new ResourceID(componentName, name))
}

def parseAllResourceRequests(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ private[spark] class ResultTask[T, U](
with Serializable {

@transient private[this] val preferredLocs: Seq[TaskLocation] = {
if (locs == null) Nil else locs.toSet.toSeq
if (locs == null) Nil else locs.distinct
}

override def runTask(context: TaskContext): U = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ private[spark] class ShuffleMapTask(
}

@transient private val preferredLocs: Seq[TaskLocation] = {
if (locs == null) Nil else locs.toSet.toSeq
if (locs == null) Nil else locs.distinct
}

override def runTask(context: TaskContext): MapStatus = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,7 @@ private[spark] class TaskSchedulerImpl(
newExecAvail = true
}
}
val hosts = offers.map(_.host).toSet.toSeq
val hosts = offers.map(_.host).distinct
for ((host, Some(rack)) <- hosts.zip(getRacksForHosts(hosts))) {
hostsByRack.getOrElseUpdate(rack, new HashSet[String]()) += host
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -758,7 +758,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
// that are explicitly blacklisted, plus those that have *any* executors blacklisted.
val nodesForBlacklistedExecutors = offers.filter { offer =>
execBlacklist.contains(offer.executorId)
}.map(_.host).toSet.toSeq
}.map(_.host).distinct
val nodesWithAnyBlacklisting = (nodeBlacklist ++ nodesForBlacklistedExecutors).toSet
// Similarly, figure out which executors have any blacklisting. This means all executors
// that are explicitly blacklisted, plus all executors on nodes that are blacklisted.
Expand Down
2 changes: 1 addition & 1 deletion sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2455,7 +2455,7 @@ class Dataset[T] private[sql](
def dropDuplicates(colNames: Seq[String]): Dataset[T] = withTypedPlan {
val resolver = sparkSession.sessionState.analyzer.resolver
val allColumns = queryExecution.analyzed.output
val groupCols = colNames.toSet.toSeq.flatMap { (colName: String) =>
val groupCols = colNames.distinct.flatMap { (colName: String) =>
// It is possibly there are more than one columns with the same name,
// so we call filter instead of find.
val cols = allColumns.filter(col => resolver(col.name, colName))
Expand Down

0 comments on commit 71dcf66

Please sign in to comment.