Skip to content

Commit

Permalink
DataFrame.withColumn can replace original column with identical colum…
Browse files Browse the repository at this point in the history
…n name.
  • Loading branch information
viirya committed Apr 16, 2015
1 parent de4fa6b commit 72f35b1
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 1 deletion.
14 changes: 13 additions & 1 deletion sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
Original file line number Diff line number Diff line change
Expand Up @@ -747,7 +747,19 @@ class DataFrame private[sql](
* Returns a new [[DataFrame]] by adding a column.
* @group dfops
*/
def withColumn(colName: String, col: Column): DataFrame = select(Column("*"), col.as(colName))
def withColumn(colName: String, col: Column): DataFrame = {
val resolver = sqlContext.analyzer.resolver
val replaced = schema.exists(f => resolver(f.name, colName))
if (replaced) {
val colNames = schema.map { field =>
val name = field.name
if (resolver(name, colName)) col.as(colName) else Column(name)
}
select(colNames :_*)
} else {
select(Column("*"), col.as(colName))
}
}

/**
* Returns a new [[DataFrame]] with a column renamed.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,10 @@ class DataFrameSuite extends QueryTest {
Row(key, value, key + 1)
}.toSeq)
assert(df.schema.map(_.name).toSeq === Seq("key", "value", "newCol"))

val df2 = TestSQLContext.sparkContext.parallelize(Array(1, 2, 3)).toDF("x")
val df3 = df2.withColumn("x", df2("x") + 1)
assert(df3.select("x").collect().toSeq === Seq(Row(2), Row(3), Row(4)))
}

test("withColumnRenamed") {
Expand Down

0 comments on commit 72f35b1

Please sign in to comment.