Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-41757][CONNECT] Fixing String representation for Column class #39296

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
11 changes: 4 additions & 7 deletions python/pyspark/sql/column.py
Expand Up @@ -182,9 +182,6 @@ def _(self: "Column", other: Union["LiteralType", "DecimalLiteral"]) -> "Column"
return _


# TODO(SPARK-41757): Compatibility of string representation for Column


class Column:

"""
Expand All @@ -203,16 +200,16 @@ class Column:
... [(2, "Alice"), (5, "Bob")], ["age", "name"])

Select a column out of a DataFrame
>>> df.name # doctest: +SKIP
>>> df.name
Column<'name'>
>>> df["name"] # doctest: +SKIP
>>> df["name"]
Column<'name'>

Create from an expression

>>> df.age + 1 # doctest: +SKIP
>>> df.age + 1
Column<'(age + 1)'>
>>> 1 / df.age # doctest: +SKIP
>>> 1 / df.age
Column<'(1 / age)'>
"""

Expand Down
12 changes: 9 additions & 3 deletions python/pyspark/sql/connect/expressions.py
Expand Up @@ -152,7 +152,7 @@ def to_plan(self, session: "SparkConnectClient") -> "proto.Expression":
return exp

def __repr__(self) -> str:
return f"Alias({self._parent}, ({','.join(self._alias)}))"
return f"{self._parent} AS {','.join(self._alias)}"


class LiteralExpression(Expression):
Expand Down Expand Up @@ -308,7 +308,7 @@ def to_plan(self, session: "SparkConnectClient") -> "proto.Expression":
return expr

def __repr__(self) -> str:
return f"Literal({self._value})"
return f"{self._value}"


class ColumnReference(Expression):
Expand All @@ -333,7 +333,7 @@ def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
return expr

def __repr__(self) -> str:
return f"ColumnReference({self._unparsed_identifier})"
return f"{self._unparsed_identifier}"


class SQLExpression(Expression):
Expand Down Expand Up @@ -414,6 +414,12 @@ def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
return fun

def __repr__(self) -> str:
# Special handling for certain infix operators that require slightly
# different printing.
if len(self._args) == 2 and len(self._name) == 1:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, what about we just whitelist, and make it nicer? For example, there are only a fixed number of operators (20 ~ 30) that PySpark supports plug eqNullSafe, and very likely we won't add some more about this.

Otherwise, I would just even remove these new logics, and just fix the doctest with ....

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let me just take it over at #39616 by doing a (very) simple version for now to make the tests pass.

return f"{self._args[0]} {self._name} {self._args[1]}"

# Default print handling:
if self._is_distinct:
return f"{self._name}(distinct {', '.join([str(arg) for arg in self._args])})"
else:
Expand Down