Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions python/datafusion/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,13 +409,30 @@ def select(self, *exprs: Expr | str) -> DataFrame:
def drop(self, *columns: str) -> DataFrame:
"""Drop arbitrary amount of columns.

Column names are case-sensitive and do not require double quotes like
other operations such as `select`. Leading and trailing double quotes
are allowed and will be automatically stripped if present.

Args:
columns: Column names to drop from the dataframe.
columns: Column names to drop from the dataframe. Both ``column_name``
and ``"column_name"`` are accepted.

Returns:
DataFrame with those columns removed in the projection.

Example Usage::

df.drop('ID_For_Students') # Works
df.drop('"ID_For_Students"') # Also works (quotes stripped)
"""
return DataFrame(self.df.drop(*columns))
normalized_columns = []
for col in columns:
if col.startswith('"') and col.endswith('"'):
normalized_columns.append(col.strip('"')) # Strip double quotes
else:
normalized_columns.append(col)

return DataFrame(self.df.drop(*normalized_columns))

def filter(self, *predicates: Expr) -> DataFrame:
"""Return a DataFrame for which ``predicate`` evaluates to ``True``.
Expand Down
10 changes: 10 additions & 0 deletions python/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,16 @@ def test_select(df):
assert result.column(1) == pa.array([1, 2, 3])


def test_drop_quoted_columns():
ctx = SessionContext()
batch = pa.RecordBatch.from_arrays([pa.array([1, 2, 3])], names=["ID_For_Students"])
df = ctx.create_dataframe([[batch]])

# Both should work
assert df.drop('"ID_For_Students"').schema().names == []
assert df.drop("ID_For_Students").schema().names == []


def test_select_mixed_expr_string(df):
df = df.select(column("b"), "a")

Expand Down
Loading