From 4c695ecbeb3680a50f590575ac679029eb7387ce Mon Sep 17 00:00:00 2001 From: H0TB0X420 Date: Tue, 16 Sep 2025 15:34:56 -0400 Subject: [PATCH 1/3] Fix drop() method to handle quoted column names consistently - Strip quotes from column names in drop() method - Maintains consistency with other DataFrame operations - Both drop('col') and drop('col') now work Fixes #1212 --- python/datafusion/dataframe.py | 9 ++++++++- python/tests/test_dataframe.py | 11 ++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index 181c29db4..2543675b1 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -415,7 +415,14 @@ def drop(self, *columns: str) -> DataFrame: Returns: DataFrame with those columns removed in the projection. """ - return DataFrame(self.df.drop(*columns)) + normalized_columns = [] + for col in columns: + if col.startswith('"') and col.endswith('"'): + normalized_columns.append(col.strip('"')) # Removes quotes from both sides of col + else: + normalized_columns.append(col) + + return DataFrame(self.df.drop(*normalized_columns)) def filter(self, *predicates: Expr) -> DataFrame: """Return a DataFrame for which ``predicate`` evaluates to ``True``. diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index 343d32a92..c983fdbe3 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -216,7 +216,16 @@ def test_select(df): assert result.column(0) == pa.array([4, 5, 6]) assert result.column(1) == pa.array([1, 2, 3]) - +def test_drop_quoted_columns(): + ctx = SessionContext() + batch = pa.RecordBatch.from_arrays([pa.array([1, 2, 3])], names=["ID_For_Students"]) + df = ctx.create_dataframe([[batch]]) + + # Both should work + assert df.drop('"ID_For_Students"').schema().names == [] + assert df.drop('ID_For_Students').schema().names == [] + + def test_select_mixed_expr_string(df): df = df.select(column("b"), "a") From b07f00b9128efe7f23b6721154b8f095e69d8344 Mon Sep 17 00:00:00 2001 From: H0TB0X420 Date: Sat, 20 Sep 2025 11:28:37 -0500 Subject: [PATCH 2/3] Update drop() method docstring to clarify quote handling - Document that column names are case-sensitive and don't require quotes - Clarify that both quoted and unquoted column names are accepted - Add examples showing both 'col' and 'col' syntax work - Note difference from select() operation behavior --- python/datafusion/dataframe.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index 2543675b1..33869dd25 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -409,11 +409,20 @@ def select(self, *exprs: Expr | str) -> DataFrame: def drop(self, *columns: str) -> DataFrame: """Drop arbitrary amount of columns. + Column names are case-sensitive and do not require double quotes like + other operations such as `select`. Leading and trailing double quotes + are allowed and will be automatically stripped if present. + Args: - columns: Column names to drop from the dataframe. + columns: Column names to drop from the dataframe. Both 'column_name' + and '"column_name"' are accepted. Returns: DataFrame with those columns removed in the projection. + + Example Usage: + df.drop('ID_For_Students') # Works + df.drop('"ID_For_Students"') # Also works (quotes stripped) """ normalized_columns = [] for col in columns: From b925ad7f251bdc4c625d09982ea2bc6be618a7c4 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 21 Sep 2025 07:58:21 -0400 Subject: [PATCH 3/3] Fix whitespace and documentation errors --- python/datafusion/dataframe.py | 17 +++++++++-------- python/tests/test_dataframe.py | 7 ++++--- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index 33869dd25..357971275 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -409,28 +409,29 @@ def select(self, *exprs: Expr | str) -> DataFrame: def drop(self, *columns: str) -> DataFrame: """Drop arbitrary amount of columns. - Column names are case-sensitive and do not require double quotes like - other operations such as `select`. Leading and trailing double quotes + Column names are case-sensitive and do not require double quotes like + other operations such as `select`. Leading and trailing double quotes are allowed and will be automatically stripped if present. Args: - columns: Column names to drop from the dataframe. Both 'column_name' - and '"column_name"' are accepted. + columns: Column names to drop from the dataframe. Both ``column_name`` + and ``"column_name"`` are accepted. Returns: DataFrame with those columns removed in the projection. - - Example Usage: + + Example Usage:: + df.drop('ID_For_Students') # Works df.drop('"ID_For_Students"') # Also works (quotes stripped) """ normalized_columns = [] for col in columns: if col.startswith('"') and col.endswith('"'): - normalized_columns.append(col.strip('"')) # Removes quotes from both sides of col + normalized_columns.append(col.strip('"')) # Strip double quotes else: normalized_columns.append(col) - + return DataFrame(self.df.drop(*normalized_columns)) def filter(self, *predicates: Expr) -> DataFrame: diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index c983fdbe3..ba799f55e 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -216,16 +216,17 @@ def test_select(df): assert result.column(0) == pa.array([4, 5, 6]) assert result.column(1) == pa.array([1, 2, 3]) + def test_drop_quoted_columns(): ctx = SessionContext() batch = pa.RecordBatch.from_arrays([pa.array([1, 2, 3])], names=["ID_For_Students"]) df = ctx.create_dataframe([[batch]]) - + # Both should work assert df.drop('"ID_For_Students"').schema().names == [] - assert df.drop('ID_For_Students').schema().names == [] + assert df.drop("ID_For_Students").schema().names == [] + - def test_select_mixed_expr_string(df): df = df.select(column("b"), "a")