Skip to content

Commit

Permalink
feat: add 'columns' as an alias for 'col_order' (#701)
Browse files Browse the repository at this point in the history
* feat: add 'columns' as an alias for 'col_order'

* Added test to test alias correctness

* reformatted with black

* refactored to alias checking and testing

* Reformatted tests for columns alias

* feat: add 'columns' as an alias for 'col_order'

* Added test to test alias correctness

* reformatted with black

* refactored to alias checking and testing

* Reformatted tests for columns alias

* Made col_order a keyword argument and added to-do

* Edit todo comment

* Fixed small error in docstring

* Fixed valueerror message

* reformatted with black

---------

Co-authored-by: Chalmer Lowe <chalmerlowe@google.com>
  • Loading branch information
kiraksi and chalmerlowe committed Dec 6, 2023
1 parent 5a558e4 commit e52e8f8
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 6 deletions.
2 changes: 1 addition & 1 deletion docs/reading.rst
Expand Up @@ -28,7 +28,7 @@ destination DataFrame as well as a preferred column order as follows:
'SELECT * FROM `test_dataset.test_table`',
project_id=projectid,
index_col='index_column_name',
col_order=['col1', 'col2', 'col3'])
columns=['col1', 'col2'])
Querying with legacy SQL syntax
-------------------------------
Expand Down
23 changes: 18 additions & 5 deletions pandas_gbq/gbq.py
Expand Up @@ -734,7 +734,7 @@ def read_gbq(
query_or_table,
project_id=None,
index_col=None,
col_order=None,
columns=None,
reauth=False,
auth_local_webserver=True,
dialect=None,
Expand All @@ -750,6 +750,8 @@ def read_gbq(
auth_redirect_uri=None,
client_id=None,
client_secret=None,
*,
col_order=None,
):
r"""Load data from Google BigQuery using google-cloud-python
Expand All @@ -773,7 +775,7 @@ def read_gbq(
the environment.
index_col : str, optional
Name of result column to use for index in results DataFrame.
col_order : list(str), optional
columns : list(str), optional
List of BigQuery column names in the desired order for results
DataFrame.
reauth : boolean, default False
Expand Down Expand Up @@ -888,6 +890,8 @@ def read_gbq(
client_secret : str
The Client Secret associated with the Client ID for the Google Cloud Project
the user is attempting to connect to.
col_order : list(str), optional
Alias for columns, retained for backwards compatibility.
Returns
-------
Expand Down Expand Up @@ -966,10 +970,19 @@ def read_gbq(
'Index column "{0}" does not exist in DataFrame.'.format(index_col)
)

# Using columns as an alias for col_order, raising an error if both provided
if col_order and not columns:
columns = col_order
elif col_order and columns:
raise ValueError(
"Must specify either columns (preferred) or col_order, not both"
)

# Change the order of columns in the DataFrame based on provided list
if col_order is not None:
if sorted(col_order) == sorted(final_df.columns):
final_df = final_df[col_order]
# TODO(kiraksi): allow columns to be a subset of all columns in the table, with follow up PR
if columns is not None:
if sorted(columns) == sorted(final_df.columns):
final_df = final_df[columns]
else:
raise InvalidColumnOrder("Column order does not match this DataFrame.")

Expand Down
34 changes: 34 additions & 0 deletions tests/system/test_gbq.py
Expand Up @@ -600,6 +600,40 @@ def test_tokyo(self, tokyo_dataset, tokyo_table, project_id):
)
assert df["max_year"][0] >= 2000

def test_columns_as_alias(self, project_id):
query = "SELECT 'a' AS string_1, 'b' AS string_2, 'c' AS string_3"
columns = ["string_2", "string_1", "string_3"]

df = gbq.read_gbq(
query,
project_id=project_id,
columns=columns,
credentials=self.credentials,
dialect="standard",
)

expected = DataFrame({"string_1": ["a"], "string_2": ["b"], "string_3": ["c"]})[
columns
]

# Verify that the result_frame matches the expected DataFrame
tm.assert_frame_equal(df, expected)

def test_columns_and_col_order_raises_error(self, project_id):
query = "SELECT 'a' AS string_1, 'b' AS string_2, 'c' AS string_3"
columns = ["string_2", "string_1"]
col_order = ["string_3", "string_1", "string_2"]

with pytest.raises(ValueError):
gbq.read_gbq(
query,
project_id=project_id,
columns=columns,
col_order=col_order,
credentials=self.credentials,
dialect="standard",
)


class TestToGBQIntegration(object):
@pytest.fixture(autouse=True, scope="function")
Expand Down

0 comments on commit e52e8f8

Please sign in to comment.