From cac0b6e2683bf2fc100488a7f076ce3d0b566593 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Tue, 19 Nov 2024 11:38:17 +0100 Subject: [PATCH 1/7] Bump version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index dec848bd..25186fa9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ classifiers = [ ] dependencies = [ "databricks-labs-blueprint[yaml]>=0.4.2", - "databricks-sdk~=0.37", + "databricks-sdk~=0.38", "sqlglot>=22.3.1" ] From c382b962c5a3ded83e48d5538485dd14eff383f1 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Tue, 19 Nov 2024 11:50:26 +0100 Subject: [PATCH 2/7] Remove Dashboard.as_dict Handled in the SDK (https://github.com/databricks/databricks-sdk-py/releases/tag/v0.38.0) --- src/databricks/labs/lsql/dashboards.py | 4 ++-- tests/integration/test_dashboards.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/databricks/labs/lsql/dashboards.py b/src/databricks/labs/lsql/dashboards.py index 091f00bf..3222ed5b 100644 --- a/src/databricks/labs/lsql/dashboards.py +++ b/src/databricks/labs/lsql/dashboards.py @@ -1133,9 +1133,9 @@ def create_dashboard( warehouse_id=warehouse_id, ) if dashboard_id is not None: - sdk_dashboard = self._ws.lakeview.update(dashboard_id, dashboard=dashboard_to_create.as_dict()) # type: ignore + sdk_dashboard = self._ws.lakeview.update(dashboard_id, dashboard=dashboard_to_create) else: - sdk_dashboard = self._ws.lakeview.create(dashboard=dashboard_to_create.as_dict()) # type: ignore + sdk_dashboard = self._ws.lakeview.create(dashboard=dashboard_to_create) if publish: assert sdk_dashboard.dashboard_id is not None self._ws.lakeview.publish(sdk_dashboard.dashboard_id, warehouse_id=warehouse_id) diff --git a/tests/integration/test_dashboards.py b/tests/integration/test_dashboards.py index f2eae100..4fbdf867 100644 --- a/tests/integration/test_dashboards.py +++ b/tests/integration/test_dashboards.py @@ -58,7 +58,7 @@ def create(*, display_name: str = "") -> SDKDashboard: display_name = f"created_by_lsql_{make_random()}" else: display_name = f"{display_name} ({make_random()})" - dashboard = ws.lakeview.create(dashboard=SDKDashboard(display_name=display_name).as_dict()) + dashboard = ws.lakeview.create(dashboard=SDKDashboard(display_name=display_name)) if is_in_debug(): dashboard_url = f"{ws.config.host}/sql/dashboardsv3/{dashboard.dashboard_id}" webbrowser.open(dashboard_url) @@ -117,7 +117,7 @@ def test_dashboards_creates_exported_dashboard_definition(ws, make_dashboard) -> dashboard_content = (Path(__file__).parent / "dashboards" / "dashboard.lvdash.json").read_text() dashboard_to_create = dataclasses.replace(sdk_dashboard, serialized_dashboard=dashboard_content) - ws.lakeview.update(sdk_dashboard.dashboard_id, dashboard=dashboard_to_create.as_dict()) + ws.lakeview.update(sdk_dashboard.dashboard_id, dashboard=dashboard_to_create) lakeview_dashboard = Dashboard.from_dict(json.loads(dashboard_content)) new_dashboard = dashboards.get_dashboard(sdk_dashboard.path) From a316172d7b0ec73c88d5594539887b4727ccd2eb Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Tue, 19 Nov 2024 12:13:12 +0100 Subject: [PATCH 3/7] Limit integration test query --- tests/integration/test_core.py | 89 ++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 41 deletions(-) diff --git a/tests/integration/test_core.py b/tests/integration/test_core.py index 4983a0d0..19c1f1e5 100644 --- a/tests/integration/test_core.py +++ b/tests/integration/test_core.py @@ -17,49 +17,56 @@ def test_sql_execution_chunked(ws, disposition): assert total == 1999999000000 -def test_sql_execution(ws, env_or_skip): - results = [] +NYC_TAXI_LIMITED_TRIPS = """ +WITH zipcodes AS ( + SELECT DISTINCT pickup_zip, dropoff_zip + FROM samples.nyctaxi.trips + WHERE pickup_zip = 10282 AND dropoff_zip <= 10005 +) + +SELECT + trips.pickup_zip, + trips.dropoff_zip, + trips.tpep_pickup_datetime, + trips.tpep_dropoff_datetime +FROM + zipcodes + JOIN + samples.nyctaxi.trips AS trips + ON zipcodes.pickup_zip = trips.pickup_zip AND zipcodes.dropoff_zip = trips.dropoff_zip +ORDER BY trips.dropoff_zip, trips.tpep_pickup_datetime, trips.tpep_dropoff_datetime +""" + + +def test_sql_execution(ws, env_or_skip) -> None: + results = set() see = StatementExecutionExt(ws, warehouse_id=env_or_skip("TEST_DEFAULT_WAREHOUSE_ID")) - for pickup_zip, dropoff_zip in see.fetch_all( - "SELECT pickup_zip, dropoff_zip FROM nyctaxi.trips LIMIT 10", catalog="samples" - ): - results.append((pickup_zip, dropoff_zip)) - assert results == [ - (10282, 10171), - (10110, 10110), - (10103, 10023), - (10022, 10017), - (10110, 10282), - (10009, 10065), - (10153, 10199), - (10112, 10069), - (10023, 10153), - (10012, 10003), - ] - - -def test_sql_execution_partial(ws, env_or_skip): - results = [] + for pickup_zip, dropoff_zip, *_ in see.fetch_all(NYC_TAXI_LIMITED_TRIPS, catalog="samples"): + results.add((pickup_zip, dropoff_zip)) + assert results == { + (10282, 7114), + (10282, 10001), + (10282, 10002), + (10282, 10003), + (10282, 10005), + } + + +def test_sql_execution_partial(ws, env_or_skip) -> None: + results = set() see = StatementExecutionExt(ws, warehouse_id=env_or_skip("TEST_DEFAULT_WAREHOUSE_ID"), catalog="samples") - for row in see("SELECT * FROM nyctaxi.trips LIMIT 10"): - pickup_time, dropoff_time = row[0], row[1] - pickup_zip = row.pickup_zip - dropoff_zip = row["dropoff_zip"] + for row in see(NYC_TAXI_LIMITED_TRIPS): + pickup_zip, dropoff_zip, pickup_time, dropoff_time = row[0], row[1], row[2], row[3] all_fields = row.asDict() logger.info(f"{pickup_zip}@{pickup_time} -> {dropoff_zip}@{dropoff_time}: {all_fields}") - results.append((pickup_zip, dropoff_zip)) - assert results == [ - (10282, 10171), - (10110, 10110), - (10103, 10023), - (10022, 10017), - (10110, 10282), - (10009, 10065), - (10153, 10199), - (10112, 10069), - (10023, 10153), - (10012, 10003), - ] + results.add((pickup_zip, dropoff_zip)) + assert results == { + (10282, 7114), + (10282, 10001), + (10282, 10002), + (10282, 10003), + (10282, 10005), + } def test_fetch_one(ws): @@ -73,9 +80,9 @@ def test_fetch_one_fails_if_limit_is_bigger(ws): see.fetch_one("SELECT * FROM samples.nyctaxi.trips LIMIT 100") -def test_fetch_one_works(ws): +def test_fetch_one_works(ws) -> None: see = StatementExecutionExt(ws) - row = see.fetch_one("SELECT * FROM samples.nyctaxi.trips LIMIT 1") + row = see.fetch_one("SELECT * FROM samples.nyctaxi.trips WHERE pickup_zip == 10282 LIMIT 1") assert row.pickup_zip == 10282 From 35cf424f084ce8512ba961c09b72e837ffe0c57d Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Tue, 19 Nov 2024 13:06:27 +0100 Subject: [PATCH 4/7] Fix type hint --- tests/integration/test_core.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_core.py b/tests/integration/test_core.py index 19c1f1e5..b5ae46ee 100644 --- a/tests/integration/test_core.py +++ b/tests/integration/test_core.py @@ -82,7 +82,8 @@ def test_fetch_one_fails_if_limit_is_bigger(ws): def test_fetch_one_works(ws) -> None: see = StatementExecutionExt(ws) - row = see.fetch_one("SELECT * FROM samples.nyctaxi.trips WHERE pickup_zip == 10282 LIMIT 1") + row = see.fetch_one("SELECT pickup_zip FROM samples.nyctaxi.trips WHERE pickup_zip == 10282 LIMIT 1") + assert row is not None assert row.pickup_zip == 10282 From 4fe39111eca0cc59873a81f90e24858ad0c286af Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Tue, 19 Nov 2024 13:07:20 +0100 Subject: [PATCH 5/7] Remove .as_dict from unit tests --- tests/unit/test_dashboards.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_dashboards.py b/tests/unit/test_dashboards.py index 4a1064ec..22d42809 100644 --- a/tests/unit/test_dashboards.py +++ b/tests/unit/test_dashboards.py @@ -1478,7 +1478,7 @@ def test_dashboards_calls_create_without_dashboard_id() -> None: dashboards.create_dashboard(dashboard_metadata, parent_path="/non/existing/path", warehouse_id="warehouse") - ws.lakeview.create.assert_called_with(dashboard=sdk_dashboard.as_dict()) + ws.lakeview.create.assert_called_with(dashboard=sdk_dashboard) ws.lakeview.update.assert_not_called() ws.lakeview.publish.assert_not_called() @@ -1498,7 +1498,7 @@ def test_dashboards_calls_update_with_dashboard_id() -> None: dashboards.create_dashboard(dashboard_metadata, dashboard_id="id", warehouse_id="warehouse") ws.lakeview.create.assert_not_called() - ws.lakeview.update.assert_called_with("id", dashboard=sdk_dashboard.as_dict()) + ws.lakeview.update.assert_called_with("id", dashboard=sdk_dashboard) ws.lakeview.publish.assert_not_called() From e640d9cfffce1da40fa75f0897f902d69e6c3916 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Tue, 19 Nov 2024 13:29:52 +0100 Subject: [PATCH 6/7] Rename query --- tests/integration/test_core.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_core.py b/tests/integration/test_core.py index b5ae46ee..595e01fc 100644 --- a/tests/integration/test_core.py +++ b/tests/integration/test_core.py @@ -17,7 +17,7 @@ def test_sql_execution_chunked(ws, disposition): assert total == 1999999000000 -NYC_TAXI_LIMITED_TRIPS = """ +NYC_TAXI_TRIPS_LIMITED = """ WITH zipcodes AS ( SELECT DISTINCT pickup_zip, dropoff_zip FROM samples.nyctaxi.trips @@ -41,7 +41,7 @@ def test_sql_execution_chunked(ws, disposition): def test_sql_execution(ws, env_or_skip) -> None: results = set() see = StatementExecutionExt(ws, warehouse_id=env_or_skip("TEST_DEFAULT_WAREHOUSE_ID")) - for pickup_zip, dropoff_zip, *_ in see.fetch_all(NYC_TAXI_LIMITED_TRIPS, catalog="samples"): + for pickup_zip, dropoff_zip, *_ in see.fetch_all(NYC_TAXI_TRIPS_LIMITED, catalog="samples"): results.add((pickup_zip, dropoff_zip)) assert results == { (10282, 7114), @@ -55,7 +55,7 @@ def test_sql_execution(ws, env_or_skip) -> None: def test_sql_execution_partial(ws, env_or_skip) -> None: results = set() see = StatementExecutionExt(ws, warehouse_id=env_or_skip("TEST_DEFAULT_WAREHOUSE_ID"), catalog="samples") - for row in see(NYC_TAXI_LIMITED_TRIPS): + for row in see(NYC_TAXI_TRIPS_LIMITED): pickup_zip, dropoff_zip, pickup_time, dropoff_time = row[0], row[1], row[2], row[3] all_fields = row.asDict() logger.info(f"{pickup_zip}@{pickup_time} -> {dropoff_zip}@{dropoff_time}: {all_fields}") From 2efca99e103252ac5625016262f4dee10dca7aab Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Tue, 19 Nov 2024 13:37:16 +0100 Subject: [PATCH 7/7] Update integration tests to make more clear we expect at least two records --- tests/integration/test_core.py | 32 ++++++++++---------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/tests/integration/test_core.py b/tests/integration/test_core.py index 595e01fc..ef1eb5cb 100644 --- a/tests/integration/test_core.py +++ b/tests/integration/test_core.py @@ -39,34 +39,22 @@ def test_sql_execution_chunked(ws, disposition): def test_sql_execution(ws, env_or_skip) -> None: - results = set() see = StatementExecutionExt(ws, warehouse_id=env_or_skip("TEST_DEFAULT_WAREHOUSE_ID")) - for pickup_zip, dropoff_zip, *_ in see.fetch_all(NYC_TAXI_TRIPS_LIMITED, catalog="samples"): - results.add((pickup_zip, dropoff_zip)) - assert results == { - (10282, 7114), - (10282, 10001), - (10282, 10002), - (10282, 10003), - (10282, 10005), - } - - -def test_sql_execution_partial(ws, env_or_skip) -> None: - results = set() + + records = see.fetch_all(NYC_TAXI_TRIPS_LIMITED, catalog="samples") + + assert len([True for _ in records]) > 1 + + +def test_sql_execution_as_iterator(ws, env_or_skip) -> None: + number_of_records = 0 see = StatementExecutionExt(ws, warehouse_id=env_or_skip("TEST_DEFAULT_WAREHOUSE_ID"), catalog="samples") for row in see(NYC_TAXI_TRIPS_LIMITED): pickup_zip, dropoff_zip, pickup_time, dropoff_time = row[0], row[1], row[2], row[3] all_fields = row.asDict() logger.info(f"{pickup_zip}@{pickup_time} -> {dropoff_zip}@{dropoff_time}: {all_fields}") - results.add((pickup_zip, dropoff_zip)) - assert results == { - (10282, 7114), - (10282, 10001), - (10282, 10002), - (10282, 10003), - (10282, 10005), - } + number_of_records += 1 + assert number_of_records > 1 def test_fetch_one(ws):