From a1d0186bc91269048889dc1750a23d2f8722986a Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Mon, 15 Jul 2024 18:22:22 +0200 Subject: [PATCH 1/8] Added `databricks labs lsql fmt` command --- labs.yml | 5 +++ src/databricks/labs/lsql/cli.py | 14 +++++++- src/databricks/labs/lsql/dashboards.py | 48 +++++++++++++------------- 3 files changed, 42 insertions(+), 25 deletions(-) diff --git a/labs.yml b/labs.yml index 897f6879..742f0d14 100644 --- a/labs.yml +++ b/labs.yml @@ -4,6 +4,11 @@ description: Lightweight SQL execution wrapper only on top of Databricks SDK entrypoint: src/databricks/labs/lsql/cli.py min_python: 3.10 commands: + - name: fmt + description: Format SQL files in the given folder + flags: + - name: folder + description: The folder with SQL files. By default, the current working directory. - name: create-dashboard description: Create an unpublished dashboard from code, see [docs](./docs/dashboards.md). flags: diff --git a/src/databricks/labs/lsql/cli.py b/src/databricks/labs/lsql/cli.py index acc6d64d..07a68b91 100644 --- a/src/databricks/labs/lsql/cli.py +++ b/src/databricks/labs/lsql/cli.py @@ -5,7 +5,7 @@ from databricks.labs.blueprint.entrypoint import get_logger from databricks.sdk import WorkspaceClient -from databricks.labs.lsql.dashboards import DashboardMetadata, Dashboards +from databricks.labs.lsql.dashboards import DashboardMetadata, Dashboards, QueryTile logger = get_logger(__file__) lsql = App(__file__) @@ -37,5 +37,17 @@ def create_dashboard( print(sdk_dashboard.dashboard_id) +@lsql.command +def fmt(folder: Path = Path.cwd()): + """Format SQL files in a folder""" + logger.debug("Formatting SQL files ...") + folder = Path(folder) + for sql_file in folder.glob("**/*.sql"): + sql = sql_file.read_text() + formatted_sql = QueryTile.format_query(sql) + sql_file.write_text(formatted_sql) + logger.debug(f"Formatted {sql_file}") + + if __name__ == "__main__": lsql() diff --git a/src/databricks/labs/lsql/dashboards.py b/src/databricks/labs/lsql/dashboards.py index 8796e048..c7badeb5 100644 --- a/src/databricks/labs/lsql/dashboards.py +++ b/src/databricks/labs/lsql/dashboards.py @@ -376,6 +376,29 @@ class QueryTile(Tile): _DIALECT = sqlglot.dialects.Databricks _FILTER_HEIGHT = 1 + @staticmethod + def format_query(query: str) -> str: + try: + parsed_query = sqlglot.parse(query) + except sqlglot.ParseError: + return query + statements = [] + for statement in parsed_query: + if statement is None: + continue + # see https://sqlglot.com/sqlglot/generator.html#Generator + statements.append( + statement.sql( + dialect="databricks", + normalize=True, # normalize identifiers to lowercase + pretty=True, # format the produced SQL string + normalize_functions="upper", # normalize function names to uppercase + max_text_width=80, # wrap text at 80 characters + ) + ) + formatted_query = ";\n".join(statements) + return formatted_query + def _get_abstract_syntax_tree(self) -> sqlglot.Expression | None: try: return sqlglot.parse_one(self.content, dialect=self._DIALECT) @@ -822,7 +845,7 @@ def save_to_folder(self, dashboard: Dashboard, local_path: Path) -> Dashboard: local_path.mkdir(parents=True, exist_ok=True) dashboard = self._with_better_names(dashboard) for dataset in dashboard.datasets: - query = self._format_query(dataset.query) + query = QueryTile.format_query(dataset.query) with (local_path / f"{dataset.name}.sql").open("w") as f: f.write(query) for page in dashboard.pages: @@ -830,29 +853,6 @@ def save_to_folder(self, dashboard: Dashboard, local_path: Path) -> Dashboard: yaml.safe_dump(page.as_dict(), f) return dashboard - @staticmethod - def _format_query(query: str) -> str: - try: - parsed_query = sqlglot.parse(query) - except sqlglot.ParseError: - return query - statements = [] - for statement in parsed_query: - if statement is None: - continue - # see https://sqlglot.com/sqlglot/generator.html#Generator - statements.append( - statement.sql( - dialect="databricks", - normalize=True, # normalize identifiers to lowercase - pretty=True, # format the produced SQL string - normalize_functions="upper", # normalize function names to uppercase - max_text_width=80, # wrap text at 80 characters - ) - ) - formatted_query = ";\n".join(statements) - return formatted_query - def deploy_dashboard( self, lakeview_dashboard: Dashboard, From 418e518b0479071869a8a4a85f0442d082a05e46 Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Mon, 15 Jul 2024 18:28:19 +0200 Subject: [PATCH 2/8] .. --- labs.yml | 3 +++ src/databricks/labs/lsql/cli.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/labs.yml b/labs.yml index 742f0d14..d83c5e9d 100644 --- a/labs.yml +++ b/labs.yml @@ -2,9 +2,12 @@ name: lsql description: Lightweight SQL execution wrapper only on top of Databricks SDK entrypoint: src/databricks/labs/lsql/cli.py +install: + script: src/databricks/labs/lsql/__about__.py min_python: 3.10 commands: - name: fmt + is_unauthenticated: true description: Format SQL files in the given folder flags: - name: folder diff --git a/src/databricks/labs/lsql/cli.py b/src/databricks/labs/lsql/cli.py index 07a68b91..f66ee172 100644 --- a/src/databricks/labs/lsql/cli.py +++ b/src/databricks/labs/lsql/cli.py @@ -37,7 +37,7 @@ def create_dashboard( print(sdk_dashboard.dashboard_id) -@lsql.command +@lsql.command(is_unauthenticated=True) def fmt(folder: Path = Path.cwd()): """Format SQL files in a folder""" logger.debug("Formatting SQL files ...") From 50489cac0a6dc68d93c0baae7d4d9212c0af4ebb Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Mon, 15 Jul 2024 18:30:06 +0200 Subject: [PATCH 3/8] .. --- tests/integration/dashboards/one_counter/010_counter.sql | 3 ++- tests/integration/views/some.sql | 5 ++++- tests/unit/queries/counter.sql | 3 ++- tests/unit/views/some.sql | 5 ++++- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/tests/integration/dashboards/one_counter/010_counter.sql b/tests/integration/dashboards/one_counter/010_counter.sql index b2ab78e2..df9bd5e5 100644 --- a/tests/integration/dashboards/one_counter/010_counter.sql +++ b/tests/integration/dashboards/one_counter/010_counter.sql @@ -1 +1,2 @@ -SELECT 6217 AS count \ No newline at end of file +SELECT + 6217 AS count \ No newline at end of file diff --git a/tests/integration/views/some.sql b/tests/integration/views/some.sql index 6af77981..9f5ce717 100644 --- a/tests/integration/views/some.sql +++ b/tests/integration/views/some.sql @@ -1 +1,4 @@ -SELECT first AS name, 1 AS id FROM $inventory.foo \ No newline at end of file +SELECT + first AS name, + 1 AS id +FROM $inventory.foo \ No newline at end of file diff --git a/tests/unit/queries/counter.sql b/tests/unit/queries/counter.sql index b2ab78e2..df9bd5e5 100644 --- a/tests/unit/queries/counter.sql +++ b/tests/unit/queries/counter.sql @@ -1 +1,2 @@ -SELECT 6217 AS count \ No newline at end of file +SELECT + 6217 AS count \ No newline at end of file diff --git a/tests/unit/views/some.sql b/tests/unit/views/some.sql index 166a1660..c76c45a0 100644 --- a/tests/unit/views/some.sql +++ b/tests/unit/views/some.sql @@ -1 +1,4 @@ -SELECT id, name FROM $inventory.something \ No newline at end of file +SELECT + id, + name +FROM $inventory.something \ No newline at end of file From 8a698a494e6e055fa8227f02638fc8fda73ddc4f Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Mon, 15 Jul 2024 18:33:57 +0200 Subject: [PATCH 4/8] .. --- src/databricks/labs/lsql/dashboards.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/databricks/labs/lsql/dashboards.py b/src/databricks/labs/lsql/dashboards.py index c7badeb5..449b2efe 100644 --- a/src/databricks/labs/lsql/dashboards.py +++ b/src/databricks/labs/lsql/dashboards.py @@ -377,7 +377,7 @@ class QueryTile(Tile): _FILTER_HEIGHT = 1 @staticmethod - def format_query(query: str) -> str: + def format_query(query: str, max_text_width: int = 120) -> str: try: parsed_query = sqlglot.parse(query) except sqlglot.ParseError: @@ -393,7 +393,7 @@ def format_query(query: str) -> str: normalize=True, # normalize identifiers to lowercase pretty=True, # format the produced SQL string normalize_functions="upper", # normalize function names to uppercase - max_text_width=80, # wrap text at 80 characters + max_text_width=max_text_width, # wrap text at 120 characters ) ) formatted_query = ";\n".join(statements) From a207391fc4bee3ab2f70382f6b512a7870865efe Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Mon, 15 Jul 2024 18:46:37 +0200 Subject: [PATCH 5/8] .. --- src/databricks/labs/lsql/cli.py | 2 +- src/databricks/labs/lsql/dashboards.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/databricks/labs/lsql/cli.py b/src/databricks/labs/lsql/cli.py index f66ee172..70377a6f 100644 --- a/src/databricks/labs/lsql/cli.py +++ b/src/databricks/labs/lsql/cli.py @@ -44,7 +44,7 @@ def fmt(folder: Path = Path.cwd()): folder = Path(folder) for sql_file in folder.glob("**/*.sql"): sql = sql_file.read_text() - formatted_sql = QueryTile.format_query(sql) + formatted_sql = QueryTile.format(sql) sql_file.write_text(formatted_sql) logger.debug(f"Formatted {sql_file}") diff --git a/src/databricks/labs/lsql/dashboards.py b/src/databricks/labs/lsql/dashboards.py index 449b2efe..836acb30 100644 --- a/src/databricks/labs/lsql/dashboards.py +++ b/src/databricks/labs/lsql/dashboards.py @@ -377,7 +377,7 @@ class QueryTile(Tile): _FILTER_HEIGHT = 1 @staticmethod - def format_query(query: str, max_text_width: int = 120) -> str: + def format(query: str, max_text_width: int = 120) -> str: try: parsed_query = sqlglot.parse(query) except sqlglot.ParseError: @@ -386,6 +386,8 @@ def format_query(query: str, max_text_width: int = 120) -> str: for statement in parsed_query: if statement is None: continue + # TODO: CASE .. WHEN .. THEN .. formatting is a bit less readable after reformatting. + # See https://github.com/tobymao/sqlglot/issues/3770 # see https://sqlglot.com/sqlglot/generator.html#Generator statements.append( statement.sql( @@ -845,7 +847,7 @@ def save_to_folder(self, dashboard: Dashboard, local_path: Path) -> Dashboard: local_path.mkdir(parents=True, exist_ok=True) dashboard = self._with_better_names(dashboard) for dataset in dashboard.datasets: - query = QueryTile.format_query(dataset.query) + query = QueryTile.format(dataset.query) with (local_path / f"{dataset.name}.sql").open("w") as f: f.write(query) for page in dashboard.pages: From 5a837d02c7c054660ed63289d976c19d5e68027c Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Mon, 15 Jul 2024 19:23:43 +0200 Subject: [PATCH 6/8] .. --- src/databricks/labs/lsql/dashboards.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/databricks/labs/lsql/dashboards.py b/src/databricks/labs/lsql/dashboards.py index 836acb30..0ec98beb 100644 --- a/src/databricks/labs/lsql/dashboards.py +++ b/src/databricks/labs/lsql/dashboards.py @@ -379,7 +379,7 @@ class QueryTile(Tile): @staticmethod def format(query: str, max_text_width: int = 120) -> str: try: - parsed_query = sqlglot.parse(query) + parsed_query = sqlglot.parse(query, dialect="databricks") except sqlglot.ParseError: return query statements = [] From c2dccb9412d5b6a1904462df0a061b14a53f559d Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Mon, 15 Jul 2024 19:42:39 +0200 Subject: [PATCH 7/8] .. --- src/databricks/labs/lsql/dashboards.py | 3 +++ .../one_table/databricks_office_locations.sql | 13 ++++++------- tests/unit/test_deployment.py | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/databricks/labs/lsql/dashboards.py b/src/databricks/labs/lsql/dashboards.py index 0ec98beb..c82af3e6 100644 --- a/src/databricks/labs/lsql/dashboards.py +++ b/src/databricks/labs/lsql/dashboards.py @@ -399,6 +399,9 @@ def format(query: str, max_text_width: int = 120) -> str: ) ) formatted_query = ";\n".join(statements) + if "$" in query: + # replace ${x} with $x, because we use it in UCX view definitions for now + formatted_query = re.sub(r"\${(\w+)}", r"$\1", formatted_query) return formatted_query def _get_abstract_syntax_tree(self) -> sqlglot.Expression | None: diff --git a/tests/integration/dashboards/one_table/databricks_office_locations.sql b/tests/integration/dashboards/one_table/databricks_office_locations.sql index fbe8a2e4..91c72d8b 100644 --- a/tests/integration/dashboards/one_table/databricks_office_locations.sql +++ b/tests/integration/dashboards/one_table/databricks_office_locations.sql @@ -1,13 +1,12 @@ SELECT - Address, - City, - State, + address, + city, + state, `Zip Code`, - Country -FROM -VALUES + country +FROM VALUES ('160 Spear St 15th Floor', 'San Francisco', 'CA', '94105', 'USA'), ('756 W Peachtree St NW, Suite 03W114', 'Atlanta', 'GA', '30308', 'USA'), ('500 108th Ave NE, Suite 1820', 'Bellevue', 'WA', '98004', 'USA'), ('125 High St, Suite 220', 'Boston', 'MA', '02110', 'USA'), - ('2120 University Ave, Suite 722', 'Berkeley', 'CA', '94704', 'USA') AS tab(Address, City, State, `Zip Code`, Country) + ('2120 University Ave, Suite 722', 'Berkeley', 'CA', '94704', 'USA') AS tab(address, city, state, `Zip Code`, country) \ No newline at end of file diff --git a/tests/unit/test_deployment.py b/tests/unit/test_deployment.py index cd6fcdbb..37fde4e2 100644 --- a/tests/unit/test_deployment.py +++ b/tests/unit/test_deployment.py @@ -17,7 +17,7 @@ def test_deploys_view(): deployment.deploy_view("some", "some.sql") assert mock_backend.queries == [ - "CREATE OR REPLACE VIEW hive_metastore.inventory.some AS SELECT id, name FROM hive_metastore.inventory.something" + "CREATE OR REPLACE VIEW hive_metastore.inventory.some AS SELECT\n id,\n name\nFROM inventory.something" ] From fbb3e1105827f4c7a239a79d349db5ac468070b2 Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Mon, 15 Jul 2024 19:50:59 +0200 Subject: [PATCH 8/8] .. --- tests/unit/test_deployment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_deployment.py b/tests/unit/test_deployment.py index 37fde4e2..1e407a6f 100644 --- a/tests/unit/test_deployment.py +++ b/tests/unit/test_deployment.py @@ -17,7 +17,7 @@ def test_deploys_view(): deployment.deploy_view("some", "some.sql") assert mock_backend.queries == [ - "CREATE OR REPLACE VIEW hive_metastore.inventory.some AS SELECT\n id,\n name\nFROM inventory.something" + "CREATE OR REPLACE VIEW hive_metastore.inventory.some AS SELECT\n id,\n name\nFROM hive_metastore.inventory.something" ]