diff --git a/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md b/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md index f1d6f0b8fcd53..05f948c488119 100644 --- a/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md +++ b/metadata-ingestion/docs/sources/powerbi/powerbi_pre.md @@ -133,7 +133,11 @@ PowerBI Source would be able to ingest below listed metadata of that particular - Dashboard's Tiles - Report's Pages -Lets consider user don't want (or doesn't have access) to add service principal as member in workspace then you can enable the `admin_apis_only: true` in recipe to use PowerBI Admin API only. if `admin_apis_only` is set to `true` then report's pages would not get ingested as page API is not available in PowerBI Admin API. +If you don't want to add a service principal as a member in your workspace, then you can enable the `admin_apis_only: true` in recipe to use PowerBI Admin API only. + +Caveats of setting `admin_apis_only` to `true`: + - Report's pages would not get ingested as page API is not available in PowerBI Admin API + - [PowerBI Parameters](https://learn.microsoft.com/en-us/power-query/power-query-query-parameters) would not get resolved to actual values while processing M-Query for table lineage ### Basic Ingestion: Service Principal As Member In Workspace diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py index b3619616a68f2..c156d8d69ce52 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py @@ -1,7 +1,7 @@ import functools import importlib.resources as pkg_resource import logging -from typing import Dict, List, Optional +from typing import Dict, List import lark from lark import Lark, Tree @@ -46,7 +46,7 @@ def get_upstream_tables( table: Table, reporter: PowerBiDashboardSourceReport, native_query_enabled: bool = True, - parameters: Optional[Dict[str, str]] = None, + parameters: Dict[str, str] = {}, ) -> List[resolver.DataPlatformTable]: if table.expression is None: logger.debug(f"Expression is none for table {table.full_name}") diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py index aa507ce105cfb..548942324d195 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py @@ -9,6 +9,7 @@ from datahub.ingestion.source.powerbi.config import PowerBiDashboardSourceReport from datahub.ingestion.source.powerbi.m_query import native_sql_parser, tree_function from datahub.ingestion.source.powerbi.m_query.data_classes import ( + TRACE_POWERBI_MQUERY_PARSER, DataAccessFunctionDetail, IdentifierAccessor, ) @@ -190,7 +191,10 @@ def _process_invoke_expression( first_argument ) - logger.debug(f"Extracting token from tree {first_argument.pretty()}") + if TRACE_POWERBI_MQUERY_PARSER: + logger.debug(f"Extracting token from tree {first_argument.pretty()}") + else: + logger.debug(f"Extracting token from tree {first_argument}") if expression is None: expression = tree_function.first_type_expression_func(first_argument) if expression is None: diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py index 40a177130e7a6..a571691683adc 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py @@ -70,33 +70,24 @@ def internal(node: Union[Tree, Token]) -> Optional[Tree]: return expression_tree -def token_values(tree: Tree, parameters: Optional[Dict[str, str]] = None) -> List[str]: +def token_values(tree: Tree, parameters: Dict[str, str] = {}) -> List[str]: """ - :param tree: Tree to traverse - :param parameters: If parameters is not None, it will try to resolve identifier variable references + :param parameters: If parameters is not an empty dict, it will try to resolve identifier variable references using the values in 'parameters'. :return: List of leaf token data """ values: List[str] = [] def internal(node: Union[Tree, Token]) -> None: - if ( - parameters is not None - and isinstance(node, Tree) - and node.data == "identifier" - and node.children[0].data == "quoted_identifier" - ): + if parameters and isinstance(node, Tree) and node.data == "identifier": # This is the case where they reference a variable using - # the `#"Name of variable"` syntax. + # the `#"Name of variable"` or `Variable` syntax. It can be + # a quoted_identifier or a regular_identifier. - identifier = node.children[0].children[0] - assert isinstance(identifier, Token) + ref = make_function_name(node) # For quoted_identifier, ref will have quotes around it. - # However, we'll probably need to expand this to all identifier types, - # which are not required to have quotes (using make_function_name). - ref = identifier.value if ref.startswith('"') and ref[1:-1] in parameters: resolved = parameters[ref[1:-1]] values.append(resolved) @@ -155,7 +146,7 @@ def get_all_function_name(tree: Tree) -> List[str]: for node in _filter: if TRACE_POWERBI_MQUERY_PARSER: - logger.debug(f"Tree = {node.pretty}") + logger.debug(f"Tree = {node.pretty()}") primary_expression_node: Optional[Tree] = first_primary_expression_func(node) if primary_expression_node is None: continue diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule index f89e70d080a11..321ffe1d3a21b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi-lexical-grammar.rule @@ -2,6 +2,9 @@ // - The let_expression definition has added the whitespace rule instead of the required newline. // This allows the parser to be less strict about whitespace. // - Add inline whitespace to item_selection and optional_item_selection. +// - Tweak unary_expression to allow arbitrary operators within it. +// This is necessary because unary_expression is the base for the +// whole relational_expression parse tree. lexical_unit: lexical_elements? @@ -302,6 +305,7 @@ unary_expression: type_expression | "+" unary_expression | "_" unary_expression | "not" unary_expression + | expression primary_expression: literal_expression | list_expression diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py index 9601820ceea2e..e2ff69e5fb6cb 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py @@ -151,7 +151,7 @@ def extract_lineage( mcps: List[MetadataChangeProposalWrapper] = [] # table.dataset should always be set, but we check it just in case. - parameters = table.dataset.parameters if table.dataset else None + parameters = table.dataset.parameters if table.dataset else {} upstreams: List[UpstreamClass] = [] upstream_tables: List[resolver.DataPlatformTable] = parser.get_upstream_tables( diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py index d1adc28028943..17996b1f34872 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py @@ -66,7 +66,7 @@ class PowerBIDataset: description: str webUrl: Optional[str] workspace_id: str - parameters: Optional[Dict[str, str]] + parameters: Dict[str, str] # Table in datasets tables: List["Table"] @@ -194,7 +194,7 @@ def new_powerbi_dataset(workspace_id: str, raw_instance: dict) -> PowerBIDataset if raw_instance.get("webUrl") is not None else None, workspace_id=workspace_id, - parameters=None, + parameters={}, tables=[], tags=[], ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py index b8bbb5807a392..40a047868e23b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py @@ -99,10 +99,11 @@ def get_dataset( ) -> Optional[PowerBIDataset]: pass + @abstractmethod def get_dataset_parameters( self, workspace_id: str, dataset_id: str - ) -> Optional[Dict[str, str]]: - return None + ) -> Dict[str, str]: + pass @abstractmethod def get_users(self, workspace_id: str, entity: str, entity_id: str) -> List[User]: @@ -401,7 +402,7 @@ def get_dataset( def get_dataset_parameters( self, workspace_id: str, dataset_id: str - ) -> Optional[Dict[str, str]]: + ) -> Dict[str, str]: dataset_get_endpoint: str = RegularAPIResolver.API_ENDPOINTS[ Constant.DATASET_GET ] @@ -420,16 +421,14 @@ def get_dataset_parameters( params_response.raise_for_status() params_dict = params_response.json() - params_values: Optional[List] = params_dict.get(Constant.VALUE) - if params_values: - logger.debug(f"dataset {dataset_id} parameters = {params_values}") - return { - value[Constant.NAME]: value[Constant.CURRENT_VALUE] - for value in params_values - } - else: - logger.debug(f"dataset {dataset_id} has no parameters") - return {} + params_values: List[dict] = params_dict.get(Constant.VALUE, []) + + logger.debug(f"dataset {dataset_id} parameters = {params_values}") + + return { + value[Constant.NAME]: value[Constant.CURRENT_VALUE] + for value in params_values + } def get_groups_endpoint(self) -> str: return DataResolverBase.BASE_URL @@ -741,3 +740,9 @@ def get_dataset( def _get_pages_by_report(self, workspace: Workspace, report_id: str) -> List[Page]: return [] # Report pages are not available in Admin API + + def get_dataset_parameters( + self, workspace_id: str, dataset_id: str + ) -> Dict[str, str]: + logger.debug("Get dataset parameter is unsupported in Admin API") + return {} diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_container.json b/metadata-ingestion/tests/integration/powerbi/golden_test_container.json index a57317ea28e99..fbbe27d11ce2b 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_container.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_container.json @@ -209,6 +209,54 @@ "runId": "powerbi-test" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "name": "big-query-with-parameter", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", @@ -513,6 +561,9 @@ { "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" + }, { "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" }, @@ -967,6 +1018,54 @@ "runId": "powerbi-test" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "name": "big-query-with-parameter", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", @@ -1173,6 +1272,9 @@ { "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" + }, { "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" }, @@ -1269,6 +1371,9 @@ { "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" + }, { "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json b/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json index 1cf3ce71e8f81..284a69d999c1b 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_disabled_ownership.json @@ -98,6 +98,39 @@ "runId": "powerbi-test" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "name": "big-query-with-parameter", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", @@ -297,6 +330,9 @@ { "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" + }, { "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_endorsement.json b/metadata-ingestion/tests/integration/powerbi/golden_test_endorsement.json index 4ea05ea63ff15..d2a9c5de6b27a 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_endorsement.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_endorsement.json @@ -155,6 +155,58 @@ "runId": "powerbi-test" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "name": "big-query-with-parameter", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:Promoted" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", @@ -441,6 +493,9 @@ { "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" + }, { "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json b/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json index c15a8bb219394..4cdbb3b65c630 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json @@ -98,6 +98,39 @@ "runId": "powerbi-test" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "name": "big-query-with-parameter", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", @@ -327,6 +360,9 @@ { "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" + }, { "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json b/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json index 1aa88797e5af6..f4869c28fe98d 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_lineage.json @@ -148,13 +148,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { "json": { "customProperties": {}, - "name": "snowflake native-query-with-join", + "name": "big-query-with-parameter", "description": "Library dataset description", "tags": [] } @@ -166,7 +166,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -183,27 +183,28 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "datasetProperties", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,sn-2.GSL_TEST_DB.PUBLIC.SALES_ANALYST,PROD)", - "type": "TRANSFORMED" - }, - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,sn-2.GSL_TEST_DB.PUBLIC.SALES_FORECAST,PROD)", - "type": "TRANSFORMED" - } - ] + "customProperties": {}, + "name": "snowflake native-query-with-join", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-lineage-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false } }, "systemMetadata": { @@ -535,6 +536,9 @@ { "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" + }, { "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json b/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json index 04842b640dc0b..3ba925d98e854 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_lower_case_urn_ingest.json @@ -98,6 +98,39 @@ "runId": "powerbi-test" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "name": "big-query-with-parameter", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", @@ -327,6 +360,9 @@ { "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" + }, { "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_report.json b/metadata-ingestion/tests/integration/powerbi/golden_test_report.json index 2095562b959bd..61fe59d3d096f 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_report.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_report.json @@ -98,6 +98,39 @@ "runId": "powerbi-test" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "name": "big-query-with-parameter", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", @@ -327,6 +360,9 @@ { "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" + }, { "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" }, @@ -691,6 +727,39 @@ "runId": "powerbi-test" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "name": "big-query-with-parameter", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", @@ -852,6 +921,9 @@ { "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" + }, { "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" }, @@ -933,6 +1005,9 @@ { "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" + }, { "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" }, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_scan_all_workspaces.json b/metadata-ingestion/tests/integration/powerbi/golden_test_scan_all_workspaces.json index 0b250a12e2f2b..8aea0c23f1d2c 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_scan_all_workspaces.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_scan_all_workspaces.json @@ -98,6 +98,39 @@ "runId": "powerbi-test" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "name": "big-query-with-parameter", + "description": "Library dataset description", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-test" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", @@ -297,6 +330,9 @@ { "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" + }, { "string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" }, diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py index 4a0b25e15bfbe..8d7c0d79b989a 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py +++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py @@ -33,6 +33,8 @@ 'let\n Source = Snowflake.Databases("xaa48144.snowflakecomputing.com","GSL_TEST_WH",[Role="ACCOUNTADMIN"]),\n GSL_TEST_DB_Database = Source{[Name="GSL_TEST_DB",Kind="Database"]}[Data],\n PUBLIC_Schema = GSL_TEST_DB_Database{[Name="PUBLIC",Kind="Schema"]}[Data],\n SALES_FORECAST_Table = PUBLIC_Schema{[Name="SALES_FORECAST",Kind="Table"]}[Data],\n SALES_ANALYST_Table = PUBLIC_Schema{[Name="SALES_ANALYST",Kind="Table"]}[Data],\n RESULT = Table.Combine({SALES_FORECAST_Table, SALES_ANALYST_Table})\n\nin\n RESULT', 'let\n Source = GoogleBigQuery.Database(),\n #"seraphic-music-344307" = Source{[Name="seraphic-music-344307"]}[Data],\n school_dataset_Schema = #"seraphic-music-344307"{[Name="school_dataset",Kind="Schema"]}[Data],\n first_Table = school_dataset_Schema{[Name="first",Kind="Table"]}[Data]\nin\n first_Table', 'let \nSource = GoogleBigQuery.Database([BillingProject = #"Parameter - Source"]),\n#"gcp-project" = Source{[Name=#"Parameter - Source"]}[Data],\ngcp_billing_Schema = #"gcp-project"{[Name=#"My bq project",Kind="Schema"]}[Data],\nF_GCP_COST_Table = gcp_billing_Schema{[Name="GCP_TABLE",Kind="Table"]}[Data]\nin\nF_GCP_COST_Table', + 'let\n Source = GoogleBigQuery.Database([BillingProject = #"Parameter - Source"]),\n#"gcp-project" = Source{[Name=#"Parameter - Source"]}[Data],\nuniversal_Schema = #"gcp-project"{[Name="universal",Kind="Schema"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name="D_WH_DATE",Kind="Table"]}[Data],\n#"Filtered Rows" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#"Filtered Rows1" = Table.SelectRows(#"Filtered Rows", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#"Filtered Rows1"', + 'let\n Source = GoogleBigQuery.Database([BillingProject="dwh-prod"]),\ngcp_project = Source{[Name="dwh-prod"]}[Data],\ngcp_billing_Schema = gcp_project {[Name="gcp_billing",Kind="Schema"]}[Data],\nD_GCP_CUSTOM_LABEL_Table = gcp_billing_Schema{[Name="D_GCP_CUSTOM_LABEL",Kind="Table"]}[Data] \n in \n D_GCP_CUSTOM_LABEL_Table', ] @@ -348,6 +350,61 @@ def test_google_bigquery_2(): ) +def test_for_each_expression_1(): + table: powerbi_data_classes.Table = powerbi_data_classes.Table( + expression=M_QUERIES[19], + name="D_WH_DATE", + full_name="my-test-project.universal.D_WH_DATE", + ) + + reporter = PowerBiDashboardSourceReport() + + data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables( + table, + reporter, + native_query_enabled=False, + parameters={ + "Parameter - Source": "my-test-project", + "My bq project": "gcp_billing", + }, + ) + + assert len(data_platform_tables) == 1 + assert data_platform_tables[0].name == table.full_name.split(".")[2] + assert data_platform_tables[0].full_name == table.full_name + assert ( + data_platform_tables[0].data_platform_pair.powerbi_data_platform_name + == SupportedDataPlatform.GOOGLE_BIGQUERY.value.powerbi_data_platform_name + ) + + +def test_for_each_expression_2(): + table: powerbi_data_classes.Table = powerbi_data_classes.Table( + expression=M_QUERIES[20], + name="D_GCP_CUSTOM_LABEL", + full_name="dwh-prod.gcp_billing.D_GCP_CUSTOM_LABEL", + ) + + reporter = PowerBiDashboardSourceReport() + + data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables( + table, + reporter, + native_query_enabled=False, + parameters={ + "dwh-prod": "originally-not-a-variable-ref-and-not-resolved", + }, + ) + + assert len(data_platform_tables) == 1 + assert data_platform_tables[0].name == table.full_name.split(".")[2] + assert data_platform_tables[0].full_name == table.full_name + assert ( + data_platform_tables[0].data_platform_pair.powerbi_data_platform_name + == SupportedDataPlatform.GOOGLE_BIGQUERY.value.powerbi_data_platform_name + ) + + @pytest.mark.integration def test_native_query_disabled(): table: powerbi_data_classes.Table = powerbi_data_classes.Table( diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py index b36860ec4d195..29f949fe3799e 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py +++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py @@ -331,6 +331,19 @@ def register_mock_api(request_mock: Any, override_data: dict = {}) -> None: } ], }, + { + "name": "big-query-with-parameter", + "source": [ + { + "expression": 'let\n Source = GoogleBigQuery.Database([BillingProject = #"Parameter - Source"]),\n#"gcp-project" = Source{[Name=#"Parameter - Source"]}[Data],\nuniversal_Schema = #"gcp-project"{[Name="universal",Kind="Schema"]}[Data],\nD_WH_DATE_Table = universal_Schema{[Name="D_WH_DATE",Kind="Table"]}[Data],\n#"Filtered Rows" = Table.SelectRows(D_WH_DATE_Table, each [D_DATE] > #datetime(2019, 9, 10, 0, 0, 0)),\n#"Filtered Rows1" = Table.SelectRows(#"Filtered Rows", each DateTime.IsInPreviousNHours([D_DATE], 87600))\n in \n#"Filtered Rows1"', + } + ], + "datasourceUsages": [ + { + "datasourceInstanceId": "DCE90B40-84D6-467A-9A5C-648E830E72D3", + } + ], + }, { "name": "snowflake native-query-with-join", "source": [ @@ -516,6 +529,26 @@ def register_mock_api(request_mock: Any, override_data: dict = {}) -> None: ] }, }, + "https://api.powerbi.com/v1.0/myorg/groups/64ED5CAD-7C10-4684-8180-826122881108/datasets/05169CD2-E713-41E6-9600-1D8066D95445/parameters": { + "method": "GET", + "status_code": 200, + "json": { + "value": [ + { + "name": "Parameter - Source", + "type": "Text", + "isRequired": True, + "currentValue": "my-test-project", + }, + { + "name": "My bq project", + "type": "Text", + "isRequired": True, + "currentValue": "gcp_billing", + }, + ] + }, + }, } api_vs_response.update(override_data)