Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(ingest/powerbi): support each expression in m-query function invocation #7541

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion metadata-ingestion/docs/sources/powerbi/powerbi_pre.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,11 @@ PowerBI Source would be able to ingest below listed metadata of that particular
- Dashboard's Tiles
- Report's Pages

Lets consider user don't want (or doesn't have access) to add service principal as member in workspace then you can enable the `admin_apis_only: true` in recipe to use PowerBI Admin API only. if `admin_apis_only` is set to `true` then report's pages would not get ingested as page API is not available in PowerBI Admin API.
If you don't want to add a service principal as a member in your workspace, then you can enable the `admin_apis_only: true` in recipe to use PowerBI Admin API only.

Caveats of setting `admin_apis_only` to `true`:
- Report's pages would not get ingested as page API is not available in PowerBI Admin API
- [PowerBI Parameters](https://learn.microsoft.com/en-us/power-query/power-query-query-parameters) would not get resolved to actual values while processing M-Query for table lineage


### Basic Ingestion: Service Principal As Member In Workspace
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import functools
import importlib.resources as pkg_resource
import logging
from typing import Dict, List, Optional
from typing import Dict, List

import lark
from lark import Lark, Tree
Expand Down Expand Up @@ -46,7 +46,7 @@ def get_upstream_tables(
table: Table,
reporter: PowerBiDashboardSourceReport,
native_query_enabled: bool = True,
parameters: Optional[Dict[str, str]] = None,
parameters: Dict[str, str] = {},
) -> List[resolver.DataPlatformTable]:
if table.expression is None:
logger.debug(f"Expression is none for table {table.full_name}")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from datahub.ingestion.source.powerbi.config import PowerBiDashboardSourceReport
from datahub.ingestion.source.powerbi.m_query import native_sql_parser, tree_function
from datahub.ingestion.source.powerbi.m_query.data_classes import (
TRACE_POWERBI_MQUERY_PARSER,
DataAccessFunctionDetail,
IdentifierAccessor,
)
Expand Down Expand Up @@ -190,7 +191,10 @@ def _process_invoke_expression(
first_argument
)

logger.debug(f"Extracting token from tree {first_argument.pretty()}")
if TRACE_POWERBI_MQUERY_PARSER:
logger.debug(f"Extracting token from tree {first_argument.pretty()}")
else:
logger.debug(f"Extracting token from tree {first_argument}")
if expression is None:
expression = tree_function.first_type_expression_func(first_argument)
if expression is None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,33 +70,24 @@ def internal(node: Union[Tree, Token]) -> Optional[Tree]:
return expression_tree


def token_values(tree: Tree, parameters: Optional[Dict[str, str]] = None) -> List[str]:
def token_values(tree: Tree, parameters: Dict[str, str] = {}) -> List[str]:
"""

:param tree: Tree to traverse
:param parameters: If parameters is not None, it will try to resolve identifier variable references
:param parameters: If parameters is not an empty dict, it will try to resolve identifier variable references
using the values in 'parameters'.
:return: List of leaf token data
"""
values: List[str] = []

def internal(node: Union[Tree, Token]) -> None:
if (
parameters is not None
and isinstance(node, Tree)
and node.data == "identifier"
and node.children[0].data == "quoted_identifier"
):
if parameters and isinstance(node, Tree) and node.data == "identifier":
# This is the case where they reference a variable using
# the `#"Name of variable"` syntax.
# the `#"Name of variable"` or `Variable` syntax. It can be
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What was the previous assert doing here? No purpose?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this broadens it from quoted_identifier -> identifier (which includes regular_identifier)

# a quoted_identifier or a regular_identifier.

identifier = node.children[0].children[0]
assert isinstance(identifier, Token)
ref = make_function_name(node)

# For quoted_identifier, ref will have quotes around it.
# However, we'll probably need to expand this to all identifier types,
# which are not required to have quotes (using make_function_name).
ref = identifier.value
if ref.startswith('"') and ref[1:-1] in parameters:
resolved = parameters[ref[1:-1]]
values.append(resolved)
Expand Down Expand Up @@ -155,7 +146,7 @@ def get_all_function_name(tree: Tree) -> List[str]:

for node in _filter:
if TRACE_POWERBI_MQUERY_PARSER:
logger.debug(f"Tree = {node.pretty}")
logger.debug(f"Tree = {node.pretty()}")
primary_expression_node: Optional[Tree] = first_primary_expression_func(node)
if primary_expression_node is None:
continue
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
// - The let_expression definition has added the whitespace rule instead of the required newline.
// This allows the parser to be less strict about whitespace.
// - Add inline whitespace to item_selection and optional_item_selection.
// - Tweak unary_expression to allow arbitrary operators within it.
// This is necessary because unary_expression is the base for the
// whole relational_expression parse tree.


lexical_unit: lexical_elements?
Expand Down Expand Up @@ -302,6 +305,7 @@ unary_expression: type_expression
| "+" unary_expression
| "_" unary_expression
| "not" unary_expression
| expression
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice - so this broadens the definition so that unary expression can be any type of expression?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep - this isn't strictly correct because it can cause incorrect order of operations in the AST (e.g. addition before multiplication or something), but we don't really care about that here


primary_expression: literal_expression
| list_expression
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def extract_lineage(
mcps: List[MetadataChangeProposalWrapper] = []

# table.dataset should always be set, but we check it just in case.
parameters = table.dataset.parameters if table.dataset else None
parameters = table.dataset.parameters if table.dataset else {}

upstreams: List[UpstreamClass] = []
upstream_tables: List[resolver.DataPlatformTable] = parser.get_upstream_tables(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class PowerBIDataset:
description: str
webUrl: Optional[str]
workspace_id: str
parameters: Optional[Dict[str, str]]
parameters: Dict[str, str]

# Table in datasets
tables: List["Table"]
Expand Down Expand Up @@ -194,7 +194,7 @@ def new_powerbi_dataset(workspace_id: str, raw_instance: dict) -> PowerBIDataset
if raw_instance.get("webUrl") is not None
else None,
workspace_id=workspace_id,
parameters=None,
parameters={},
tables=[],
tags=[],
)
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,11 @@ def get_dataset(
) -> Optional[PowerBIDataset]:
pass

@abstractmethod
def get_dataset_parameters(
self, workspace_id: str, dataset_id: str
) -> Optional[Dict[str, str]]:
return None
) -> Dict[str, str]:
pass

@abstractmethod
def get_users(self, workspace_id: str, entity: str, entity_id: str) -> List[User]:
Expand Down Expand Up @@ -401,7 +402,7 @@ def get_dataset(

def get_dataset_parameters(
self, workspace_id: str, dataset_id: str
) -> Optional[Dict[str, str]]:
) -> Dict[str, str]:
dataset_get_endpoint: str = RegularAPIResolver.API_ENDPOINTS[
Constant.DATASET_GET
]
Expand All @@ -420,16 +421,14 @@ def get_dataset_parameters(
params_response.raise_for_status()
params_dict = params_response.json()

params_values: Optional[List] = params_dict.get(Constant.VALUE)
if params_values:
logger.debug(f"dataset {dataset_id} parameters = {params_values}")
return {
value[Constant.NAME]: value[Constant.CURRENT_VALUE]
for value in params_values
}
else:
logger.debug(f"dataset {dataset_id} has no parameters")
return {}
params_values: List[dict] = params_dict.get(Constant.VALUE, [])

logger.debug(f"dataset {dataset_id} parameters = {params_values}")

return {
value[Constant.NAME]: value[Constant.CURRENT_VALUE]
for value in params_values
}

def get_groups_endpoint(self) -> str:
return DataResolverBase.BASE_URL
Expand Down Expand Up @@ -741,3 +740,9 @@ def get_dataset(

def _get_pages_by_report(self, workspace: Workspace, report_id: str) -> List[Page]:
return [] # Report pages are not available in Admin API

def get_dataset_parameters(
self, workspace_id: str, dataset_id: str
) -> Dict[str, str]:
logger.debug("Get dataset parameter is unsupported in Admin API")
return {}
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,54 @@
"runId": "powerbi-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)",
"changeType": "UPSERT",
"aspectName": "datasetProperties",
"aspect": {
"json": {
"customProperties": {},
"name": "big-query-with-parameter",
"description": "Library dataset description",
"tags": []
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "powerbi-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": false
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "powerbi-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)",
"changeType": "UPSERT",
"aspectName": "container",
"aspect": {
"json": {
"container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "powerbi-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
Expand Down Expand Up @@ -513,6 +561,9 @@
{
"string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)"
},
{
"string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)"
},
{
"string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)"
},
Expand Down Expand Up @@ -967,6 +1018,54 @@
"runId": "powerbi-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)",
"changeType": "UPSERT",
"aspectName": "datasetProperties",
"aspect": {
"json": {
"customProperties": {},
"name": "big-query-with-parameter",
"description": "Library dataset description",
"tags": []
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "powerbi-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": false
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "powerbi-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)",
"changeType": "UPSERT",
"aspectName": "container",
"aspect": {
"json": {
"container": "urn:li:container:a4ed52f9abd3ff9cc34960c0c41f72e9"
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "powerbi-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
Expand Down Expand Up @@ -1173,6 +1272,9 @@
{
"string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)"
},
{
"string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)"
},
{
"string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)"
},
Expand Down Expand Up @@ -1269,6 +1371,9 @@
{
"string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)"
},
{
"string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)"
},
{
"string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)"
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,39 @@
"runId": "powerbi-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)",
"changeType": "UPSERT",
"aspectName": "datasetProperties",
"aspect": {
"json": {
"customProperties": {},
"name": "big-query-with-parameter",
"description": "Library dataset description",
"tags": []
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "powerbi-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
"json": {
"removed": false
}
},
"systemMetadata": {
"lastObserved": 1643871600000,
"runId": "powerbi-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
Expand Down Expand Up @@ -297,6 +330,9 @@
{
"string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)"
},
{
"string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)"
},
{
"string": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)"
},
Expand Down
Loading