diff --git a/README.md b/README.md index 5526650af..658ca58a6 100644 --- a/README.md +++ b/README.md @@ -444,7 +444,7 @@ Other information such as report run, owner, chart name, query name is in separa It calls two APIs ([spaces API](https://mode.com/developer/api-reference/management/spaces/#listSpaces) and [reports API](https://mode.com/developer/api-reference/analytics/reports/#listReportsInSpace)) joining together. -You can create Databuilder job config like this. +You can create Databuilder job config like this. ```python task = DefaultTask(extractor=ModeDashboardExtractor(), loader=FsNeo4jCSVLoader(), ) @@ -608,6 +608,25 @@ job = DefaultJob(conf=job_config, job.launch() ``` +If your organization's mode account supports discovery feature(paid feature), you could leverage [ModeDashboardChartsBatchExtractor](./databuilder/extractor/dashboard/mode_analytics/batch/mode_dashboard_charts_batch_extractor.py) which does a batch call to mode API which is more performant. You need to generate a bearer account based on the API instruction. + +```python +extractor = ModeDashboardChartsBatchExtractor() +task = DefaultTask(extractor=extractor, loader=FsNeo4jCSVLoader()) + +job_config = ConfigFactory.from_dict({ + '{}.{}'.format(extractor.get_scope(), ORGANIZATION): organization, + '{}.{}'.format(extractor.get_scope(), MODE_ACCESS_TOKEN): mode_token, + '{}.{}'.format(extractor.get_scope(), MODE_PASSWORD_TOKEN): mode_password, + '{}.{}'.format(extractor.get_scope(), MODE_BEARER_TOKEN): mode_bearer_token, +}) + +job = DefaultJob(conf=job_config, + task=task, + publisher=Neo4jCsvPublisher()) +job.launch() +``` + #### [ModeDashboardUserExtractor](./databuilder/extractor/dashboard/mode_analytics/mode_dashboard_user_extractor.py) A Extractor that extracts Mode user_id and then update User node. @@ -637,6 +656,7 @@ Note that this provides accumulated view count which does [not effectively show If you are fine with `accumulated usage`, you could use TemplateVariableSubstitutionTransformer to transform Dict payload from [ModeDashboardUsageExtractor](./databuilder/extractor/dashboard/mode_analytics/mode_dashboard_usage_extractor.py) to fit [DashboardUsage](./docs/models.md#dashboardusage) and transform Dict to [DashboardUsage](./docs/models.md#dashboardusage) by [TemplateVariableSubstitutionTransformer](./databuilder/transformer/template_variable_substitution_transformer.py), and [DictToModel](./databuilder/transformer/dict_to_model.py) transformers. ([Example](./databuilder/extractor/dashboard/mode_analytics/mode_dashboard_queries_extractor.py#L36) on how to combining these two transformers) + ### [RedashDashboardExtractor](./databuilder/extractor/dashboard/redash/redash_dashboard_extractor.py) The included `RedashDashboardExtractor` provides support for extracting basic metadata for Redash dashboards (dashboard name, owner, URL, created/updated timestamps, and a generated description) and their associated queries (query name, URL, and raw query). It can be extended with a configurable table parser function to also support extraction of `DashboardTable` metadata. (See below for example usage.) diff --git a/databuilder/extractor/dashboard/mode_analytics/batch/__init__.py b/databuilder/extractor/dashboard/mode_analytics/batch/__init__.py new file mode 100644 index 000000000..f3145d75b --- /dev/null +++ b/databuilder/extractor/dashboard/mode_analytics/batch/__init__.py @@ -0,0 +1,2 @@ +# Copyright Contributors to the Amundsen project. +# SPDX-License-Identifier: Apache-2.0 diff --git a/databuilder/extractor/dashboard/mode_analytics/batch/mode_dashboard_charts_batch_extractor.py b/databuilder/extractor/dashboard/mode_analytics/batch/mode_dashboard_charts_batch_extractor.py new file mode 100644 index 000000000..379cd6930 --- /dev/null +++ b/databuilder/extractor/dashboard/mode_analytics/batch/mode_dashboard_charts_batch_extractor.py @@ -0,0 +1,87 @@ +# Copyright Contributors to the Amundsen project. +# SPDX-License-Identifier: Apache-2.0 + +import logging + +from pyhocon import ConfigTree, ConfigFactory +from typing import Any + +from databuilder import Scoped +from databuilder.extractor.base_extractor import Extractor +from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_utils import ModeDashboardUtils +from databuilder.rest_api.mode_analytics.mode_paginated_rest_api_query import ModePaginatedRestApiQuery +from databuilder.rest_api.rest_api_query import RestApiQuery +from databuilder.rest_api.base_rest_api_query import RestApiQuerySeed +from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_constants import ORGANIZATION +from databuilder.transformer.dict_to_model import DictToModel, MODEL_CLASS + + +LOGGER = logging.getLogger(__name__) + + +class ModeDashboardChartsBatchExtractor(Extractor): + """ + Mode dashboard chart extractor leveraging batch / discovery endpoint. + The detail could be found in https://mode.com/help/articles/discovery-api/#list-charts-for-an-organization + """ + # config to include the charts from all space + INCLUDE_ALL_SPACE = 'include_all_space' + + def init(self, conf: ConfigTree) -> None: + self._conf = conf + restapi_query = self._build_restapi_query() + self._extractor = ModeDashboardUtils.create_mode_rest_api_extractor( + restapi_query=restapi_query, + conf=self._conf + ) + + dict_to_model_transformer = DictToModel() + dict_to_model_transformer.init( + conf=Scoped.get_scoped_conf(self._conf, dict_to_model_transformer.get_scope()).with_fallback( + ConfigFactory.from_dict( + {MODEL_CLASS: 'databuilder.models.dashboard.dashboard_chart.DashboardChart'}))) + self._transformer = dict_to_model_transformer + + def extract(self) -> Any: + + record = self._extractor.extract() + if not record: + return None + return self._transformer.transform(record=record) + + def get_scope(self) -> str: + return 'extractor.mode_dashboard_chart_batch' + + def _build_restapi_query(self) -> RestApiQuery: + """ + Build a paginated REST API based on Mode discovery API + :return: + """ + params = ModeDashboardUtils.get_auth_params(conf=self._conf, discover_auth=True) + + seed_record = [{ + 'organization': self._conf.get_string(ORGANIZATION), + 'is_active': None, + 'updated_at': None, + 'do_not_update_empty_attribute': True, + }] + seed_query = RestApiQuerySeed(seed_record=seed_record) + + chart_url_template = 'http://app.mode.com/batch/{organization}/charts' + if self._conf.get_bool(ModeDashboardChartsBatchExtractor.INCLUDE_ALL_SPACE, default=False): + chart_url_template += '?include_spaces=all' + json_path = '(charts[*].[space_token,report_token,query_token,token,chart_title,chart_type])' + field_names = ['dashboard_group_id', + 'dashboard_id', + 'query_id', + 'chart_id', + 'chart_name', + 'chart_type'] + chart_batch_query = ModePaginatedRestApiQuery(query_to_join=seed_query, + url=chart_url_template, + params=params, + json_path=json_path, + pagination_json_path=json_path, + field_names=field_names, + skip_no_result=True) + return chart_batch_query diff --git a/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_constants.py b/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_constants.py index b54a3cffe..0b82ddf42 100644 --- a/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_constants.py +++ b/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_constants.py @@ -4,3 +4,7 @@ ORGANIZATION = 'organization' MODE_ACCESS_TOKEN = 'mode_user_token' MODE_PASSWORD_TOKEN = 'mode_password_token' + +# this token is needed to access batch discover endpoint +# e.g https://mode.com/developer/discovery-api/introduction/ +MODE_BEARER_TOKEN = 'mode_bearer_token' diff --git a/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_utils.py b/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_utils.py index 80d918f4e..68b1a3b0d 100644 --- a/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_utils.py +++ b/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_utils.py @@ -7,7 +7,7 @@ from databuilder import Scoped from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_constants import ORGANIZATION, MODE_ACCESS_TOKEN, \ - MODE_PASSWORD_TOKEN + MODE_PASSWORD_TOKEN, MODE_BEARER_TOKEN from databuilder.extractor.restapi.rest_api_extractor import RestAPIExtractor, REST_API_QUERY, STATIC_RECORD_DICT from databuilder.rest_api.base_rest_api_query import BaseRestApiQuery from databuilder.rest_api.base_rest_api_query import RestApiQuerySeed @@ -44,11 +44,21 @@ def get_spaces_query_api(conf: ConfigTree) -> BaseRestApiQuery: return spaces_query @staticmethod - def get_auth_params(conf: ConfigTree) -> Dict[str, Any]: - params = {'auth': HTTPBasicAuth(conf.get_string(MODE_ACCESS_TOKEN), - conf.get_string(MODE_PASSWORD_TOKEN) - ) - } + def get_auth_params(conf: ConfigTree, discover_auth: bool = False) -> Dict[str, Any]: + if discover_auth: + # Mode discovery API needs custom token set in header + # https://mode.com/developer/discovery-api/introduction/ + params = { + "headers": { + "Authorization": conf.get_string(MODE_BEARER_TOKEN), + } + } # type: Dict[str, Any] + else: + params = { + 'auth': HTTPBasicAuth(conf.get_string(MODE_ACCESS_TOKEN), + conf.get_string(MODE_PASSWORD_TOKEN) + ) + } return params @staticmethod diff --git a/tests/unit/extractor/dashboard/mode_analytics/__init__.py b/tests/unit/extractor/dashboard/mode_analytics/__init__.py new file mode 100644 index 000000000..f3145d75b --- /dev/null +++ b/tests/unit/extractor/dashboard/mode_analytics/__init__.py @@ -0,0 +1,2 @@ +# Copyright Contributors to the Amundsen project. +# SPDX-License-Identifier: Apache-2.0 diff --git a/tests/unit/extractor/dashboard/mode_analytics/batch/__init__.py b/tests/unit/extractor/dashboard/mode_analytics/batch/__init__.py new file mode 100644 index 000000000..f3145d75b --- /dev/null +++ b/tests/unit/extractor/dashboard/mode_analytics/batch/__init__.py @@ -0,0 +1,2 @@ +# Copyright Contributors to the Amundsen project. +# SPDX-License-Identifier: Apache-2.0 diff --git a/tests/unit/extractor/dashboard/mode_analytics/batch/test_mode_dashboard_charts_batch_extractor.py b/tests/unit/extractor/dashboard/mode_analytics/batch/test_mode_dashboard_charts_batch_extractor.py new file mode 100644 index 000000000..c5011d3c8 --- /dev/null +++ b/tests/unit/extractor/dashboard/mode_analytics/batch/test_mode_dashboard_charts_batch_extractor.py @@ -0,0 +1,58 @@ +# Copyright Contributors to the Amundsen project. +# SPDX-License-Identifier: Apache-2.0 + +import unittest +from mock import patch +from pyhocon import ConfigFactory + +from databuilder import Scoped +from databuilder.extractor.dashboard.mode_analytics.batch.\ + mode_dashboard_charts_batch_extractor import ModeDashboardChartsBatchExtractor + + +class TestModeDashboardChartsBatchExtractor(unittest.TestCase): + def setUp(self) -> None: + config = ConfigFactory.from_dict({ + 'extractor.mode_dashboard_chart_batch.organization': 'amundsen', + 'extractor.mode_dashboard_chart_batch.mode_user_token': 'amundsen_user_token', + 'extractor.mode_dashboard_chart_batch.mode_password_token': 'amundsen_password_token', + 'extractor.mode_dashboard_chart_batch.mode_bearer_token': 'amundsen_bearer_token', + }) + self.config = config + + def test_dashboard_chart_extractor_empty_record(self) -> None: + extractor = ModeDashboardChartsBatchExtractor() + extractor.init(Scoped.get_scoped_conf(conf=self.config, scope=extractor.get_scope())) + + with patch('databuilder.rest_api.rest_api_query.requests.get'): + record = extractor.extract() + self.assertIsNone(record) + + def test_dashboard_chart_extractor_actual_record(self) -> None: + extractor = ModeDashboardChartsBatchExtractor() + extractor.init(Scoped.get_scoped_conf(conf=self.config, scope=extractor.get_scope())) + + with patch('databuilder.extractor.restapi.rest_api_extractor.RestAPIExtractor.extract') as mock_get: + mock_get.return_value = { + 'organization': 'amundsen', + 'is_active': None, + 'updated_at': None, + 'do_not_update_empty_attribute': True, + 'dashboard_group_id': 'ggg', + 'dashboard_id': 'ddd', + 'query_id': 'yyy', + 'chart_id': 'xxx', + 'chart_name': 'some chart', + 'chart_type': 'bigNumber', + 'product': 'mode' + } + + record = extractor.extract() + self.assertEquals(record._dashboard_group_id, 'ggg') + self.assertEquals(record._dashboard_id, 'ddd') + self.assertEquals(record._chart_name, 'some chart') + self.assertEquals(record._product, 'mode') + + +if __name__ == '__main__': + unittest.main()