Skip to content

Commit

Permalink
feat: Mode Batch dashboard charrt API (#362)
Browse files Browse the repository at this point in the history
Signed-off-by: Tao Feng <fengtao04@gmail.com>
  • Loading branch information
feng-tao committed Sep 9, 2020
1 parent c3e713e commit 87213c5
Show file tree
Hide file tree
Showing 8 changed files with 192 additions and 7 deletions.
22 changes: 21 additions & 1 deletion README.md
Expand Up @@ -444,7 +444,7 @@ Other information such as report run, owner, chart name, query name is in separa

It calls two APIs ([spaces API](https://mode.com/developer/api-reference/management/spaces/#listSpaces) and [reports API](https://mode.com/developer/api-reference/analytics/reports/#listReportsInSpace)) joining together.

You can create Databuilder job config like this.
You can create Databuilder job config like this.
```python
task = DefaultTask(extractor=ModeDashboardExtractor(),
loader=FsNeo4jCSVLoader(), )
Expand Down Expand Up @@ -608,6 +608,25 @@ job = DefaultJob(conf=job_config,
job.launch()
```

If your organization's mode account supports discovery feature(paid feature), you could leverage [ModeDashboardChartsBatchExtractor](./databuilder/extractor/dashboard/mode_analytics/batch/mode_dashboard_charts_batch_extractor.py) which does a batch call to mode API which is more performant. You need to generate a bearer account based on the API instruction.

```python
extractor = ModeDashboardChartsBatchExtractor()
task = DefaultTask(extractor=extractor, loader=FsNeo4jCSVLoader())

job_config = ConfigFactory.from_dict({
'{}.{}'.format(extractor.get_scope(), ORGANIZATION): organization,
'{}.{}'.format(extractor.get_scope(), MODE_ACCESS_TOKEN): mode_token,
'{}.{}'.format(extractor.get_scope(), MODE_PASSWORD_TOKEN): mode_password,
'{}.{}'.format(extractor.get_scope(), MODE_BEARER_TOKEN): mode_bearer_token,
})

job = DefaultJob(conf=job_config,
task=task,
publisher=Neo4jCsvPublisher())
job.launch()
```

#### [ModeDashboardUserExtractor](./databuilder/extractor/dashboard/mode_analytics/mode_dashboard_user_extractor.py)
A Extractor that extracts Mode user_id and then update User node.

Expand Down Expand Up @@ -637,6 +656,7 @@ Note that this provides accumulated view count which does [not effectively show

If you are fine with `accumulated usage`, you could use TemplateVariableSubstitutionTransformer to transform Dict payload from [ModeDashboardUsageExtractor](./databuilder/extractor/dashboard/mode_analytics/mode_dashboard_usage_extractor.py) to fit [DashboardUsage](./docs/models.md#dashboardusage) and transform Dict to [DashboardUsage](./docs/models.md#dashboardusage) by [TemplateVariableSubstitutionTransformer](./databuilder/transformer/template_variable_substitution_transformer.py), and [DictToModel](./databuilder/transformer/dict_to_model.py) transformers. ([Example](./databuilder/extractor/dashboard/mode_analytics/mode_dashboard_queries_extractor.py#L36) on how to combining these two transformers)


### [RedashDashboardExtractor](./databuilder/extractor/dashboard/redash/redash_dashboard_extractor.py)

The included `RedashDashboardExtractor` provides support for extracting basic metadata for Redash dashboards (dashboard name, owner, URL, created/updated timestamps, and a generated description) and their associated queries (query name, URL, and raw query). It can be extended with a configurable table parser function to also support extraction of `DashboardTable` metadata. (See below for example usage.)
Expand Down
@@ -0,0 +1,2 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0
@@ -0,0 +1,87 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0

import logging

from pyhocon import ConfigTree, ConfigFactory
from typing import Any

from databuilder import Scoped
from databuilder.extractor.base_extractor import Extractor
from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_utils import ModeDashboardUtils
from databuilder.rest_api.mode_analytics.mode_paginated_rest_api_query import ModePaginatedRestApiQuery
from databuilder.rest_api.rest_api_query import RestApiQuery
from databuilder.rest_api.base_rest_api_query import RestApiQuerySeed
from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_constants import ORGANIZATION
from databuilder.transformer.dict_to_model import DictToModel, MODEL_CLASS


LOGGER = logging.getLogger(__name__)


class ModeDashboardChartsBatchExtractor(Extractor):
"""
Mode dashboard chart extractor leveraging batch / discovery endpoint.
The detail could be found in https://mode.com/help/articles/discovery-api/#list-charts-for-an-organization
"""
# config to include the charts from all space
INCLUDE_ALL_SPACE = 'include_all_space'

def init(self, conf: ConfigTree) -> None:
self._conf = conf
restapi_query = self._build_restapi_query()
self._extractor = ModeDashboardUtils.create_mode_rest_api_extractor(
restapi_query=restapi_query,
conf=self._conf
)

dict_to_model_transformer = DictToModel()
dict_to_model_transformer.init(
conf=Scoped.get_scoped_conf(self._conf, dict_to_model_transformer.get_scope()).with_fallback(
ConfigFactory.from_dict(
{MODEL_CLASS: 'databuilder.models.dashboard.dashboard_chart.DashboardChart'})))
self._transformer = dict_to_model_transformer

def extract(self) -> Any:

record = self._extractor.extract()
if not record:
return None
return self._transformer.transform(record=record)

def get_scope(self) -> str:
return 'extractor.mode_dashboard_chart_batch'

def _build_restapi_query(self) -> RestApiQuery:
"""
Build a paginated REST API based on Mode discovery API
:return:
"""
params = ModeDashboardUtils.get_auth_params(conf=self._conf, discover_auth=True)

seed_record = [{
'organization': self._conf.get_string(ORGANIZATION),
'is_active': None,
'updated_at': None,
'do_not_update_empty_attribute': True,
}]
seed_query = RestApiQuerySeed(seed_record=seed_record)

chart_url_template = 'http://app.mode.com/batch/{organization}/charts'
if self._conf.get_bool(ModeDashboardChartsBatchExtractor.INCLUDE_ALL_SPACE, default=False):
chart_url_template += '?include_spaces=all'
json_path = '(charts[*].[space_token,report_token,query_token,token,chart_title,chart_type])'
field_names = ['dashboard_group_id',
'dashboard_id',
'query_id',
'chart_id',
'chart_name',
'chart_type']
chart_batch_query = ModePaginatedRestApiQuery(query_to_join=seed_query,
url=chart_url_template,
params=params,
json_path=json_path,
pagination_json_path=json_path,
field_names=field_names,
skip_no_result=True)
return chart_batch_query
Expand Up @@ -4,3 +4,7 @@
ORGANIZATION = 'organization'
MODE_ACCESS_TOKEN = 'mode_user_token'
MODE_PASSWORD_TOKEN = 'mode_password_token'

# this token is needed to access batch discover endpoint
# e.g https://mode.com/developer/discovery-api/introduction/
MODE_BEARER_TOKEN = 'mode_bearer_token'
Expand Up @@ -7,7 +7,7 @@

from databuilder import Scoped
from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_constants import ORGANIZATION, MODE_ACCESS_TOKEN, \
MODE_PASSWORD_TOKEN
MODE_PASSWORD_TOKEN, MODE_BEARER_TOKEN
from databuilder.extractor.restapi.rest_api_extractor import RestAPIExtractor, REST_API_QUERY, STATIC_RECORD_DICT
from databuilder.rest_api.base_rest_api_query import BaseRestApiQuery
from databuilder.rest_api.base_rest_api_query import RestApiQuerySeed
Expand Down Expand Up @@ -44,11 +44,21 @@ def get_spaces_query_api(conf: ConfigTree) -> BaseRestApiQuery:
return spaces_query

@staticmethod
def get_auth_params(conf: ConfigTree) -> Dict[str, Any]:
params = {'auth': HTTPBasicAuth(conf.get_string(MODE_ACCESS_TOKEN),
conf.get_string(MODE_PASSWORD_TOKEN)
)
}
def get_auth_params(conf: ConfigTree, discover_auth: bool = False) -> Dict[str, Any]:
if discover_auth:
# Mode discovery API needs custom token set in header
# https://mode.com/developer/discovery-api/introduction/
params = {
"headers": {
"Authorization": conf.get_string(MODE_BEARER_TOKEN),
}
} # type: Dict[str, Any]
else:
params = {
'auth': HTTPBasicAuth(conf.get_string(MODE_ACCESS_TOKEN),
conf.get_string(MODE_PASSWORD_TOKEN)
)
}
return params

@staticmethod
Expand Down
2 changes: 2 additions & 0 deletions tests/unit/extractor/dashboard/mode_analytics/__init__.py
@@ -0,0 +1,2 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0
@@ -0,0 +1,2 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0
@@ -0,0 +1,58 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0

import unittest
from mock import patch
from pyhocon import ConfigFactory

from databuilder import Scoped
from databuilder.extractor.dashboard.mode_analytics.batch.\
mode_dashboard_charts_batch_extractor import ModeDashboardChartsBatchExtractor


class TestModeDashboardChartsBatchExtractor(unittest.TestCase):
def setUp(self) -> None:
config = ConfigFactory.from_dict({
'extractor.mode_dashboard_chart_batch.organization': 'amundsen',
'extractor.mode_dashboard_chart_batch.mode_user_token': 'amundsen_user_token',
'extractor.mode_dashboard_chart_batch.mode_password_token': 'amundsen_password_token',
'extractor.mode_dashboard_chart_batch.mode_bearer_token': 'amundsen_bearer_token',
})
self.config = config

def test_dashboard_chart_extractor_empty_record(self) -> None:
extractor = ModeDashboardChartsBatchExtractor()
extractor.init(Scoped.get_scoped_conf(conf=self.config, scope=extractor.get_scope()))

with patch('databuilder.rest_api.rest_api_query.requests.get'):
record = extractor.extract()
self.assertIsNone(record)

def test_dashboard_chart_extractor_actual_record(self) -> None:
extractor = ModeDashboardChartsBatchExtractor()
extractor.init(Scoped.get_scoped_conf(conf=self.config, scope=extractor.get_scope()))

with patch('databuilder.extractor.restapi.rest_api_extractor.RestAPIExtractor.extract') as mock_get:
mock_get.return_value = {
'organization': 'amundsen',
'is_active': None,
'updated_at': None,
'do_not_update_empty_attribute': True,
'dashboard_group_id': 'ggg',
'dashboard_id': 'ddd',
'query_id': 'yyy',
'chart_id': 'xxx',
'chart_name': 'some chart',
'chart_type': 'bigNumber',
'product': 'mode'
}

record = extractor.extract()
self.assertEquals(record._dashboard_group_id, 'ggg')
self.assertEquals(record._dashboard_id, 'ddd')
self.assertEquals(record._chart_name, 'some chart')
self.assertEquals(record._product, 'mode')


if __name__ == '__main__':
unittest.main()

0 comments on commit 87213c5

Please sign in to comment.