Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Mode Batch dashboard chart extractor using discovery API #362

Merged
merged 1 commit into from
Sep 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
22 changes: 21 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ Other information such as report run, owner, chart name, query name is in separa

It calls two APIs ([spaces API](https://mode.com/developer/api-reference/management/spaces/#listSpaces) and [reports API](https://mode.com/developer/api-reference/analytics/reports/#listReportsInSpace)) joining together.

You can create Databuilder job config like this.
You can create Databuilder job config like this.
```python
task = DefaultTask(extractor=ModeDashboardExtractor(),
loader=FsNeo4jCSVLoader(), )
Expand Down Expand Up @@ -608,6 +608,25 @@ job = DefaultJob(conf=job_config,
job.launch()
```

If your organization's mode account supports discovery feature(paid feature), you could leverage [ModeDashboardChartsBatchExtractor](./databuilder/extractor/dashboard/mode_analytics/batch/mode_dashboard_charts_batch_extractor.py) which does a batch call to mode API which is more performant. You need to generate a bearer account based on the API instruction.

```python
extractor = ModeDashboardChartsBatchExtractor()
task = DefaultTask(extractor=extractor, loader=FsNeo4jCSVLoader())

job_config = ConfigFactory.from_dict({
'{}.{}'.format(extractor.get_scope(), ORGANIZATION): organization,
'{}.{}'.format(extractor.get_scope(), MODE_ACCESS_TOKEN): mode_token,
'{}.{}'.format(extractor.get_scope(), MODE_PASSWORD_TOKEN): mode_password,
'{}.{}'.format(extractor.get_scope(), MODE_BEARER_TOKEN): mode_bearer_token,
})

job = DefaultJob(conf=job_config,
task=task,
publisher=Neo4jCsvPublisher())
job.launch()
```

#### [ModeDashboardUserExtractor](./databuilder/extractor/dashboard/mode_analytics/mode_dashboard_user_extractor.py)
A Extractor that extracts Mode user_id and then update User node.

Expand Down Expand Up @@ -637,6 +656,7 @@ Note that this provides accumulated view count which does [not effectively show

If you are fine with `accumulated usage`, you could use TemplateVariableSubstitutionTransformer to transform Dict payload from [ModeDashboardUsageExtractor](./databuilder/extractor/dashboard/mode_analytics/mode_dashboard_usage_extractor.py) to fit [DashboardUsage](./docs/models.md#dashboardusage) and transform Dict to [DashboardUsage](./docs/models.md#dashboardusage) by [TemplateVariableSubstitutionTransformer](./databuilder/transformer/template_variable_substitution_transformer.py), and [DictToModel](./databuilder/transformer/dict_to_model.py) transformers. ([Example](./databuilder/extractor/dashboard/mode_analytics/mode_dashboard_queries_extractor.py#L36) on how to combining these two transformers)


### [RedashDashboardExtractor](./databuilder/extractor/dashboard/redash/redash_dashboard_extractor.py)

The included `RedashDashboardExtractor` provides support for extracting basic metadata for Redash dashboards (dashboard name, owner, URL, created/updated timestamps, and a generated description) and their associated queries (query name, URL, and raw query). It can be extended with a configurable table parser function to also support extraction of `DashboardTable` metadata. (See below for example usage.)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0

import logging

from pyhocon import ConfigTree, ConfigFactory
from typing import Any

from databuilder import Scoped
from databuilder.extractor.base_extractor import Extractor
from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_utils import ModeDashboardUtils
from databuilder.rest_api.mode_analytics.mode_paginated_rest_api_query import ModePaginatedRestApiQuery
from databuilder.rest_api.rest_api_query import RestApiQuery
from databuilder.rest_api.base_rest_api_query import RestApiQuerySeed
from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_constants import ORGANIZATION
from databuilder.transformer.dict_to_model import DictToModel, MODEL_CLASS


LOGGER = logging.getLogger(__name__)


class ModeDashboardChartsBatchExtractor(Extractor):
"""
Mode dashboard chart extractor leveraging batch / discovery endpoint.
The detail could be found in https://mode.com/help/articles/discovery-api/#list-charts-for-an-organization
"""
# config to include the charts from all space
INCLUDE_ALL_SPACE = 'include_all_space'

def init(self, conf: ConfigTree) -> None:
self._conf = conf
restapi_query = self._build_restapi_query()
self._extractor = ModeDashboardUtils.create_mode_rest_api_extractor(
restapi_query=restapi_query,
conf=self._conf
)

dict_to_model_transformer = DictToModel()
dict_to_model_transformer.init(
conf=Scoped.get_scoped_conf(self._conf, dict_to_model_transformer.get_scope()).with_fallback(
ConfigFactory.from_dict(
{MODEL_CLASS: 'databuilder.models.dashboard.dashboard_chart.DashboardChart'})))
self._transformer = dict_to_model_transformer

def extract(self) -> Any:

record = self._extractor.extract()
if not record:
return None
return self._transformer.transform(record=record)

def get_scope(self) -> str:
return 'extractor.mode_dashboard_chart_batch'

def _build_restapi_query(self) -> RestApiQuery:
"""
Build a paginated REST API based on Mode discovery API
:return:
"""
params = ModeDashboardUtils.get_auth_params(conf=self._conf, discover_auth=True)

seed_record = [{
'organization': self._conf.get_string(ORGANIZATION),
'is_active': None,
'updated_at': None,
'do_not_update_empty_attribute': True,
}]
seed_query = RestApiQuerySeed(seed_record=seed_record)

chart_url_template = 'http://app.mode.com/batch/{organization}/charts'
if self._conf.get_bool(ModeDashboardChartsBatchExtractor.INCLUDE_ALL_SPACE, default=False):
chart_url_template += '?include_spaces=all'
json_path = '(charts[*].[space_token,report_token,query_token,token,chart_title,chart_type])'
field_names = ['dashboard_group_id',
'dashboard_id',
'query_id',
'chart_id',
'chart_name',
'chart_type']
chart_batch_query = ModePaginatedRestApiQuery(query_to_join=seed_query,
url=chart_url_template,
params=params,
json_path=json_path,
pagination_json_path=json_path,
field_names=field_names,
skip_no_result=True)
return chart_batch_query
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,7 @@
ORGANIZATION = 'organization'
MODE_ACCESS_TOKEN = 'mode_user_token'
MODE_PASSWORD_TOKEN = 'mode_password_token'

# this token is needed to access batch discover endpoint
# e.g https://mode.com/developer/discovery-api/introduction/
MODE_BEARER_TOKEN = 'mode_bearer_token'
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from databuilder import Scoped
from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_constants import ORGANIZATION, MODE_ACCESS_TOKEN, \
MODE_PASSWORD_TOKEN
MODE_PASSWORD_TOKEN, MODE_BEARER_TOKEN
from databuilder.extractor.restapi.rest_api_extractor import RestAPIExtractor, REST_API_QUERY, STATIC_RECORD_DICT
from databuilder.rest_api.base_rest_api_query import BaseRestApiQuery
from databuilder.rest_api.base_rest_api_query import RestApiQuerySeed
Expand Down Expand Up @@ -44,11 +44,21 @@ def get_spaces_query_api(conf: ConfigTree) -> BaseRestApiQuery:
return spaces_query

@staticmethod
def get_auth_params(conf: ConfigTree) -> Dict[str, Any]:
params = {'auth': HTTPBasicAuth(conf.get_string(MODE_ACCESS_TOKEN),
conf.get_string(MODE_PASSWORD_TOKEN)
)
}
def get_auth_params(conf: ConfigTree, discover_auth: bool = False) -> Dict[str, Any]:
if discover_auth:
# Mode discovery API needs custom token set in header
# https://mode.com/developer/discovery-api/introduction/
params = {
"headers": {
"Authorization": conf.get_string(MODE_BEARER_TOKEN),
}
} # type: Dict[str, Any]
else:
params = {
'auth': HTTPBasicAuth(conf.get_string(MODE_ACCESS_TOKEN),
conf.get_string(MODE_PASSWORD_TOKEN)
)
}
return params

@staticmethod
Expand Down
2 changes: 2 additions & 0 deletions tests/unit/extractor/dashboard/mode_analytics/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0

import unittest
from mock import patch
from pyhocon import ConfigFactory

from databuilder import Scoped
from databuilder.extractor.dashboard.mode_analytics.batch.\
mode_dashboard_charts_batch_extractor import ModeDashboardChartsBatchExtractor


class TestModeDashboardChartsBatchExtractor(unittest.TestCase):
def setUp(self) -> None:
config = ConfigFactory.from_dict({
'extractor.mode_dashboard_chart_batch.organization': 'amundsen',
'extractor.mode_dashboard_chart_batch.mode_user_token': 'amundsen_user_token',
'extractor.mode_dashboard_chart_batch.mode_password_token': 'amundsen_password_token',
'extractor.mode_dashboard_chart_batch.mode_bearer_token': 'amundsen_bearer_token',
})
self.config = config

def test_dashboard_chart_extractor_empty_record(self) -> None:
extractor = ModeDashboardChartsBatchExtractor()
extractor.init(Scoped.get_scoped_conf(conf=self.config, scope=extractor.get_scope()))

with patch('databuilder.rest_api.rest_api_query.requests.get'):
record = extractor.extract()
self.assertIsNone(record)

def test_dashboard_chart_extractor_actual_record(self) -> None:
extractor = ModeDashboardChartsBatchExtractor()
extractor.init(Scoped.get_scoped_conf(conf=self.config, scope=extractor.get_scope()))

with patch('databuilder.extractor.restapi.rest_api_extractor.RestAPIExtractor.extract') as mock_get:
mock_get.return_value = {
'organization': 'amundsen',
'is_active': None,
'updated_at': None,
'do_not_update_empty_attribute': True,
'dashboard_group_id': 'ggg',
'dashboard_id': 'ddd',
'query_id': 'yyy',
'chart_id': 'xxx',
'chart_name': 'some chart',
'chart_type': 'bigNumber',
'product': 'mode'
}

record = extractor.extract()
self.assertEquals(record._dashboard_group_id, 'ggg')
self.assertEquals(record._dashboard_id, 'ddd')
self.assertEquals(record._chart_name, 'some chart')
self.assertEquals(record._product, 'mode')


if __name__ == '__main__':
unittest.main()