-
Notifications
You must be signed in to change notification settings - Fork 209
/
tableau_dashboard_extractor.py
135 lines (114 loc) · 5.91 KB
/
tableau_dashboard_extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import logging
from typing import Any, Dict, Iterator, List
from pyhocon import ConfigFactory, ConfigTree
import databuilder.extractor.dashboard.tableau.tableau_dashboard_constants as const
from databuilder import Scoped
from databuilder.extractor.base_extractor import Extractor
from databuilder.extractor.dashboard.tableau.tableau_dashboard_utils import TableauGraphQLApiExtractor,\
TableauDashboardUtils
from databuilder.extractor.restapi.rest_api_extractor import STATIC_RECORD_DICT
from databuilder.transformer.base_transformer import ChainedTransformer
from databuilder.transformer.base_transformer import Transformer
from databuilder.transformer.dict_to_model import DictToModel, MODEL_CLASS
from databuilder.transformer.timestamp_string_to_epoch import TimestampStringToEpoch, FIELD_NAME
LOGGER = logging.getLogger(__name__)
class TableauGraphQLApiMetadataExtractor(TableauGraphQLApiExtractor):
"""
Implements the extraction-time logic for parsing the GraphQL result and transforming into a dict
that fills the DashboardMetadata model. Allows workbooks to be exlcuded based on their project.
"""
CLUSTER = const.CLUSTER
EXCLUDED_PROJECTS = const.EXCLUDED_PROJECTS
TABLEAU_BASE_URL = const.TABLEAU_BASE_URL
def execute(self) -> Iterator[Dict[str, Any]]:
response = self.execute_query()
workbooks_data = [workbook for workbook in response['workbooks']
if workbook['projectName'] not in
self._conf.get_list(TableauGraphQLApiMetadataExtractor.EXCLUDED_PROJECTS)]
for workbook in workbooks_data:
data = {
'dashboard_group': workbook['projectName'],
'dashboard_name': TableauDashboardUtils.sanitize_workbook_name(workbook['name']),
'description': workbook.get('description', ''),
'created_timestamp': workbook['createdAt'],
'dashboard_group_url': '{}/#/projects/{}'.format(
self._conf.get(TableauGraphQLApiMetadataExtractor.TABLEAU_BASE_URL),
workbook['projectVizportalUrlId']
),
'dashboard_url': '{}/#/workbooks/{}/views'.format(
self._conf.get(TableauGraphQLApiMetadataExtractor.TABLEAU_BASE_URL),
workbook['vizportalUrlId']
),
'cluster': self._conf.get_string(TableauGraphQLApiMetadataExtractor.CLUSTER)
}
yield data
class TableauDashboardExtractor(Extractor):
"""
Extracts core metadata about Tableau "dashboards".
For the purposes of this extractor, Tableau "workbooks" are mapped to Amundsen dashboards, and the
top-level project in which these workbooks preside is the dashboard group. The metadata it gathers is:
Dashboard name (Workbook name)
Dashboard description (Workbook description)
Dashboard creation timestamp (Workbook creationstamp)
Dashboard group name (Workbook top-level folder name)
Uses the Metadata API: https://help.tableau.com/current/api/metadata_api/en-us/index.html
"""
API_BASE_URL = const.API_BASE_URL
API_VERSION = const.API_VERSION
CLUSTER = const.CLUSTER
EXCLUDED_PROJECTS = const.EXCLUDED_PROJECTS
SITE_NAME = const.SITE_NAME
TABLEAU_BASE_URL = const.TABLEAU_BASE_URL
TABLEAU_ACCESS_TOKEN_NAME = const.TABLEAU_ACCESS_TOKEN_NAME
TABLEAU_ACCESS_TOKEN_SECRET = const.TABLEAU_ACCESS_TOKEN_SECRET
VERIFY_REQUEST = const.VERIFY_REQUEST
def init(self, conf: ConfigTree) -> None:
self._conf = conf
self.query = """query {
workbooks {
id
name
createdAt
description
projectName
projectVizportalUrlId
vizportalUrlId
}
}"""
self._extractor = self._build_extractor()
transformers: List[Transformer] = []
timestamp_str_to_epoch_transformer = TimestampStringToEpoch()
timestamp_str_to_epoch_transformer.init(
conf=Scoped.get_scoped_conf(self._conf, timestamp_str_to_epoch_transformer.get_scope()).with_fallback(
ConfigFactory.from_dict({FIELD_NAME: 'created_timestamp', })))
transformers.append(timestamp_str_to_epoch_transformer)
dict_to_model_transformer = DictToModel()
dict_to_model_transformer.init(
conf=Scoped.get_scoped_conf(self._conf, dict_to_model_transformer.get_scope()).with_fallback(
ConfigFactory.from_dict(
{MODEL_CLASS: 'databuilder.models.dashboard.dashboard_metadata.DashboardMetadata'})))
transformers.append(dict_to_model_transformer)
self._transformer = ChainedTransformer(transformers=transformers)
def extract(self) -> Any:
record = self._extractor.extract()
if not record:
return None
return self._transformer.transform(record=record)
def get_scope(self) -> str:
return 'extractor.tableau_dashboard_metadata'
def _build_extractor(self) -> TableauGraphQLApiMetadataExtractor:
"""
Builds a TableauGraphQLApiMetadataExtractor. All data required can be retrieved with a single GraphQL call.
:return: A TableauGraphQLApiMetadataExtractor that provides core dashboard metadata.
"""
extractor = TableauGraphQLApiMetadataExtractor()
tableau_extractor_conf = \
Scoped.get_scoped_conf(self._conf, extractor.get_scope())\
.with_fallback(self._conf)\
.with_fallback(ConfigFactory.from_dict({TableauGraphQLApiExtractor.QUERY: self.query,
STATIC_RECORD_DICT: {'product': 'tableau'}
}
)
)
extractor.init(conf=tableau_extractor_conf)
return extractor