amundsen-io · feng-tao · Dec 15, 2020 · Dec 11, 2020 · Dec 15, 2020 · Dec 15, 2020
@@ -0,0 +1,18 @@
+root = true
+
+[*]
+charset = utf-8
+end_of_line = lf
+indent_size = 2
+indent_style = space
+insert_final_newline = true
+trim_trailing_whitespace = true
+
+[*.py]
+indent_size = 4
+
+[{*.mk,*.make,Makefile}]
+indent_style = tab
+
+[*.md]
+trim_trailing_whitespace = false
@@ -14,6 +14,13 @@ lint:
 mypy:
 	mypy .
 
-.PHONY: test
-test: test_unit lint mypy
+.PHONY: isort
+isort:
+	isort .
+
+.PHONY: isort_check
+isort_check:
+	isort ./ --check --diff
 
+.PHONY: test
+test: test_unit lint mypy isort_check
@@ -3,7 +3,7 @@
 
 import abc
 
-from pyhocon import ConfigTree, ConfigFactory
+from pyhocon import ConfigFactory, ConfigTree
 
 
 class Scoped(object, metaclass=abc.ABCMeta):

@@ -3,7 +3,6 @@
 
 import abc
 import logging
-
 from typing import List, Optional
 
 LOGGER = logging.getLogger(__name__)

@@ -3,15 +3,17 @@
 
 import logging
 from collections import namedtuple
+from itertools import groupby
+from typing import (
+    Any, Dict, Iterator, Union,
+)
 
 from pyhocon import ConfigFactory, ConfigTree
-from typing import Iterator, Union, Dict, Any
 
 from databuilder import Scoped
 from databuilder.extractor.base_extractor import Extractor
 from databuilder.extractor.sql_alchemy_extractor import SQLAlchemyExtractor
-from databuilder.models.table_metadata import TableMetadata, ColumnMetadata
-from itertools import groupby
+from databuilder.models.table_metadata import ColumnMetadata, TableMetadata
 
 TableKey = namedtuple('TableKey', ['schema', 'table_name'])
 
@@ -45,14 +47,14 @@ class AthenaMetadataExtractor(Extractor):
 
     def init(self, conf: ConfigTree) -> None:
         conf = conf.with_fallback(AthenaMetadataExtractor.DEFAULT_CONFIG)
-        self._cluster = '{}'.format(conf.get_string(AthenaMetadataExtractor.CATALOG_KEY))
+        self._cluster = conf.get_string(AthenaMetadataExtractor.CATALOG_KEY)
 
         self.sql_stmt = AthenaMetadataExtractor.SQL_STATEMENT.format(
             where_clause_suffix=conf.get_string(AthenaMetadataExtractor.WHERE_CLAUSE_SUFFIX_KEY),
             catalog_source=self._cluster
         )
 
-        LOGGER.info('SQL for Athena metadata: {}'.format(self.sql_stmt))
+        LOGGER.info('SQL for Athena metadata: %s', self.sql_stmt)
 
         self._alchemy_extractor = SQLAlchemyExtractor()
         sql_alch_conf = Scoped.get_scoped_conf(conf, self._alchemy_extractor.get_scope())\

@@ -4,17 +4,18 @@
 import json
 import logging
 from collections import namedtuple
+from typing import (
+    Any, Dict, Iterator, List,
+)
 
 import google.oauth2.service_account
 import google_auth_httplib2
-from googleapiclient.discovery import build
 import httplib2
+from googleapiclient.discovery import build
 from pyhocon import ConfigTree
-from typing import Any, Dict, Iterator, List
 
 from databuilder.extractor.base_extractor import Extractor
 
-
 DatasetRef = namedtuple('DatasetRef', ['datasetId', 'projectId'])
 TableKey = namedtuple('TableKey', ['schema', 'table_name'])
 

@@ -2,9 +2,9 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import abc
+from typing import Any
 
 from pyhocon import ConfigTree
-from typing import Any
 
 from databuilder import Scoped
 

@@ -4,16 +4,17 @@
 import abc
 import logging
 from collections import namedtuple
+from itertools import groupby
+from typing import (
+    Any, Dict, Iterator, Union,
+)
 
 from pyhocon import ConfigFactory, ConfigTree
-from typing import Iterator, Union, Dict, Any
 
 from databuilder import Scoped
 from databuilder.extractor.base_extractor import Extractor
 from databuilder.extractor.sql_alchemy_extractor import SQLAlchemyExtractor
-from databuilder.models.table_metadata import TableMetadata, ColumnMetadata
-from itertools import groupby
-
+from databuilder.models.table_metadata import ColumnMetadata, TableMetadata
 
 TableKey = namedtuple('TableKey', ['schema', 'table_name'])
 
@@ -47,7 +48,7 @@ def get_sql_statement(self, use_catalog_as_cluster_name: bool, where_clause_suff
 
     def init(self, conf: ConfigTree) -> None:
         conf = conf.with_fallback(BasePostgresMetadataExtractor.DEFAULT_CONFIG)
-        self._cluster = '{}'.format(conf.get_string(BasePostgresMetadataExtractor.CLUSTER_KEY))
+        self._cluster = conf.get_string(BasePostgresMetadataExtractor.CLUSTER_KEY)
 
         self._database = conf.get_string(BasePostgresMetadataExtractor.DATABASE_KEY, default='postgres')
 
@@ -62,7 +63,7 @@ def init(self, conf: ConfigTree) -> None:
 
         self.sql_stmt = sql_alch_conf.get_string(SQLAlchemyExtractor.EXTRACT_SQL)
 
-        LOGGER.info('SQL for postgres metadata: {}'.format(self.sql_stmt))
+        LOGGER.info('SQL for postgres metadata: %s', self.sql_stmt)
 
         self._alchemy_extractor.init(sql_alch_conf)
         self._extract_iter: Union[None, Iterator] = None

@@ -2,13 +2,14 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import logging
+from typing import (
+    Any, Dict, List, Set, cast,
+)
 
 from pyhocon import ConfigTree
-from typing import cast, Any, Dict, List, Set
 
 from databuilder.extractor.base_bigquery_extractor import BaseBigQueryExtractor, DatasetRef
-from databuilder.models.table_metadata import TableMetadata, ColumnMetadata
-
+from databuilder.models.table_metadata import ColumnMetadata, TableMetadata
 
 LOGGER = logging.getLogger(__name__)
 
@@ -91,7 +92,7 @@ def _iterate_over_cols(self,
                            cols: List[ColumnMetadata],
                            total_cols: int) -> int:
         if len(parent) > 0:
-            col_name = '{parent}.{field}'.format(parent=parent, field=column['name'])
+            col_name = f'{parent}.{column["name"]}'
         else:
             col_name = column['name']
 

@@ -1,14 +1,16 @@
 # Copyright Contributors to the Amundsen project.
 # SPDX-License-Identifier: Apache-2.0
 
-from collections import namedtuple
-from datetime import date, timedelta
 import logging
 import re
+from collections import namedtuple
+from datetime import date, timedelta
 from time import sleep
+from typing import (
+    Any, Dict, Iterator, List, Optional, Tuple,
+)
 
 from pyhocon import ConfigTree
-from typing import Any, Iterator, Dict, Optional, Tuple, List
 
 from databuilder.extractor.base_bigquery_extractor import BaseBigQueryExtractor
 
@@ -47,7 +49,7 @@ def _count_usage(self) -> None:  # noqa: C901
         for entry in self._retrieve_records():
             count += 1
             if count % self.pagesize == 0:
-                LOGGER.info('Aggregated {} records'.format(count))
+                LOGGER.info(f'Aggregated %i records', count)
 
             if entry is None:
                 continue
@@ -93,9 +95,7 @@ def _create_records(self, refResources: List[dict], resourcesProcessed: int, ema
                 return
 
         if len(refResources) != resourcesProcessed:
-            LOGGER.warn(
-                'The number of tables listed in job {job_id} is not consistent'
-                .format(job_id=jobId))
+            LOGGER.warning(f'The number of tables listed in job {jobId} is not consistent')
             return
 
         for refResource in refResources:
@@ -117,17 +117,15 @@ def _retrieve_records(self) -> Iterator[Optional[Dict]]:
         :return: Provides a record or None if no more to extract
         """
         body = {
-            'resourceNames': [
-                'projects/{project_id}'.format(project_id=self.project_id)
-            ],
+            'resourceNames': [f'projects/{self.project_id}'],
             'pageSize': self.pagesize,
             'filter': 'resource.type="bigquery_resource" AND '
                       'protoPayload.methodName="jobservice.jobcompleted" AND '
-                      'timestamp >= "{timestamp}"'.format(timestamp=self.timestamp)
+                      f'timestamp >= "{self.timestamp}"'
         }
         for page in self._page_over_results(body):
             for entry in page['entries']:
-                yield(entry)
+                yield entry
 
     def extract(self) -> Optional[Tuple[Any, int]]:
         try:

@@ -1,14 +1,15 @@
 # Copyright Contributors to the Amundsen project.
 # SPDX-License-Identifier: Apache-2.0
 
-from collections import namedtuple
-
-import logging
 import datetime
+import logging
 import textwrap
+from collections import namedtuple
+from typing import (
+    Any, Dict, Iterator, List, Tuple, Union,
+)
 
 from pyhocon import ConfigTree
-from typing import Any, Dict, Iterator, List, Tuple, Union
 
 from databuilder.extractor.base_bigquery_extractor import BaseBigQueryExtractor, DatasetRef
 from databuilder.models.watermark import Watermark
@@ -70,7 +71,7 @@ def _retrieve_tables(self,
                     'bigquery',
                     tableRef['datasetId'],
                     prefix,
-                    '__table__={partition_id}'.format(partition_id=td['low']),
+                    f'__table__={td["low"]}',
                     part_type="low_watermark",
                     cluster=tableRef['projectId']
                 )
@@ -80,7 +81,7 @@ def _retrieve_tables(self,
                     'bigquery',
                     tableRef['datasetId'],
                     prefix,
-                    '__table__={partition_id}'.format(partition_id=td['high']),
+                    f'__table__={td["high"]}',
                     part_type="high_watermark",
                     cluster=tableRef['projectId']
                 )
@@ -129,7 +130,7 @@ def _get_partition_watermarks(self,
             'bigquery',
             tableRef['datasetId'],
             tableRef['tableId'],
-            '{field}={partition_id}'.format(field=field, partition_id=low.partition_id),
+            f'{field}={low.partition_id}',
             part_type="low_watermark",
             cluster=tableRef['projectId']
         )
@@ -140,7 +141,7 @@ def _get_partition_watermarks(self,
             'bigquery',
             tableRef['datasetId'],
             tableRef['tableId'],
-            '{field}={partition_id}'.format(field=field, partition_id=high.partition_id),
+            f'{field}={high.partition_id}',
             part_type="high_watermark",
             cluster=tableRef['projectId']
         )

@@ -1,14 +1,16 @@
 # Copyright Contributors to the Amundsen project.
 # SPDX-License-Identifier: Apache-2.0
 
-from cassandra.cluster import Cluster
-import cassandra.metadata
+from typing import (
+    Dict, Iterator, Union,
+)
 
+import cassandra.metadata
+from cassandra.cluster import Cluster
 from pyhocon import ConfigFactory, ConfigTree
-from typing import Iterator, Union, Dict
 
 from databuilder.extractor.base_extractor import Extractor
-from databuilder.models.table_metadata import TableMetadata, ColumnMetadata
+from databuilder.models.table_metadata import ColumnMetadata, TableMetadata
 
 
 class CassandraExtractor(Extractor):
@@ -38,7 +40,7 @@ class CassandraExtractor(Extractor):
 
     def init(self, conf: ConfigTree) -> None:
         conf = conf.with_fallback(CassandraExtractor.DEFAULT_CONFIG)
-        self._cluster = '{}'.format(conf.get_string(CassandraExtractor.CLUSTER_KEY))
+        self._cluster = conf.get_string(CassandraExtractor.CLUSTER_KEY)
         self._filter = conf.get(CassandraExtractor.FILTER_FUNCTION_KEY)
         ips = conf.get_list(CassandraExtractor.IPS_KEY)
         kwargs = conf.get(CassandraExtractor.KWARGS_KEY)

@@ -4,13 +4,13 @@
 import csv
 import importlib
 from collections import defaultdict
+from typing import Any
 
 from pyhocon import ConfigTree
-from typing import Any
 
 from databuilder.extractor.base_extractor import Extractor
-from databuilder.models.table_metadata import TableMetadata, ColumnMetadata
 from databuilder.models.badge import Badge, BadgeMetadata
+from databuilder.models.table_metadata import ColumnMetadata, TableMetadata
 
 
 class CsvExtractor(Extractor):

@@ -2,19 +2,18 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import logging
-
-from pyhocon import ConfigTree, ConfigFactory
 from typing import Any
 
+from pyhocon import ConfigFactory, ConfigTree
+
 from databuilder import Scoped
 from databuilder.extractor.base_extractor import Extractor
+from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_constants import ORGANIZATION
 from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_utils import ModeDashboardUtils
+from databuilder.rest_api.base_rest_api_query import RestApiQuerySeed
 from databuilder.rest_api.mode_analytics.mode_paginated_rest_api_query import ModePaginatedRestApiQuery
 from databuilder.rest_api.rest_api_query import RestApiQuery
-from databuilder.rest_api.base_rest_api_query import RestApiQuerySeed
-from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_constants import ORGANIZATION
-from databuilder.transformer.dict_to_model import DictToModel, MODEL_CLASS
-
+from databuilder.transformer.dict_to_model import MODEL_CLASS, DictToModel
 
 LOGGER = logging.getLogger(__name__)