Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: using f-string instead of string.format #428

Merged
merged 4 commits into from
Dec 15, 2020
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
18 changes: 18 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
root = true

[*]
charset = utf-8
end_of_line = lf
indent_size = 2
indent_style = space
insert_final_newline = true
trim_trailing_whitespace = true

[*.py]
indent_size = 4

[{*.mk,*.make,Makefile}]
indent_style = tab

[*.md]
trim_trailing_whitespace = false
11 changes: 9 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@ lint:
mypy:
mypy .

.PHONY: test
test: test_unit lint mypy
.PHONY: isort
isort:
isort .

.PHONY: isort_check
isort_check:
isort ./ --check --diff

.PHONY: test
test: test_unit lint mypy isort_check
2 changes: 1 addition & 1 deletion databuilder/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import abc

from pyhocon import ConfigTree, ConfigFactory
from pyhocon import ConfigFactory, ConfigTree


class Scoped(object, metaclass=abc.ABCMeta):
Expand Down
1 change: 0 additions & 1 deletion databuilder/callback/call_back.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import abc
import logging

from typing import List, Optional

LOGGER = logging.getLogger(__name__)
Expand Down
12 changes: 7 additions & 5 deletions databuilder/extractor/athena_metadata_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,17 @@

import logging
from collections import namedtuple
from itertools import groupby
from typing import (
Any, Dict, Iterator, Union,
)

from pyhocon import ConfigFactory, ConfigTree
from typing import Iterator, Union, Dict, Any

from databuilder import Scoped
from databuilder.extractor.base_extractor import Extractor
from databuilder.extractor.sql_alchemy_extractor import SQLAlchemyExtractor
from databuilder.models.table_metadata import TableMetadata, ColumnMetadata
from itertools import groupby
from databuilder.models.table_metadata import ColumnMetadata, TableMetadata

TableKey = namedtuple('TableKey', ['schema', 'table_name'])

Expand Down Expand Up @@ -45,14 +47,14 @@ class AthenaMetadataExtractor(Extractor):

def init(self, conf: ConfigTree) -> None:
conf = conf.with_fallback(AthenaMetadataExtractor.DEFAULT_CONFIG)
self._cluster = '{}'.format(conf.get_string(AthenaMetadataExtractor.CATALOG_KEY))
self._cluster = conf.get_string(AthenaMetadataExtractor.CATALOG_KEY)

self.sql_stmt = AthenaMetadataExtractor.SQL_STATEMENT.format(
where_clause_suffix=conf.get_string(AthenaMetadataExtractor.WHERE_CLAUSE_SUFFIX_KEY),
catalog_source=self._cluster
)

LOGGER.info('SQL for Athena metadata: {}'.format(self.sql_stmt))
LOGGER.info('SQL for Athena metadata: %s', self.sql_stmt)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe we should be using f here as well for consistency

Copy link
Contributor Author

@dungdm93 dungdm93 Dec 15, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@allisonsuarez string.format or f-string is eager evaluation, which mean that the logging message still evaluate event it's not logged to output.
On other hand, LOGGER.info(format, args) is lazy evaluation. So in somehow, it's better.
What do you think?


self._alchemy_extractor = SQLAlchemyExtractor()
sql_alch_conf = Scoped.get_scoped_conf(conf, self._alchemy_extractor.get_scope())\
Expand Down
7 changes: 4 additions & 3 deletions databuilder/extractor/base_bigquery_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,18 @@
import json
import logging
from collections import namedtuple
from typing import (
Any, Dict, Iterator, List,
)

import google.oauth2.service_account
import google_auth_httplib2
from googleapiclient.discovery import build
import httplib2
from googleapiclient.discovery import build
from pyhocon import ConfigTree
from typing import Any, Dict, Iterator, List

from databuilder.extractor.base_extractor import Extractor


DatasetRef = namedtuple('DatasetRef', ['datasetId', 'projectId'])
TableKey = namedtuple('TableKey', ['schema', 'table_name'])

Expand Down
2 changes: 1 addition & 1 deletion databuilder/extractor/base_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
# SPDX-License-Identifier: Apache-2.0

import abc
from typing import Any

from pyhocon import ConfigTree
from typing import Any

from databuilder import Scoped

Expand Down
13 changes: 7 additions & 6 deletions databuilder/extractor/base_postgres_metadata_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,17 @@
import abc
import logging
from collections import namedtuple
from itertools import groupby
from typing import (
Any, Dict, Iterator, Union,
)

from pyhocon import ConfigFactory, ConfigTree
from typing import Iterator, Union, Dict, Any

from databuilder import Scoped
from databuilder.extractor.base_extractor import Extractor
from databuilder.extractor.sql_alchemy_extractor import SQLAlchemyExtractor
from databuilder.models.table_metadata import TableMetadata, ColumnMetadata
from itertools import groupby

from databuilder.models.table_metadata import ColumnMetadata, TableMetadata

TableKey = namedtuple('TableKey', ['schema', 'table_name'])

Expand Down Expand Up @@ -47,7 +48,7 @@ def get_sql_statement(self, use_catalog_as_cluster_name: bool, where_clause_suff

def init(self, conf: ConfigTree) -> None:
conf = conf.with_fallback(BasePostgresMetadataExtractor.DEFAULT_CONFIG)
self._cluster = '{}'.format(conf.get_string(BasePostgresMetadataExtractor.CLUSTER_KEY))
self._cluster = conf.get_string(BasePostgresMetadataExtractor.CLUSTER_KEY)

self._database = conf.get_string(BasePostgresMetadataExtractor.DATABASE_KEY, default='postgres')

Expand All @@ -62,7 +63,7 @@ def init(self, conf: ConfigTree) -> None:

self.sql_stmt = sql_alch_conf.get_string(SQLAlchemyExtractor.EXTRACT_SQL)

LOGGER.info('SQL for postgres metadata: {}'.format(self.sql_stmt))
LOGGER.info('SQL for postgres metadata: %s', self.sql_stmt)

self._alchemy_extractor.init(sql_alch_conf)
self._extract_iter: Union[None, Iterator] = None
Expand Down
9 changes: 5 additions & 4 deletions databuilder/extractor/bigquery_metadata_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
# SPDX-License-Identifier: Apache-2.0

import logging
from typing import (
Any, Dict, List, Set, cast,
)

from pyhocon import ConfigTree
from typing import cast, Any, Dict, List, Set

from databuilder.extractor.base_bigquery_extractor import BaseBigQueryExtractor, DatasetRef
from databuilder.models.table_metadata import TableMetadata, ColumnMetadata

from databuilder.models.table_metadata import ColumnMetadata, TableMetadata

LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -91,7 +92,7 @@ def _iterate_over_cols(self,
cols: List[ColumnMetadata],
total_cols: int) -> int:
if len(parent) > 0:
col_name = '{parent}.{field}'.format(parent=parent, field=column['name'])
col_name = f'{parent}.{column["name"]}'
else:
col_name = column['name']

Expand Down
22 changes: 10 additions & 12 deletions databuilder/extractor/bigquery_usage_extractor.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0

from collections import namedtuple
from datetime import date, timedelta
import logging
import re
from collections import namedtuple
from datetime import date, timedelta
from time import sleep
from typing import (
Any, Dict, Iterator, List, Optional, Tuple,
)

from pyhocon import ConfigTree
from typing import Any, Iterator, Dict, Optional, Tuple, List

from databuilder.extractor.base_bigquery_extractor import BaseBigQueryExtractor

Expand Down Expand Up @@ -47,7 +49,7 @@ def _count_usage(self) -> None: # noqa: C901
for entry in self._retrieve_records():
count += 1
if count % self.pagesize == 0:
LOGGER.info('Aggregated {} records'.format(count))
LOGGER.info(f'Aggregated %i records', count)

if entry is None:
continue
Expand Down Expand Up @@ -93,9 +95,7 @@ def _create_records(self, refResources: List[dict], resourcesProcessed: int, ema
return

if len(refResources) != resourcesProcessed:
LOGGER.warn(
'The number of tables listed in job {job_id} is not consistent'
.format(job_id=jobId))
LOGGER.warning(f'The number of tables listed in job {jobId} is not consistent')
return

for refResource in refResources:
Expand All @@ -117,17 +117,15 @@ def _retrieve_records(self) -> Iterator[Optional[Dict]]:
:return: Provides a record or None if no more to extract
"""
body = {
'resourceNames': [
'projects/{project_id}'.format(project_id=self.project_id)
],
'resourceNames': [f'projects/{self.project_id}'],
'pageSize': self.pagesize,
'filter': 'resource.type="bigquery_resource" AND '
'protoPayload.methodName="jobservice.jobcompleted" AND '
'timestamp >= "{timestamp}"'.format(timestamp=self.timestamp)
f'timestamp >= "{self.timestamp}"'
}
for page in self._page_over_results(body):
for entry in page['entries']:
yield(entry)
yield entry

def extract(self) -> Optional[Tuple[Any, int]]:
try:
Expand Down
17 changes: 9 additions & 8 deletions databuilder/extractor/bigquery_watermark_extractor.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0

from collections import namedtuple

import logging
import datetime
import logging
import textwrap
from collections import namedtuple
from typing import (
Any, Dict, Iterator, List, Tuple, Union,
)

from pyhocon import ConfigTree
from typing import Any, Dict, Iterator, List, Tuple, Union

from databuilder.extractor.base_bigquery_extractor import BaseBigQueryExtractor, DatasetRef
from databuilder.models.watermark import Watermark
Expand Down Expand Up @@ -70,7 +71,7 @@ def _retrieve_tables(self,
'bigquery',
tableRef['datasetId'],
prefix,
'__table__={partition_id}'.format(partition_id=td['low']),
f'__table__={td["low"]}',
part_type="low_watermark",
cluster=tableRef['projectId']
)
Expand All @@ -80,7 +81,7 @@ def _retrieve_tables(self,
'bigquery',
tableRef['datasetId'],
prefix,
'__table__={partition_id}'.format(partition_id=td['high']),
f'__table__={td["high"]}',
part_type="high_watermark",
cluster=tableRef['projectId']
)
Expand Down Expand Up @@ -129,7 +130,7 @@ def _get_partition_watermarks(self,
'bigquery',
tableRef['datasetId'],
tableRef['tableId'],
'{field}={partition_id}'.format(field=field, partition_id=low.partition_id),
f'{field}={low.partition_id}',
part_type="low_watermark",
cluster=tableRef['projectId']
)
Expand All @@ -140,7 +141,7 @@ def _get_partition_watermarks(self,
'bigquery',
tableRef['datasetId'],
tableRef['tableId'],
'{field}={partition_id}'.format(field=field, partition_id=high.partition_id),
f'{field}={high.partition_id}',
part_type="high_watermark",
cluster=tableRef['projectId']
)
Expand Down
12 changes: 7 additions & 5 deletions databuilder/extractor/cassandra_extractor.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0

from cassandra.cluster import Cluster
import cassandra.metadata
from typing import (
Dict, Iterator, Union,
)

import cassandra.metadata
from cassandra.cluster import Cluster
from pyhocon import ConfigFactory, ConfigTree
from typing import Iterator, Union, Dict

from databuilder.extractor.base_extractor import Extractor
from databuilder.models.table_metadata import TableMetadata, ColumnMetadata
from databuilder.models.table_metadata import ColumnMetadata, TableMetadata


class CassandraExtractor(Extractor):
Expand Down Expand Up @@ -38,7 +40,7 @@ class CassandraExtractor(Extractor):

def init(self, conf: ConfigTree) -> None:
conf = conf.with_fallback(CassandraExtractor.DEFAULT_CONFIG)
self._cluster = '{}'.format(conf.get_string(CassandraExtractor.CLUSTER_KEY))
self._cluster = conf.get_string(CassandraExtractor.CLUSTER_KEY)
self._filter = conf.get(CassandraExtractor.FILTER_FUNCTION_KEY)
ips = conf.get_list(CassandraExtractor.IPS_KEY)
kwargs = conf.get(CassandraExtractor.KWARGS_KEY)
Expand Down
4 changes: 2 additions & 2 deletions databuilder/extractor/csv_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
import csv
import importlib
from collections import defaultdict
from typing import Any

from pyhocon import ConfigTree
from typing import Any

from databuilder.extractor.base_extractor import Extractor
from databuilder.models.table_metadata import TableMetadata, ColumnMetadata
from databuilder.models.badge import Badge, BadgeMetadata
from databuilder.models.table_metadata import ColumnMetadata, TableMetadata


class CsvExtractor(Extractor):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,18 @@
# SPDX-License-Identifier: Apache-2.0

import logging

from pyhocon import ConfigTree, ConfigFactory
from typing import Any

from pyhocon import ConfigFactory, ConfigTree

from databuilder import Scoped
from databuilder.extractor.base_extractor import Extractor
from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_constants import ORGANIZATION
from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_utils import ModeDashboardUtils
from databuilder.rest_api.base_rest_api_query import RestApiQuerySeed
from databuilder.rest_api.mode_analytics.mode_paginated_rest_api_query import ModePaginatedRestApiQuery
from databuilder.rest_api.rest_api_query import RestApiQuery
from databuilder.rest_api.base_rest_api_query import RestApiQuerySeed
from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_constants import ORGANIZATION
from databuilder.transformer.dict_to_model import DictToModel, MODEL_CLASS

from databuilder.transformer.dict_to_model import MODEL_CLASS, DictToModel

LOGGER = logging.getLogger(__name__)

Expand Down