-
Notifications
You must be signed in to change notification settings - Fork 4.5k
[BEAM-11986] Spanner write metric #15294
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
348f8b3
4de1538
ceebe85
37f5927
616b45d
faaef14
e0c5842
d5380be
98b88af
df2e077
dd6624c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -177,7 +177,10 @@ | |
| from apache_beam import Flatten | ||
| from apache_beam import ParDo | ||
| from apache_beam import Reshuffle | ||
| from apache_beam.internal.metrics.metric import ServiceCallMetric | ||
| from apache_beam.io.gcp import resource_identifiers | ||
| from apache_beam.metrics import Metrics | ||
| from apache_beam.metrics import monitoring_infos | ||
| from apache_beam.pvalue import AsSingleton | ||
| from apache_beam.pvalue import PBegin | ||
| from apache_beam.pvalue import TaggedOutput | ||
|
|
@@ -189,12 +192,17 @@ | |
| from apache_beam.typehints import with_output_types | ||
| from apache_beam.utils.annotations import experimental | ||
|
|
||
| # Protect against environments where spanner library is not available. | ||
| # pylint: disable=wrong-import-order, wrong-import-position, ungrouped-imports | ||
| # pylint: disable=unused-import | ||
| try: | ||
| from google.cloud.spanner import Client | ||
| from google.cloud.spanner import KeySet | ||
| from google.cloud.spanner_v1 import batch | ||
| from google.cloud.spanner_v1.database import BatchSnapshot | ||
| from google.cloud.spanner_v1.proto.mutation_pb2 import Mutation | ||
| from google.api_core.exceptions import ClientError, GoogleAPICallError | ||
| from apitools.base.py.exceptions import HttpError | ||
| except ImportError: | ||
| Client = None | ||
| KeySet = None | ||
|
|
@@ -284,6 +292,8 @@ class _BeamSpannerConfiguration(namedtuple("_BeamSpannerConfiguration", | |
| ["project", | ||
| "instance", | ||
| "database", | ||
| "table", | ||
| "query_name", | ||
| "credentials", | ||
| "pool", | ||
| "snapshot_read_timestamp", | ||
|
|
@@ -320,6 +330,42 @@ def __init__(self, spanner_configuration): | |
| self._spanner_configuration = spanner_configuration | ||
| self._snapshot = None | ||
| self._session = None | ||
| self.base_labels = { | ||
| monitoring_infos.SERVICE_LABEL: 'Spanner', | ||
| monitoring_infos.METHOD_LABEL: 'Read', | ||
| monitoring_infos.SPANNER_PROJECT_ID: ( | ||
| self._spanner_configuration.project), | ||
| monitoring_infos.SPANNER_DATABASE_ID: ( | ||
| self._spanner_configuration.database), | ||
| } | ||
|
|
||
| def _table_metric(self, table_id, status): | ||
| database_id = self._spanner_configuration.database | ||
| project_id = self._spanner_configuration.project | ||
| resource = resource_identifiers.SpannerTable( | ||
| project_id, database_id, table_id) | ||
| labels = { | ||
| **self.base_labels, | ||
| monitoring_infos.RESOURCE_LABEL: resource, | ||
| monitoring_infos.SPANNER_TABLE_ID: table_id | ||
| } | ||
| service_call_metric = ServiceCallMetric( | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ServiceCallMetric by design must be instantiated before the IO(spanner) API call is made to the IO source/sink. Then the .call() should be made once the IO(spanner) API call returns
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
| request_count_urn=monitoring_infos.API_REQUEST_COUNT_URN, | ||
| base_labels=labels) | ||
| service_call_metric.call(str(status)) | ||
|
|
||
| def _query_metric(self, query_name, status): | ||
| project_id = self._spanner_configuration.project | ||
| resource = resource_identifiers.SpannerSqlQuery(project_id, query_name) | ||
| labels = { | ||
| **self.base_labels, | ||
| monitoring_infos.RESOURCE_LABEL: resource, | ||
| monitoring_infos.SPANNER_QUERY_NAME: query_name | ||
| } | ||
| service_call_metric = ServiceCallMetric( | ||
| request_count_urn=monitoring_infos.API_REQUEST_COUNT_URN, | ||
| base_labels=labels) | ||
| service_call_metric.call(str(status)) | ||
|
|
||
| def _get_session(self): | ||
| if self._session is None: | ||
|
|
@@ -357,16 +403,32 @@ def process(self, element, spanner_transaction): | |
| # getting the transaction from the snapshot's session to run read operation. | ||
| # with self._snapshot.session().transaction() as transaction: | ||
| with self._get_session().transaction() as transaction: | ||
| table_id = self._spanner_configuration.table | ||
| query_name = self._spanner_configuration.query_name or '' | ||
|
|
||
| if element.is_sql is True: | ||
| transaction_read = transaction.execute_sql | ||
| metric_action = self._query_metric | ||
| metric_id = query_name | ||
| elif element.is_table is True: | ||
| transaction_read = transaction.read | ||
| metric_action = self._table_metric | ||
| metric_id = table_id | ||
| else: | ||
| raise ValueError( | ||
| "ReadOperation is improperly configure: %s" % str(element)) | ||
|
|
||
| for row in transaction_read(**element.kwargs): | ||
| yield row | ||
| try: | ||
| for row in transaction_read(**element.kwargs): | ||
| yield row | ||
|
|
||
| metric_action(metric_id, 'ok') | ||
| except (ClientError, GoogleAPICallError) as e: | ||
| metric_action(metric_id, e.code.value) | ||
| raise | ||
| except HttpError as e: | ||
| metric_action(metric_id, e) | ||
| raise | ||
|
|
||
|
|
||
| @with_input_types(ReadOperation) | ||
|
|
@@ -523,6 +585,43 @@ class _ReadFromPartitionFn(DoFn): | |
| """ | ||
| def __init__(self, spanner_configuration): | ||
| self._spanner_configuration = spanner_configuration | ||
| self.base_labels = { | ||
| monitoring_infos.SERVICE_LABEL: 'Spanner', | ||
| monitoring_infos.METHOD_LABEL: 'Read', | ||
| monitoring_infos.SPANNER_PROJECT_ID: ( | ||
| self._spanner_configuration.project), | ||
| monitoring_infos.SPANNER_DATABASE_ID: ( | ||
| self._spanner_configuration.database), | ||
| } | ||
| self.service_metric = None | ||
|
|
||
| def _table_metric(self, table_id): | ||
| database_id = self._spanner_configuration.database | ||
| project_id = self._spanner_configuration.project | ||
| resource = resource_identifiers.SpannerTable( | ||
| project_id, database_id, table_id) | ||
| labels = { | ||
| **self.base_labels, | ||
| monitoring_infos.RESOURCE_LABEL: resource, | ||
| monitoring_infos.SPANNER_TABLE_ID: table_id | ||
| } | ||
| service_call_metric = ServiceCallMetric( | ||
| request_count_urn=monitoring_infos.API_REQUEST_COUNT_URN, | ||
| base_labels=labels) | ||
| return service_call_metric | ||
|
|
||
| def _query_metric(self, query_name): | ||
| project_id = self._spanner_configuration.project | ||
| resource = resource_identifiers.SpannerSqlQuery(project_id, query_name) | ||
| labels = { | ||
| **self.base_labels, | ||
| monitoring_infos.RESOURCE_LABEL: resource, | ||
| monitoring_infos.SPANNER_QUERY_NAME: query_name | ||
| } | ||
| service_call_metric = ServiceCallMetric( | ||
| request_count_urn=monitoring_infos.API_REQUEST_COUNT_URN, | ||
| base_labels=labels) | ||
| return service_call_metric | ||
|
|
||
| def setup(self): | ||
| spanner_client = Client(self._spanner_configuration.project) | ||
|
|
@@ -537,16 +636,30 @@ def process(self, element): | |
| self._snapshot = BatchSnapshot.from_dict( | ||
| self._database, element['transaction_info']) | ||
|
|
||
| table_id = self._spanner_configuration.table | ||
| query_name = self._spanner_configuration.query_name or '' | ||
|
|
||
| if element['is_sql'] is True: | ||
| read_action = self._snapshot.process_query_batch | ||
| self.service_metric = self._query_metric(query_name) | ||
| elif element['is_table'] is True: | ||
| read_action = self._snapshot.process_read_batch | ||
| self.service_metric = self._table_metric(table_id) | ||
| else: | ||
| raise ValueError( | ||
| "ReadOperation is improperly configure: %s" % str(element)) | ||
|
|
||
| for row in read_action(element['partitions']): | ||
| yield row | ||
| try: | ||
| for row in read_action(element['partitions']): | ||
| yield row | ||
|
|
||
| self.service_metric.call('ok') | ||
| except (ClientError, GoogleAPICallError) as e: | ||
| self.service_metric(str(e.code.value)) | ||
| raise | ||
| except HttpError as e: | ||
| self.service_metric(str(e)) | ||
| raise | ||
|
|
||
| def teardown(self): | ||
| if self._snapshot: | ||
|
|
@@ -563,7 +676,8 @@ class ReadFromSpanner(PTransform): | |
| def __init__(self, project_id, instance_id, database_id, pool=None, | ||
| read_timestamp=None, exact_staleness=None, credentials=None, | ||
| sql=None, params=None, param_types=None, # with_query | ||
| table=None, columns=None, index="", keyset=None, # with_table | ||
| table=None, query_name=None, columns=None, index="", | ||
| keyset=None, # with_table | ||
| read_operations=None, # for read all | ||
| transaction=None | ||
| ): | ||
|
|
@@ -611,6 +725,8 @@ def __init__(self, project_id, instance_id, database_id, pool=None, | |
| project=project_id, | ||
| instance=instance_id, | ||
| database=database_id, | ||
| table=table, | ||
| query_name=query_name, | ||
| credentials=credentials, | ||
| pool=pool, | ||
| snapshot_read_timestamp=read_timestamp, | ||
|
|
@@ -725,6 +841,8 @@ def __init__( | |
| project=project_id, | ||
| instance=instance_id, | ||
| database=database_id, | ||
| table=None, | ||
| query_name=None, | ||
| credentials=credentials, | ||
| pool=pool, | ||
| snapshot_read_timestamp=None, | ||
|
|
@@ -1068,6 +1186,28 @@ def __init__(self, spanner_configuration): | |
| self._spanner_configuration = spanner_configuration | ||
| self._db_instance = None | ||
| self.batches = Metrics.counter(self.__class__, 'SpannerBatches') | ||
| self.base_labels = { | ||
| monitoring_infos.SERVICE_LABEL: 'Spanner', | ||
| monitoring_infos.METHOD_LABEL: 'Write', | ||
| monitoring_infos.SPANNER_PROJECT_ID: spanner_configuration.project, | ||
| monitoring_infos.SPANNER_DATABASE_ID: spanner_configuration.database, | ||
| } | ||
| self.service_metric = None | ||
|
|
||
| def _table_metric(self, table_id): | ||
| database_id = self._spanner_configuration.database | ||
| project_id = self._spanner_configuration.project | ||
| resource = resource_identifiers.SpannerTable( | ||
| project_id, database_id, table_id) | ||
| labels = { | ||
| **self.base_labels, | ||
| monitoring_infos.RESOURCE_LABEL: resource, | ||
| monitoring_infos.SPANNER_TABLE_ID: table_id | ||
| } | ||
| service_call_metric = ServiceCallMetric( | ||
| request_count_urn=monitoring_infos.API_REQUEST_COUNT_URN, | ||
| base_labels=labels) | ||
| return service_call_metric | ||
|
|
||
| def setup(self): | ||
| spanner_client = Client(self._spanner_configuration.project) | ||
|
|
@@ -1078,22 +1218,33 @@ def setup(self): | |
|
|
||
| def process(self, element): | ||
| self.batches.inc() | ||
| with self._db_instance.batch() as b: | ||
| for m in element: | ||
| if m.operation == WriteMutation._OPERATION_DELETE: | ||
| batch_func = b.delete | ||
| elif m.operation == WriteMutation._OPERATION_REPLACE: | ||
| batch_func = b.replace | ||
| elif m.operation == WriteMutation._OPERATION_INSERT_OR_UPDATE: | ||
| batch_func = b.insert_or_update | ||
| elif m.operation == WriteMutation._OPERATION_INSERT: | ||
| batch_func = b.insert | ||
| elif m.operation == WriteMutation._OPERATION_UPDATE: | ||
| batch_func = b.update | ||
| else: | ||
| raise ValueError("Unknown operation action: %s" % m.operation) | ||
|
|
||
| batch_func(**m.kwargs) | ||
| try: | ||
| with self._db_instance.batch() as b: | ||
| for m in element: | ||
| table_id = m.kwargs['table'] | ||
| self.service_metric = self._table_metric(table_id) | ||
|
|
||
| if m.operation == WriteMutation._OPERATION_DELETE: | ||
| batch_func = b.delete | ||
| elif m.operation == WriteMutation._OPERATION_REPLACE: | ||
| batch_func = b.replace | ||
| elif m.operation == WriteMutation._OPERATION_INSERT_OR_UPDATE: | ||
| batch_func = b.insert_or_update | ||
| elif m.operation == WriteMutation._OPERATION_INSERT: | ||
| batch_func = b.insert | ||
| elif m.operation == WriteMutation._OPERATION_UPDATE: | ||
| batch_func = b.update | ||
| else: | ||
| raise ValueError("Unknown operation action: %s" % m.operation) | ||
| batch_func(**m.kwargs) | ||
|
|
||
| self.service_metric.call('ok') | ||
| except (ClientError, GoogleAPICallError) as e: | ||
| self.service_metric.call(str(e.code.value)) | ||
| raise | ||
| except HttpError as e: | ||
| self.service_metric.call(str(e)) | ||
| raise | ||
|
|
||
|
|
||
| @with_input_types(typing.Union[MutationGroup, _Mutator]) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
use table_id as the name here to be consistent with other "table_id" variable names
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is consistent with the parameters in this namedtuple~ otherwise, there would be
project_idandinstance_id,database_id, etc. What do you think?