-
Notifications
You must be signed in to change notification settings - Fork 4.4k
[BEAM-11984] Request count metrics for GCSIO #14770
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
a4d9603
920553e
2d90287
f264f02
aff97a7
f0d0dd5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -33,15 +33,20 @@ | |||
|
|
||||
| # Protect against environments where apitools library is not available. | ||||
| # pylint: disable=wrong-import-order, wrong-import-position | ||||
| from apache_beam.metrics import monitoring_infos | ||||
| from apache_beam.metrics.execution import MetricsEnvironment | ||||
| from apache_beam.metrics.metricbase import MetricName | ||||
|
|
||||
| try: | ||||
| from apache_beam.io.gcp import gcsio | ||||
| from apache_beam.io.gcp import gcsio, resource_identifiers | ||||
| from apache_beam.io.gcp.internal.clients import storage | ||||
| from apitools.base.py.exceptions import HttpError | ||||
| except ImportError: | ||||
| HttpError = None | ||||
| # pylint: enable=wrong-import-order, wrong-import-position | ||||
|
|
||||
| DEFAULT_GCP_PROJECT = 'apache-beam-testing' | ||||
| DEFAULT_PROJECT_NUMBER = 1 | ||||
|
|
||||
|
|
||||
| class FakeGcsClient(object): | ||||
|
|
@@ -50,6 +55,7 @@ class FakeGcsClient(object): | |||
|
|
||||
| def __init__(self): | ||||
| self.objects = FakeGcsObjects() | ||||
| self.buckets = FakeGcsBuckets() | ||||
| # Referenced in GcsIO.copy_batch() and GcsIO.delete_batch(). | ||||
| self._http = object() | ||||
|
|
||||
|
|
@@ -79,6 +85,17 @@ def get_metadata(self): | |||
| updated=last_updated_datetime) | ||||
|
|
||||
|
|
||||
| class FakeGcsBuckets(object): | ||||
| def __init__(self): | ||||
| pass | ||||
|
|
||||
| def get_bucket(self, bucket): | ||||
| return storage.Bucket(name=bucket, projectNumber=DEFAULT_PROJECT_NUMBER) | ||||
|
|
||||
| def Get(self, get_request): | ||||
| return self.get_bucket(get_request.bucket) | ||||
|
|
||||
|
|
||||
| class FakeGcsObjects(object): | ||||
| def __init__(self): | ||||
| self.files = {} | ||||
|
|
@@ -751,6 +768,53 @@ def test_mime_binary_encoding(self): | |||
| generator._handle_text(message) | ||||
| self.assertEqual(test_msg.encode('ascii'), output_buffer.getvalue()) | ||||
|
|
||||
| def test_downloader_monitoring_info(self): | ||||
| file_name = 'gs://gcsio-metrics-test/dummy_mode_file' | ||||
| file_size = 5 * 1024 * 1024 + 100 | ||||
| random_file = self._insert_random_file(self.client, file_name, file_size) | ||||
| self.gcs.open(file_name, 'r') | ||||
|
|
||||
| resource = resource_identifiers.GoogleCloudStorageBucket(random_file.bucket) | ||||
| labels = { | ||||
| monitoring_infos.SERVICE_LABEL: 'Storage', | ||||
| monitoring_infos.METHOD_LABEL: 'Objects.get', | ||||
| monitoring_infos.RESOURCE_LABEL: resource, | ||||
| monitoring_infos.GCS_BUCKET_LABEL: random_file.bucket, | ||||
| monitoring_infos.GCS_PROJECT_ID_LABEL: DEFAULT_PROJECT_NUMBER, | ||||
| monitoring_infos.STATUS_LABEL: 'ok' | ||||
| } | ||||
|
|
||||
| metric_name = MetricName( | ||||
| None, None, urn=monitoring_infos.API_REQUEST_COUNT_URN, labels=labels) | ||||
| metric_value = MetricsEnvironment.process_wide_container().get_counter( | ||||
| metric_name).get_cumulative() | ||||
|
|
||||
| self.assertEqual(metric_value, 2) | ||||
|
|
||||
| def test_uploader_monitoring_info(self): | ||||
| file_name = 'gs://gcsio-metrics-test/dummy_mode_file' | ||||
| file_size = 5 * 1024 * 1024 + 100 | ||||
| random_file = self._insert_random_file(self.client, file_name, file_size) | ||||
| f = self.gcs.open(file_name, 'w') | ||||
|
|
||||
| resource = resource_identifiers.GoogleCloudStorageBucket(random_file.bucket) | ||||
| labels = { | ||||
| monitoring_infos.SERVICE_LABEL: 'Storage', | ||||
| monitoring_infos.METHOD_LABEL: 'Objects.insert', | ||||
| monitoring_infos.RESOURCE_LABEL: resource, | ||||
| monitoring_infos.GCS_BUCKET_LABEL: random_file.bucket, | ||||
| monitoring_infos.GCS_PROJECT_ID_LABEL: DEFAULT_PROJECT_NUMBER, | ||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are these labels supposed to map to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's right, I converted this value to str() and it worked 👍 |
||||
| monitoring_infos.STATUS_LABEL: 'ok' | ||||
| } | ||||
|
|
||||
| f.close() | ||||
| metric_name = MetricName( | ||||
| None, None, urn=monitoring_infos.API_REQUEST_COUNT_URN, labels=labels) | ||||
| metric_value = MetricsEnvironment.process_wide_container().get_counter( | ||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm assuming this process_wide_container is a global container that ends up being shared between tests, because we often run many tests in one process. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes the proper pattern it to reset it before each test MetricsEnvironment.set_process_wide_container(new MetricsContainerImpl())
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a MetricsEnvironment.process_wide_container().reset() at the begining of the tests
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The failing test does have a reset() call:
Shouldn't that have prevented this flake? |
||||
| metric_name).get_cumulative() | ||||
|
|
||||
| self.assertEqual(metric_value, 1) | ||||
|
|
||||
|
|
||||
| if __name__ == '__main__': | ||||
| logging.getLogger().setLevel(logging.INFO) | ||||
|
|
||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Was the project not available on the API response after the first call? When you ran it on a pipeline were you able to see it populated on the API responses with logging?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I debugged it and also logged it but it seems that the
aclis empty, so I couldn't get the project.<Object acl: [] bucket: 'gcsio-metrics-test' generation: 1 name: 'dummy_mode_file' size: 5242980>