-
Notifications
You must be signed in to change notification settings - Fork 4.4k
Add matchers for python MetricResults, add labels to MetricResults and populate them for dataflow system metrics. #7936
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -51,6 +51,11 @@ def _get_match(proto, filter_fn): | |
| return query[0] | ||
|
|
||
|
|
||
| # V1b3 MetricStructuredName keys to accept and copy to the MetricKey labels. | ||
| STRUCTURED_NAME_LABELS = [ | ||
| 'execution_step', 'original_name', 'output_user_name', 'step'] | ||
|
|
||
|
|
||
| class DataflowMetrics(MetricResults): | ||
| """Implementation of MetricResults class for the Dataflow runner.""" | ||
|
|
||
|
|
@@ -97,7 +102,9 @@ def _translate_step_name(self, internal_name): | |
|
|
||
| def _get_metric_key(self, metric): | ||
| """Populate the MetricKey object for a queried metric result.""" | ||
| try: | ||
| step = "" | ||
| name = metric.name.name # Always extract a name | ||
| try: # Try to extract the user step name. | ||
| # If ValueError is thrown within this try-block, it is because of | ||
| # one of the following: | ||
| # 1. Unable to translate the step name. Only happening with improperly | ||
|
|
@@ -108,23 +115,39 @@ def _get_metric_key(self, metric): | |
| step = _get_match(metric.name.context.additionalProperties, | ||
| lambda x: x.key == 'step').value | ||
| step = self._translate_step_name(step) | ||
| except ValueError: | ||
| pass | ||
|
|
||
| namespace = "dataflow/v1b3" # Try to extract namespace or add a default. | ||
| try: | ||
| namespace = _get_match(metric.name.context.additionalProperties, | ||
| lambda x: x.key == 'namespace').value | ||
| name = metric.name.name | ||
| except ValueError: | ||
| return None | ||
|
|
||
| return MetricKey(step, MetricName(namespace, name)) | ||
|
|
||
| def _populate_metric_results(self, response): | ||
| """Take a list of metrics, and convert it to a list of MetricResult.""" | ||
| user_metrics = [metric | ||
| for metric in response.metrics | ||
| if metric.name.origin == 'user'] | ||
| pass | ||
|
|
||
| labels = dict() | ||
| for kv in metric.name.context.additionalProperties: | ||
| if kv.key in STRUCTURED_NAME_LABELS: | ||
| labels[kv.key] = kv.value | ||
| # Package everything besides namespace and name the labels as well, | ||
| # including unmodified step names to assist in integration the exact | ||
| # unmodified values which come from dataflow. | ||
| return MetricKey(step, MetricName(namespace, name), labels=labels) | ||
|
|
||
| def _populate_metrics(self, response, result, user_metrics=False): | ||
| """Move metrics from response to results as MetricResults.""" | ||
| if user_metrics: | ||
| metrics = [metric | ||
| for metric in response.metrics | ||
| if metric.name.origin == 'user'] | ||
| else: | ||
| metrics = [metric | ||
| for metric in response.metrics | ||
| if metric.name.origin == 'dataflow/v1b3'] | ||
|
|
||
| # Get the tentative/committed versions of every metric together. | ||
| metrics_by_name = defaultdict(lambda: {}) | ||
| for metric in user_metrics: | ||
| for metric in metrics: | ||
| if (metric.name.name.endswith('[MIN]') or | ||
| metric.name.name.endswith('[MAX]') or | ||
| metric.name.name.endswith('[MEAN]') or | ||
|
|
@@ -148,7 +171,6 @@ def _populate_metric_results(self, response): | |
| metrics_by_name[metric_key][tentative_or_committed] = metric | ||
|
|
||
| # Now we create the MetricResult elements. | ||
| result = [] | ||
| for metric_key, metric in iteritems(metrics_by_name): | ||
| attempted = self._get_metric_value(metric['tentative']) | ||
| committed = self._get_metric_value(metric['committed']) | ||
|
|
@@ -158,8 +180,6 @@ def _populate_metric_results(self, response): | |
| attempted=attempted, | ||
| committed=committed)) | ||
|
|
||
| return result | ||
|
|
||
| def _get_metric_value(self, metric): | ||
| """Get a metric result object from a MetricUpdate from Dataflow API.""" | ||
| if metric is None: | ||
|
|
@@ -200,9 +220,18 @@ def _get_metrics_from_dataflow(self): | |
| self._cached_metrics = job_metrics | ||
| return job_metrics | ||
|
|
||
| def all_metrics(self): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we expand query() and __populate_metrics() with "query_user" and "query_v1b3" (based on code, can be query_system) parameters. This will keep only 1 method that will be more flexible than two of currently existing. Default values should keep code backwards compatible.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, we need to discuss that kind of change on the dev list. This PR introduces he bare minimum to support integration testing. To enhance this API we need to design on the dev list. |
||
| """Return all user and system metrics from the dataflow service.""" | ||
| metric_results = [] | ||
| response = self._get_metrics_from_dataflow() | ||
| self._populate_metrics(response, metric_results, user_metrics=True) | ||
| self._populate_metrics(response, metric_results, user_metrics=False) | ||
| return metric_results | ||
|
|
||
| def query(self, filter=None): | ||
| metric_results = [] | ||
| response = self._get_metrics_from_dataflow() | ||
| metric_results = self._populate_metric_results(response) | ||
| self._populate_metrics(response, metric_results, user_metrics=True) | ||
| return {self.COUNTERS: [elm for elm in metric_results | ||
| if self.matches(filter, elm.key) | ||
| and DataflowMetrics._is_counter(elm)], | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@ajamato @Ardagan - We did not resolve this issue: It seems to me that
labelsare supposed to be a Python set, but a dict is being used here (see line 66).If labels are a dictionary, then you need to do
frozenset(self.labels.items())to include the values in the hash and not just the keys.It seems that labels should be a set. In that case, line 66 should be
self.labels = labels if labels else set()