dbt-labs · jtcohen6 · Mar 22, 2021 · Mar 4, 2021 · Mar 4, 2021 · Mar 4, 2021
diff --git a/core/dbt/contracts/connection.py b/core/dbt/contracts/connection.py
@@ -9,7 +9,7 @@
 from dbt.logger import GLOBAL_LOGGER as logger
 from typing_extensions import Protocol
 from dbt.dataclass_schema import (
-    dbtClassMixin, StrEnum, ExtensibleDbtClassMixin,
+    dbtClassMixin, StrEnum, ExtensibleDbtClassMixin, HyphenatedDbtClassMixin,
     ValidatedStringMixin, register_pattern
 )
 from dbt.contracts.util import Replaceable
@@ -212,9 +212,10 @@ def to_target_dict(self):
 
 
 @dataclass
-class QueryComment(dbtClassMixin):
+class QueryComment(HyphenatedDbtClassMixin):
     comment: str = DEFAULT_QUERY_COMMENT
     append: bool = False
+    job_label: bool = False
 
 
 class AdapterRequiredConfig(HasCredentials, Protocol):

diff --git a/core/dbt/dataclass_schema.py b/core/dbt/dataclass_schema.py
@@ -73,7 +73,9 @@ def __post_serialize__(self, dct):
     # performing the conversion to a dict
     @classmethod
     def __pre_deserialize__(cls, data):
-        if cls._hyphenated:
+        # `data` might not be a dict, e.g. for `query_comment`, which accepts
+        # a dict or a string; only snake-case for dict values.
+        if cls._hyphenated and isinstance(data, dict):
             new_dict = {}
             for key in data:
                 if '-' in key:

diff --git a/plugins/bigquery/dbt/adapters/bigquery/connections.py b/plugins/bigquery/dbt/adapters/bigquery/connections.py
@@ -1,3 +1,5 @@
+import json
+import re
 from contextlib import contextmanager
 from dataclasses import dataclass
 from functools import lru_cache
@@ -305,12 +307,16 @@ def raw_execute(self, sql, fetch=False, *, use_legacy_sql=False):
 
         logger.debug('On {}: {}', conn.name, sql)
 
-        job_params = {'use_legacy_sql': use_legacy_sql}
+        if self.profile.query_comment.job_label:
+            query_comment = self.query_header.comment.query_comment
+            labels = self._labels_from_query_comment(query_comment)
+        else:
+            labels = {}
 
         if active_user:
-            job_params['labels'] = {
-                'dbt_invocation_id': active_user.invocation_id
-            }
+            labels['dbt_invocation_id'] = active_user.invocation_id
+
+        job_params = {'use_legacy_sql': use_legacy_sql, 'labels': labels}
 
         priority = conn.credentials.priority
         if priority == Priority.Batch:
@@ -544,6 +550,16 @@ def _retry_generator(self):
             initial=self.DEFAULT_INITIAL_DELAY,
             maximum=self.DEFAULT_MAXIMUM_DELAY)
 
+    def _labels_from_query_comment(self, comment: str) -> Dict:
+        try:
+            comment_labels = json.loads(comment)
+        except (TypeError, ValueError):
+            return {'query_comment': _sanitize_label(comment)}
+        return {
+            _sanitize_label(key): _sanitize_label(str(value))
+            for key, value in comment_labels.items()
+        }
+
 
 class _ErrorCounter(object):
     """Counts errors seen up to a threshold then raises the next error."""
@@ -573,3 +589,14 @@ def _is_retryable(error):
             e['reason'] == 'rateLimitExceeded' for e in error.errors):
         return True
     return False
+
+
+_SANITIZE_LABEL_PATTERN = re.compile(r"[^a-z0-9_-]")
+
+
+def _sanitize_label(value: str, max_length: int = 63) -> str:
+    """Return a legal value for a BigQuery label."""
+    value = value.lower()
+    value = _SANITIZE_LABEL_PATTERN.sub("_", value)
+    value = value[: max_length - 1]
+    return value
diff --git a/test/unit/test_bigquery_adapter.py b/test/unit/test_bigquery_adapter.py
@@ -1,5 +1,6 @@
 import agate
 import decimal
+import json
 import re
 import unittest
 from contextlib import contextmanager
@@ -588,7 +589,6 @@ def test_query_and_results(self, mock_bq):
         self.mock_client.query.assert_called_once_with(
           'sql', job_config=mock_bq.QueryJobConfig())
 
-
     def test_copy_bq_table_appends(self):
         self._copy_table(
             write_disposition=dbt.adapters.bigquery.impl.WRITE_APPEND)
@@ -615,12 +615,20 @@ def test_copy_bq_table_truncates(self):
             kwargs['job_config'].write_disposition,
             dbt.adapters.bigquery.impl.WRITE_TRUNCATE)
 
+    def test_job_labels_valid_json(self):
+        expected = {"key": "value"}
+        labels = self.connections._labels_from_query_comment(json.dumps(expected))
+        self.assertEqual(labels, expected)
+
+    def test_job_labels_invalid_json(self):
+        labels = self.connections._labels_from_query_comment("not json")
+        self.assertEqual(labels, {"query_comment": "not_json"})
+
     def _table_ref(self, proj, ds, table, conn):
         return google.cloud.bigquery.table.TableReference.from_string(
             '{}.{}.{}'.format(proj, ds, table))
 
     def _copy_table(self, write_disposition):
-
         self.connections.table_ref = self._table_ref
         source = BigQueryRelation.create(
             database='project', schema='dataset', identifier='table1')