Skip to content
Permalink
Browse files
fix: disambiguate missing policy tags from explicitly unset policy ta…
…gs (#983)

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #981 
Fixes #982
Towards googleapis/python-bigquery-pandas#387
🦕
  • Loading branch information
tswast committed Sep 24, 2021
1 parent ee1e25c commit f83c00acead70fc0ce9959eefb133a672d816277
@@ -128,6 +128,7 @@ Schema
:toctree: generated

schema.SchemaField
schema.PolicyTagList


Query
@@ -88,6 +88,7 @@
from google.cloud.bigquery.routine import RoutineReference
from google.cloud.bigquery.routine import RoutineType
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.schema import PolicyTagList
from google.cloud.bigquery.table import PartitionRange
from google.cloud.bigquery.table import RangePartitioning
from google.cloud.bigquery.table import Row
@@ -140,6 +141,7 @@
"RoutineReference",
# Shared helpers
"SchemaField",
"PolicyTagList",
"UDFResource",
"ExternalConfig",
"BigtableOptions",
@@ -15,12 +15,12 @@
"""Schemas for BigQuery tables / queries."""

import collections
from typing import Optional
import enum
from typing import Iterable, Union

from google.cloud.bigquery_v2 import types


_DEFAULT_VALUE = object()
_STRUCT_TYPES = ("RECORD", "STRUCT")

# SQL types reference:
@@ -49,47 +49,62 @@
"""String names of the legacy SQL types to integer codes of Standard SQL types."""


class _DefaultSentinel(enum.Enum):
"""Object used as 'sentinel' indicating default value should be used.
Uses enum so that pytype/mypy knows that this is the only possible value.
https://stackoverflow.com/a/60605919/101923
Literal[_DEFAULT_VALUE] is an alternative, but only added in Python 3.8.
https://docs.python.org/3/library/typing.html#typing.Literal
"""

DEFAULT_VALUE = object()


_DEFAULT_VALUE = _DefaultSentinel.DEFAULT_VALUE


class SchemaField(object):
"""Describe a single field within a table schema.
Args:
name (str): The name of the field.
name: The name of the field.
field_type (str): The type of the field. See
field_type:
The type of the field. See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type
mode (Optional[str]): The mode of the field. See
mode:
Defaults to ``'NULLABLE'``. The mode of the field. See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode
description (Optional[str]): Description for the field.
description: Description for the field.
fields (Optional[Tuple[google.cloud.bigquery.schema.SchemaField]]):
Subfields (requires ``field_type`` of 'RECORD').
fields: Subfields (requires ``field_type`` of 'RECORD').
policy_tags (Optional[PolicyTagList]): The policy tag list for the field.
policy_tags: The policy tag list for the field.
precision (Optional[int]):
precision:
Precison (number of digits) of fields with NUMERIC or BIGNUMERIC type.
scale (Optional[int]):
scale:
Scale (digits after decimal) of fields with NUMERIC or BIGNUMERIC type.
max_length (Optional[int]):
Maximim length of fields with STRING or BYTES type.
max_length: Maximum length of fields with STRING or BYTES type.
"""

def __init__(
self,
name,
field_type,
mode="NULLABLE",
description=_DEFAULT_VALUE,
fields=(),
policy_tags=None,
precision=_DEFAULT_VALUE,
scale=_DEFAULT_VALUE,
max_length=_DEFAULT_VALUE,
name: str,
field_type: str,
mode: str = "NULLABLE",
description: Union[str, _DefaultSentinel] = _DEFAULT_VALUE,
fields: Iterable["SchemaField"] = (),
policy_tags: Union["PolicyTagList", None, _DefaultSentinel] = _DEFAULT_VALUE,
precision: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
scale: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
max_length: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
):
self._properties = {
"name": name,
@@ -105,28 +120,12 @@ def __init__(
self._properties["scale"] = scale
if max_length is not _DEFAULT_VALUE:
self._properties["maxLength"] = max_length
if policy_tags is not _DEFAULT_VALUE:
self._properties["policyTags"] = (
policy_tags.to_api_repr() if policy_tags is not None else None
)
self._fields = tuple(fields)

self._policy_tags = self._determine_policy_tags(field_type, policy_tags)

@staticmethod
def _determine_policy_tags(
field_type: str, given_policy_tags: Optional["PolicyTagList"]
) -> Optional["PolicyTagList"]:
"""Return the given policy tags, or their suitable representation if `None`.
Args:
field_type: The type of the schema field.
given_policy_tags: The policy tags to maybe ajdust.
"""
if given_policy_tags is not None:
return given_policy_tags

if field_type is not None and field_type.upper() in _STRUCT_TYPES:
return None

return PolicyTagList()

@staticmethod
def __get_int(api_repr, name):
v = api_repr.get(name, _DEFAULT_VALUE)
@@ -152,10 +151,10 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField":
mode = api_repr.get("mode", "NULLABLE")
description = api_repr.get("description", _DEFAULT_VALUE)
fields = api_repr.get("fields", ())
policy_tags = api_repr.get("policyTags", _DEFAULT_VALUE)

policy_tags = cls._determine_policy_tags(
field_type, PolicyTagList.from_api_repr(api_repr.get("policyTags"))
)
if policy_tags is not None and policy_tags is not _DEFAULT_VALUE:
policy_tags = PolicyTagList.from_api_repr(policy_tags)

return cls(
field_type=field_type,
@@ -230,7 +229,8 @@ def policy_tags(self):
"""Optional[google.cloud.bigquery.schema.PolicyTagList]: Policy tag list
definition for this field.
"""
return self._policy_tags
resource = self._properties.get("policyTags")
return PolicyTagList.from_api_repr(resource) if resource is not None else None

def to_api_repr(self) -> dict:
"""Return a dictionary representing this schema field.
@@ -244,10 +244,6 @@ def to_api_repr(self) -> dict:
# add this to the serialized representation.
if self.field_type.upper() in _STRUCT_TYPES:
answer["fields"] = [f.to_api_repr() for f in self.fields]
else:
# Explicitly include policy tag definition (we must not do it for RECORD
# fields, because those are not leaf fields).
answer["policyTags"] = self.policy_tags.to_api_repr()

# Done; return the serialized dictionary.
return answer
@@ -272,7 +268,7 @@ def _key(self):
field_type = f"{field_type}({self.precision})"

policy_tags = (
() if self._policy_tags is None else tuple(sorted(self._policy_tags.names))
() if self.policy_tags is None else tuple(sorted(self.policy_tags.names))
)

return (
@@ -673,14 +673,15 @@ def test_unset_table_schema_attributes(self):
mode=old_field.mode,
description=None,
fields=old_field.fields,
policy_tags=None,
policy_tags=PolicyTagList(),
)

table.schema = new_schema
updated_table = Config.CLIENT.update_table(table, ["schema"])

self.assertFalse(updated_table.schema[1].description) # Empty string or None.
self.assertEqual(updated_table.schema[1].policy_tags.names, ())
# policyTags key expected to be missing from response.
self.assertIsNone(updated_table.schema[1].policy_tags)

def test_update_table_clustering_configuration(self):
dataset = self.temp_dataset(_make_dataset_id("update_table"))
@@ -484,13 +484,11 @@ def test_schema_setter_fields(self):
"name": "full_name",
"type": "STRING",
"mode": "REQUIRED",
"policyTags": {"names": []},
}
age_repr = {
"name": "age",
"type": "INTEGER",
"mode": "REQUIRED",
"policyTags": {"names": []},
}
self.assertEqual(
config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]}
@@ -503,13 +501,11 @@ def test_schema_setter_valid_mappings_list(self):
"name": "full_name",
"type": "STRING",
"mode": "REQUIRED",
"policyTags": {"names": []},
}
age_repr = {
"name": "age",
"type": "INTEGER",
"mode": "REQUIRED",
"policyTags": {"names": []},
}
schema = [full_name_repr, age_repr]
config.schema = schema
Loading

0 comments on commit f83c00a

Please sign in to comment.