Skip to content
Browse files
fix: disambiguate missing policy tags from explicitly unset policy ta…
…gs (#983)

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue]( before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #981 
Fixes #982
Towards googleapis/python-bigquery-pandas#387
  • Loading branch information
tswast committed Sep 24, 2021
1 parent ee1e25c commit f83c00a
Show file tree
Hide file tree
Showing 8 changed files with 139 additions and 219 deletions.
@@ -128,6 +128,7 @@ Schema
:toctree: generated


@@ -88,6 +88,7 @@
from import RoutineReference
from import RoutineType
from import SchemaField
from import PolicyTagList
from import PartitionRange
from import RangePartitioning
from import Row
@@ -140,6 +141,7 @@
# Shared helpers
@@ -15,12 +15,12 @@
"""Schemas for BigQuery tables / queries."""

import collections
from typing import Optional
import enum
from typing import Iterable, Union

from import types

_DEFAULT_VALUE = object()

# SQL types reference:
@@ -49,47 +49,62 @@
"""String names of the legacy SQL types to integer codes of Standard SQL types."""

class _DefaultSentinel(enum.Enum):
"""Object used as 'sentinel' indicating default value should be used.
Uses enum so that pytype/mypy knows that this is the only possible value.
Literal[_DEFAULT_VALUE] is an alternative, but only added in Python 3.8.

DEFAULT_VALUE = object()


class SchemaField(object):
"""Describe a single field within a table schema.
name (str): The name of the field.
name: The name of the field.
field_type (str): The type of the field. See
The type of the field. See
mode (Optional[str]): The mode of the field. See
Defaults to ``'NULLABLE'``. The mode of the field. See
description (Optional[str]): Description for the field.
description: Description for the field.
fields (Optional[Tuple[]]):
Subfields (requires ``field_type`` of 'RECORD').
fields: Subfields (requires ``field_type`` of 'RECORD').
policy_tags (Optional[PolicyTagList]): The policy tag list for the field.
policy_tags: The policy tag list for the field.
precision (Optional[int]):
Precison (number of digits) of fields with NUMERIC or BIGNUMERIC type.
scale (Optional[int]):
Scale (digits after decimal) of fields with NUMERIC or BIGNUMERIC type.
max_length (Optional[int]):
Maximim length of fields with STRING or BYTES type.
max_length: Maximum length of fields with STRING or BYTES type.

def __init__(
name: str,
field_type: str,
mode: str = "NULLABLE",
description: Union[str, _DefaultSentinel] = _DEFAULT_VALUE,
fields: Iterable["SchemaField"] = (),
policy_tags: Union["PolicyTagList", None, _DefaultSentinel] = _DEFAULT_VALUE,
precision: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
scale: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
max_length: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
self._properties = {
"name": name,
@@ -105,28 +120,12 @@ def __init__(
self._properties["scale"] = scale
if max_length is not _DEFAULT_VALUE:
self._properties["maxLength"] = max_length
if policy_tags is not _DEFAULT_VALUE:
self._properties["policyTags"] = (
policy_tags.to_api_repr() if policy_tags is not None else None
self._fields = tuple(fields)

self._policy_tags = self._determine_policy_tags(field_type, policy_tags)

def _determine_policy_tags(
field_type: str, given_policy_tags: Optional["PolicyTagList"]
) -> Optional["PolicyTagList"]:
"""Return the given policy tags, or their suitable representation if `None`.
field_type: The type of the schema field.
given_policy_tags: The policy tags to maybe ajdust.
if given_policy_tags is not None:
return given_policy_tags

if field_type is not None and field_type.upper() in _STRUCT_TYPES:
return None

return PolicyTagList()

def __get_int(api_repr, name):
v = api_repr.get(name, _DEFAULT_VALUE)
@@ -152,10 +151,10 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField":
mode = api_repr.get("mode", "NULLABLE")
description = api_repr.get("description", _DEFAULT_VALUE)
fields = api_repr.get("fields", ())
policy_tags = api_repr.get("policyTags", _DEFAULT_VALUE)

policy_tags = cls._determine_policy_tags(
field_type, PolicyTagList.from_api_repr(api_repr.get("policyTags"))
if policy_tags is not None and policy_tags is not _DEFAULT_VALUE:
policy_tags = PolicyTagList.from_api_repr(policy_tags)

return cls(
@@ -230,7 +229,8 @@ def policy_tags(self):
"""Optional[]: Policy tag list
definition for this field.
return self._policy_tags
resource = self._properties.get("policyTags")
return PolicyTagList.from_api_repr(resource) if resource is not None else None

def to_api_repr(self) -> dict:
"""Return a dictionary representing this schema field.
@@ -244,10 +244,6 @@ def to_api_repr(self) -> dict:
# add this to the serialized representation.
if self.field_type.upper() in _STRUCT_TYPES:
answer["fields"] = [f.to_api_repr() for f in self.fields]
# Explicitly include policy tag definition (we must not do it for RECORD
# fields, because those are not leaf fields).
answer["policyTags"] = self.policy_tags.to_api_repr()

# Done; return the serialized dictionary.
return answer
@@ -272,7 +268,7 @@ def _key(self):
field_type = f"{field_type}({self.precision})"

policy_tags = (
() if self._policy_tags is None else tuple(sorted(self._policy_tags.names))
() if self.policy_tags is None else tuple(sorted(self.policy_tags.names))

return (
@@ -673,14 +673,15 @@ def test_unset_table_schema_attributes(self):

table.schema = new_schema
updated_table = Config.CLIENT.update_table(table, ["schema"])

self.assertFalse(updated_table.schema[1].description) # Empty string or None.
self.assertEqual(updated_table.schema[1].policy_tags.names, ())
# policyTags key expected to be missing from response.

def test_update_table_clustering_configuration(self):
dataset = self.temp_dataset(_make_dataset_id("update_table"))
@@ -484,13 +484,11 @@ def test_schema_setter_fields(self):
"name": "full_name",
"type": "STRING",
"mode": "REQUIRED",
"policyTags": {"names": []},
age_repr = {
"name": "age",
"type": "INTEGER",
"mode": "REQUIRED",
"policyTags": {"names": []},
config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]}
@@ -503,13 +501,11 @@ def test_schema_setter_valid_mappings_list(self):
"name": "full_name",
"type": "STRING",
"mode": "REQUIRED",
"policyTags": {"names": []},
age_repr = {
"name": "age",
"type": "INTEGER",
"mode": "REQUIRED",
"policyTags": {"names": []},
schema = [full_name_repr, age_repr]
config.schema = schema

0 comments on commit f83c00a

Please sign in to comment.