Skip to content
Permalink
Browse files
feat: add support for decimal target types (#735)
* feat: add support for decimal target types

* Add decimal target types support to ExternalConfig

* Remove ambiguous parts of DecimalTargetType docs.
  • Loading branch information
plamut committed Jul 10, 2021
1 parent cd2f09e commit 7d2d3e9
Show file tree
Hide file tree
Showing 8 changed files with 217 additions and 0 deletions.
@@ -38,6 +38,7 @@
from google.cloud.bigquery.dataset import DatasetReference
from google.cloud.bigquery import enums
from google.cloud.bigquery.enums import AutoRowIDs
from google.cloud.bigquery.enums import DecimalTargetType
from google.cloud.bigquery.enums import KeyResultStatementKind
from google.cloud.bigquery.enums import SqlTypeNames
from google.cloud.bigquery.enums import StandardSqlDataTypes
@@ -148,6 +149,7 @@
"AutoRowIDs",
"Compression",
"CreateDisposition",
"DecimalTargetType",
"DestinationFormat",
"DeterminismLevel",
"ExternalSourceFormat",
@@ -49,6 +49,24 @@ class Compression(object):
"""Specifies no compression."""


class DecimalTargetType:
"""The data types that could be used as a target type when converting decimal values.
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#DecimalTargetType
.. versionadded:: 2.21.0
"""

NUMERIC = "NUMERIC"
"""Decimal values could be converted to NUMERIC type."""

BIGNUMERIC = "BIGNUMERIC"
"""Decimal values could be converted to BIGNUMERIC type."""

STRING = "STRING"
"""Decimal values could be converted to STRING type."""


class CreateDisposition(object):
"""Specifies whether the job is allowed to create new tables. The default
value is :attr:`CREATE_IF_NEEDED`.
@@ -22,6 +22,7 @@

import base64
import copy
from typing import FrozenSet, Iterable, Optional

from google.cloud.bigquery._helpers import _to_bytes
from google.cloud.bigquery._helpers import _bytes_to_json
@@ -693,6 +694,28 @@ def compression(self):
def compression(self, value):
self._properties["compression"] = value

@property
def decimal_target_types(self) -> Optional[FrozenSet[str]]:
"""Possible SQL data types to which the source decimal values are converted.
See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.decimal_target_types
.. versionadded:: 2.21.0
"""
prop = self._properties.get("decimalTargetTypes")
if prop is not None:
prop = frozenset(prop)
return prop

@decimal_target_types.setter
def decimal_target_types(self, value: Optional[Iterable[str]]):
if value is not None:
self._properties["decimalTargetTypes"] = list(value)
else:
if "decimalTargetTypes" in self._properties:
del self._properties["decimalTargetTypes"]

@property
def hive_partitioning(self):
"""Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \
@@ -14,6 +14,8 @@

"""Classes for load jobs."""

from typing import FrozenSet, Iterable, Optional

from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
from google.cloud.bigquery.external_config import HivePartitioningOptions
from google.cloud.bigquery.format_options import ParquetOptions
@@ -121,6 +123,27 @@ def create_disposition(self):
def create_disposition(self, value):
self._set_sub_prop("createDisposition", value)

@property
def decimal_target_types(self) -> Optional[FrozenSet[str]]:
"""Possible SQL data types to which the source decimal values are converted.
See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.decimal_target_types
.. versionadded:: 2.21.0
"""
prop = self._get_sub_prop("decimalTargetTypes")
if prop is not None:
prop = frozenset(prop)
return prop

@decimal_target_types.setter
def decimal_target_types(self, value: Optional[Iterable[str]]):
if value is not None:
self._set_sub_prop("decimalTargetTypes", list(value))
else:
self._del_sub_prop("decimalTargetTypes")

@property
def destination_encryption_configuration(self):
"""Optional[google.cloud.bigquery.encryption_configuration.EncryptionConfiguration]: Custom
Binary file not shown.
@@ -864,6 +864,60 @@ def test_load_table_from_local_avro_file_then_dump_table(self):
sorted(row_tuples, key=by_wavelength), sorted(ROWS, key=by_wavelength)
)

def test_load_table_from_local_parquet_file_decimal_types(self):
from google.cloud.bigquery.enums import DecimalTargetType
from google.cloud.bigquery.job import SourceFormat
from google.cloud.bigquery.job import WriteDisposition

TABLE_NAME = "test_table_parquet"

expected_rows = [
(decimal.Decimal("123.999999999999"),),
(decimal.Decimal("99999999999999999999999999.999999999999"),),
]

dataset = self.temp_dataset(_make_dataset_id("load_local_parquet_then_dump"))
table_ref = dataset.table(TABLE_NAME)
table = Table(table_ref)
self.to_delete.insert(0, table)

job_config = bigquery.LoadJobConfig()
job_config.source_format = SourceFormat.PARQUET
job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE
job_config.decimal_target_types = [
DecimalTargetType.NUMERIC,
DecimalTargetType.BIGNUMERIC,
DecimalTargetType.STRING,
]

with open(DATA_PATH / "numeric_38_12.parquet", "rb") as parquet_file:
job = Config.CLIENT.load_table_from_file(
parquet_file, table_ref, job_config=job_config
)

job.result(timeout=JOB_TIMEOUT) # Retry until done.

self.assertEqual(job.output_rows, len(expected_rows))

table = Config.CLIENT.get_table(table)
rows = self._fetch_single_page(table)
row_tuples = [r.values() for r in rows]
self.assertEqual(sorted(row_tuples), sorted(expected_rows))

# Forcing the NUMERIC type, however, should result in an error.
job_config.decimal_target_types = [DecimalTargetType.NUMERIC]

with open(DATA_PATH / "numeric_38_12.parquet", "rb") as parquet_file:
job = Config.CLIENT.load_table_from_file(
parquet_file, table_ref, job_config=job_config
)

with self.assertRaises(BadRequest) as exc_info:
job.result(timeout=JOB_TIMEOUT)

exc_msg = str(exc_info.exception)
self.assertIn("out of valid NUMERIC range", exc_msg)

def test_load_table_from_json_basic_use(self):
table_schema = (
bigquery.SchemaField("name", "STRING", mode="REQUIRED"),
@@ -122,6 +122,45 @@ def test_create_disposition_setter(self):
config.create_disposition = disposition
self.assertEqual(config._properties["load"]["createDisposition"], disposition)

def test_decimal_target_types_miss(self):
config = self._get_target_class()()
self.assertIsNone(config.decimal_target_types)

def test_decimal_target_types_hit(self):
from google.cloud.bigquery.enums import DecimalTargetType

config = self._get_target_class()()
decimal_target_types = [DecimalTargetType.NUMERIC, DecimalTargetType.STRING]
config._properties["load"]["decimalTargetTypes"] = decimal_target_types

expected = frozenset(decimal_target_types)
self.assertEqual(config.decimal_target_types, expected)

def test_decimal_target_types_setter(self):
from google.cloud.bigquery.enums import DecimalTargetType

decimal_target_types = (DecimalTargetType.NUMERIC, DecimalTargetType.BIGNUMERIC)
config = self._get_target_class()()
config.decimal_target_types = decimal_target_types
self.assertEqual(
config._properties["load"]["decimalTargetTypes"],
list(decimal_target_types),
)

def test_decimal_target_types_setter_w_none(self):
from google.cloud.bigquery.enums import DecimalTargetType

config = self._get_target_class()()
decimal_target_types = [DecimalTargetType.BIGNUMERIC]
config._properties["load"]["decimalTargetTypes"] = decimal_target_types

config.decimal_target_types = None

self.assertIsNone(config.decimal_target_types)
self.assertNotIn("decimalTargetTypes", config._properties["load"])

config.decimal_target_types = None # No error if unsetting an unset property.

def test_destination_encryption_configuration_missing(self):
config = self._get_target_class()()
self.assertIsNone(config.destination_encryption_configuration)
@@ -532,6 +532,64 @@ def test_to_api_repr_parquet(self):

self.assertEqual(got_resource, exp_resource)

def test_from_api_repr_decimal_target_types(self):
from google.cloud.bigquery.enums import DecimalTargetType

resource = _copy_and_update(
self.BASE_RESOURCE,
{
"sourceFormat": "FORMAT_FOO",
"decimalTargetTypes": [DecimalTargetType.NUMERIC],
},
)

ec = external_config.ExternalConfig.from_api_repr(resource)

self._verify_base(ec)
self.assertEqual(ec.source_format, "FORMAT_FOO")
self.assertEqual(
ec.decimal_target_types, frozenset([DecimalTargetType.NUMERIC])
)

# converting back to API representation should yield the same result
got_resource = ec.to_api_repr()
self.assertEqual(got_resource, resource)

del resource["decimalTargetTypes"]
ec = external_config.ExternalConfig.from_api_repr(resource)
self.assertIsNone(ec.decimal_target_types)

got_resource = ec.to_api_repr()
self.assertEqual(got_resource, resource)

def test_to_api_repr_decimal_target_types(self):
from google.cloud.bigquery.enums import DecimalTargetType

ec = external_config.ExternalConfig("FORMAT_FOO")
ec.decimal_target_types = [DecimalTargetType.NUMERIC, DecimalTargetType.STRING]

got_resource = ec.to_api_repr()

expected_resource = {
"sourceFormat": "FORMAT_FOO",
"decimalTargetTypes": [DecimalTargetType.NUMERIC, DecimalTargetType.STRING],
}
self.assertEqual(got_resource, expected_resource)

def test_to_api_repr_decimal_target_types_unset(self):
from google.cloud.bigquery.enums import DecimalTargetType

ec = external_config.ExternalConfig("FORMAT_FOO")
ec._properties["decimalTargetTypes"] = [DecimalTargetType.NUMERIC]
ec.decimal_target_types = None

got_resource = ec.to_api_repr()

expected_resource = {"sourceFormat": "FORMAT_FOO"}
self.assertEqual(got_resource, expected_resource)

ec.decimal_target_types = None # No error if unsetting when already unset.


def _copy_and_update(d, u):
d = copy.deepcopy(d)

0 comments on commit 7d2d3e9

Please sign in to comment.