Skip to content

Commit 95a27f7

Browse files
authored
feat(bigquery): support decimalTargetType prioritization (#4343)
* feat(bigquery): support decimalTargetType prioritization Adds support to govern how values from external formats are converted to a corresponding BigQuery type in load jobs and federated table definitions.
1 parent c355eb8 commit 95a27f7

File tree

5 files changed

+71
-6
lines changed

5 files changed

+71
-6
lines changed

Diff for: bigquery/external.go

+14
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,14 @@ type ExternalDataConfig struct {
9696
// HivePartitioningOptions allows use of Hive partitioning based on the
9797
// layout of objects in Google Cloud Storage.
9898
HivePartitioningOptions *HivePartitioningOptions
99+
100+
// DecimalTargetTypes allows selection of how decimal values are converted when
101+
// processed in bigquery, subject to the value type having sufficient precision/scale
102+
// to support the values. In the order of NUMERIC, BIGNUMERIC, and STRING, a type is
103+
// selected if is present in the list and if supports the necessary precision and scale.
104+
//
105+
// StringTargetType supports all precision and scale values.
106+
DecimalTargetTypes []DecimalTargetType
99107
}
100108

101109
func (e *ExternalDataConfig) toBQ() bq.ExternalDataConfiguration {
@@ -114,6 +122,9 @@ func (e *ExternalDataConfig) toBQ() bq.ExternalDataConfiguration {
114122
if e.Options != nil {
115123
e.Options.populateExternalDataConfig(&q)
116124
}
125+
for _, v := range e.DecimalTargetTypes {
126+
q.DecimalTargetTypes = append(q.DecimalTargetTypes, string(v))
127+
}
117128
return q
118129
}
119130

@@ -128,6 +139,9 @@ func bqToExternalDataConfig(q *bq.ExternalDataConfiguration) (*ExternalDataConfi
128139
Schema: bqToSchema(q.Schema),
129140
HivePartitioningOptions: bqToHivePartitioningOptions(q.HivePartitioningOptions),
130141
}
142+
for _, v := range q.DecimalTargetTypes {
143+
e.DecimalTargetTypes = append(e.DecimalTargetTypes, DecimalTargetType(v))
144+
}
131145
switch {
132146
case q.CsvOptions != nil:
133147
e.Options = bqToCSVOptions(q.CsvOptions)

Diff for: bigquery/external_test.go

+4
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,10 @@ func TestExternalDataConfig(t *testing.T) {
8787
EnableListInference: true,
8888
},
8989
},
90+
{
91+
SourceFormat: Parquet,
92+
DecimalTargetTypes: []DecimalTargetType{BigNumericTargetType, NumericTargetType, StringTargetType},
93+
},
9094
} {
9195
q := want.toBQ()
9296
got, err := bqToExternalDataConfig(&q)

Diff for: bigquery/integration_test.go

+8-6
Original file line numberDiff line numberDiff line change
@@ -2208,9 +2208,10 @@ func TestIntegration_QueryExternalHivePartitioning(t *testing.T) {
22082208

22092209
err := autoTable.Create(ctx, &TableMetadata{
22102210
ExternalDataConfig: &ExternalDataConfig{
2211-
SourceFormat: Parquet,
2212-
SourceURIs: []string{"gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"},
2213-
AutoDetect: true,
2211+
SourceFormat: Parquet,
2212+
SourceURIs: []string{"gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"},
2213+
AutoDetect: true,
2214+
DecimalTargetTypes: []DecimalTargetType{StringTargetType},
22142215
HivePartitioningOptions: &HivePartitioningOptions{
22152216
Mode: AutoHivePartitioningMode,
22162217
SourceURIPrefix: "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/",
@@ -2225,9 +2226,10 @@ func TestIntegration_QueryExternalHivePartitioning(t *testing.T) {
22252226

22262227
err = customTable.Create(ctx, &TableMetadata{
22272228
ExternalDataConfig: &ExternalDataConfig{
2228-
SourceFormat: Parquet,
2229-
SourceURIs: []string{"gs://cloud-samples-data/bigquery/hive-partitioning-samples/customlayout/*"},
2230-
AutoDetect: true,
2229+
SourceFormat: Parquet,
2230+
SourceURIs: []string{"gs://cloud-samples-data/bigquery/hive-partitioning-samples/customlayout/*"},
2231+
AutoDetect: true,
2232+
DecimalTargetTypes: []DecimalTargetType{NumericTargetType, StringTargetType},
22312233
HivePartitioningOptions: &HivePartitioningOptions{
22322234
Mode: CustomHivePartitioningMode,
22332235
SourceURIPrefix: "gs://cloud-samples-data/bigquery/hive-partitioning-samples/customlayout/{pkey:STRING}/",

Diff for: bigquery/load.go

+28
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,14 @@ type LoadConfig struct {
6969
// HivePartitioningOptions allows use of Hive partitioning based on the
7070
// layout of objects in Cloud Storage.
7171
HivePartitioningOptions *HivePartitioningOptions
72+
73+
// DecimalTargetTypes allows selection of how decimal values are converted when
74+
// processed in bigquery, subject to the value type having sufficient precision/scale
75+
// to support the values. In the order of NUMERIC, BIGNUMERIC, and STRING, a type is
76+
// selected if is present in the list and if supports the necessary precision and scale.
77+
//
78+
// StringTargetType supports all precision and scale values.
79+
DecimalTargetTypes []DecimalTargetType
7280
}
7381

7482
func (l *LoadConfig) toBQ() (*bq.JobConfiguration, io.Reader) {
@@ -88,6 +96,9 @@ func (l *LoadConfig) toBQ() (*bq.JobConfiguration, io.Reader) {
8896
HivePartitioningOptions: l.HivePartitioningOptions.toBQ(),
8997
},
9098
}
99+
for _, v := range l.DecimalTargetTypes {
100+
config.Load.DecimalTargetTypes = append(config.Load.DecimalTargetTypes, string(v))
101+
}
91102
media := l.Src.populateLoadConfig(config.Load)
92103
return config, media
93104
}
@@ -107,6 +118,9 @@ func bqToLoadConfig(q *bq.JobConfiguration, c *Client) *LoadConfig {
107118
ProjectionFields: q.Load.ProjectionFields,
108119
HivePartitioningOptions: bqToHivePartitioningOptions(q.Load.HivePartitioningOptions),
109120
}
121+
for _, v := range q.Load.DecimalTargetTypes {
122+
lc.DecimalTargetTypes = append(lc.DecimalTargetTypes, DecimalTargetType(v))
123+
}
110124
var fc *FileConfig
111125
if len(q.Load.SourceUris) == 0 {
112126
s := NewReaderSource(nil)
@@ -168,3 +182,17 @@ func (l *Loader) newJob() (*bq.Job, io.Reader) {
168182
Configuration: config,
169183
}, media
170184
}
185+
186+
// DecimalTargetType is used to express preference ordering for converting values from external formats.
187+
type DecimalTargetType string
188+
189+
var (
190+
// NumericTargetType indicates the preferred type is NUMERIC when supported.
191+
NumericTargetType DecimalTargetType = "NUMERIC"
192+
193+
// BigNumericTargetType indicates the preferred type is BIGNUMERIC when supported.
194+
BigNumericTargetType DecimalTargetType = "BIGNUMERIC"
195+
196+
// StringTargetType indicates the preferred type is STRING when supported.
197+
StringTargetType DecimalTargetType = "STRING"
198+
)

Diff for: bigquery/load_test.go

+17
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,23 @@ func TestLoad(t *testing.T) {
367367
return j
368368
}(),
369369
},
370+
{
371+
dst: c.Dataset("dataset-id").Table("table-id"),
372+
src: func() *GCSReference {
373+
g := NewGCSReference("uri")
374+
g.SourceFormat = Parquet
375+
return g
376+
}(),
377+
config: LoadConfig{
378+
DecimalTargetTypes: []DecimalTargetType{BigNumericTargetType, NumericTargetType, StringTargetType},
379+
},
380+
want: func() *bq.Job {
381+
j := defaultLoadJob()
382+
j.Configuration.Load.SourceFormat = "PARQUET"
383+
j.Configuration.Load.DecimalTargetTypes = []string{"BIGNUMERIC", "NUMERIC", "STRING"}
384+
return j
385+
}(),
386+
},
370387
}
371388

372389
for i, tc := range testCases {

0 commit comments

Comments
 (0)