Skip to content

Commit c9cd984

Browse files
authored
feat(bigquery): expose identifiers using a variety of formats (#5017)
* feat(bigquery): expose identifiers using a variety of formats This PR adds an Identifier() method to common BQ resources so that users can get an identifier that is formatted appropriately for their use case (legacy sql, standard sql, referencing in storage API, etc). Existing instances of FullyQualifiedName() have been migrated to the new method. Fixes: #1955
1 parent f58a9f7 commit c9cd984

9 files changed

+365
-19
lines changed

Diff for: bigquery/dataset.go

+20
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"context"
1919
"errors"
2020
"fmt"
21+
"strings"
2122
"time"
2223

2324
"cloud.google.com/go/internal/optional"
@@ -88,6 +89,25 @@ func (c *Client) DatasetInProject(projectID, datasetID string) *Dataset {
8889
}
8990
}
9091

92+
// Identifier returns the ID of the dataset in the requested format.
93+
//
94+
// For Standard SQL format, the identifier will be quoted if the
95+
// ProjectID contains dash (-) characters.
96+
func (d *Dataset) Identifier(f IdentifierFormat) (string, error) {
97+
switch f {
98+
case LegacySQLID:
99+
return fmt.Sprintf("%s:%s", d.ProjectID, d.DatasetID), nil
100+
case StandardSQLID:
101+
// Quote project identifiers if they have a dash character.
102+
if strings.Contains(d.ProjectID, "-") {
103+
return fmt.Sprintf("`%s`.%s", d.ProjectID, d.DatasetID), nil
104+
}
105+
return fmt.Sprintf("%s.%s", d.ProjectID, d.DatasetID), nil
106+
default:
107+
return "", ErrUnknownIdentifierFormat
108+
}
109+
}
110+
91111
// Create creates a dataset in the BigQuery service. An error will be returned if the
92112
// dataset already exists. Pass in a DatasetMetadata value to configure the dataset.
93113
func (d *Dataset) Create(ctx context.Context, md *DatasetMetadata) (err error) {

Diff for: bigquery/dataset_test.go

+60
Original file line numberDiff line numberDiff line change
@@ -476,3 +476,63 @@ func TestConvertAccessEntry(t *testing.T) {
476476
t.Error("got nil, want error")
477477
}
478478
}
479+
480+
func TestDatasetIdentifiers(t *testing.T) {
481+
testDataset := &Dataset{
482+
ProjectID: "p",
483+
DatasetID: "d",
484+
c: nil,
485+
}
486+
for _, tc := range []struct {
487+
description string
488+
in *Dataset
489+
format IdentifierFormat
490+
want string
491+
wantErr bool
492+
}{
493+
{
494+
description: "empty format string",
495+
in: testDataset,
496+
format: "",
497+
wantErr: true,
498+
},
499+
{
500+
description: "legacy",
501+
in: testDataset,
502+
format: LegacySQLID,
503+
want: "p:d",
504+
},
505+
{
506+
description: "standard unquoted",
507+
in: testDataset,
508+
format: StandardSQLID,
509+
want: "p.d",
510+
},
511+
{
512+
description: "standard w/quoting",
513+
in: &Dataset{ProjectID: "p-p", DatasetID: "d"},
514+
format: StandardSQLID,
515+
want: "`p-p`.d",
516+
},
517+
{
518+
description: "api resource",
519+
in: testDataset,
520+
format: StorageAPIResourceID,
521+
wantErr: true,
522+
},
523+
} {
524+
got, err := tc.in.Identifier(tc.format)
525+
if tc.wantErr && err == nil {
526+
t.Errorf("case %q: wanted err, was success", tc.description)
527+
}
528+
if !tc.wantErr {
529+
if err != nil {
530+
t.Errorf("case %q: wanted success, got err: %v", tc.description, err)
531+
} else {
532+
if got != tc.want {
533+
t.Errorf("case %q: got %s, want %s", tc.description, got, tc.want)
534+
}
535+
}
536+
}
537+
}
538+
}

Diff for: bigquery/integration_test.go

+22-16
Original file line numberDiff line numberDiff line change
@@ -371,12 +371,12 @@ func TestIntegration_TableCreateView(t *testing.T) {
371371
}
372372
ctx := context.Background()
373373
table := newTable(t, schema)
374+
tableIdentifier, _ := table.Identifier(StandardSQLID)
374375
defer table.Delete(ctx)
375376

376377
// Test that standard SQL views work.
377378
view := dataset.Table("t_view_standardsql")
378-
query := fmt.Sprintf("SELECT APPROX_COUNT_DISTINCT(name) FROM `%s.%s.%s`",
379-
dataset.ProjectID, dataset.DatasetID, table.TableID)
379+
query := fmt.Sprintf("SELECT APPROX_COUNT_DISTINCT(name) FROM %s", tableIdentifier)
380380
err := view.Create(context.Background(), &TableMetadata{
381381
ViewQuery: query,
382382
UseStandardSQL: true,
@@ -936,10 +936,11 @@ func TestIntegration_DatasetUpdateAccess(t *testing.T) {
936936
// Create a sample UDF so we can verify adding authorized UDFs
937937
routineID := routineIDs.New()
938938
routine := dataset.Routine(routineID)
939+
routineSQLID, _ := routine.Identifier(StandardSQLID)
939940

940941
sql := fmt.Sprintf(`
941-
CREATE FUNCTION `+"`%s`"+`(x INT64) AS (x * 3);`,
942-
routine.FullyQualifiedName())
942+
CREATE FUNCTION %s(x INT64) AS (x * 3);`,
943+
routineSQLID)
943944
if _, _, err := runQuerySQL(ctx, sql); err != nil {
944945
t.Fatal(err)
945946
}
@@ -1348,13 +1349,14 @@ func TestIntegration_RoutineStoredProcedure(t *testing.T) {
13481349
// Define a simple stored procedure via DDL.
13491350
routineID := routineIDs.New()
13501351
routine := dataset.Routine(routineID)
1352+
routineSQLID, _ := routine.Identifier(StandardSQLID)
13511353
sql := fmt.Sprintf(`
1352-
CREATE OR REPLACE PROCEDURE `+"`%s`"+`(val INT64)
1354+
CREATE OR REPLACE PROCEDURE %s(val INT64)
13531355
BEGIN
13541356
SELECT CURRENT_TIMESTAMP() as ts;
13551357
SELECT val * 2 as f2;
13561358
END`,
1357-
routine.FullyQualifiedName())
1359+
routineSQLID)
13581360

13591361
if _, _, err := runQuerySQL(ctx, sql); err != nil {
13601362
t.Fatal(err)
@@ -1363,8 +1365,8 @@ func TestIntegration_RoutineStoredProcedure(t *testing.T) {
13631365

13641366
// Invoke the stored procedure.
13651367
sql = fmt.Sprintf(`
1366-
CALL `+"`%s`"+`(5)`,
1367-
routine.FullyQualifiedName())
1368+
CALL %s(5)`,
1369+
routineSQLID)
13681370

13691371
q := client.Query(sql)
13701372
it, err := q.Read(ctx)
@@ -2354,8 +2356,10 @@ func TestIntegration_QueryExternalHivePartitioning(t *testing.T) {
23542356
}
23552357
defer customTable.Delete(ctx)
23562358

2359+
customTableSQLID, _ := customTable.Identifier(StandardSQLID)
2360+
23572361
// Issue a test query that prunes based on the custom hive partitioning key, and verify the result is as expected.
2358-
sql := fmt.Sprintf("SELECT COUNT(*) as ct FROM `%s`.%s.%s WHERE pkey=\"foo\"", customTable.ProjectID, customTable.DatasetID, customTable.TableID)
2362+
sql := fmt.Sprintf("SELECT COUNT(*) as ct FROM %s WHERE pkey=\"foo\"", customTableSQLID)
23592363
q := client.Query(sql)
23602364
it, err := q.Read(ctx)
23612365
if err != nil {
@@ -3227,10 +3231,10 @@ func TestIntegration_ModelLifecycle(t *testing.T) {
32273231
// Create a model via a CREATE MODEL query
32283232
modelID := modelIDs.New()
32293233
model := dataset.Model(modelID)
3230-
modelRef := fmt.Sprintf("%s.%s.%s", dataset.ProjectID, dataset.DatasetID, modelID)
3234+
modelSQLID, _ := model.Identifier(StandardSQLID)
32313235

32323236
sql := fmt.Sprintf(`
3233-
CREATE MODEL `+"`%s`"+`
3237+
CREATE MODEL %s
32343238
OPTIONS (
32353239
model_type='linear_reg',
32363240
max_iteration=1,
@@ -3240,7 +3244,7 @@ func TestIntegration_ModelLifecycle(t *testing.T) {
32403244
SELECT 'a' AS f1, 2.0 AS label
32413245
UNION ALL
32423246
SELECT 'b' AS f1, 3.8 AS label
3243-
)`, modelRef)
3247+
)`, modelSQLID)
32443248
if _, _, err := runQuerySQL(ctx, sql); err != nil {
32453249
t.Fatal(err)
32463250
}
@@ -3417,13 +3421,14 @@ func TestIntegration_RoutineComplexTypes(t *testing.T) {
34173421

34183422
routineID := routineIDs.New()
34193423
routine := dataset.Routine(routineID)
3424+
routineSQLID, _ := routine.Identifier(StandardSQLID)
34203425
sql := fmt.Sprintf(`
3421-
CREATE FUNCTION `+"`%s`("+`
3426+
CREATE FUNCTION %s(
34223427
arr ARRAY<STRUCT<name STRING, val INT64>>
34233428
) AS (
34243429
(SELECT SUM(IF(elem.name = "foo",elem.val,null)) FROM UNNEST(arr) AS elem)
34253430
)`,
3426-
routine.FullyQualifiedName())
3431+
routineSQLID)
34273432
if _, _, err := runQuerySQL(ctx, sql); err != nil {
34283433
t.Fatal(err)
34293434
}
@@ -3480,10 +3485,11 @@ func TestIntegration_RoutineLifecycle(t *testing.T) {
34803485
// Create a scalar UDF routine via a CREATE FUNCTION query
34813486
routineID := routineIDs.New()
34823487
routine := dataset.Routine(routineID)
3488+
routineSQLID, _ := routine.Identifier(StandardSQLID)
34833489

34843490
sql := fmt.Sprintf(`
3485-
CREATE FUNCTION `+"`%s`"+`(x INT64) AS (x * 3);`,
3486-
routine.FullyQualifiedName())
3491+
CREATE FUNCTION %s(x INT64) AS (x * 3);`,
3492+
routineSQLID)
34873493
if _, _, err := runQuerySQL(ctx, sql); err != nil {
34883494
t.Fatal(err)
34893495
}

Diff for: bigquery/model.go

+24-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ package bigquery
1717
import (
1818
"context"
1919
"fmt"
20+
"strings"
2021
"time"
2122

2223
"cloud.google.com/go/internal/optional"
@@ -41,9 +42,31 @@ type Model struct {
4142
c *Client
4243
}
4344

45+
// Identifier returns the ID of the model in the requested format.
46+
//
47+
// For Standard SQL format, the identifier will be quoted if the
48+
// ProjectID contains dash (-) characters.
49+
func (m *Model) Identifier(f IdentifierFormat) (string, error) {
50+
switch f {
51+
case LegacySQLID:
52+
return fmt.Sprintf("%s:%s.%s", m.ProjectID, m.DatasetID, m.ModelID), nil
53+
case StandardSQLID:
54+
// Per https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-create#model_name
55+
// we quote the entire identifier.
56+
out := fmt.Sprintf("%s.%s.%s", m.ProjectID, m.DatasetID, m.ModelID)
57+
if strings.Contains(out, "-") {
58+
out = fmt.Sprintf("`%s`", out)
59+
}
60+
return out, nil
61+
default:
62+
return "", ErrUnknownIdentifierFormat
63+
}
64+
}
65+
4466
// FullyQualifiedName returns the ID of the model in projectID:datasetID.modelid format.
4567
func (m *Model) FullyQualifiedName() string {
46-
return fmt.Sprintf("%s:%s.%s", m.ProjectID, m.DatasetID, m.ModelID)
68+
s, _ := m.Identifier(LegacySQLID)
69+
return s
4770
}
4871

4972
func (m *Model) toBQ() *bq.ModelReference {

Diff for: bigquery/model_test.go

+61
Original file line numberDiff line numberDiff line change
@@ -120,3 +120,64 @@ func TestModelMetadataUpdateToBQ(t *testing.T) {
120120
}
121121
}
122122
}
123+
124+
func TestModelIdentifiers(t *testing.T) {
125+
testModel := &Model{
126+
ProjectID: "p",
127+
DatasetID: "d",
128+
ModelID: "m",
129+
c: nil,
130+
}
131+
for _, tc := range []struct {
132+
description string
133+
in *Model
134+
format IdentifierFormat
135+
want string
136+
wantErr bool
137+
}{
138+
{
139+
description: "empty format string",
140+
in: testModel,
141+
format: "",
142+
wantErr: true,
143+
},
144+
{
145+
description: "legacy",
146+
in: testModel,
147+
format: LegacySQLID,
148+
want: "p:d.m",
149+
},
150+
{
151+
description: "standard unquoted",
152+
in: testModel,
153+
format: StandardSQLID,
154+
want: "p.d.m",
155+
},
156+
{
157+
description: "standard w/dash",
158+
in: &Model{ProjectID: "p-p", DatasetID: "d", ModelID: "m"},
159+
format: StandardSQLID,
160+
want: "`p-p.d.m`",
161+
},
162+
{
163+
description: "api resource",
164+
in: testModel,
165+
format: StorageAPIResourceID,
166+
wantErr: true,
167+
},
168+
} {
169+
got, err := tc.in.Identifier(tc.format)
170+
if tc.wantErr && err == nil {
171+
t.Errorf("case %q: wanted err, was success", tc.description)
172+
}
173+
if !tc.wantErr {
174+
if err != nil {
175+
t.Errorf("case %q: wanted success, got err: %v", tc.description, err)
176+
} else {
177+
if got != tc.want {
178+
t.Errorf("case %q: got %s, want %s", tc.description, got, tc.want)
179+
}
180+
}
181+
}
182+
}
183+
}

Diff for: bigquery/routine.go

+19-1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"context"
1919
"errors"
2020
"fmt"
21+
"strings"
2122
"time"
2223

2324
"cloud.google.com/go/internal/optional"
@@ -44,9 +45,26 @@ func (r *Routine) toBQ() *bq.RoutineReference {
4445
}
4546
}
4647

48+
// Identifier returns the ID of the routine in the requested format.
49+
//
50+
// For Standard SQL format, the identifier will be quoted if the
51+
// ProjectID contains dash (-) characters.
52+
func (r *Routine) Identifier(f IdentifierFormat) (string, error) {
53+
switch f {
54+
case StandardSQLID:
55+
if strings.Contains(r.ProjectID, "-") {
56+
return fmt.Sprintf("`%s`.%s.%s", r.ProjectID, r.DatasetID, r.RoutineID), nil
57+
}
58+
return fmt.Sprintf("%s.%s.%s", r.ProjectID, r.DatasetID, r.RoutineID), nil
59+
default:
60+
return "", ErrUnknownIdentifierFormat
61+
}
62+
}
63+
4764
// FullyQualifiedName returns an identifer for the routine in project.dataset.routine format.
4865
func (r *Routine) FullyQualifiedName() string {
49-
return fmt.Sprintf("%s.%s.%s", r.ProjectID, r.DatasetID, r.RoutineID)
66+
s, _ := r.Identifier(StandardSQLID)
67+
return s
5068
}
5169

5270
// Create creates a Routine in the BigQuery service.

0 commit comments

Comments
 (0)