Skip to content

Commit

Permalink
fix(bigquery): strip whitespace from bigquery field names (#9160)
Browse files Browse the repository at this point in the history
Removes extra whitespace from the generated field name.

xref #9112 but doesn't strictly solve the issue
  • Loading branch information
gforsyth committed May 10, 2024
1 parent 9caa552 commit 8e5cc3b
Show file tree
Hide file tree
Showing 36 changed files with 48 additions and 35 deletions.
2 changes: 1 addition & 1 deletion ibis/backends/bigquery/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -673,7 +673,7 @@ def visit_HashBytes(self, op, *, arg, how):

@staticmethod
def _gen_valid_name(name: str) -> str:
return "_".join(_NAME_REGEX.findall(name)) or "tmp"
return "_".join(map(str.strip, _NAME_REGEX.findall(name))) or "tmp"

def visit_CountStar(self, op, *, arg, where):
if where is not None:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
approx_quantiles(IF(`t0`.`month` > 0, `t0`.`double_col`, NULL), IF(`t0`.`month` > 0, 2, NULL))[offset(1)] AS `ApproxMedian_double_col_ Greater_month_ 0`
approx_quantiles(IF(`t0`.`month` > 0, `t0`.`double_col`, NULL), IF(`t0`.`month` > 0, 2, NULL))[offset(1)] AS `ApproxMedian_double_col_Greater_month_0`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
APPROX_COUNT_DISTINCT(IF(`t0`.`month` > 0, `t0`.`double_col`, NULL)) AS `ApproxCountDistinct_double_col_ Greater_month_ 0`
APPROX_COUNT_DISTINCT(IF(`t0`.`month` > 0, `t0`.`double_col`, NULL)) AS `ApproxCountDistinct_double_col_Greater_month_0`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
CAST(`t0`.`value` AS BYTES) AS `Cast_value_ binary`
CAST(`t0`.`value` AS BYTES) AS `Cast_value_binary`
FROM `t` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
bit_and(IF(`t0`.`bigint_col` > 0, `t0`.`int_col`, NULL)) AS `BitAnd_int_col_ Greater_bigint_col_ 0`
bit_and(IF(`t0`.`bigint_col` > 0, `t0`.`int_col`, NULL)) AS `BitAnd_int_col_Greater_bigint_col_0`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
bit_or(IF(`t0`.`bigint_col` > 0, `t0`.`int_col`, NULL)) AS `BitOr_int_col_ Greater_bigint_col_ 0`
bit_or(IF(`t0`.`bigint_col` > 0, `t0`.`int_col`, NULL)) AS `BitOr_int_col_Greater_bigint_col_0`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
bit_xor(IF(`t0`.`bigint_col` > 0, `t0`.`int_col`, NULL)) AS `BitXor_int_col_ Greater_bigint_col_ 0`
bit_xor(IF(`t0`.`bigint_col` > 0, `t0`.`int_col`, NULL)) AS `BitXor_int_col_Greater_bigint_col_0`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ SELECT
CAST(`t0`.`bool_col` AS INT64),
NULL
)
) AS `Sum_bool_col_ And_Greater_month_ 6_ Less_month_ 10`
) AS `Sum_bool_col_And_Greater_month_6_Less_month_10`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
AVG(IF(`t0`.`month` > 6, CAST(`t0`.`bool_col` AS INT64), NULL)) AS `Mean_bool_col_ Greater_month_ 6`
AVG(IF(`t0`.`month` > 6, CAST(`t0`.`bool_col` AS INT64), NULL)) AS `Mean_bool_col_Greater_month_6`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
CAST(trunc(`t0`.`double_col`) AS INT64) AS `Cast_double_col_ int64`
CAST(trunc(`t0`.`double_col`) AS INT64) AS `Cast_double_col_int64`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
COVAR_POP(`t0`.`double_col`, `t0`.`double_col`) AS `Covariance_double_col_ double_col`
COVAR_POP(`t0`.`double_col`, `t0`.`double_col`) AS `Covariance_double_col_double_col`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
COVAR_SAMP(`t0`.`double_col`, `t0`.`double_col`) AS `Covariance_double_col_ double_col`
COVAR_SAMP(`t0`.`double_col`, `t0`.`double_col`) AS `Covariance_double_col_double_col`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
MOD(EXTRACT(dayofweek FROM DATE(2017, 1, 1)) + 5, 7) AS `DayOfWeekIndex_datetime_date_2017_ 1_ 1`
MOD(EXTRACT(dayofweek FROM DATE(2017, 1, 1)) + 5, 7) AS `DayOfWeekIndex_datetime_date_2017_1_1`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
INITCAP(CAST(DATE(2017, 1, 1) AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_date_2017_ 1_ 1`
INITCAP(CAST(DATE(2017, 1, 1) AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_date_2017_1_1`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
MOD(EXTRACT(dayofweek FROM datetime('2017-01-01T04:55:59')) + 5, 7) AS `DayOfWeekIndex_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59`
MOD(EXTRACT(dayofweek FROM datetime('2017-01-01T04:55:59')) + 5, 7) AS `DayOfWeekIndex_datetime_datetime_2017_1_1_4_55_59`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
INITCAP(CAST(datetime('2017-01-01T04:55:59') AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59`
INITCAP(CAST(datetime('2017-01-01T04:55:59') AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_datetime_2017_1_1_4_55_59`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
MOD(EXTRACT(dayofweek FROM DATE(2017, 1, 1)) + 5, 7) AS `DayOfWeekIndex_datetime_date_2017_ 1_ 1`
MOD(EXTRACT(dayofweek FROM DATE(2017, 1, 1)) + 5, 7) AS `DayOfWeekIndex_datetime_date_2017_1_1`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
INITCAP(CAST(DATE(2017, 1, 1) AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_date_2017_ 1_ 1`
INITCAP(CAST(DATE(2017, 1, 1) AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_date_2017_1_1`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
MOD(EXTRACT(dayofweek FROM datetime('2017-01-01T04:55:59')) + 5, 7) AS `DayOfWeekIndex_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59`
MOD(EXTRACT(dayofweek FROM datetime('2017-01-01T04:55:59')) + 5, 7) AS `DayOfWeekIndex_datetime_datetime_2017_1_1_4_55_59`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
INITCAP(CAST(datetime('2017-01-01T04:55:59') AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59`
INITCAP(CAST(datetime('2017-01-01T04:55:59') AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_datetime_2017_1_1_4_55_59`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
MOD(EXTRACT(dayofweek FROM datetime('2017-01-01T04:55:59')) + 5, 7) AS `DayOfWeekIndex_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59`
MOD(EXTRACT(dayofweek FROM datetime('2017-01-01T04:55:59')) + 5, 7) AS `DayOfWeekIndex_datetime_datetime_2017_1_1_4_55_59`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
INITCAP(CAST(datetime('2017-01-01T04:55:59') AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59`
INITCAP(CAST(datetime('2017-01-01T04:55:59') AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_datetime_2017_1_1_4_55_59`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
MOD(EXTRACT(dayofweek FROM DATE(2017, 1, 1)) + 5, 7) AS `DayOfWeekIndex_datetime_date_2017_ 1_ 1`
MOD(EXTRACT(dayofweek FROM DATE(2017, 1, 1)) + 5, 7) AS `DayOfWeekIndex_datetime_date_2017_1_1`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
INITCAP(CAST(DATE(2017, 1, 1) AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_date_2017_ 1_ 1`
INITCAP(CAST(DATE(2017, 1, 1) AS STRING FORMAT 'DAY')) AS `DayOfWeekName_datetime_date_2017_1_1`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
CAST(FLOOR(ieee_divide(`t0`.`double_col`, 0)) AS INT64) AS `FloorDivide_double_col_ 0`
CAST(FLOOR(ieee_divide(`t0`.`double_col`, 0)) AS INT64) AS `FloorDivide_double_col_0`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
ieee_divide(`t0`.`double_col`, 0) AS `Divide_double_col_ 0`
ieee_divide(`t0`.`double_col`, 0) AS `Divide_double_col_0`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
EXTRACT(year FROM DATE(2017, 1, 1)) AS `ExtractYear_datetime_date_2017_ 1_ 1`
EXTRACT(year FROM DATE(2017, 1, 1)) AS `ExtractYear_datetime_date_2017_1_1`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
EXTRACT(year FROM datetime('2017-01-01T04:55:59')) AS `ExtractYear_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59`
EXTRACT(year FROM datetime('2017-01-01T04:55:59')) AS `ExtractYear_datetime_datetime_2017_1_1_4_55_59`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
EXTRACT(year FROM DATE(2017, 1, 1)) AS `ExtractYear_datetime_date_2017_ 1_ 1`
EXTRACT(year FROM DATE(2017, 1, 1)) AS `ExtractYear_datetime_date_2017_1_1`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
EXTRACT(year FROM datetime('2017-01-01T04:55:59')) AS `ExtractYear_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59`
EXTRACT(year FROM datetime('2017-01-01T04:55:59')) AS `ExtractYear_datetime_datetime_2017_1_1_4_55_59`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
EXTRACT(year FROM datetime('2017-01-01T04:55:59')) AS `ExtractYear_datetime_datetime_2017_ 1_ 1_ 4_ 55_ 59`
EXTRACT(year FROM datetime('2017-01-01T04:55:59')) AS `ExtractYear_datetime_datetime_2017_1_1_4_55_59`
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
EXTRACT(year FROM DATE(2017, 1, 1)) AS `ExtractYear_datetime_date_2017_ 1_ 1`
EXTRACT(year FROM DATE(2017, 1, 1)) AS `ExtractYear_datetime_date_2017_1_1`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
parse_timestamp('%F', `t0`.`date_string_col`, 'UTC') AS `StringToTimestamp_date_string_col_ '%F'`
parse_timestamp('%F', `t0`.`date_string_col`, 'UTC') AS `StringToTimestamp_date_string_col_'%F'`
FROM `functional_alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
parse_timestamp('%F %Z', CONCAT(`t0`.`date_string_col`, ' America/New_York'), 'UTC') AS `StringToTimestamp_StringConcat_ '%F %Z'`
parse_timestamp('%F %Z', CONCAT(`t0`.`date_string_col`, ' America/New_York'), 'UTC') AS `StringToTimestamp_StringConcat_'%F %Z'`
FROM `functional_alltypes` AS `t0`
13 changes: 13 additions & 0 deletions ibis/backends/bigquery/tests/unit/test_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
from ibis import _
from ibis.backends.bigquery.compiler import BigQueryCompiler
from ibis.common.annotations import ValidationError

to_sql = ibis.bigquery.compile
Expand Down Expand Up @@ -633,3 +634,15 @@ def test_unnest(snapshot):
).select(level_two=lambda t: t.level_one.unnest())
)
snapshot.assert_match(result, "out_two_unnests.sql")


@pytest.mark.parametrize(
"fieldname, expected",
[
("TryCast(b, Float64)", "TryCast_b_Float64"),
("Cast(b, Int64)", "Cast_b_Int64"),
("that, is, a, lot, of, spaces", "that_is_a_lot_of_spaces"),
],
)
def test_field_names_strip_whitespace(fieldname, expected):
assert BigQueryCompiler._gen_valid_name(fieldname) == expected
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SELECT
farm_fingerprint(CAST('48656c6c6f2c20576f726c6421' AS BYTES FORMAT 'HEX')) AS `farm_fingerprint_0_b'Hello_ World_'`
farm_fingerprint(CAST('48656c6c6f2c20576f726c6421' AS BYTES FORMAT 'HEX')) AS `farm_fingerprint_0_b'Hello_World_'`

0 comments on commit 8e5cc3b

Please sign in to comment.