# BigQuery

## Supported data types
<table>
  <thead>
    <tr>
      <th>Name</th>
      <th>Data type</th>
      <th>Description</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td><a href="https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types">Integer</a></td>
      <td><code translate="no" dir="ltr">INT64</code></td>
      <td>Numeric values without fractional components</td>
    </tr>
    <tr>
      <td><a href="https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types">Floating point</a></td>
      <td><code translate="no" dir="ltr">FLOAT64</code></td>
      <td>Approximate numeric values with fractional components</td>
    </tr>
    <tr>
      <td><a href="https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#numeric_type">Numeric</a></td>
      <td><code translate="no" dir="ltr">NUMERIC</code></td>
      <td>Exact numeric values with fractional components</td>
    </tr>
    <tr>
      <td><a href="https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#bignumeric_type">BigNumeric</a></td>
      <td><code translate="no" dir="ltr">BIGNUMERIC</code></td>
      <td>Exact numeric values with fractional components</td>
    </tr>
    <tr>
      <td><a href="https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type">Boolean</a></td>
      <td><code translate="no" dir="ltr">BOOL</code></td>
      <td>TRUE or FALSE (case-insensitive)</td>
    </tr>
    <tr>
      <td><a href="https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type">String</a></td>
      <td><code translate="no" dir="ltr">STRING</code></td>
      <td>Variable-length character (Unicode) data</td>
    </tr>
    <tr>
      <td><a href="https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#bytes_type">Bytes</a></td>
      <td><code translate="no" dir="ltr">BYTES</code></td>
      <td>Variable-length binary data</td>
    </tr>
    <tr>
      <td><a href="https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#date_type">Date</a></td>
      <td><code translate="no" dir="ltr">DATE</code></td>
      <td>A logical calendar date</td>
    </tr>
    <tr>
      <td><a href="https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#datetime_type">Date/Time</a></td>
      <td><code translate="no" dir="ltr">DATETIME</code></td>
      <td>A year, month, day, hour, minute, second, and subsecond</td>
    </tr>
    <tr>
      <td><a href="https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#time_type">Time</a></td>
      <td><code translate="no" dir="ltr">TIME</code></td>
      <td>A time, independent of a specific date</td>
    </tr>
    <tr>
      <td><a href="https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#timestamp_type">Timestamp</a></td>
      <td><code translate="no" dir="ltr">TIMESTAMP</code></td>
      <td>An absolute point in time, with microsecond precision</td>
    </tr>
    <tr>
      <td><a href="https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type">Struct (Record)</a></td>
      <td><code translate="no" dir="ltr">STRUCT</code></td>
      <td>Container of ordered fields each with a type (required) and field name (optional)</td>
    </tr>
    <tr>
      <td><a href="https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#geography_type">Geography</a></td>
      <td><code translate="no" dir="ltr">GEOGRAPHY</code></td>
      <td>A pointset on the Earth's surface (a set of points, lines and polygons on the <a href="http://earth-info.nga.mil/GandG/update/index.php">WGS84</a>
reference spheroid, with geodesic edges)</td>
    </tr>
    <tr>
      <td><a href="https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#json_type">JSON</a></td>
      <td><code translate="no" dir="ltr">JSON</code></td>
      <td>Represents JSON, a lightweight data-interchange format</td>
    </tr>
    <tr>
      <td><a href="https://cloud.google.com/bigquery/docs/reference/standard-sql/range-functions#range">RANGE</a></td>
      <td><code translate="no" dir="ltr">RANGE</code></td>
      <td>A range of <code translate="no" dir="ltr">DATE</code>, <code translate="no" dir="ltr">DATETIME</code>, or <code translate="no" dir="ltr">TIMESTAMP</code> values</td>
    </tr>
  </tbody>
</table>

## Create a dataset


In [None]:
%%bigquery
CREATE SCHEMA IF NOT EXISTS `bq_ds`

# Create a table

In [None]:
%%bigquery
CREATE OR REPLACE TABLE `bq_ds.all_types` (
  int_col INT64,
  float_col FLOAT64,
  numeric_col NUMERIC,
  bignumeric_col BIGNUMERIC,
  bool_col BOOL,
  string_col STRING,
  bytes_col BYTES,
  date_col DATE,
  datetime_col DATETIME,
  time_col TIME,
  timestamp_col TIMESTAMP,
  struct_col STRUCT<name STRING, age INT64>,
  geography_col GEOGRAPHY,
  json_col JSON,
  range_col RANGE<DATE>
)

In [None]:
%%bigquery
INSERT INTO `bq_ds.all_types`
-- (int_col,float_col,numeric_col,bignumeric_col,bool_col,string_col,bytes_col,date_col,datetime_col,time_col,timestamp_col,struct_col,geography_col,json_col,range_col)
VALUES
(1, -- integer
1.1, -- float
1, -- numeric
1, -- big numeric
true, --boolean
'hello', --string
b'abc\n', -- bytes
DATE '2024-09-18',-- date
DATETIME '2024-09-17 12:30:00.45', -- date with time
TIME '22:59:00.45', --time
TIMESTAMP '2024-08-27T12:30:00.45 Europe/Berlin', -- timestamp
('Peter', 25), -- struct
ST_GEOGFROMTEXT('POINT(32 90)'), -- geography
JSON '{"hello": "world", "b": [1,2,3]}', -- json
RANGE(DATE '2023-12-01', DATE '2023-12-31') -- range
)

In [None]:
%%bigquery
SELECT * FROM `bq_ds.all_types`

# Work with arrays
Filter the values inside an array (`WHERE x <=5`), multiply by 2 (`x * 2`) and build a new array with the `ARRAY()` function.

In [None]:
%%bigquery
CREATE OR REPLACE TABLE `bq_ds.arrays` AS (
SELECT * FROM UNNEST(
  [STRUCT(1 AS id, [1,2,3] AS list),
   (2,[4,5,6]),
   (3, [7,8,9])]
));
SELECT * FROM `bq_ds.arrays` ORDER BY id ASC;

In [None]:
%%bigquery
SELECT id, list, ARRAY(SELECT x * 2 FROM UNNEST(list) AS x WHERE x <=5) AS doubled
FROM `bq_ds.arrays` ORDER BY id ASC;

In [None]:
%%bigquery
UPDATE `bq_ds.arrays`
SET list = ARRAY(SELECT x * 3 FROM UNNEST(list) AS x WHERE x <=5)
WHERE true;
SELECT * FROM `bq_ds.arrays` ORDER BY id ASC;

In [None]:
%%bigquery
SELECT * FROM `bq_ds.arrays` CROSS JOIN UNNEST(list) AS item ORDER BY id ASC;

In [None]:
%%bigquery
SELECT * FROM UNNEST([STRUCT(1 as a,2 as b,3 as c)])

# Work with structs

In [None]:
%%bigquery
CREATE OR REPLACE TABLE `bq_ds.structs` (
  simple_col STRUCT<val INT64>,
  advanced_col STRUCT<
    val INT64,
    names ARRAY<STRING>>,
  nested_col STRUCT<
    n STRUCT<
      a INT64,
      b INT64>>,
  repeated_col ARRAY<STRUCT<val INT64>>
)

In [None]:
%%bigquery
INSERT INTO `bq_ds.structs` (simple_col, advanced_col, nested_col,repeated_col)
VALUES
(STRUCT(2),(23,['Peter', 'Loki']), STRUCT((2,3)),[STRUCT(2),STRUCT(3)]),
## ARRAY<STRUCT<INT64>>[STRUCT<INT64>(2),STRUCT<INT64>(3)]
(STRUCT<INT64>(3), STRUCT<INT64,ARRAY<STRING>>(33,['Bob']), STRUCT<STRUCT<INT64,INT64>>((STRUCT<INT64,INT64>(3,4))), ARRAY<STRUCT<INT64>>[STRUCT<INT64>(2),STRUCT<INT64>(3)]),

(STRUCT(4 AS val), STRUCT(44 AS val, ['Justus'] AS names), STRUCT(STRUCT(4,5) AS n), [STRUCT(4 AS val),STRUCT(5)]);


In [None]:
%%bigquery
SELECT advanced_col.val, advanced_col.names[OFFSET(0)] AS name FROM `bq_ds.structs`

In [None]:
%%bigquery
--
SELECT advanced_col.val FROM `bq_ds.structs` WHERE 'Loki' IN UNNEST(advanced_col.names);

# Work with JSON

In [None]:
%%bigquery
CREATE OR REPLACE TABLE `bq_ds.json` (
  id INT64,
  json_col JSON
)

In [None]:
%%bigquery
INSERT INTO `bq_ds.json`
VALUES
(1, JSON '{"a": 1, "b": 2}'), -- JSON literal
(2, JSON_ARRAY(10, 'peter', true, [20, 30])), -- JSON array
(3, JSON_OBJECT('name','justus','friends',['bob','peter'])), -- JSON object
(4, TO_JSON(STRUCT('Gartenweg' AS street, 11 AS number))); --JSON from schema
SELECT * FROM `bq_ds.json`;

In [None]:
%%bigquery
CREATE OR REPLACE TABLE `bq_ds.orders`(id INT64, cart JSON);

INSERT INTO `bq_ds.orders` VALUES
(1, JSON """{
        "name": "Alice",
        "items": [
            {"product": "book", "price": 10},
            {"product": "food", "price": 5}
        ]
    }"""),
(2, JSON """{
        "name": "Bob",
        "items": [
            {"product": "pen", "price": 20}
        ]
    }""");
SELECT * FROM `bq_ds.orders`;

In [None]:
%%bigquery
SELECT (SELECT SUM(CAST(JSON_VALUE(j,'$.price') AS INT64)) FROM UNNEST(JSON_QUERY_ARRAY(cart, '$.items')) AS j) FROM `bq_ds.orders`;

# Work with timestamps

In [None]:
%%bigquery
SELECT
CURRENT_TIMESTAMP() AS now,
EXTRACT(HOUR FROM TIMESTAMP('2024-06-25 20:30:00+00') AT TIME ZONE 'Europe/Berlin') AS hour_a,
EXTRACT(HOUR FROM TIMESTAMP '2024-06-25 20:30:00 Europe/Berlin') AS hour_b,
FORMAT_TIMESTAMP("%b %Y %H", TIMESTAMP "2020-12-25 15:30:55+00") AS month_year_a,
FORMAT_TIMESTAMP("%b %Y %H", TIMESTAMP "2020-12-25 15:30:55Z", 'Europe/Berlin') AS month_year_b,
PARSE_TIMESTAMP("%a %b %e %I:%M:%S %Y", "Thu Dec 24 07:30:00 2024") AS parsed;

## Load data

## Copy data

## Clone data

## Remote function SDP

```shell
pip install -r /path/to/requirements.txt
```