Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(bigquery): update code samples to use strings for table and dataset IDs #9974

Merged
merged 16 commits into from Dec 26, 2019
Merged
346 changes: 0 additions & 346 deletions bigquery/docs/snippets.py
Expand Up @@ -228,78 +228,6 @@ def test_create_partitioned_table(client, to_delete):
assert table.time_partitioning.expiration_ms == 7776000000


def test_load_and_query_partitioned_table(client, to_delete):
dataset_id = "load_partitioned_table_dataset_{}".format(_millis())
dataset = bigquery.Dataset(client.dataset(dataset_id))
client.create_dataset(dataset)
to_delete.append(dataset)

# [START bigquery_load_table_partitioned]
# from google.cloud import bigquery
# client = bigquery.Client()
# dataset_id = 'my_dataset'
table_id = "us_states_by_date"

dataset_ref = client.dataset(dataset_id)
job_config = bigquery.LoadJobConfig()
job_config.schema = [
bigquery.SchemaField("name", "STRING"),
bigquery.SchemaField("post_abbr", "STRING"),
bigquery.SchemaField("date", "DATE"),
]
job_config.skip_leading_rows = 1
job_config.time_partitioning = bigquery.TimePartitioning(
type_=bigquery.TimePartitioningType.DAY,
field="date", # name of column to use for partitioning
expiration_ms=7776000000,
) # 90 days
uri = "gs://cloud-samples-data/bigquery/us-states/us-states-by-date.csv"

load_job = client.load_table_from_uri(
uri, dataset_ref.table(table_id), job_config=job_config
) # API request

assert load_job.job_type == "load"

load_job.result() # Waits for table load to complete.

table = client.get_table(dataset_ref.table(table_id))
print("Loaded {} rows to table {}".format(table.num_rows, table_id))
# [END bigquery_load_table_partitioned]
assert table.num_rows == 50

project_id = client.project

# [START bigquery_query_partitioned_table]
import datetime

# from google.cloud import bigquery
# client = bigquery.Client()
# project_id = 'my-project'
# dataset_id = 'my_dataset'
table_id = "us_states_by_date"

sql_template = """
SELECT *
FROM `{}.{}.{}`
WHERE date BETWEEN @start_date AND @end_date
"""
sql = sql_template.format(project_id, dataset_id, table_id)
job_config = bigquery.QueryJobConfig()
job_config.query_parameters = [
bigquery.ScalarQueryParameter("start_date", "DATE", datetime.date(1800, 1, 1)),
bigquery.ScalarQueryParameter("end_date", "DATE", datetime.date(1899, 12, 31)),
]

# API request
query_job = client.query(sql, job_config=job_config)

rows = list(query_job)
print("{} states were admitted to the US in the 1800s".format(len(rows)))
# [END bigquery_query_partitioned_table]
assert len(rows) == 29


@pytest.mark.skip(
reason=(
"update_table() is flaky "
Expand Down Expand Up @@ -1327,35 +1255,6 @@ def test_extract_table_compressed(client, to_delete):
to_delete.insert(0, blob)


def test_client_query_legacy_sql(client):
emar-kar marked this conversation as resolved.
Show resolved Hide resolved
"""Run a query with Legacy SQL explicitly set"""
# [START bigquery_query_legacy]
# from google.cloud import bigquery
# client = bigquery.Client()

query = (
"SELECT name FROM [bigquery-public-data:usa_names.usa_1910_2013] "
'WHERE state = "TX" '
"LIMIT 100"
)

# Set use_legacy_sql to True to use legacy SQL syntax.
job_config = bigquery.QueryJobConfig()
job_config.use_legacy_sql = True

query_job = client.query(
query,
# Location must match that of the dataset(s) referenced in the query.
location="US",
job_config=job_config,
) # API request - starts the query

# Print the results.
for row in query_job: # API request - fetches results
print(row)
# [END bigquery_query_legacy]


def test_client_query_total_rows(client, capsys):
"""Run a query and just check for how many rows."""
# [START bigquery_query_total_rows]
Expand Down Expand Up @@ -1420,251 +1319,6 @@ def test_manage_job(client):
# [END bigquery_get_job]


def test_client_query_w_named_params(client, capsys):
"""Run a query using named query parameters"""

# [START bigquery_query_params_named]
# from google.cloud import bigquery
# client = bigquery.Client()

query = """
SELECT word, word_count
FROM `bigquery-public-data.samples.shakespeare`
WHERE corpus = @corpus
AND word_count >= @min_word_count
ORDER BY word_count DESC;
"""
query_params = [
bigquery.ScalarQueryParameter("corpus", "STRING", "romeoandjuliet"),
bigquery.ScalarQueryParameter("min_word_count", "INT64", 250),
]
job_config = bigquery.QueryJobConfig()
job_config.query_parameters = query_params
query_job = client.query(
query,
# Location must match that of the dataset(s) referenced in the query.
location="US",
job_config=job_config,
) # API request - starts the query

# Print the results
for row in query_job:
print("{}: \t{}".format(row.word, row.word_count))

assert query_job.state == "DONE"
# [END bigquery_query_params_named]

out, _ = capsys.readouterr()
assert "the" in out


def test_client_query_w_positional_params(client, capsys):
"""Run a query using query parameters"""

# [START bigquery_query_params_positional]
# from google.cloud import bigquery
# client = bigquery.Client()

query = """
SELECT word, word_count
FROM `bigquery-public-data.samples.shakespeare`
WHERE corpus = ?
AND word_count >= ?
ORDER BY word_count DESC;
"""
# Set the name to None to use positional parameters.
# Note that you cannot mix named and positional parameters.
query_params = [
bigquery.ScalarQueryParameter(None, "STRING", "romeoandjuliet"),
bigquery.ScalarQueryParameter(None, "INT64", 250),
]
job_config = bigquery.QueryJobConfig()
job_config.query_parameters = query_params
query_job = client.query(
query,
# Location must match that of the dataset(s) referenced in the query.
location="US",
job_config=job_config,
) # API request - starts the query

# Print the results
for row in query_job:
print("{}: \t{}".format(row.word, row.word_count))

assert query_job.state == "DONE"
# [END bigquery_query_params_positional]

out, _ = capsys.readouterr()
assert "the" in out


def test_client_query_w_timestamp_params(client, capsys):
"""Run a query using query parameters"""

# [START bigquery_query_params_timestamps]
# from google.cloud import bigquery
# client = bigquery.Client()

import datetime
import pytz

query = "SELECT TIMESTAMP_ADD(@ts_value, INTERVAL 1 HOUR);"
query_params = [
bigquery.ScalarQueryParameter(
"ts_value",
"TIMESTAMP",
datetime.datetime(2016, 12, 7, 8, 0, tzinfo=pytz.UTC),
)
]
job_config = bigquery.QueryJobConfig()
job_config.query_parameters = query_params
query_job = client.query(
query,
# Location must match that of the dataset(s) referenced in the query.
location="US",
job_config=job_config,
) # API request - starts the query

# Print the results
for row in query_job:
print(row)

assert query_job.state == "DONE"
# [END bigquery_query_params_timestamps]

out, _ = capsys.readouterr()
assert "2016, 12, 7, 9, 0" in out


def test_client_query_w_array_params(client, capsys):
"""Run a query using array query parameters"""
# [START bigquery_query_params_arrays]
# from google.cloud import bigquery
# client = bigquery.Client()

query = """
SELECT name, sum(number) as count
FROM `bigquery-public-data.usa_names.usa_1910_2013`
WHERE gender = @gender
AND state IN UNNEST(@states)
GROUP BY name
ORDER BY count DESC
LIMIT 10;
"""
query_params = [
bigquery.ScalarQueryParameter("gender", "STRING", "M"),
bigquery.ArrayQueryParameter("states", "STRING", ["WA", "WI", "WV", "WY"]),
]
job_config = bigquery.QueryJobConfig()
job_config.query_parameters = query_params
query_job = client.query(
query,
# Location must match that of the dataset(s) referenced in the query.
location="US",
job_config=job_config,
) # API request - starts the query

# Print the results
for row in query_job:
print("{}: \t{}".format(row.name, row.count))

assert query_job.state == "DONE"
# [END bigquery_query_params_arrays]

out, _ = capsys.readouterr()
assert "James" in out


def test_client_query_w_struct_params(client, capsys):
"""Run a query using struct query parameters"""
# [START bigquery_query_params_structs]
# from google.cloud import bigquery
# client = bigquery.Client()

query = "SELECT @struct_value AS s;"
query_params = [
bigquery.StructQueryParameter(
"struct_value",
bigquery.ScalarQueryParameter("x", "INT64", 1),
bigquery.ScalarQueryParameter("y", "STRING", "foo"),
)
]
job_config = bigquery.QueryJobConfig()
job_config.query_parameters = query_params
query_job = client.query(
query,
# Location must match that of the dataset(s) referenced in the query.
location="US",
job_config=job_config,
) # API request - starts the query

# Print the results
for row in query_job:
print(row.s)

assert query_job.state == "DONE"
# [END bigquery_query_params_structs]

out, _ = capsys.readouterr()
assert "1" in out
assert "foo" in out


def test_query_no_cache(client):
# [START bigquery_query_no_cache]
# from google.cloud import bigquery
# client = bigquery.Client()

job_config = bigquery.QueryJobConfig()
job_config.use_query_cache = False
sql = """
SELECT corpus
FROM `bigquery-public-data.samples.shakespeare`
GROUP BY corpus;
"""
query_job = client.query(
sql,
# Location must match that of the dataset(s) referenced in the query.
location="US",
job_config=job_config,
) # API request

# Print the results.
for row in query_job: # API request - fetches results
print(row)
# [END bigquery_query_no_cache]


def test_query_external_gcs_temporary_table(client):
# [START bigquery_query_external_gcs_temp]
# from google.cloud import bigquery
# client = bigquery.Client()

# Configure the external data source and query job
external_config = bigquery.ExternalConfig("CSV")
external_config.source_uris = [
"gs://cloud-samples-data/bigquery/us-states/us-states.csv"
]
external_config.schema = [
bigquery.SchemaField("name", "STRING"),
bigquery.SchemaField("post_abbr", "STRING"),
]
external_config.options.skip_leading_rows = 1 # optionally skip header row
table_id = "us_states"
job_config = bigquery.QueryJobConfig()
job_config.table_definitions = {table_id: external_config}

# Example query to find states starting with 'W'
sql = 'SELECT * FROM `{}` WHERE name LIKE "W%"'.format(table_id)

query_job = client.query(sql, job_config=job_config) # API request

w_states = list(query_job) # Waits for query to finish
print("There are {} states with names starting with W.".format(len(w_states)))
# [END bigquery_query_external_gcs_temp]
assert len(w_states) == 4


def test_query_external_gcs_permanent_table(client, to_delete):
dataset_id = "query_external_gcs_{}".format(_millis())
dataset = bigquery.Dataset(client.dataset(dataset_id))
Expand Down
2 changes: 1 addition & 1 deletion bigquery/docs/usage/queries.rst
Expand Up @@ -43,7 +43,7 @@ Run a query using a named query parameter
See BigQuery documentation for more information on
`parameterized queries <https://cloud.google.com/bigquery/docs/parameterized-queries>`_.

.. literalinclude:: ../snippets.py
.. literalinclude:: ../samples/client_query_w_named_params.py
:language: python
:dedent: 4
:start-after: [START bigquery_query_params_named]
Expand Down