Skip to content
Permalink
Browse files
docs: add loading data from Firestore backup sample (#737)
Follow-up to #736

To be included here: https://cloud.google.com/bigquery/docs/loading-data-cloud-firestore

Also

* Use `google-cloud-testutils` for cleanup as described in googleapis/python-test-utils#39
  • Loading branch information
tswast committed Jul 16, 2021
1 parent 4ff8bed commit 22fd848cae4af1148040e1faa31dd15a4d674687
@@ -12,38 +12,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import datetime
import random

from google.cloud import bigquery
import pytest
import test_utils.prefixer


RESOURCE_PREFIX = "python_bigquery_samples_snippets"
RESOURCE_DATE_FORMAT = "%Y%m%d_%H%M%S"
RESOURCE_DATE_LENGTH = 4 + 2 + 2 + 1 + 2 + 2 + 2


def resource_prefix() -> str:
timestamp = datetime.datetime.utcnow().strftime(RESOURCE_DATE_FORMAT)
random_string = hex(random.randrange(1000000))[2:]
return f"{RESOURCE_PREFIX}_{timestamp}_{random_string}"


def resource_name_to_date(resource_name: str):
start_date = len(RESOURCE_PREFIX) + 1
date_string = resource_name[start_date : start_date + RESOURCE_DATE_LENGTH]
return datetime.datetime.strptime(date_string, RESOURCE_DATE_FORMAT)
prefixer = test_utils.prefixer.Prefixer("python-bigquery", "samples/snippets")


@pytest.fixture(scope="session", autouse=True)
def cleanup_datasets(bigquery_client: bigquery.Client):
yesterday = datetime.datetime.utcnow() - datetime.timedelta(days=1)
for dataset in bigquery_client.list_datasets():
if (
dataset.dataset_id.startswith(RESOURCE_PREFIX)
and resource_name_to_date(dataset.dataset_id) < yesterday
):
if prefixer.should_cleanup(dataset.dataset_id):
bigquery_client.delete_dataset(
dataset, delete_contents=True, not_found_ok=True
)
@@ -62,14 +42,25 @@ def project_id(bigquery_client):

@pytest.fixture(scope="session")
def dataset_id(bigquery_client: bigquery.Client, project_id: str):
dataset_id = resource_prefix()
dataset_id = prefixer.create_prefix()
full_dataset_id = f"{project_id}.{dataset_id}"
dataset = bigquery.Dataset(full_dataset_id)
bigquery_client.create_dataset(dataset)
yield dataset_id
bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True)


@pytest.fixture
def random_table_id(bigquery_client: bigquery.Client, project_id: str, dataset_id: str):
"""Create a new table ID each time, so random_table_id can be used as
target for load jobs.
"""
random_table_id = prefixer.create_prefix()
full_table_id = f"{project_id}.{dataset_id}.{random_table_id}"
yield full_table_id
bigquery_client.delete_table(full_table_id, not_found_ok=True)


@pytest.fixture
def bigquery_client_patch(monkeypatch, bigquery_client):
monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client)
@@ -0,0 +1,55 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


def load_table_uri_firestore(table_id):
orig_table_id = table_id
# [START bigquery_load_table_gcs_firestore]
# TODO(developer): Set table_id to the ID of the table to create.
table_id = "your-project.your_dataset.your_table_name"

# TODO(developer): Set uri to the path of the kind export metadata
uri = (
"gs://cloud-samples-data/bigquery/us-states"
"/2021-07-02T16:04:48_70344/all_namespaces/kind_us-states"
"/all_namespaces_kind_us-states.export_metadata"
)

# TODO(developer): Set projection_fields to a list of document properties
# to import. Leave unset or set to `None` for all fields.
projection_fields = ["name", "post_abbr"]

# [END bigquery_load_table_gcs_firestore]
table_id = orig_table_id

# [START bigquery_load_table_gcs_firestore]
from google.cloud import bigquery

# Construct a BigQuery client object.
client = bigquery.Client()

job_config = bigquery.LoadJobConfig(
source_format=bigquery.SourceFormat.DATASTORE_BACKUP,
projection_fields=projection_fields,
)

load_job = client.load_table_from_uri(
uri, table_id, job_config=job_config
) # Make an API request.

load_job.result() # Waits for the job to complete.

destination_table = client.get_table(table_id)
print("Loaded {} rows.".format(destination_table.num_rows))
# [END bigquery_load_table_gcs_firestore]
@@ -0,0 +1,21 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import load_table_uri_firestore


def test_load_table_uri_firestore(capsys, random_table_id):
load_table_uri_firestore.load_table_uri_firestore(random_table_id)
out, _ = capsys.readouterr()
assert "Loaded 50 rows." in out
@@ -1,2 +1,3 @@
google-cloud-testutils==0.3.0
pytest==6.2.4
mock==4.0.3
@@ -15,13 +15,13 @@
from google.cloud import bigquery
import pytest

from conftest import resource_prefix
from conftest import prefixer
import update_with_dml


@pytest.fixture
def table_id(bigquery_client: bigquery.Client, project_id: str, dataset_id: str):
table_id = f"{resource_prefix()}_update_with_dml"
table_id = f"{prefixer.create_prefix()}_update_with_dml"
yield table_id
full_table_id = f"{project_id}.{dataset_id}.{table_id}"
bigquery_client.delete_table(full_table_id, not_found_ok=True)

0 comments on commit 22fd848

Please sign in to comment.