In [None]:
#!pip install --upgrade google-cloud-bigquery[bqstorage,pandas]
#!pip install google-cloud-bigquery-storage

In [79]:
import os
from google.cloud import bigquery
from google.cloud import bigquery_storage

In [2]:
bigquery.__version__

'2.13.1'

#### Requirements
In order to execute the lab, you need create a service account with the following roles:
- BigQuery Data Editor
- BigQuery Job User
- BigQuery Read Session User
- Storage Admin

In [3]:
service_account = os.path.dirname(os.path.realpath('__file__')) + "/service-account.json"
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = service_account

In [None]:
# Bigquery instance
bigquery_client = bigquery.Client()

#### Create dataset and table

In [None]:
# Create dataset
dataset = bigquery_client.create_dataset(dataset="bigquery_lab")

In [65]:
# Create table
table = dataset.table(table_id="Person")
schema = [
    bigquery.SchemaField(name="name", field_type="STRING", mode="REQUIRED"),
    bigquery.SchemaField(name="age", field_type="INTEGER", mode="REQUIRED")
]
table = bigquery.Table(table_ref=table, schema=schema)
table = bigquery_client.create_table(table=table) 
print(table.table_id)

Person


#### Import csv from google storage

In [62]:
uri = "gs://bigquery-lab/housing.csv"

try:
    dataset = bigquery_client.dataset(dataset_id="bigquery_lab")
    # Create table
    table = dataset.table(table_id="housing")
    
    # Create schema
    job_config = bigquery.job.LoadJobConfig(
        schema = [
            bigquery.SchemaField("CRIM", "Float"),
            bigquery.SchemaField("ZN", "Float"),
            bigquery.SchemaField("INDUS", "Float"),
            bigquery.SchemaField("CHAS", "Integer"),
            bigquery.SchemaField("NOX", "Float"),
            bigquery.SchemaField("RM", "Float"),
            bigquery.SchemaField("AGE", "FLOAT"),
            bigquery.SchemaField("DIS", "Float"),
            bigquery.SchemaField("RAD", "Float"),
            bigquery.SchemaField("TAX", "Float"),
            bigquery.SchemaField("PTRATIO", "Float"),
            bigquery.SchemaField("B", "FLOAT"),
            bigquery.SchemaField("LSTAT", "Float"),
            bigquery.SchemaField("MEDV", "Float")
        ],
        job_config.source_format = bigquery.SourceFormat.CSV,
        job_config.field_delimiter = ",",
        job_config.skip_leading_rows = 1
    )

    load_job = bigquery_client.load_table_from_uri(source_uris=uri, location=table, job_config=job_config)

    load_job.result()
except Exception as err:
    print(err)

In [54]:
uri = "gs://bigquery-lab/titanic.csv"

try:
    dataset = bigquery_client.dataset(dataset_id="bigquery_lab")
    table = dataset.table(table_id="titanic")
    job_config = bigquery.job.LoadJobConfig(
        autodetect=True,
        field_delimiter=";",
        source_format=bigquery.SourceFormat.CSV
    )

    load_job = bigquery_client.load_table_from_uri(source_uris=ruri, location=table, job_config=job_config)

    load_job.result()
except Exception as err:
    print(err)

#### Quering data with bigquery client

In [67]:
query = """
    SELECT *
    FROM `dataengineer-310515.bigquery_lab.titanic`
    LIMIT 10
"""

job_config = bigquery.job.QueryJobConfig(use_query_cache=False)
results = bigquery_client.query(query=query, job_config=job_config).to_dataframe()

In [68]:
results.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,180,0,3,"Leonard, Mr. Lionel",male,36.0,0,0,LINE,0.0,,S
1,264,0,1,"Harrison, Mr. William",male,40.0,0,0,112059,0.0,B94,S
2,278,0,2,"Parkes, Mr. Francis ""Frank""",male,,0,0,239853,0.0,,S
3,303,0,3,"Johnson, Mr. William Cahoone Jr",male,19.0,0,0,LINE,0.0,,S
4,414,0,2,"Cunningham, Mr. Alfred Fleming",male,,0,0,239853,0.0,,S


#### Download data to cloud storage

In [96]:
destination_uri = "gs://bigquery-lab/titanic_backup.csv"
dataset = bigquery_client.dataset(dataset_id="bigquery_lab")
table = dataset.table("titanic")

extract_job = bigquery_client.extract_table(source=table, destination_uris=destination_uri)
extract_job.result()