In [19]:
import pandas as pd
import numpy as np
import google.auth
from google.auth import compute_engine
from google.cloud import bigquery

# Create the BQ client. This will ask you to log in the first time.

project_id = 'data-engineering-prod'
client = bigquery.Client(project=project_id)

# List all datasets in project
client.list_datasets()

<google.api_core.page_iterator.HTTPIterator at 0x1036d02d0>

In [22]:
# list datasets and number of views/tables
datasets = []
for dataset in client.list_datasets():
    views = list(client.list_tables(dataset=dataset.reference))
    datasets.append(views)
    print(dataset.dataset_id + " contains {:d} views".format(len(views)))
    
print("\n" + project_id + " contains {:d} datasets".format(len(datasets)))

landing_andromeda contains 107 views
landing_andromeda_secure contains 660 views
landing_identity_secure contains 4 views
landing_orex_secure contains 16 views
landing_orion_migration_secure contains 51 views
landing_orion_secure contains 3 views
landing_pace_secure contains 1 views
landing_payments_secure contains 23 views
product_andromeda_secure contains 195 views
product_identity_secure contains 1 views
product_orion contains 31 views
product_orion_secure contains 99 views
product_payments_secure contains 1 views
raw_andromeda_secure contains 418 views
raw_identity_secure contains 2 views
raw_orex_secure contains 11 views
raw_orion_migration_secure contains 13 views
raw_orion_secure contains 1 views
raw_pace_secure contains 1 views
raw_payments_secure contains 5 views

data-engineering-prod contains 20 datasets


In [23]:
# Collect data on all views within datasets
print("Collecting data on all views in " + project_id + ". This may take some time...\n")
views = []

for dataset in client.list_datasets():
    print("Collecting views for: " + dataset.dataset_id)
    
    for view in list(client.list_tables(dataset=dataset.reference)):
        try:
            dataset_ref = bigquery.DatasetReference(project_id, dataset.dataset_id)
            view_ref = dataset_ref.table(view.table_id)
            v = client.get_table(view_ref)
            views.append(v)

        except Exception, err:
            print(err)
            pass
        
print("\nDone!")

Collecting data on all views in data-engineering-prod. This may take some time...
Collecting views for: landing_andromeda
Collecting views for: landing_andromeda_secure
Collecting views for: landing_identity_secure
Collecting views for: landing_orex_secure
Collecting views for: landing_orion_migration_secure
Collecting views for: landing_orion_secure
Collecting views for: landing_pace_secure
Collecting views for: landing_payments_secure
Collecting views for: product_andromeda_secure
Collecting views for: product_identity_secure
Collecting views for: product_orion
Collecting views for: product_orion_secure
Collecting views for: product_payments_secure
Collecting views for: raw_andromeda_secure
Collecting views for: raw_identity_secure
Collecting views for: raw_orex_secure
Collecting views for: raw_orion_migration_secure
Collecting views for: raw_orion_secure
Collecting views for: raw_pace_secure
Collecting views for: raw_payments_secure

Done!


In [24]:
# Example data from table/view
print("Example table/view and query:")
print("\nView: {}".format(views[0].full_table_id))
print("\nView Query:\n{}".format(views[0].view_query))
print("\nType: {}".format(views[0].table_type))

Example table/view and query:

View: data-engineering-prod:landing_andromeda.energy_contracts_consumption_charge_generated_v2

View Query:
#standardSQL
SELECT
  `kafkaData`,
  `unionRecord`,
  `_PARTITIONTIME` `PARTITIONTIME`
FROM
  `data-engineering-prod.auto_capture_v2.energy_contracts_consumption_charge_generated_v2`

Type: VIEW


In [25]:
data = []
for view in views:
    # if the table is created with a query I am assuming it is a view (https://cloud.google.com/bigquery/docs/view-metadata)
    d = [view.dataset_id, view.table_id, view.table_type, view.view_query, view.location, view.num_bytes, view.modified.strftime('%Y-%m-%d %H:%M:%S'), view.created.strftime('%Y-%m-%d %H:%M:%S'), view.self_link]
    data.append(d)
    
df = pd.DataFrame(data).to_json("views.json")
pd.read_json("views.json")

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,landing_andromeda,energy_contracts_consumption_charge_generated_v2,VIEW,"#standardSQL\nSELECT\n `kafkaData`,\n `union...",EU,0,2018-10-01 23:45:33,2018-08-16 16:48:02,https://bigquery.googleapis.com/bigquery/v2/pr...
1,landing_andromeda,energy_contracts_consumption_charge_generated_v3,VIEW,"#standardSQL\nSELECT\n `kafkaData`,\n `union...",EU,0,2018-10-01 23:45:36,2018-08-16 16:48:08,https://bigquery.googleapis.com/bigquery/v2/pr...
10,landing_andromeda,energy_contracts_electricity_consumption_charg...,VIEW,"#standardSQL\nSELECT\n `metadata`,\n `supply...",EU,0,2018-10-01 23:45:54,2018-08-16 16:48:38,https://bigquery.googleapis.com/bigquery/v2/pr...
100,landing_andromeda,rac_settlement_candidates_selected_v3,VIEW,"#standardSQL\nSELECT\n `metadata`,\n `msn`,\...",EU,0,2018-10-01 23:52:18,2018-06-12 17:01:59,https://bigquery.googleapis.com/bigquery/v2/pr...
1000,product_andromeda_secure,prod_support_final_statement_ready_V2,TABLE,,EU,100608,2020-09-08 12:26:08,2020-07-24 10:07:08,https://bigquery.googleapis.com/bigquery/v2/pr...
1001,product_andromeda_secure,prod_support_work_items_prep,TABLE,,EU,362208,2020-09-08 04:21:19,2020-07-21 16:05:20,https://bigquery.googleapis.com/bigquery/v2/pr...
1002,product_andromeda_secure,rac_d10_simplified,VIEW,"SELECT \n header.fileIdentifier\n ,header.f...",EU,0,2019-02-05 15:45:21,2019-02-05 15:45:04,https://bigquery.googleapis.com/bigquery/v2/pr...
1003,product_andromeda_secure,rac_d86_simplified,VIEW,"SELECT\n header.fileIdentifier\n ,header.fr...",EU,0,2019-02-05 15:36:56,2019-02-05 15:36:56,https://bigquery.googleapis.com/bigquery/v2/pr...
1004,product_andromeda_secure,rac_elec_replay_service_performance,TABLE,,EU,249095325,2020-09-08 05:36:17,2019-11-12 11:01:23,https://bigquery.googleapis.com/bigquery/v2/pr...
1005,product_andromeda_secure,rac_electricity_HISTORIC_contractChangeDateEst...,VIEW,"SELECT MSN AS msn\n , 'EstimatedReading' ...",EU,0,2020-04-06 14:53:47,2020-04-06 14:53:47,https://bigquery.googleapis.com/bigquery/v2/pr...
