In [1]:
from google.cloud import bigquery as bq
import os

service_credentials = 'Service_Credentials/big-query-horse-play-f37757d450b8.json'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = service_credentials

In [2]:
# Various utility functions
def get_dataset_ids(client_name):
    dataset_list = list(client_name.list_datasets())
    
    if dataset_list:
        dataset_ids = list()
        print(f'Datasets in project - {client_name.project}:')
        for dataset in dataset_list:
            print(dataset.dataset_id)
            dataset_ids.append(dataset.dataset_id)
            
        return dataset_ids

    else:
        print(f'No datasets in {client_name.project}')        

def get_table_ids(client_name, dataset):

    table_list = list(client_name.list_tables(dataset))

    if table_list:
        table_ids = list()
        print(f'Tables in {dataset.dataset_id}:')
        for table in table_list:
            print(f'{table.table_id}')
            table_ids.append(table.table_id)

        return table_ids

    else:
        print(f'No tables in {dataset.dataset_id}')
        
def get_dataset(client_name, dataset_id, table_ids=False):
    dataset_ref = client_name.dataset(dataset_id)
    
    dataset = client_name.get_dataset(dataset_ref)
    
    if dataset:
        print('---------------------------------------')
        print(f'Dataset ID: {dataset.dataset_id}')
        print(f'Friendly Name: {dataset.friendly_name}')
        print(f'Full ID: {dataset.full_dataset_id}')
        print(f'Labels: {dataset.labels}')
        print(f'Project: {dataset.project}')
        print(f'Ref: {dataset.reference}')

        if table_ids == True:
            table_ids = get_table_ids(client_name, dataset)
            
            return dataset, table_ids

        print('---------------------------------------')
    
    else:
        print(f'No dataset matching dataset_id {dataset_id} in project {client_name.project}')

    return dataset

def get_table(client_name, dataset, table_id, incl_schema=False):
    table_ref = dataset.table(table_id)
    
    table = client_name.get_table(table_ref)
    
    if table:
        print('---------------------------------------')
        print(f'Table ID: {table.table_id}')
        print(f'Friendly Name: {table.friendly_name}')
        print(f'Full ID: {table.full_table_id}')
        print(f'Type: {table.table_type}')
        print(f'Rows: {table.num_rows}')
        if incl_schema == True:
            print(f'\nSchema:\n{table.schema}') 
    
    else:
        print(f'{table_id} not present in dataset {dataset}')
    
    return table

def create_dataset(client_name, dataset_id, dataset_location=None, project=None):
    
    # Creates dataset reference for bq
    dataset_reference = client_name.dataset(dataset_id, project)
    
    # Creates actual data set object
    dataset = bq.Dataset(dataset_reference)
    
    # Optionally sets dataset location value
    if dataset_location:
        dataset.location = dataset_location
    
    # Makes the call home
    dataset = client_name.create_dataset(dataset)
    
    return f'{dataset_id} created in project - {client_name.project}'

In [None]:
# I need a function to get table meta data using the __tables__
# summary meta-data table

In [3]:
# Instantiates bq client, optionally requires project
bq_client = bq.Client()

In [4]:
dataset_ids = get_dataset_ids(bq_client)
dataset_ids

Datasets in project - big-query-horse-play:
names_dataset
second_test


['names_dataset', 'second_test']

In [6]:
names_dataset, names_table_ids = get_dataset(bq_client, dataset_ids[0], tables=True)

---------------------------------------
Dataset ID: names_dataset
Friendly Name: None
Full ID: big-query-horse-play:names_dataset
Labels: {}
Project: big-query-horse-play
Ref: DatasetReference('big-query-horse-play', 'names_dataset')
Tables in names_dataset:
names2017


In [7]:
names_table_ids = get_table_ids(bq_client, names_dataset)

Tables in names_dataset:
names2017


In [29]:
def get_table(client_name, dataset, table_id, incl_schema=False):
    table_ref = dataset.table(table_id)
    
    table = client_name.get_table(table_ref)
    
    if table:
        print('---------------------------------------')
        print(f'Table ID: {table.table_id}')
        print(f'Friendly Name: {table.friendly_name}')
        print(f'Full ID: {table.full_table_id}')
        print(f'Type: {table.table_type}')
        print(f'Rows: {table.num_rows}')
        if incl_schema == True:
            print(f'\nSchema:\n{table.schema}') 
    
    else:
        print(f'{table_id} not present in dataset {dataset}')
    
    return table

In [30]:
names_dataset

Dataset(DatasetReference('big-query-horse-play', 'names_dataset'))

In [31]:
names_table_ids

['names2017']

In [33]:
test_table = get_table(bq_client, names_dataset, names_table_ids[0], incl_schema=True)

test_table

---------------------------------------
Table ID: names2017
Friendly Name: None
Full ID: big-query-horse-play:names_dataset.names2017
Type: TABLE
Rows: 32469

Schema:
[SchemaField('name', 'STRING', 'NULLABLE', None, ()), SchemaField('gender', 'STRING', 'NULLABLE', None, ()), SchemaField('count', 'INTEGER', 'NULLABLE', None, ())]


Table(TableReference(DatasetReference('big-query-horse-play', 'names_dataset'), 'names2017'))

In [12]:
# bq_client.get_table()

table_ref = names_dataset.table(names_table_ids[0])

table_ref
# names_tables

TableReference(DatasetReference('big-query-horse-play', 'names_dataset'), 'names2017')

In [13]:
bq_client.get_table(table_ref)

Table(TableReference(DatasetReference('big-query-horse-play', 'names_dataset'), 'names2017'))

In [14]:
actual_table = bq_client.get_table(table_ref)
actual_table

Table(TableReference(DatasetReference('big-query-horse-play', 'names_dataset'), 'names2017'))

In [19]:
actual_table.schema

[SchemaField('name', 'STRING', 'NULLABLE', None, ()),
 SchemaField('gender', 'STRING', 'NULLABLE', None, ()),
 SchemaField('count', 'INTEGER', 'NULLABLE', None, ())]