In [None]:
# example notebook
# Note: the first cell is expected to have a "parameters" tag to allow parameter injection
project_id = 'elife-data-pipeline'
source_dataset = 'prod'
output_dataset = 'de_dev'
output_table_prefix = 'data_science_'

In [None]:
import logging
import sys

import pandas as pd

from IPython.display import display, Markdown

import data_science_pipeline.configure_warnings  # pylint: disable=unused-import
import data_science_pipeline.configure_notebook_logging  # pylint: disable=unused-import

from data_science_pipeline.utils.bq import to_gbq

In [None]:
print('source_dataset:', source_dataset)
print('output_dataset:', output_dataset)
print('output_table_prefix:', output_table_prefix)

In [None]:
def do_test_logging():
    logging.getLogger().debug('test debug logging')
    logging.getLogger().info('test info logging')
    logging.getLogger().warning('test warning logging')
    logging.getLogger().error('test error logging')

print('testing logging..')
do_test_logging()
# print('initialising logging (if not already initialised)')
# logging.basicConfig(level='INFO', stream=sys.stdout)
# do_test_logging()

In [None]:
default_query_props = dict(project=project_id, source_dataset=source_dataset)

In [None]:
def printmd(text: str):
    display(Markdown(text))


def read_big_query(query, show_query=True):
    if show_query:
        printmd('> ```sql\n%s\n```' % query.strip())
    return pd.read_gbq(
        query,
        project_id=project_id,
        dialect='standard'
    )

In [None]:
sample_df = read_big_query(
    '''
    SELECT COUNT(*) AS count
    FROM `{project}.{source_dataset}.mv_Editorial_Person`
    '''.format(
        **default_query_props
    )
)
sample_df.head()

In [None]:
destination_table = '{dataset}.{table_prefix}sample_table'.format(
    dataset=output_dataset,
    table_prefix=output_table_prefix
)
print('writing to: %s' % destination_table)
to_gbq(
    sample_df,
    destination_table=destination_table,
    project_id=project_id,
    if_exists='replace'
)