diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 5d7d4dd7..7864d81e 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -7,6 +7,9 @@ Changelog - Project ID parameter is optional in ``read_gbq`` and ``to_gbq`` when it can inferred from the environment. Note: you must still pass in a project ID when using user-based authentication. (:issue:`103`) +- Progress bar added for ``to_gbq``, through an optional library `tqdm` as + dependency. (:issue:`162`) + Internal changes ~~~~~~~~~~~~~~~~ diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index b7447074..5da85e82 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -16,6 +16,11 @@ BIGQUERY_INSTALLED_VERSION = None SHOW_VERBOSE_DEPRECATION = False +try: + import tqdm # noqa +except ImportError: + tqdm = None + def _check_google_client_version(): global BIGQUERY_INSTALLED_VERSION, SHOW_VERBOSE_DEPRECATION @@ -563,16 +568,19 @@ def run_query(self, query, **kwargs): def load_data( self, dataframe, dataset_id, table_id, chunksize=None, - schema=None): + schema=None, progress_bar=True): from pandas_gbq import load total_rows = len(dataframe) logger.info("\n\n") try: - for remaining_rows in load.load_chunks( - self.client, dataframe, dataset_id, table_id, - chunksize=chunksize, schema=schema): + chunks = load.load_chunks(self.client, dataframe, dataset_id, + table_id, chunksize=chunksize, + schema=schema) + if progress_bar and tqdm: + chunks = tqdm.tqdm(chunks) + for remaining_rows in chunks: logger.info("\rLoad is {0}% Complete".format( ((total_rows - remaining_rows) * 100) / total_rows)) except self.http_error as ex: @@ -870,7 +878,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, def to_gbq(dataframe, destination_table, project_id=None, chunksize=None, verbose=None, reauth=False, if_exists='fail', private_key=None, - auth_local_webserver=False, table_schema=None): + auth_local_webserver=False, table_schema=None, progress_bar=True): """Write a DataFrame to a Google BigQuery table. The main method a user calls to export pandas DataFrame contents to @@ -935,6 +943,8 @@ def to_gbq(dataframe, destination_table, project_id=None, chunksize=None, names of a field. .. versionadded:: 0.3.1 verbose : None, deprecated + progress_bar : boolean, True by default. It uses the library `tqdm` to show + the progress bar for the upload, chunk by chunk. """ _test_google_api_imports() @@ -987,7 +997,7 @@ def to_gbq(dataframe, destination_table, project_id=None, chunksize=None, connector.load_data( dataframe, dataset_id, table_id, chunksize=chunksize, - schema=table_schema) + schema=table_schema, progress_bar=progress_bar) def generate_bq_schema(df, default_type='STRING'): diff --git a/requirements.txt b/requirements.txt index 88cf967a..7b3ede97 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ pandas google-auth google-auth-oauthlib google-cloud-bigquery +tqdm diff --git a/setup.py b/setup.py index ebe147c3..40cfa427 100644 --- a/setup.py +++ b/setup.py @@ -24,6 +24,9 @@ def readme(): 'google-cloud-bigquery>=0.29.0', ] +extras = { + 'tqdm': 'tqdm>=4.23.0', +} setup( name=NAME, @@ -50,6 +53,7 @@ def readme(): ], keywords='data', install_requires=INSTALL_REQUIRES, + extras_require=extras, packages=find_packages(exclude=['contrib', 'docs', 'tests*']), test_suite='tests', )