From ea7fc462a9ee6e97874d4060619cf17a3ac2fd6f Mon Sep 17 00:00:00 2001 From: timifasubaa <30888507+timifasubaa@users.noreply.github.com> Date: Fri, 13 Jul 2018 15:51:16 -0700 Subject: [PATCH 1/4] quote hive column names (#5368) --- superset/migrations/versions/6400c588de5e_.py | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 superset/migrations/versions/6400c588de5e_.py diff --git a/superset/migrations/versions/6400c588de5e_.py b/superset/migrations/versions/6400c588de5e_.py new file mode 100644 index 000000000000..f84616b14c3c --- /dev/null +++ b/superset/migrations/versions/6400c588de5e_.py @@ -0,0 +1,21 @@ +"""empty message + +Revision ID: 6400c588de5e +Revises: bddc498dd179 +Create Date: 2018-07-13 17:10:00.156708 + +""" + +# revision identifiers, used by Alembic. +revision = '6400c588de5e' +down_revision = 'bddc498dd179' + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + op.add_column('dbs', sa.Column('allow_csv_upload', sa.Boolean(), nullable=True)) + +def downgrade(): + op.drop_column('dbs', 'allow_csv_upload') From 03edce5c9df337141de0b18b4f677f0f7ae8ef7c Mon Sep 17 00:00:00 2001 From: Timi Fasubaa Date: Thu, 19 Jul 2018 10:42:55 -0700 Subject: [PATCH 2/4] create db migration --- superset/migrations/versions/6400c588de5e_.py | 21 ------------------- superset/sql_lab.py | 12 ++++++----- 2 files changed, 7 insertions(+), 26 deletions(-) delete mode 100644 superset/migrations/versions/6400c588de5e_.py diff --git a/superset/migrations/versions/6400c588de5e_.py b/superset/migrations/versions/6400c588de5e_.py deleted file mode 100644 index f84616b14c3c..000000000000 --- a/superset/migrations/versions/6400c588de5e_.py +++ /dev/null @@ -1,21 +0,0 @@ -"""empty message - -Revision ID: 6400c588de5e -Revises: bddc498dd179 -Create Date: 2018-07-13 17:10:00.156708 - -""" - -# revision identifiers, used by Alembic. -revision = '6400c588de5e' -down_revision = 'bddc498dd179' - -from alembic import op -import sqlalchemy as sa - - -def upgrade(): - op.add_column('dbs', sa.Column('allow_csv_upload', sa.Boolean(), nullable=True)) - -def downgrade(): - op.drop_column('dbs', 'allow_csv_upload') diff --git a/superset/sql_lab.py b/superset/sql_lab.py index 4612b4e0ec8c..abb51ef3e15c 100644 --- a/superset/sql_lab.py +++ b/superset/sql_lab.py @@ -100,8 +100,9 @@ def execute_sql( user_name=None, session=None, ): """Executes the sql query returns the results.""" - + time_at_worker = utils.now_as_float() query = get_query(query_id, session) + query.time_at_worker = time_at_worker payload = dict(query_id=query_id) database = query.database @@ -157,7 +158,6 @@ def handle_error(msg): query.executed_sql = executed_sql query.status = QueryStatus.RUNNING - query.start_running_time = utils.now_as_float() session.merge(query) session.commit() logging.info("Set query to 'running'") @@ -172,11 +172,13 @@ def handle_error(msg): cursor = conn.cursor() logging.info('Running query: \n{}'.format(executed_sql)) logging.info(query.executed_sql) + query.time_at_db = utils.now_as_float() db_engine_spec.execute(cursor, query.executed_sql, async_=True) logging.info('Handling cursor') db_engine_spec.handle_cursor(cursor, query, session) logging.info('Fetching data: {}'.format(query.to_dict())) data = db_engine_spec.fetch_data(cursor, query.limit) + query.time_at_db_result = utils.now_as_float() except SoftTimeLimitExceeded as e: logging.exception(e) if conn is not None: @@ -199,7 +201,7 @@ def handle_error(msg): if query.status == utils.QueryStatus.STOPPED: return handle_error('The query has been stopped') - cdf = dataframe.SupersetDataFrame(data, cursor_description, db_engine_spec) + cdf = dataframe.SupersetDataFrame(data, cursor.description, db_engine_spec) query.rows = cdf.size query.progress = 100 @@ -212,7 +214,6 @@ def handle_error(msg): schema=database.force_ctas_schema, show_cols=False, latest_partition=False)) - query.end_time = utils.now_as_float() session.merge(query) session.flush() @@ -225,6 +226,7 @@ def handle_error(msg): if store_results: key = '{}'.format(uuid.uuid4()) logging.info('Storing results in results backend, key: {}'.format(key)) + query.time_at_results_backend_write = utils.now_as_float() json_payload = json.dumps( payload, default=utils.json_iso_dttm_ser, ignore_nan=True) cache_timeout = database.cache_timeout @@ -232,7 +234,7 @@ def handle_error(msg): cache_timeout = config.get('CACHE_DEFAULT_TIMEOUT', 0) results_backend.set(key, utils.zlib_compress(json_payload), cache_timeout) query.results_key = key - query.end_result_backend_time = utils.now_as_float() + query.time_after_results_backend_write = utils.now_as_float() session.merge(query) session.commit() From 298940970224ac36eb0f70dcc0faa98e2ea29317 Mon Sep 17 00:00:00 2001 From: Timi Fasubaa Date: Mon, 10 Sep 2018 01:28:53 -0700 Subject: [PATCH 3/4] use stats_logger timing --- superset/sql_lab.py | 29 ++++++++++++++++++----------- superset/views/core.py | 8 +++++++- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/superset/sql_lab.py b/superset/sql_lab.py index abb51ef3e15c..fff42718c0f3 100644 --- a/superset/sql_lab.py +++ b/superset/sql_lab.py @@ -76,14 +76,14 @@ def session_scope(nullpool): @celery_app.task(bind=True, soft_time_limit=SQLLAB_TIMEOUT) def get_sql_results( ctask, query_id, rendered_query, return_results=True, store_results=False, - user_name=None): + user_name=None, start_time=None): """Executes the sql query returns the results.""" with session_scope(not ctask.request.called_directly) as session: try: return execute_sql( ctask, query_id, rendered_query, return_results, store_results, user_name, - session=session) + session=session, start_time=start_time) except Exception as e: logging.exception(e) stats_logger.incr('error_sqllab_unhandled') @@ -97,12 +97,14 @@ def get_sql_results( def execute_sql( ctask, query_id, rendered_query, return_results=True, store_results=False, - user_name=None, session=None, + user_name=None, session=None, start_time=None, ): """Executes the sql query returns the results.""" - time_at_worker = utils.now_as_float() + if store_results: + # only asynchronous queries + stats_logger.timing( + 'sqllab.query.time_pending', utils.now_as_float() - start_time) query = get_query(query_id, session) - query.time_at_worker = time_at_worker payload = dict(query_id=query_id) database = query.database @@ -158,6 +160,7 @@ def handle_error(msg): query.executed_sql = executed_sql query.status = QueryStatus.RUNNING + query.start_running_time = utils.now_as_float() session.merge(query) session.commit() logging.info("Set query to 'running'") @@ -172,13 +175,15 @@ def handle_error(msg): cursor = conn.cursor() logging.info('Running query: \n{}'.format(executed_sql)) logging.info(query.executed_sql) - query.time_at_db = utils.now_as_float() + query_start_time = utils.now_as_float() db_engine_spec.execute(cursor, query.executed_sql, async_=True) logging.info('Handling cursor') db_engine_spec.handle_cursor(cursor, query, session) logging.info('Fetching data: {}'.format(query.to_dict())) data = db_engine_spec.fetch_data(cursor, query.limit) - query.time_at_db_result = utils.now_as_float() + stats_logger.timing( + 'sqllab.query.time_executing_query', + utils.now_as_float() - query_start_time) except SoftTimeLimitExceeded as e: logging.exception(e) if conn is not None: @@ -201,7 +206,7 @@ def handle_error(msg): if query.status == utils.QueryStatus.STOPPED: return handle_error('The query has been stopped') - cdf = dataframe.SupersetDataFrame(data, cursor.description, db_engine_spec) + cdf = dataframe.SupersetDataFrame(data, cursor_description, db_engine_spec) query.rows = cdf.size query.progress = 100 @@ -214,6 +219,7 @@ def handle_error(msg): schema=database.force_ctas_schema, show_cols=False, latest_partition=False)) + query.end_time = utils.now_as_float() session.merge(query) session.flush() @@ -226,7 +232,7 @@ def handle_error(msg): if store_results: key = '{}'.format(uuid.uuid4()) logging.info('Storing results in results backend, key: {}'.format(key)) - query.time_at_results_backend_write = utils.now_as_float() + write_to_results_backend_start = utils.now_as_float() json_payload = json.dumps( payload, default=utils.json_iso_dttm_ser, ignore_nan=True) cache_timeout = database.cache_timeout @@ -234,8 +240,9 @@ def handle_error(msg): cache_timeout = config.get('CACHE_DEFAULT_TIMEOUT', 0) results_backend.set(key, utils.zlib_compress(json_payload), cache_timeout) query.results_key = key - query.time_after_results_backend_write = utils.now_as_float() - + stats_logger.timing( + 'sqllab.query.results_backend_write', + utils.now_as_float() - write_to_results_backend_start) session.merge(query) session.commit() diff --git a/superset/views/core.py b/superset/views/core.py index fc9e366bf19b..e0260d17bfc1 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -2334,7 +2334,12 @@ def results(self, key): if not results_backend: return json_error_response("Results backend isn't configured") + read_from_results_backend_start = utils.now_as_float() blob = results_backend.get(key) + stats_logger.timing( + 'sqllab.query.results_backend_read', + read_from_results_backend_start - utils.now_as_float(), + ) if not blob: return json_error_response( 'Data could not be retrieved. ' @@ -2446,7 +2451,8 @@ def sql_json(self): rendered_query, return_results=False, store_results=not query.select_as_cta, - user_name=g.user.username) + user_name=g.user.username, + start_time=utils.now_as_float()) except Exception as e: logging.exception(e) msg = ( From e479606af12da72cd9a8b1572c28740e7e31cc98 Mon Sep 17 00:00:00 2001 From: Timi Fasubaa Date: Tue, 11 Sep 2018 06:17:11 -0700 Subject: [PATCH 4/4] trigger build