Skip to content

Commit

Permalink
Optimize deferrable mode execution for BigQueryInsertJobOperator (#…
Browse files Browse the repository at this point in the history
…31249)

* Optimize deferred mode for BigQueryInsertJobOperator

* Apply review suggestion

* Add job state to the log

* capture and assert job state log in test
  • Loading branch information
phanikumv committed May 25, 2023
1 parent 64b0872 commit 28f2e70
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 10 deletions.
23 changes: 13 additions & 10 deletions airflow/providers/google/cloud/operators/bigquery.py
Expand Up @@ -2709,16 +2709,19 @@ def execute(self, context: Any):
self._handle_job_error(job)

return self.job_id
self.defer(
timeout=self.execution_timeout,
trigger=BigQueryInsertJobTrigger(
conn_id=self.gcp_conn_id,
job_id=self.job_id,
project_id=self.project_id,
poll_interval=self.poll_interval,
),
method_name="execute_complete",
)
else:
if job.running():
self.defer(
timeout=self.execution_timeout,
trigger=BigQueryInsertJobTrigger(
conn_id=self.gcp_conn_id,
job_id=self.job_id,
project_id=self.project_id,
poll_interval=self.poll_interval,
),
method_name="execute_complete",
)
self.log.info("Current state of job %s is %s", job.job_id, job.state)

def execute_complete(self, context: Context, event: dict[str, Any]):
"""
Expand Down
29 changes: 29 additions & 0 deletions tests/providers/google/cloud/operators/test_bigquery.py
Expand Up @@ -1310,6 +1310,35 @@ def test_execute_no_force_rerun(self, mock_hook):
with pytest.raises(AirflowException):
op.execute(context=MagicMock())

@mock.patch("airflow.providers.google.cloud.operators.bigquery.BigQueryInsertJobOperator.defer")
@mock.patch("airflow.providers.google.cloud.operators.bigquery.BigQueryHook")
def test_bigquery_insert_job_operator_async_finish_before_deferred(self, mock_hook, mock_defer, caplog):
job_id = "123456"
hash_ = "hash"
real_job_id = f"{job_id}_{hash_}"

configuration = {
"query": {
"query": "SELECT * FROM any",
"useLegacySql": False,
}
}
mock_hook.return_value.insert_job.return_value = MagicMock(job_id=real_job_id, error_result=False)
mock_hook.return_value.insert_job.return_value.running.return_value = False

op = BigQueryInsertJobOperator(
task_id="insert_query_job",
configuration=configuration,
location=TEST_DATASET_LOCATION,
job_id=job_id,
project_id=TEST_GCP_PROJECT_ID,
deferrable=True,
)

op.execute(MagicMock())
assert not mock_defer.called
assert "Current state of job" in caplog.text

@mock.patch("airflow.providers.google.cloud.operators.bigquery.BigQueryHook")
def test_bigquery_insert_job_operator_async(self, mock_hook, create_task_instance_of_operator):
"""
Expand Down

0 comments on commit 28f2e70

Please sign in to comment.