dlt-hub · rudolfix · May 8, 2024 · May 3, 2024 · May 3, 2024 · May 7, 2024
diff --git a/dlt/helpers/airflow_helper.py b/dlt/helpers/airflow_helper.py
@@ -179,7 +179,10 @@ def run(
 
         Args:
             pipeline (Pipeline): The pipeline to run
-            data (Any): The data to run the pipeline with
+            data (Any):
+                The data to run the pipeline with. If a non-resource
+                callable given, it's called before the load to get
+                the data.
             table_name (str, optional): The name of the table to
                 which the data should be loaded within the `dataset`.
             write_disposition (TWriteDispositionConfig, optional): Same as
@@ -221,7 +224,10 @@ def _run(
 
         Args:
             pipeline (Pipeline): The pipeline to run
-            data (Any): The data to run the pipeline with
+            data (Any):
+                The data to run the pipeline with. If a non-resource
+                callable given, it's called before the load to get
+                the data.
             table_name (str, optional): The name of the
                 table to which the data should be loaded
                 within the `dataset`.
@@ -271,6 +277,9 @@ def log_after_attempt(retry_state: RetryCallState) -> None:
                 )
 
         try:
+            if callable(data):
+                data = data()
+
             # retry with given policy on selected pipeline steps
             for attempt in self.retry_policy.copy(
                 retry=retry_if_exception(
@@ -338,7 +347,10 @@ def add_run(
 
         Args:
             pipeline (Pipeline): An instance of pipeline used to run the source
-            data (Any): Any data supported by `run` method of the pipeline
+            data (Any):
+                Any data supported by `run` method of the pipeline.
+                If a non-resource callable given, it's called before
+                the load to get the data.
             decompose (Literal["none", "serialize", "parallel"], optional):
                 A source decomposition strategy into Airflow tasks:
                     none - no decomposition, default value.

diff --git a/tests/helpers/airflow_tests/test_airflow_wrapper.py b/tests/helpers/airflow_tests/test_airflow_wrapper.py
@@ -4,7 +4,7 @@
 from typing import List
 from airflow import DAG
 from airflow.decorators import dag
-from airflow.operators.python import PythonOperator
+from airflow.operators.python import PythonOperator, get_current_context
 from airflow.models import TaskInstance
 from airflow.utils.state import DagRunState
 from airflow.utils.types import DagRunType
@@ -917,3 +917,48 @@ def dag_parallel():
     dag_def = dag_parallel()
     assert len(tasks_list) == 1
     dag_def.test()
+
+
+def callable_source():
+    @dlt.resource
+    def test_res():
+        context = get_current_context()
+        yield [
+            {"id": 1, "tomorrow": context["tomorrow_ds"]},
+            {"id": 2, "tomorrow": context["tomorrow_ds"]},
+            {"id": 3, "tomorrow": context["tomorrow_ds"]},
+        ]
+
+    return test_res
+
+
+def test_run_callable() -> None:
+    quackdb_path = os.path.join(TEST_STORAGE_ROOT, "callable_dag.duckdb")
+
+    @dag(schedule=None, start_date=DEFAULT_DATE, catchup=False, default_args=default_args)
+    def dag_regular():
+        tasks = PipelineTasksGroup(
+            "callable_dag_group", local_data_folder=TEST_STORAGE_ROOT, wipe_local_data=False
+        )
+
+        call_dag = dlt.pipeline(
+            pipeline_name="callable_dag",
+            dataset_name="mock_data_" + uniq_id(),
+            destination="duckdb",
+            credentials=quackdb_path,
+        )
+        tasks.run(call_dag, callable_source)
+
+    dag_def: DAG = dag_regular()
+    dag_def.test()
+
+    pipeline_dag = dlt.attach(pipeline_name="callable_dag")
+
+    with pipeline_dag.sql_client() as client:
+        with client.execute_query("SELECT * FROM test_res") as result:
+            results = result.fetchall()
+
+            assert len(results) == 3
+
+            for row in results:
+                assert row[1] == pendulum.tomorrow().format("YYYY-MM-DD")