astronomer · utkarsharma2 · Feb 3, 2023 · Jan 30, 2023 · Feb 2, 2023 · Feb 2, 2023
@@ -88,14 +88,12 @@ def execute(self, context: Context) -> None:
         # Find and load dataframes from op_arg and op_kwarg into Table
         self.create_output_table_if_needed()
         self.op_args = load_op_arg_dataframes_into_sql(  # type: ignore
-            conn_id=self.conn_id,
-            op_args=self.op_args,  # type: ignore
-            target_table=self.output_table.create_similar_table(),
+            conn_id=self.conn_id, op_args=self.op_args, output_table=self.output_table  # type: ignore
         )
         self.op_kwargs = load_op_kwarg_dataframes_into_sql(
             conn_id=self.conn_id,
             op_kwargs=self.op_kwargs,
-            target_table=self.output_table.create_similar_table(),
+            output_table=self.output_table,
         )
 
         # The transform decorator doesn't explicitly pass output_table as a
@@ -290,19 +288,20 @@ def get_source_code(self, py_callable: Callable) -> str | None:
             return None
 
 
-def load_op_arg_dataframes_into_sql(conn_id: str, op_args: tuple, target_table: BaseTable) -> tuple:
+def load_op_arg_dataframes_into_sql(conn_id: str, op_args: tuple, output_table: BaseTable) -> tuple:
     """
     Identify dataframes in op_args and load them to the table.
 
     :param conn_id: Connection identifier to be used to load content to the target_table
     :param op_args: user-defined decorator's kwargs
-    :param target_table: Table where the dataframe content will be written to
+    :param output_table: Similar table where the dataframe content will be written to
     :return: New op_args, in which dataframes are replaced by tables
     """
-    final_args = []
+    final_args: list[Table | BaseTable] = []
     database = create_database(conn_id=conn_id)
     for arg in op_args:
         if isinstance(arg, pd.DataFrame):
+            target_table = output_table.create_similar_table()
             database.load_pandas_dataframe_to_table(source_dataframe=arg, target_table=target_table)
             final_args.append(target_table)
         elif isinstance(arg, BaseTable):
@@ -313,19 +312,20 @@ def load_op_arg_dataframes_into_sql(conn_id: str, op_args: tuple, target_table:
     return tuple(final_args)
 
 
-def load_op_kwarg_dataframes_into_sql(conn_id: str, op_kwargs: dict, target_table: BaseTable) -> dict:
+def load_op_kwarg_dataframes_into_sql(conn_id: str, op_kwargs: dict, output_table: BaseTable) -> dict:
     """
     Identify dataframes in op_kwargs and load them to a table.
 
     :param conn_id: Connection identifier to be used to load content to the target_table
     :param op_kwargs: user-defined decorator's kwargs
-    :param target_table: Table where the dataframe content will be written to
+    :param output_table: Similar table where the dataframe content will be written to
     :return: New op_kwargs, in which dataframes are replaced by tables
     """
     final_kwargs = {}
-    database = create_database(conn_id=conn_id, table=target_table)
+    database = create_database(conn_id=conn_id, table=output_table)
     for key, value in op_kwargs.items():
         if isinstance(value, pd.DataFrame):
+            target_table = output_table.create_similar_table()
             df_table = cast(BaseTable, target_table.create_similar_table())
             database.load_pandas_dataframe_to_table(source_dataframe=value, target_table=df_table)
             final_kwargs[key] = df_table

@@ -1,9 +1,15 @@
 from unittest import mock
 
+import pandas as pd
 import pytest
 
 from astro.sql import RawSQLOperator
-from astro.sql.operators.base_decorator import BaseSQLDecoratedOperator
+from astro.sql.operators.base_decorator import (
+    BaseSQLDecoratedOperator,
+    load_op_arg_dataframes_into_sql,
+    load_op_kwarg_dataframes_into_sql,
+)
+from astro.table import BaseTable, Table
 
 
 def test_base_sql_decorated_operator_template_fields_with_parameters():
@@ -22,3 +28,35 @@ def test_get_source_code_handle_exception(mock_getsource, exception):
     RawSQLOperator(task_id="test", sql="select * from 1", python_callable=lambda: 1).get_source_code(
         py_callable=None
     )
+
+
+def test_load_op_arg_dataframes_into_sql():
+    df_1 = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]})
+    df_2 = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]})
+    op_args = (df_1, df_2, Table(conn_id="sqlite_default"), "str")
+    results = load_op_arg_dataframes_into_sql(
+        conn_id="sqlite_default", op_args=op_args, output_table=Table(conn_id="sqlite_default")
+    )
+
+    assert isinstance(results[0], BaseTable)
+    assert isinstance(results[1], BaseTable)
+    assert results[0].name != results[1].name
+
+    assert isinstance(results[2], BaseTable)
+    assert isinstance(results[3], str)
+
+
+def test_load_op_kwarg_dataframes_into_sql():
+    df_1 = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]})
+    df_2 = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]})
+    op_kwargs = {"df_1": df_1, "df_2": df_2, "table": Table(conn_id="sqlite_default"), "some_str": "str"}
+    results = load_op_kwarg_dataframes_into_sql(
+        conn_id="sqlite_default", op_kwargs=op_kwargs, output_table=Table(conn_id="sqlite_default")
+    )
+
+    assert isinstance(results["df_1"], BaseTable)
+    assert isinstance(results["df_2"], BaseTable)
+    assert results["df_1"].name != results["df_2"].name
+
+    assert isinstance(results["table"], BaseTable)
+    assert isinstance(results["some_str"], str)
@@ -1,13 +1,15 @@
 import logging
 import pathlib
 
-import pandas
+import pandas as pd
 import pytest
 from airflow.decorators import task
 
 from astro import sql as aql
 from astro.constants import Database
+from astro.dataframes.pandas import PandasDataframe
 from astro.files import File
+from astro.table import BaseTable
 
 from ..operators import utils as test_utils
 
@@ -166,8 +168,8 @@ def raw_sql_query(input_table):
 
     @task
     def assert_num_rows(result):
-        assert isinstance(result, pandas.DataFrame)
-        assert result.equals(pandas.read_csv(DATA_FILEPATH))
+        assert isinstance(result, pd.DataFrame)
+        assert result.equals(pd.read_csv(DATA_FILEPATH))
         assert result.shape == (3, 2)
 
     with sample_dag:
@@ -208,5 +210,40 @@ def assert_num_rows(result):
     with sample_dag:
         results = raw_sql_query(input_table=test_table)
         assert_num_rows(results)
+    test_utils.run_dag(sample_dag)
+
 
+@pytest.mark.integration
+@pytest.mark.parametrize(
+    "database_table_fixture",
+    [{"database": Database.SQLITE, "file": File(path=str(DATA_FILEPATH))}],
+    indirect=True,
+    ids=["sqlite"],
+)
+def test_run_raw_sql_handle_multiple_tables(sample_dag, database_table_fixture):
+    """
+    Handle the case when we are passing multiple dataframe to run_raw_sql() operator
+    and all the dataframes are converted to different tables.
+    """
+    _, test_table = database_table_fixture
+
+    @aql.run_raw_sql(handler=lambda x: PandasDataframe(x.fetchall(), columns=x.keys()))
+    def raw_sql_query_1(input_table: BaseTable):
+        return "SELECT * from {{input_table}}"
+
+    @aql.run_raw_sql(handler=lambda x: PandasDataframe(x.fetchall(), columns=x.keys()))
+    def raw_sql_query_2(input_table: BaseTable):
+        return "SELECT * from {{input_table}}"
+
+    @aql.run_raw_sql(
+        handler=lambda x: PandasDataframe(x.fetchall(), columns=x.keys()), conn_id="sqlite_default"
+    )
+    def raw_sql_query_3(table_1: BaseTable, table_2: BaseTable):
+        assert table_1.name != table_2.name
+        return "SELECT 1 + 1"
+
+    with sample_dag:
+        results_1 = raw_sql_query_1(input_table=test_table)
+        results_2 = raw_sql_query_2(input_table=test_table)
+        _ = raw_sql_query_3(table_1=results_1, table_2=results_2)
     test_utils.run_dag(sample_dag)