Skip to content

Commit

Permalink
fix(sqlalchemy): reflect most recent schema when view is replaced
Browse files Browse the repository at this point in the history
This may be too coarse of a method for this.

This is a fix for creating a table or view with the same name as an
existing table or view.  This is most likely to occur when using the
various `parquet` and `csv` registration features, because we do a lot
of `CREATE OR REPLACE VIEW`.

Before this, the ibis table would reflect the schema of the initial
registration, but would not update if you try to re-use the same view
name for a different file (with a different schema).

Other fixes I tried:
`sa.table(..., extend_existing=True)`
This works if the subsequently registered schema is a superset of the
existing schema, but if it's a subset, then you end up with a union of
the two registration runs.

Calling `reflect_table`
This is actually already happening, but it refers to the contents of
`self.meta` -- this oddly seems to update the _types_ of any columns if
that is merited, but doesn't drop columns that are no longer present in
the most recent view.

Manually popping out columns
It's called an ImmutableColumnCollection for a reason, I guess.
  • Loading branch information
gforsyth authored and cpcloud committed Feb 21, 2023
1 parent 79cb445 commit 62c8dea
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 2 deletions.
4 changes: 4 additions & 0 deletions ibis/backends/base/sql/alchemy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,10 @@ def _log(self, sql):
def _get_sqla_table(
self, name: str, schema: str | None = None, autoload: bool = True, **kwargs: Any
) -> sa.Table:
# If the underlying table (or more likely, view) has changed, remove it
# to ensure a correct reflection
if autoload and self.inspector.has_table(name):
self.meta.remove(sa.Table(name, self.meta))
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore", message="Did not recognize type", category=sa.exc.SAWarning
Expand Down
15 changes: 15 additions & 0 deletions ibis/backends/duckdb/tests/test_register.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,21 @@ def test_read_in_memory():
assert "df_pandas" in con.list_tables()


def test_re_read_in_memory_overwrite():
con = ibis.duckdb.connect()

df_pandas_1 = pd.DataFrame({"a": ["a"], "b": [1], "d": ["hi"]})
df_pandas_2 = pd.DataFrame({"a": [1], "c": [1.4]})

table = con.read_in_memory(df_pandas_1, table_name="df")
assert len(table.columns) == 3
assert table.schema() == ibis.schema([("a", "str"), ("b", "int"), ("d", "str")])

table = con.read_in_memory(df_pandas_2, table_name="df")
assert len(table.columns) == 2
assert table.schema() == ibis.schema([("a", "int"), ("c", "float")])


def test_memtable_with_nullable_dtypes():
data = pd.DataFrame(
{
Expand Down
6 changes: 4 additions & 2 deletions ibis/backends/tests/test_register.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,9 @@ def test_csv_reregister_schema(con, tmp_path):
)

# For a full file scan, expect correct schema based on final row
foo_table = con.register(foo)
# We also use the same `table_name` for both tests to ensure that
# the table is re-reflected in sqlalchemy
foo_table = con.register(foo, table_name="same")
result_schema = foo_table.schema()

assert result_schema.names == ("cola", "colb", "colc")
Expand All @@ -339,7 +341,7 @@ def test_csv_reregister_schema(con, tmp_path):

# If file scan is limited to first two rows, should be all some kind of integer.
# The specific type isn't so important, and may vary across backends/versions
foo_table = con.register(foo, SAMPLE_SIZE=2)
foo_table = con.register(foo, SAMPLE_SIZE=2, table_name="same")
result_schema = foo_table.schema()
assert result_schema.names == ("cola", "colb", "colc")
assert result_schema["cola"].is_integer()
Expand Down

0 comments on commit 62c8dea

Please sign in to comment.