Skip to content

Commit

Permalink
Don't drop tables without data
Browse files Browse the repository at this point in the history
  • Loading branch information
steinitzu committed Apr 18, 2024
1 parent 9223abc commit ac3baa5
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 2 deletions.
4 changes: 3 additions & 1 deletion dlt/pipeline/drop.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,9 @@ def drop_resources(
resource_pattern = None

if resource_pattern:
data_tables = {t["name"]: t for t in schema.data_tables()} # Don't remove _dlt tables
data_tables = {
t["name"]: t for t in schema.data_tables(seen_data_only=True)
} # Don't remove _dlt tables
resource_tables = group_tables_by_resource(data_tables, pattern=resource_pattern)
resource_names = list(resource_tables.keys())
# TODO: If drop_tables
Expand Down
13 changes: 12 additions & 1 deletion tests/pipeline/test_refresh_modes.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,11 @@ def some_data_3():
yield {"id": 5, "name": "Jack"}
yield {"id": 6, "name": "Jill"}

return [some_data_1, some_data_2, some_data_3]
@dlt.resource
def some_data_4():
yield []

return [some_data_1, some_data_2, some_data_3, some_data_4]

# First run pipeline with load to destination so tables are created
pipeline = dlt.pipeline(
Expand All @@ -70,6 +74,13 @@ def some_data_3():
# pipeline.normalize()
# pipeline.load()

assert set(t["name"] for t in pipeline.default_schema.data_tables(include_incomplete=True)) == {
"some_data_1",
"some_data_2",
# Table has never seen data and is not dropped
"some_data_4",
}

# Confirm resource tables not selected on second run got wiped
with pytest.raises(DatabaseUndefinedRelation):
with pipeline.sql_client() as client:
Expand Down

0 comments on commit ac3baa5

Please sign in to comment.