Skip to content

Commit 979f728

Browse files
committed
Adapting catalog versioning test
1 parent 09ec000 commit 979f728

File tree

1 file changed

+29
-11
lines changed

1 file changed

+29
-11
lines changed

tests/test_catalog.py

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -157,11 +157,11 @@ def test_catalog_get_databases(glue_database):
157157
assert db["Description"] == "AWS Data Wrangler Test Arena - Glue Database"
158158

159159

160-
def test_catalog_versioning(path, glue_database, glue_table):
160+
def test_catalog_versioning(path, glue_database, glue_table, glue_table2):
161161
wr.catalog.delete_table_if_exists(database=glue_database, table=glue_table)
162162
wr.s3.delete_objects(path=path)
163163

164-
# Version 0
164+
# Version 1 - Parquet
165165
df = pd.DataFrame({"c0": [1, 2]})
166166
wr.s3.to_parquet(df=df, path=path, dataset=True, database=glue_database, table=glue_table, mode="overwrite")[
167167
"paths"
@@ -172,7 +172,7 @@ def test_catalog_versioning(path, glue_database, glue_table):
172172
assert len(df.columns) == 1
173173
assert str(df.c0.dtype).startswith("Int")
174174

175-
# Version 1
175+
# Version 2 - Parquet
176176
df = pd.DataFrame({"c1": ["foo", "boo"]})
177177
wr.s3.to_parquet(
178178
df=df,
@@ -189,38 +189,56 @@ def test_catalog_versioning(path, glue_database, glue_table):
189189
assert len(df.columns) == 1
190190
assert str(df.c1.dtype) == "string"
191191

192-
# Version 2
192+
# Version 1 - CSV
193193
df = pd.DataFrame({"c1": [1.0, 2.0]})
194194
wr.s3.to_csv(
195195
df=df,
196196
path=path,
197197
dataset=True,
198198
database=glue_database,
199-
table=glue_table,
199+
table=glue_table2,
200200
mode="overwrite",
201201
catalog_versioning=True,
202202
index=False,
203203
)
204-
assert wr.catalog.get_table_number_of_versions(table=glue_table, database=glue_database) == 3
205-
df = wr.athena.read_sql_table(table=glue_table, database=glue_database)
204+
assert wr.catalog.get_table_number_of_versions(table=glue_table2, database=glue_database) == 1
205+
df = wr.athena.read_sql_table(table=glue_table2, database=glue_database)
206206
assert len(df.index) == 2
207207
assert len(df.columns) == 1
208208
assert str(df.c1.dtype).startswith("float")
209209

210-
# Version 3 (removing version 2)
210+
# Version 1 - CSV (No evolution)
211211
df = pd.DataFrame({"c1": [True, False]})
212212
wr.s3.to_csv(
213213
df=df,
214214
path=path,
215215
dataset=True,
216216
database=glue_database,
217-
table=glue_table,
217+
table=glue_table2,
218218
mode="overwrite",
219219
catalog_versioning=False,
220220
index=False,
221221
)
222-
assert wr.catalog.get_table_number_of_versions(table=glue_table, database=glue_database) == 3
223-
df = wr.athena.read_sql_table(table=glue_table, database=glue_database)
222+
assert wr.catalog.get_table_number_of_versions(table=glue_table2, database=glue_database) == 1
223+
df = wr.athena.read_sql_table(table=glue_table2, database=glue_database)
224+
assert len(df.index) == 2
225+
assert len(df.columns) == 1
226+
assert str(df.c1.dtype).startswith("boolean")
227+
228+
# Version 2 - CSV
229+
df = pd.DataFrame({"c1": [True, False]})
230+
wr.s3.to_csv(
231+
df=df,
232+
path=path,
233+
dataset=True,
234+
database=glue_database,
235+
table=glue_table2,
236+
mode="overwrite",
237+
catalog_versioning=True,
238+
index=False,
239+
)
240+
assert wr.catalog.get_table_number_of_versions(table=glue_table2, database=glue_database) == 2
241+
df = wr.athena.read_sql_table(table=glue_table2, database=glue_database)
224242
assert len(df.index) == 2
225243
assert len(df.columns) == 1
226244
assert str(df.c1.dtype).startswith("boolean")

0 commit comments

Comments
 (0)