-
Notifications
You must be signed in to change notification settings - Fork 294
--min-age
and --max-age
should be computed relative to database session time #284
Description
Describe the bug
I expect the --min-age
and --max-age
command-line arguments to be relative to the database session time, rather than relative to the time on the machine where data-diff
is run (eg consider a database in a different time zone to a local machine running data-diff
). I believe that this gives a user a potentially misleading impression on what data they have analysed with data-diff
.
I think the easiest way to see this situation is via TestCLI
in test_cli.py
, by changing the timezone in the database session during the test setup (self.conn.query("SET @@session.time_zone='-01:00'")
) :
class TestCLI(unittest.TestCase):
def setUp(self) -> None:
self.conn = get_conn(MySQL)
self.conn.query("drop table if exists test_cli")
self.conn.query("drop table if exists test_cli_2")
table_src_name = "test_cli"
table_dst_name = "test_cli_2"
src_table = table(table_src_name, schema={"id": int, "datetime": datetime, "text_comment": str})
self.conn.query(src_table.create())
self.conn.query("SET @@session.time_zone='-01:00'") # change time zone from +00:00
db_time = self.conn.query("select now()", datetime)
self.now = now = arrow.get(db_time)
rows = [
(now, "now"),
(self.now.shift(seconds=-10), "a"),
(self.now.shift(seconds=-7), "b"),
(self.now.shift(seconds=-6), "c"),
]
self.conn.query(src_table.insert_rows((i, ts.datetime, s) for i, (ts, s) in enumerate(rows)))
_commit(self.conn)
self.conn.query(f"CREATE TABLE {table_dst_name} AS SELECT * FROM {table_src_name}")
_commit(self.conn)
self.conn.query(src_table.insert_row(len(rows), self.now.shift(seconds=-3).datetime, "3 seconds ago"))
_commit(self.conn)
def tearDown(self) -> None:
self.conn.query("drop table if exists test_cli")
self.conn.query("drop table if exists test_cli_2")
_commit(self.conn)
return super().tearDown()
def test_basic(self):
diff = run_datadiff_cli(TEST_MYSQL_CONN_STRING, "test_cli", TEST_MYSQL_CONN_STRING, "test_cli_2")
assert len(diff) == 1
def test_options(self):
diff = run_datadiff_cli(
TEST_MYSQL_CONN_STRING,
"test_cli",
TEST_MYSQL_CONN_STRING,
"test_cli_2",
"--bisection-factor",
"16",
"--bisection-threshold",
"10000",
"--limit",
"5",
"-t",
"datetime",
"--max-age",
"1h", # can make test pass again by changing to 2h
)
assert len(diff) == 1
The tests should now fail, and can only be fixed by changing the --max-age
value.
I've opened a draft PR (#283) with a sketch of a fix. If you think it has potential, please let me know what can be edited/improved, and then I will update the PR with test coverage (and possibly support for other databases if required).