diff --git a/data_diff/sqeleton/databases/mysql.py b/data_diff/sqeleton/databases/mysql.py index fd4bc295..a901c0c6 100644 --- a/data_diff/sqeleton/databases/mysql.py +++ b/data_diff/sqeleton/databases/mysql.py @@ -1,3 +1,4 @@ +import logging from ..abcs.database_types import ( Datetime, Timestamp, @@ -21,6 +22,8 @@ from .base import MD5_HEXDIGITS, CHECKSUM_HEXDIGITS, TIMESTAMP_PRECISION_POS, Mixin_Schema, Mixin_RandomSample from ..queries.ast_classes import BinBoolOp +logger = logging.getLogger("mysql") + @import_helper("mysql") def import_mysql(): @@ -127,6 +130,7 @@ def __init__(self, *, thread_count, **kw): self._args = kw super().__init__(thread_count=thread_count) + self.check_charset() # In MySQL schema and database are synonymous try: @@ -134,10 +138,23 @@ def __init__(self, *, thread_count, **kw): except KeyError: raise ValueError("MySQL URL must specify a database") + def check_charset(self) -> None: + if logging.getLogger().level == logging.DEBUG: + try: + char_set_result = self.query("SELECT @@character_set_client;") + logger.debug(f"charset: {char_set_result.rows}") + collation_result = self.query("SELECT @@collation_connection;") + logger.debug(f"collation: {collation_result.rows}") + except Exception as ex: + logger.warning(f"Failed to check the charset: {ex}") + def create_connection(self): mysql = import_mysql() try: - return mysql.connect(charset="utf8", use_unicode=True, **self._args) + conn = mysql.connect(charset="utf8mb4", use_unicode=True, **self._args) + conn.set_charset_collation(charset="utf8mb4", collation="utf8mb4_0900_ai_ci") + return conn + except mysql.Error as e: if e.errno == mysql.errorcode.ER_ACCESS_DENIED_ERROR: raise ConnectError("Bad user name or password") from e