From 77953b87f9ced06aeda296290438b60c103aba05 Mon Sep 17 00:00:00 2001 From: Dan Date: Fri, 22 Sep 2023 15:15:53 -0600 Subject: [PATCH 1/3] add additional debug logs for mysql charset --- data_diff/sqeleton/databases/mysql.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/data_diff/sqeleton/databases/mysql.py b/data_diff/sqeleton/databases/mysql.py index fd4bc295..4590799d 100644 --- a/data_diff/sqeleton/databases/mysql.py +++ b/data_diff/sqeleton/databases/mysql.py @@ -1,3 +1,4 @@ +import logging from ..abcs.database_types import ( Datetime, Timestamp, @@ -21,6 +22,8 @@ from .base import MD5_HEXDIGITS, CHECKSUM_HEXDIGITS, TIMESTAMP_PRECISION_POS, Mixin_Schema, Mixin_RandomSample from ..queries.ast_classes import BinBoolOp +logger = logging.getLogger("mysql") + @import_helper("mysql") def import_mysql(): @@ -127,6 +130,7 @@ def __init__(self, *, thread_count, **kw): self._args = kw super().__init__(thread_count=thread_count) + self.check_charset() # In MySQL schema and database are synonymous try: @@ -134,6 +138,13 @@ def __init__(self, *, thread_count, **kw): except KeyError: raise ValueError("MySQL URL must specify a database") + def check_charset(self): + try: + result = self.query("SELECT @@character_set_client;") + logger.debug(f"MYSQL Charset: {result.rows}") + except Exception as ex: + logger.warning(f"Failed to check the charset: {ex}") + def create_connection(self): mysql = import_mysql() try: From fad734263d0cfed04105dd4169ae06feccf2a4ff Mon Sep 17 00:00:00 2001 From: Dan Date: Fri, 22 Sep 2023 19:29:38 -0600 Subject: [PATCH 2/3] set collation explicitly --- data_diff/sqeleton/databases/mysql.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/data_diff/sqeleton/databases/mysql.py b/data_diff/sqeleton/databases/mysql.py index 4590799d..e3187a6f 100644 --- a/data_diff/sqeleton/databases/mysql.py +++ b/data_diff/sqeleton/databases/mysql.py @@ -138,17 +138,23 @@ def __init__(self, *, thread_count, **kw): except KeyError: raise ValueError("MySQL URL must specify a database") - def check_charset(self): - try: - result = self.query("SELECT @@character_set_client;") - logger.debug(f"MYSQL Charset: {result.rows}") - except Exception as ex: - logger.warning(f"Failed to check the charset: {ex}") + def check_charset(self) -> None: + if logging.getLogger().level == logging.DEBUG: + try: + char_set_result = self.query("SELECT @@character_set_client;") + logger.debug(f"charset: {char_set_result.rows}") + collation_result = self.query("SELECT @@collation_connection;") + logger.debug(f"collation: {collation_result.rows}") + except Exception as ex: + logger.warning(f"Failed to check the charset: {ex}") def create_connection(self): mysql = import_mysql() try: - return mysql.connect(charset="utf8", use_unicode=True, **self._args) + conn = mysql.connect(charset="utf8mb4", use_unicode=True, **self._args) + conn.set_charset_collation(charset="utf8mb4", collation='utf8mb4_0900_ai_ci') + return conn + except mysql.Error as e: if e.errno == mysql.errorcode.ER_ACCESS_DENIED_ERROR: raise ConnectError("Bad user name or password") from e From a878f13c7d882e900e088dc6a447470db5760672 Mon Sep 17 00:00:00 2001 From: Dan Lawin Date: Mon, 25 Sep 2023 10:51:12 -0600 Subject: [PATCH 3/3] format Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- data_diff/sqeleton/databases/mysql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data_diff/sqeleton/databases/mysql.py b/data_diff/sqeleton/databases/mysql.py index e3187a6f..a901c0c6 100644 --- a/data_diff/sqeleton/databases/mysql.py +++ b/data_diff/sqeleton/databases/mysql.py @@ -152,7 +152,7 @@ def create_connection(self): mysql = import_mysql() try: conn = mysql.connect(charset="utf8mb4", use_unicode=True, **self._args) - conn.set_charset_collation(charset="utf8mb4", collation='utf8mb4_0900_ai_ci') + conn.set_charset_collation(charset="utf8mb4", collation="utf8mb4_0900_ai_ci") return conn except mysql.Error as e: