From c002014cef5310d5df13d69242cd10a392255802 Mon Sep 17 00:00:00 2001 From: Filipp Ozinov Date: Sun, 29 Jun 2025 20:42:15 +0400 Subject: [PATCH] Fix parsing bug with multiple spaces in CREATE TABLE - Handle multiple consecutive spaces in field definitions - Add test case to reproduce issue #160 - Fixes 'unknown mysql type' error during realtime replication --- mysql_ch_replicator/enum/ddl_parser.py | 13 +++++--- test_mysql_ch_replicator.py | 46 ++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 5 deletions(-) diff --git a/mysql_ch_replicator/enum/ddl_parser.py b/mysql_ch_replicator/enum/ddl_parser.py index 504efcf..b11db8a 100644 --- a/mysql_ch_replicator/enum/ddl_parser.py +++ b/mysql_ch_replicator/enum/ddl_parser.py @@ -36,8 +36,9 @@ def find_enum_or_set_definition_end(line: str) -> Tuple[int, str, str]: return end_pos, field_type, field_parameters # Fallback to splitting by space if we can't find the end - definition = line.split(' ') - field_type = definition[0] + # Use split() instead of split(' ') to handle multiple consecutive spaces + definition = line.split() + field_type = definition[0] if definition else "" field_parameters = ' '.join(definition[1:]) if len(definition) > 1 else '' return -1, field_type, field_parameters @@ -62,12 +63,14 @@ def parse_enum_or_set_field(line: str, field_name: str, is_backtick_quoted: bool if line.lower().startswith('enum(') or line.lower().startswith('set('): end_pos, field_type, field_parameters = find_enum_or_set_definition_end(line) else: - definition = line.split(' ') - field_type = definition[0] + # Use split() instead of split(' ') to handle multiple consecutive spaces + definition = line.split() + field_type = definition[0] if definition else "" field_parameters = ' '.join(definition[1:]) if len(definition) > 1 else '' else: # For non-backtick quoted fields - definition = line.split(' ') + # Use split() instead of split(' ') to handle multiple consecutive spaces + definition = line.split() definition = definition[1:] # Skip the field name which was already extracted if definition and ( diff --git a/test_mysql_ch_replicator.py b/test_mysql_ch_replicator.py index 30759eb..3153285 100644 --- a/test_mysql_ch_replicator.py +++ b/test_mysql_ch_replicator.py @@ -2607,3 +2607,49 @@ def test_ignore_deletes(): finally: # Clean up the temporary config file os.unlink(config_file) + +def test_issue_160_unknown_mysql_type_bug(): + """ + Test to reproduce the bug from issue #160. + + Bug Description: Replication fails when adding a new table during realtime replication + with Exception: unknown mysql type "" + + This test should FAIL until the bug is fixed. + When the bug is present: parsing will fail with unknown mysql type and the test will FAIL + When the bug is fixed: parsing will succeed and the test will PASS + """ + # The exact CREATE TABLE statement from the bug report + create_table_query = """create table test_table +( + id bigint not null, + col_a datetime(6) not null, + col_b datetime(6) null, + col_c varchar(255) not null, + col_d varchar(255) not null, + col_e int not null, + col_f decimal(20, 10) not null, + col_g decimal(20, 10) not null, + col_h datetime(6) not null, + col_i date not null, + col_j varchar(255) not null, + col_k varchar(255) not null, + col_l bigint not null, + col_m varchar(50) not null, + col_n bigint null, + col_o decimal(20, 1) null, + col_p date null, + primary key (id, col_e) +);""" + + # Create a converter instance + converter = MysqlToClickhouseConverter() + + # This should succeed when the bug is fixed + # When the bug is present, this will raise "unknown mysql type """ and the test will FAIL + mysql_structure, ch_structure = converter.parse_create_table_query(create_table_query) + + # Verify the parsing worked correctly + assert mysql_structure.table_name == 'test_table' + assert len(mysql_structure.fields) == 17 # All columns should be parsed + assert mysql_structure.primary_keys == ['id', 'col_e']