diff --git a/workshops/modernizr/backend/src/database/implementations/dynamodb/DynamoDBProductRepository.ts b/workshops/modernizr/backend/src/database/implementations/dynamodb/DynamoDBProductRepository.ts index 69959fb5..0ea61f8b 100644 --- a/workshops/modernizr/backend/src/database/implementations/dynamodb/DynamoDBProductRepository.ts +++ b/workshops/modernizr/backend/src/database/implementations/dynamodb/DynamoDBProductRepository.ts @@ -298,7 +298,7 @@ export class DynamoDBProductRepository extends BaseDynamoDBRepository implements private async transformFromDynamoDB(item: any): Promise { const product: ProductWithDetails = { - id: parseInt(item.id), + id: parseInt(item.PK), seller_id: parseInt(item.seller_id), category_id: parseInt(item.category_id), name: item.product_name || item.name, diff --git a/workshops/modernizr/core-outputs/stage-02/migrationContract.json b/workshops/modernizr/core-outputs/stage-02/migrationContract.json index 7d8cac74..2ff27cbc 100644 --- a/workshops/modernizr/core-outputs/stage-02/migrationContract.json +++ b/workshops/modernizr/core-outputs/stage-02/migrationContract.json @@ -473,8 +473,8 @@ "condition": "parent_id IS NULL", "target_table": "categories", "join_condition": "categories.id = categories.parent_id", - "select_column": "categories.name", - "else_value": "ROOT" + "select_column": "'ROOT'", + "else_value": "parent.name" } }, { @@ -521,7 +521,7 @@ "join": { "type": "self-join", "join_alias": "cat_path", - "join_condition": "cat_path.parent_id = categories.id", + "join_condition": "cat_path.id = categories.parent_id", "select_column": "cat_path.name", "null_value": "" }, @@ -583,8 +583,8 @@ "condition": "parent_id IS NULL", "target_table": "categories", "join_condition": "categories.id = categories.parent_id", - "select_column": "categories.id", - "else_value": "ROOT" + "select_column": "'ROOT'", + "else_value": "parent.id" } }, { diff --git a/workshops/modernizr/tools/contract_driven_migration_glue_mcp.py b/workshops/modernizr/tools/contract_driven_migration_glue_mcp.py index 814662af..0cf98284 100644 --- a/workshops/modernizr/tools/contract_driven_migration_glue_mcp.py +++ b/workshops/modernizr/tools/contract_driven_migration_glue_mcp.py @@ -51,10 +51,9 @@ def execute_mysql_views_via_mcp(view_sql, mysql_config, config): mysql_defaults = config.get('mysql', {}) - # Parse SQL statements + # Parse SQL statements - improved logic statements = [] current_statement = "" - in_view = False for line in view_sql.split('\n'): line = line.strip() @@ -65,16 +64,15 @@ def execute_mysql_views_via_mcp(view_sql, mysql_config, config): if current_statement: statements.append(current_statement.strip()) current_statement = line - in_view = True - elif in_view: + else: current_statement += '\n' + line - if line.endswith(';'): - statements.append(current_statement.strip()) - current_statement = "" - in_view = False + + if line.endswith(';'): + statements.append(current_statement.strip()) + current_statement = "" - if current_statement and in_view: - statements.append(current_statement.strip() + ';') + if current_statement: + statements.append(current_statement.strip()) print(f" 📋 Found {len(statements)} view statements to execute") diff --git a/workshops/modernizr/tools/generate_mysql_views.py b/workshops/modernizr/tools/generate_mysql_views.py index 27b12136..a49eecce 100644 --- a/workshops/modernizr/tools/generate_mysql_views.py +++ b/workshops/modernizr/tools/generate_mysql_views.py @@ -1,305 +1,539 @@ #!/usr/bin/env python3 - import json -import sys -import re - -def parse_template(template, source_table, entity_attrs=None): - """Parse template strings like '{email}' or '#META'""" - if template.startswith('#'): - return f"'{template}'" - elif '{' in template and '}' in template: - matches = re.findall(r'\{([^}]+)\}', template) - if matches and entity_attrs: - # Look for the attribute in entity to see if it has a join - attr_name = matches[0] - for attr in entity_attrs: - if attr['name'] == 'PK' and attr_name in ['user_email', 'email']: - if 'join' in attr: - return attr['join']['select_column'] - # Handle complex templates like CART#{product_id} - if len(matches) == 1 and matches[0] not in ['user_email', 'email']: - return f"{source_table}.{matches[0]}" - return f"{source_table}.{matches[0]}" - elif matches: - return f"{source_table}.{matches[0]}" - return f"'{template}'" +import argparse -def handle_transformation(attr, source_table): - """Handle attribute transformations""" - if 'transformation' in attr: - transform = attr['transformation'] - if transform['type'] == 'string-format': - format_str = transform['format'] - if format_str.startswith('#'): - return f"'{format_str}'" - elif '{' in format_str: - # Handle complex templates like CART#{product_id} or ORDER#{created_at}#{id} - result = format_str - matches = re.findall(r'\{([^}]+)\}', format_str) - for match in matches: - result = result.replace(f'{{{match}}}', f"', {source_table}.{match}, '") - return f"CONCAT('{result}')" - - source_col = attr['source_column'] - if 'join' in attr: - join_config = attr['join'] - if join_config['type'] == 'foreign-key': - return join_config['select_column'] - - return f"{source_table}.{source_col}" +def load_contract(file_path): + with open(file_path, 'r') as f: + contract = json.load(f) + if isinstance(contract, dict) and 'tables' in contract: + return contract['tables'] + return contract -def generate_categories_view(): - """Generate Categories view with correct structure""" - return """CREATE VIEW ddb_categories_view AS -SELECT - CASE - WHEN c.parent_id IS NULL THEN 'ROOT' - ELSE parent.name - END AS PK, - c.name AS SK, - c.id AS category_id, - c.name AS category_name, - CASE - WHEN c.parent_id IS NULL THEN c.name - ELSE CONCAT(parent.name, '/', c.name) - END AS category_path, - COALESCE(child_count.children_count, 0) AS children_count, - c.created_at AS created_at, - CASE - WHEN c.parent_id IS NULL THEN 'ROOT' - ELSE c.parent_id - END AS GSI1PK, - c.id AS GSI1SK, - CASE - WHEN c.parent_id IS NULL THEN 0 - ELSE 1 - END AS level, - c.parent_id AS parent_id, - CASE - WHEN c.parent_id IS NULL THEN 'null' - ELSE parent.name - END AS parent_name, - COALESCE(product_count.product_count, 0) AS product_count -FROM categories c -LEFT JOIN categories parent ON parent.id = c.parent_id -LEFT JOIN ( - SELECT parent_id, COUNT(*) as children_count - FROM categories - WHERE parent_id IS NOT NULL - GROUP BY parent_id -) child_count ON child_count.parent_id = c.id -LEFT JOIN ( - SELECT category_id, COUNT(*) as product_count - FROM products - GROUP BY category_id -) product_count ON product_count.category_id = c.id""" +def get_table_config(contract, table_name): + tables = contract if isinstance(contract, list) else contract.get('tables', []) + return next((table for table in tables if table['table'] == table_name), None) -def generate_single_entity_view(table_config): - """Generate view for single-entity tables""" - source_table = table_config['source_table'] - table_name = table_config['table'] +def qualify_column_references(condition, source_table_name): + """Qualify column references in conditions to avoid ambiguity""" + if not condition: + return condition - select_parts = [] - join_parts = [] + # Only qualify if there are potential ambiguities from self-joins + # For categories table, we need to be careful about parent_id references + if source_table_name == 'categories' and 'parent_id' in condition: + # For conditions like "parent_id IS NULL", we want the main table's parent_id + import re + # Only qualify parent_id if it's not already qualified + pattern = r'\bparent_id\b(?!\s*\.)' + qualified_condition = re.sub(pattern, f'{source_table_name}.parent_id', condition) + return qualified_condition - # Find PK and SK from attributes (not table-level config) - pk_attr = None - sk_attr = None - other_attrs = [] + return condition + +def build_join_expression(attr, source_table_name): + """Build SQL expression for join patterns""" + join_config = attr.get('join', {}) + join_type = join_config.get('type') - for attr in table_config['attributes']: - if attr['name'] == 'PK': - pk_attr = attr - elif attr['name'] == 'SK': - sk_attr = attr + if join_type == 'foreign-key': + return join_config.get('select_column', f"{source_table_name}.{attr['source_column']}") + elif join_type == 'self-join': + alias = join_config.get('join_alias', 'parent_cat') + select_col = join_config.get('select_column') + null_value = join_config.get('null_value', 'NULL') + return f"COALESCE({select_col}, '{null_value}')" + elif join_type == 'conditional': + condition = join_config.get('condition') + select_col = join_config.get('select_column') + else_value = join_config.get('else_value', 'NULL') + # Qualify column references in condition to avoid ambiguity + qualified_condition = qualify_column_references(condition, source_table_name) + # Check if else_value is a column reference (contains a dot) or a literal + if '.' in else_value and not else_value.startswith("'"): + return f"CASE WHEN {qualified_condition} THEN {select_col} ELSE {else_value} END" else: - other_attrs.append(attr) + return f"CASE WHEN {qualified_condition} THEN {select_col} ELSE '{else_value}' END" + elif join_type == 'chain': + separator = join_config.get('chain_separator', ' > ') + joins = join_config.get('joins', []) + if len(joins) >= 2: + return f"CONCAT_WS('{separator}', {joins[0]['select_column']}, {joins[1]['select_column']})" + elif join_type == 'json-construction': + construction = join_config.get('construction', {}) + if construction.get('type') == 'object': + select_cols = construction.get('select_columns', {}) + json_pairs = [f"'{k}', {v}" for k, v in select_cols.items()] + return f"JSON_OBJECT({', '.join(json_pairs)})" + elif construction.get('type') == 'array': + select_cols = construction.get('select_columns', {}) + json_pairs = [f"'{k}', {v}" for k, v in select_cols.items()] + return f"JSON_ARRAYAGG(JSON_OBJECT({', '.join(json_pairs)}))" - # Handle PK - if pk_attr: - pk_expr = handle_transformation(pk_attr, source_table) - select_parts.append(f"{pk_expr} AS PK") + source_column = attr.get('source_column') + if isinstance(source_column, list): + return f"{source_table_name}.{source_column[0]}" + return f"{source_table_name}.{source_column}" + +def build_calculation_expression(attr, source_table_name): + """Build SQL expression for calculations""" + calc_config = attr.get('calculation', {}) + calc_type = calc_config.get('type') - # Handle SK - if sk_attr: - sk_expr = handle_transformation(sk_attr, source_table) - select_parts.append(f"{sk_expr} AS SK") + if calc_type == 'aggregate': + operation = calc_config.get('operation', 'COUNT') + select_col = calc_config.get('select_column', '*') + return f"{operation}({select_col})" + elif calc_type == 'case': + cases = calc_config.get('cases', []) + else_clause = calc_config.get('else', '0') + case_parts = [] + for case in cases: + when_clause = case.get('when') + then_clause = case.get('then') + case_parts.append(f"WHEN {when_clause} THEN {then_clause}") + return f"CASE {' '.join(case_parts)} ELSE {else_clause} END" - # Process other attributes - for attr in other_attrs: - attr_name = attr['name'] + source_column = attr.get('source_column') + if isinstance(source_column, list): + return f"{source_table_name}.{source_column[0]}" + return f"{source_table_name}.{source_column}" + +def build_transformation_expression(attr, source_table_name): + """Build SQL expression for transformations""" + transform_config = attr.get('transformation', {}) + transform_type = transform_config.get('type') + + if transform_type == 'static': + return f"'{transform_config.get('value', '')}'" + elif transform_type == 'string-format': + format_str = transform_config.get('format', '') + source_column = attr.get('source_column') - if 'join' in attr: - join_config = attr['join'] - if join_config['type'] == 'foreign-key': - target_table = join_config['target_table'] - condition = join_config['join_condition'] - select_col = join_config['select_column'] + # Generic format string parser - extract all column references + import re + column_refs = re.findall(r'\{([^}]+)\}', format_str) + + if column_refs: + # Build CONCAT expression by parsing the format string + concat_parts = [] + remaining_format = format_str + + for col_ref in column_refs: + # Split on the current column reference + parts = remaining_format.split('{' + col_ref + '}', 1) - join_clause = f"LEFT JOIN {target_table} ON {condition}" - if join_clause not in join_parts: - join_parts.append(join_clause) - select_parts.append(f"{select_col} AS {attr_name}") - elif join_config['type'] == 'self-join': - alias = join_config['join_alias'] - condition = join_config['join_condition'] - select_col = join_config['select_column'] - null_val = join_config.get('null_value', '') + # Add the literal text before this column reference + if parts[0]: + concat_parts.append(f"'{parts[0]}'") - join_clause = f"LEFT JOIN {source_table} {alias} ON {condition}" - if join_clause not in join_parts: - join_parts.append(join_clause) - select_parts.append(f"COALESCE({select_col}, '{null_val}') AS {attr_name}") + # Add the column reference + concat_parts.append(f"{source_table_name}.{col_ref}") + + # Update remaining format for next iteration + remaining_format = parts[1] if len(parts) > 1 else '' + + # Add any remaining literal text + if remaining_format: + concat_parts.append(f"'{remaining_format}'") + + return f"CONCAT({', '.join(concat_parts)})" else: - source_col = attr['source_column'] - select_parts.append(f"{source_table}.{source_col} AS {attr_name}") + # No column references found, treat as literal + return f"'{format_str}'" + elif transform_type == 'template': + template = transform_config.get('template', '') + # Legacy template handling + if 'product_id' in template: + return f"CONCAT('CART#', {source_table_name}.product_id)" + elif 'created_at' in template and 'order_id' in template: + return f"CONCAT('ORDER#', {source_table_name}.created_at, '#', {source_table_name}.id)" + elif template.startswith('USER#'): + if '{user_email}' in template: + return f"CONCAT('USER#', users.email)" + elif '{email}' in template: + return f"CONCAT('USER#', {source_table_name}.email)" + return f"'{template}'" - # Build SQL - sql = f"CREATE VIEW ddb_{table_name.lower()}_view AS\n" - sql += f"SELECT\n " + ",\n ".join(select_parts) + "\n" - sql += f"FROM {source_table}" + # Fallback - handle source_column as array or string + source_column = attr.get('source_column') + if isinstance(source_column, list): + return f"{source_table_name}.{source_column[0]}" # Use first column as fallback + return f"{source_table_name}.{source_column}" + +def collect_join_tables(attributes): + """Collect all tables that need to be joined""" + join_info = {} - if join_parts: - sql += "\n" + "\n".join(join_parts) + for attr in attributes: + if 'join' in attr: + join_config = attr['join'] + join_type = join_config.get('type') + + if join_type == 'foreign-key': + target_table = join_config.get('target_table') + join_condition = join_config.get('join_condition') + if target_table and join_condition: + join_info[target_table] = join_condition + + elif join_type == 'self-join': + # Handle self-joins with aliases - need to include base table name + join_alias = join_config.get('join_alias', 'parent_cat') + join_condition = join_config.get('join_condition') + if join_condition: + # Extract base table from join condition + base_table = 'categories' # Default for self-joins + join_info[f"{base_table} {join_alias}"] = join_condition + + elif join_type == 'conditional': + # Handle conditional joins that may reference other tables in else_value + target_table = join_config.get('target_table') + join_condition = join_config.get('join_condition') + else_value = join_config.get('else_value', 'NULL') + + if target_table and join_condition: + join_info[target_table] = join_condition + + # Check if else_value references another table (e.g., "parent.name") + if '.' in else_value and not else_value.startswith("'"): + table_alias = else_value.split('.')[0] + if table_alias == 'parent': + # Add parent table join for categories + join_info['categories parent'] = 'parent.id = categories.parent_id' + + elif join_type == 'chain': + # Handle chain joins (like category hierarchy) + joins = join_config.get('joins', []) + for chain_join in joins: + target_table = chain_join.get('target_table') + join_condition = chain_join.get('join_condition') + if target_table and join_condition: + # For chain joins, we need to handle table aliases + if 'parent_categories' in join_condition: + join_info['categories parent_categories'] = join_condition + else: + join_info[target_table] = join_condition + + elif join_type == 'json-construction': + target_table = join_config.get('target_table') + join_condition = join_config.get('join_condition') + if target_table and join_condition: + join_info[target_table] = join_condition + + if 'calculation' in attr: + calc_config = attr['calculation'] + if calc_config.get('type') == 'aggregate': + target_table = calc_config.get('target_table') + join_condition = calc_config.get('join_condition') + if target_table and join_condition: + join_info[target_table] = join_condition - return sql + return join_info -def generate_multi_entity_view(table_config): - """Generate unified view for multi-entity tables using UNION""" - table_name = table_config['table'] - union_parts = [] - all_columns = set() +def collect_all_attributes(entities): + """Collect all unique attributes across all entities for UNION compatibility""" + all_attrs = {} + for entity in entities: + for attr in entity.get('attributes', []): + attr_name = attr['name'] + attr_type = attr.get('type', 'S') + all_attrs[attr_name] = attr_type + return all_attrs + +def generate_single_entity_sql(entity, table_name, all_attributes): + """Generate SQL SELECT for a single entity (for use in UNION)""" + entity_type = entity.get('entity_type', 'UNKNOWN') + source_table = entity.get('source_table') + attributes = entity.get('attributes', []) - # First pass: collect all possible columns from all entities - all_columns_set = set() - for entity in table_config['entities']: - for attr in entity['attributes']: - if attr['name'] not in ['PK', 'SK']: - all_columns_set.add(attr['name']) + if not attributes or not source_table: + return None - # Sort columns for consistent ordering - all_columns = sorted(all_columns_set) + # Create a map of this entity's attributes + entity_attrs = {attr['name']: attr for attr in attributes} - for entity in table_config['entities']: - source_table = entity['source_table'] - entity_type = entity['entity_type'] - - select_parts = [] - join_parts = [] - entity_columns = set() - entity_select_parts = [] + select_parts = [] + join_info = collect_join_tables(attributes) + + # Generate SELECT for all attributes (including NULLs for missing ones) + for attr_name, attr_type in all_attributes.items(): + if attr_name in entity_attrs: + # This entity has this attribute + attr = entity_attrs[attr_name] + source_table_name = attr.get('source_table', source_table) + + # Build the expression based on attribute type + if 'transformation' in attr: + expr = build_transformation_expression(attr, source_table_name) + elif 'join' in attr: + expr = build_join_expression(attr, source_table_name) + elif 'calculation' in attr: + expr = build_calculation_expression(attr, source_table_name) + else: + if 'source_column' in attr: + source_column = attr['source_column'] + if isinstance(source_column, list): + expr = f"{source_table_name}.{source_column[0]}" # Use first column as fallback + else: + expr = f"{source_table_name}.{source_column}" + else: + expr = "NULL" + else: + # This entity doesn't have this attribute - use NULL + if attr_type == 'S': + expr = "NULL" + elif attr_type == 'N': + expr = "NULL" + elif attr_type == 'L': + expr = "NULL" + elif attr_type == 'BOOL': + expr = "NULL" + else: + expr = "NULL" - # Parse PK and SK templates - pk_template = entity['pk_template'] - sk_template = entity['sk_template'] + # Apply type casting + if attr_type == 'S': + cast_expr = f"CAST({expr} AS CHAR)" + elif attr_type == 'N': + cast_expr = f"CAST({expr} AS DECIMAL(38,10))" + elif attr_type == 'L': + cast_expr = f"CAST({expr} AS JSON)" + elif attr_type == 'BOOL': + cast_expr = f"CAST({expr} AS UNSIGNED)" + else: + cast_expr = expr - pk_expr = parse_template(pk_template, source_table, entity['attributes']) - # Handle SK template parsing with transformations - sk_expr = sk_template - if sk_template.startswith('#'): - sk_expr = f"'{sk_template}'" - elif 'CART#' in sk_template or 'ORDER#' in sk_template: - # Handle complex SK templates - matches = re.findall(r'\{([^}]+)\}', sk_template) - if matches: - result = sk_template - for match in matches: - result = result.replace(f'{{{match}}}', f"', {source_table}.{match}, '") - sk_expr = f"CONCAT('{result}')" + select_parts.append(f"{cast_expr} AS {attr_name}") + + # Build FROM and JOIN clauses + from_clause = f"FROM {source_table}" + join_clauses = [] + + for join_table, join_condition in join_info.items(): + if join_table != source_table: + join_clauses.append(f"LEFT JOIN {join_table} ON {join_condition}") + + # Handle self-joins for categories + if source_table == 'categories': + join_clauses.append("LEFT JOIN categories parent_cat ON parent_cat.id = categories.parent_id") + join_clauses.append("LEFT JOIN categories child_cat ON child_cat.parent_id = categories.id") + + sql_parts = [ + "SELECT", + ",\n ".join(select_parts), + from_clause + ] + + if join_clauses: + sql_parts.extend(join_clauses) + + # Check if we need GROUP BY for aggregate functions + has_aggregate = any('JSON_ARRAYAGG' in part or 'COUNT(' in part for part in select_parts) + if has_aggregate: + # For categories table with COUNT aggregates, add GROUP BY for all non-aggregate columns + if source_table == 'categories': + group_by_cols = [ + 'categories.id', + 'categories.name', + 'categories.parent_id', + 'categories.created_at' + ] + sql_parts.append(f"GROUP BY {', '.join(group_by_cols)}") else: - sk_expr = parse_template(sk_template, source_table, entity['attributes']) + # Add GROUP BY for non-aggregate columns from the main source table + group_by_cols = [] + for attr in attributes: + if 'calculation' not in attr or attr.get('calculation', {}).get('type') != 'aggregate': + source_column = attr.get('source_column') + source_table_name = attr.get('source_table', source_table) + if source_column and source_table_name == source_table: + if isinstance(source_column, list): + group_by_cols.extend([f"{source_table_name}.{col}" for col in source_column]) + else: + group_by_cols.append(f"{source_table_name}.{source_column}") + + # Add joined table columns that are used in SELECT + for attr in attributes: + if 'join' in attr and attr['join'].get('type') == 'foreign-key': + join_config = attr['join'] + select_column = join_config.get('select_column', '') + if select_column and 'users.email' in select_column: + group_by_cols.append(select_column) + + if group_by_cols: + sql_parts.append(f"GROUP BY {', '.join(set(group_by_cols))}") + + return "\n".join(sql_parts) + +def generate_view_sql(table_config): + if not table_config: + return None + + table_name = table_config['table'] + table_type = table_config.get('type', 'single-entity') + + if table_type == 'multi-entity' and 'entities' in table_config: + # Generate single view with UNION for all entities + entities = table_config['entities'] + all_attributes = collect_all_attributes(entities) - select_parts.append(f"{pk_expr} AS PK") - select_parts.append(f"{sk_expr} AS SK") + entity_sqls = [] + for entity in entities: + entity_sql = generate_single_entity_sql(entity, table_name, all_attributes) + if entity_sql: + entity_sqls.append(entity_sql) - # Check if PK needs a join (for user_email references) - pk_needs_join = False - for attr in entity['attributes']: - if attr['name'] == 'PK' and 'join' in attr: - join_config = attr['join'] - if join_config['type'] == 'foreign-key': - target_table = join_config['target_table'] - condition = join_config['join_condition'] - join_clause = f"LEFT JOIN {target_table} ON {condition}" - if join_clause not in join_parts: - join_parts.append(join_clause) - pk_needs_join = True + if entity_sqls: + union_sql = "\nUNION ALL\n".join(entity_sqls) + return f"CREATE OR REPLACE VIEW ddb_{table_name.lower()}_view AS\n{union_sql};" + return f"-- No valid entities found for table {table_name}" + + elif 'attributes' in table_config: + # Single entity table + source_table = table_config.get('source_table', table_name.lower()) + attributes = table_config['attributes'] - # Add entity attributes first - for attr in entity['attributes']: + select_parts = [] + join_info = collect_join_tables(attributes) + + for attr in attributes: attr_name = attr['name'] - if attr_name in ['PK', 'SK']: - continue - - entity_columns.add(attr_name) + attr_type = attr.get('type', 'S') + source_table_name = attr.get('source_table', source_table) - if 'join' in attr: - join_config = attr['join'] - if join_config['type'] == 'foreign-key': - target_table = join_config['target_table'] - condition = join_config['join_condition'] - select_col = join_config['select_column'] - - join_clause = f"LEFT JOIN {target_table} ON {condition}" - if join_clause not in join_parts: - join_parts.append(join_clause) - select_parts.append(f"{select_col} AS {attr_name}") + # Build the expression + if 'transformation' in attr: + expr = build_transformation_expression(attr, source_table_name) + elif 'join' in attr: + expr = build_join_expression(attr, source_table_name) + elif 'calculation' in attr: + expr = build_calculation_expression(attr, source_table_name) else: - source_col = attr['source_column'] - select_parts.append(f"{source_table}.{source_col} AS {attr_name}") + source_column = attr.get('source_column') + if isinstance(source_column, list): + expr = f"{source_table_name}.{source_column[0]}" # Use first column as fallback + else: + expr = f"{source_table_name}.{source_column}" + + # Apply type casting + if attr_type == 'S': + cast_expr = f"CAST({expr} AS CHAR)" + elif attr_type == 'N': + cast_expr = f"CAST({expr} AS DECIMAL(38,10))" + elif attr_type == 'BOOL': + cast_expr = f"CAST({expr} AS UNSIGNED)" + else: + cast_expr = expr + + select_parts.append(f"{cast_expr} AS {attr_name}") - # Add NULL for missing columns in consistent order - for col in all_columns: - if col not in entity_columns: - select_parts.append(f"NULL AS {col}") + # Build FROM and JOIN clauses + from_clause = f"FROM {source_table}" + join_clauses = [] - # Build entity SELECT - entity_sql = f"SELECT\n " + ",\n ".join(select_parts) + "\n" - entity_sql += f" FROM {source_table}" + for join_table, join_condition in join_info.items(): + if join_table != source_table: + # Handle table aliases (e.g., "categories parent_categories") + if ' ' in join_table: + join_clauses.append(f"LEFT JOIN {join_table} ON {join_condition}") + else: + join_clauses.append(f"LEFT JOIN {join_table} ON {join_condition}") - if join_parts: - entity_sql += "\n " + "\n ".join(join_parts) + # All joins are now handled generically through collect_join_tables - union_parts.append(entity_sql) - - # Build unified view with UNION - view_name = f"ddb_{table_name.lower()}_view" - sql = f"CREATE VIEW {view_name} AS\n" - sql += "\nUNION ALL\n\n".join(union_parts) + sql_parts = [ + f"CREATE OR REPLACE VIEW ddb_{table_name.lower()}_view AS", + "SELECT", + ",\n ".join(select_parts), + from_clause + ] + + if join_clauses: + sql_parts.extend(join_clauses) + + # Check if we need GROUP BY for aggregate functions + has_aggregate = any('JSON_ARRAYAGG' in part or 'COUNT(' in part for part in select_parts) + if has_aggregate: + # For categories table with COUNT aggregates, add GROUP BY for all non-aggregate columns + if source_table == 'categories': + group_by_cols = [ + 'categories.id', + 'categories.name', + 'categories.parent_id', + 'categories.created_at', + 'cat_path.name' # Add joined table column + ] + sql_parts.append(f"GROUP BY {', '.join(group_by_cols)}") + + return "\n".join(sql_parts) + ";" - return sql + return f"-- No processable structure found for table {table_name}" def main(): - if len(sys.argv) != 2: - print("Usage: python generate_mysql_views_final.py ") - sys.exit(1) + parser = argparse.ArgumentParser(description='Generate MySQL views for DynamoDB migration') + parser.add_argument('--contract', '-c', required=True, help='Path to the migration contract JSON file') + parser.add_argument('--table', '-t', help='Name of the DynamoDB table to generate view for') + parser.add_argument('--output', '-o', help='Output SQL file path (if not specified, prints to stdout)') + parser.add_argument('--all', '-a', action='store_true', help='Generate views for all tables') + + args = parser.parse_args() - contract_file = sys.argv[1] + if not args.all and not args.table: + parser.error("either --table or --all is required") try: - with open(contract_file, 'r') as f: - contract = json.load(f) + contract = load_contract(args.contract) + sql_statements = [] - print("-- Generated MySQL Views from Migration Contract") - print("-- Final version with correct template parsing\n") + # Add header comment + table_count = len(contract if isinstance(contract, list) else contract.get('tables', [])) + header = [ + "-- MySQL Views for DynamoDB Migration", + "-- Generated from migration contract with join patterns", + f"-- Creates {table_count} views (one per DynamoDB table)", + "-- Multi-entity tables use UNION ALL to combine entities", + "-- WARNING: Complex join patterns may require manual review", + "" + ] + sql_statements.extend(header) + + if args.all: + # Generate SQL for all DynamoDB tables (one view per table) + tables = contract if isinstance(contract, list) else contract.get('tables', []) + for table in tables: + table_name = table.get('table') + sql = generate_view_sql(table) + if sql and not sql.startswith('--'): + sql_statements.append(f"-- View for DynamoDB table: {table_name}") + sql_statements.append(sql) + else: + # Generate SQL for specific DynamoDB table + table_config = get_table_config(contract, args.table) + if not table_config: + print(f"Error: DynamoDB table {args.table} not found in contract") + return 1 + sql = generate_view_sql(table_config) + if sql and not sql.startswith('--'): + sql_statements.append(f"-- View for DynamoDB table: {args.table}") + sql_statements.append(sql) + + # Combine all SQL statements + final_sql = "\n\n".join(sql_statements) + + # Output handling + if args.output: + with open(args.output, 'w') as f: + f.write(final_sql) + print(f"SQL written to {args.output}") + else: + print(final_sql) + + return 0 - for table in contract['tables']: - if table['table'] == 'Categories': - sql = generate_categories_view() - print(sql) - elif table['type'] == 'single-entity': - sql = generate_single_entity_view(table) - print(sql) - elif table['type'] == 'multi-entity': - sql = generate_multi_entity_view(table) - print(sql) - - print("\n" + "="*50 + "\n") - except Exception as e: - print(f"Error: {e}") - sys.exit(1) + print(f"Error: {str(e)}") + import traceback + traceback.print_exc() + return 1 if __name__ == "__main__": - main() \ No newline at end of file + exit(main()) \ No newline at end of file