In [0]:
from pyspark.sql import SparkSession
import html

# CONFIG
schema_name = "fpl_bronze_dev"
spark.sql(f"USE {schema_name}")

# Helper: Clean data types for Mermaid
def clean_type(dtype_str: str) -> str:
    """
    Cleans a data type string by unescaping HTML entities and simplifying complex types for Mermaid compatibility.
    """
    # Unescape HTML entities
    cleaned = html.unescape(dtype_str)

    # Replace angle brackets with parentheses for Mermaid compatibility
    cleaned = cleaned.replace("<", "(").replace(">", ")")

    # Optional: Truncate very long nested types
    if len(cleaned) > 80:
        cleaned = "complex_type"

    return cleaned

# Helper: Format Mermaid table block
def format_mermaid_table(table_name, schema):
    lines = [f"    {table_name.upper()} {{"]
    for field in schema.fields:
        dtype = clean_type(field.dataType.simpleString())
        lines.append(f"        {dtype} {field.name}")
    lines.append("    }")
    return "\n".join(lines)

# MAIN: Generate Mermaid ERD
tables = [row.name for row in spark.catalog.listTables(schema_name)]
mermaid_lines = ["```mermaid", "erDiagram"]

for table in tables:
    try:
        df = spark.table(f"{schema_name}.{table}")
        schema = df.schema
        mermaid_lines.append(format_mermaid_table(table, schema))
    except Exception as e:
        print(f"Skipping {table}: {e}")

mermaid_lines.append("```")

# OUTPUT: Print Mermaid block
erd_diagram = "\n".join(mermaid_lines)
print(erd_diagram)

```mermaid
erDiagram
    CHIPS {
        string chip_type
        bigint id
        string name
        bigint number
        complex_type overrides
        bigint start_event
        bigint stop_event
        timestamp last_updated
    }
    ELEMENT_STATS {
        string label
        string name
        timestamp last_updated
    }
    ELEMENT_TYPES {
        bigint element_count
        bigint id
        string plural_name
        string plural_name_short
        string singular_name
        string singular_name_short
        bigint squad_max_play
        string squad_max_select
        bigint squad_min_play
        string squad_min_select
        bigint squad_select
        array(bigint) sub_positions_locked
        boolean ui_shirt_specific
        timestamp last_updated
    }
    ELEMENTS {
        bigint assists
        string birth_date
        bigint bonus
        bigint bps
        boolean can_select
        boolean can_transact
        bigint chance_of_playing_next_round
        bigint chance_of_playing_this_round
        bigint clean_sheets
        double clean_sheets_per_90
        bigint clearances_blocks_interceptions
        bigint code
        bigint corners_and_indirect_freekicks_order
        string corners_and_indirect_freekicks_text
        bigint cost_change_event
        bigint cost_change_event_fall
        bigint cost_change_start
        bigint cost_change_start_fall
        string creativity
        bigint creativity_rank
        bigint creativity_rank_type
        bigint defensive_contribution
        double defensive_contribution_per_90
        bigint direct_freekicks_order
        string direct_freekicks_text
        bigint dreamteam_count
        bigint element_type
        string ep_next
        string ep_this
        bigint event_points
        string expected_assists
        double expected_assists_per_90
        string expected_goal_involvements
        double expected_goal_involvements_per_90
        string expected_goals
        string expected_goals_conceded
        double expected_goals_conceded_per_90
        double expected_goals_per_90
        string first_name
        string form
        bigint form_rank
        bigint form_rank_type
        bigint goals_conceded
        double goals_conceded_per_90
        bigint goals_scored
        boolean has_temporary_code
        string ict_index
        bigint ict_index_rank
        bigint ict_index_rank_type
        bigint id
        boolean in_dreamteam
        string influence
        bigint influence_rank
        bigint influence_rank_type
        bigint minutes
        string news
        string news_added
        bigint now_cost
        bigint now_cost_rank
        bigint now_cost_rank_type
        string opta_code
        bigint own_goals
        bigint penalties_missed
        bigint penalties_order
        bigint penalties_saved
        string penalties_text
        string photo
        string points_per_game
        bigint points_per_game_rank
        bigint points_per_game_rank_type
        bigint recoveries
        bigint red_cards
        bigint region
        boolean removed
        bigint saves
        double saves_per_90
        string second_name
        string selected_by_percent
        bigint selected_rank
        bigint selected_rank_type
        boolean special
        string squad_number
        bigint starts
        double starts_per_90
        string status
        bigint tackles
        bigint team
        bigint team_code
        string team_join_date
        string threat
        bigint threat_rank
        bigint threat_rank_type
        bigint total_points
        bigint transfers_in
        bigint transfers_in_event
        bigint transfers_out
        bigint transfers_out_event
        string value_form
        string value_season
        string web_name
        bigint yellow_cards
        timestamp last_updated
    }
    EVENTS {
        bigint average_entry_score
        boolean can_enter
        boolean can_manage
        boolean cup_leagues_created
        boolean data_checked
        string deadline_time
        bigint deadline_time_epoch
        bigint deadline_time_game_offset
        boolean finished
        boolean h2h_ko_matches_created
        bigint highest_score
        bigint highest_scoring_entry
        bigint id
        boolean is_current
        boolean is_next
        boolean is_previous
        bigint most_captained
        bigint most_selected
        bigint most_transferred_in
        bigint most_vice_captained
        string name
        bigint ranked_count
        string release_time
        boolean released
        bigint top_element
        bigint transfers_made
        timestamp last_updated
    }
    FIXTURES {
        bigint code
        bigint event
        boolean finished
        boolean finished_provisional
        bigint id
        string kickoff_time
        bigint minutes
        boolean provisional_start_time
        bigint pulse_id
        boolean started
        complex_type stats
        bigint team_a
        bigint team_a_difficulty
        bigint team_a_score
        bigint team_h
        bigint team_h_difficulty
        bigint team_h_score
        timestamp last_updated
    }
    GAME_CONFIG_RULES {
        complex_type rules
        timestamp last_updated
    }
    GAME_CONFIG_SCORING {
        complex_type scoring
        timestamp last_updated
    }
    PHASES {
        bigint highest_score
        bigint id
        string name
        bigint start_event
        bigint stop_event
        timestamp last_updated
    }
    TEAMS {
        bigint code
        bigint draw
        string form
        bigint id
        bigint loss
        string name
        bigint played
        bigint points
        bigint position
        bigint pulse_id
        string short_name
        bigint strength
        bigint strength_attack_away
        bigint strength_attack_home
        bigint strength_defence_away
        bigint strength_defence_home
        bigint strength_overall_away
        bigint strength_overall_home
        string team_division
        boolean unavailable
        bigint win
        timestamp last_updated
    }
```