In [None]:
from typing import Dict, Optional, Any
from datetime import datetime

try:
    import yaml
    YAML_AVAILABLE = True
except ImportError:
    YAML_AVAILABLE = False

class DVUtils:
    def export_config(self, filepath: str) -> None:
        """
        Export vault configuration to YAML file.
        
        Args:
            filepath: Path to save YAML file (e.g., 'Files/vault_config.yaml')
        """
        
        if not YAML_AVAILABLE:
            raise ImportError("PyYAML required. Install with: pip install pyyaml")
        
        config = {
            "metadata": {
                "default_load_datetime_column": self._default_load_datetime_column,
                "version": "1.0"
            },
            "registered_hubs": [
                {
                    "name": hub.name,
                    "schema_name": hub.schema_name,
                    "business_key_columns": hub.business_key_columns
                }
                for hub in self._registered_hubs.values()
            ],
            "registered_links": [
                {
                    "name": link.name,
                    "schema_name": link.schema_name
                }
                for link in self._registered_links.values()
            ],
            "hubs": [
                {
                    "name": hub.name,
                    "schema_name": hub.schema_name,
                    "business_key_columns": hub.business_key_columns,
                    "source_table": hub.source_table,
                    **({"load_datetime_column": hub.load_datetime_column} if hub.load_datetime_column else {})
                }
                for hub in self._hubs
            ],
            "links": [
                self._serialize_link(link)
                for link in self._links
            ],
            "satellites": [
                {
                    "name": sat.name,
                    "schema_name": sat.schema_name,
                    "parent_hub_or_link": sat.parent_hub_or_link,
                    "descriptive_columns": sat.descriptive_columns,
                    "source_table": sat.source_table,
                    "include_mode": sat.include_mode,
                    **({"hash_column": sat.hash_column} if sat.hash_column else {}),
                    **({"load_datetime_column": sat.load_datetime_column} if sat.load_datetime_column else {})
                }
                for sat in self._sats
            ]
        }
        
        yaml_content = yaml.dump(config, default_flow_style=False, sort_keys=False, allow_unicode=True)
        notebookutils.fs.put(filepath, yaml_content, overwrite=True)
        
        print(f"✅ Configuration exported to: {filepath}")
        print(f"   └─ {len(self._hubs)} hubs, {len(self._links)} links, {len(self._sats)} satellites")
   
    @classmethod
    def from_config(cls, filepath: str) -> "DataVaultManager":
        """
        Load vault configuration from YAML file.
        
        Args:
            filepath: Path to YAML config file
            
        Returns:
            DataVaultManager instance with loaded configuration
        """
        
        if not YAML_AVAILABLE:
            raise ImportError("PyYAML required. Install with: pip install pyyaml")
        
        yaml_content = notebookutils.fs.head(filepath, 1000000)  # Read up to 1MB
        config = yaml.safe_load(yaml_content)
        
        # Create manager with metadata
        metadata = config.get("metadata", {})
        default_ldts = metadata.get("default_load_datetime_column", "load_datetime")
        
        dv = cls(default_ldts)
        
        # Load registered hubs first (dependencies for satellites/links)
        for hub_cfg in config.get("registered_hubs", []):
            dv.register_hub(
                name=hub_cfg["name"],
                schema_name=hub_cfg.get("schema_name", "silver"),
                business_key_columns=hub_cfg.get("business_key_columns")
            )
        
        # Load registered links
        for link_cfg in config.get("registered_links", []):
            dv.register_link(
                name=link_cfg["name"],
                schema_name=link_cfg.get("schema_name", "silver")
            )
        
        # Load hubs
        for hub_cfg in config.get("hubs", []):
            dv.add_hub(Hub(
                name=hub_cfg["name"],
                schema_name=hub_cfg.get("schema_name", "silver"),
                business_key_columns=hub_cfg["business_key_columns"],
                source_table=hub_cfg["source_table"],
                load_datetime_column=hub_cfg.get("load_datetime_column")
            ))
        
        # Load links
        for link_cfg in config.get("links", []):
            dv.add_link(cls._deserialize_link(link_cfg))
        
        # Load satellites
        for sat_cfg in config.get("satellites", []):
            dv.add_satellite(Satellite(
                name=sat_cfg["name"],
                schema_name=sat_cfg.get("schema_name", "silver"),
                parent_hub_or_link=sat_cfg["parent_hub_or_link"],
                descriptive_columns=sat_cfg["descriptive_columns"],
                source_table=sat_cfg["source_table"],
                include_mode=sat_cfg.get("include_mode", True),
                hash_column=sat_cfg.get("hash_column"),
                load_datetime_column=sat_cfg.get("load_datetime_column")
            ))
        
        print(f"✅ Configuration loaded from: {filepath}")
        print(f"   └─ {len(dv._hubs)} hubs, {len(dv._links)} links, {len(dv._sats)} satellites")
        
        return dv

    @staticmethod
    def _deserialize_link(link_cfg: Dict[str, Any]) -> Link:
        """Deserialize a Link from YAML dictionary."""
        
        anchor_cfg = link_cfg.get("anchor", {})
        anchor = LinkAnchor(
            table=anchor_cfg["table"],
            hub=anchor_cfg.get("hub"),
            bk_columns=anchor_cfg.get("bk_columns", [])
        )
        
        hub_mapping = [
            LinkHubJoin(
                hub=m["hub"],
                table=m["table"],
                bk_columns=m["bk_columns"],
                join_on=m.get("join_on")
            )
            for m in link_cfg.get("hub_mapping", [])
        ]
        
        return Link(
            name=link_cfg["name"],
            schema_name=link_cfg.get("schema_name", "silver"),
            staging_schema=link_cfg.get("staging_schema", "staging"),
            anchor=anchor,
            hub_mapping=hub_mapping,
            load_datetime_column=link_cfg.get("load_datetime_column"),
            source_columns=link_cfg.get("source_columns")
        )

    def _serialize_link(self, link: Link) -> Dict[str, Any]:
        """Serialize a Link to dictionary for YAML export."""
        
        return {
            "name": link.name,
            "schema_name": link.schema_name,
            "staging_schema": link.staging_schema,
            "anchor": {
                "table": link.anchor.table,
                **({"hub": link.anchor.hub} if link.anchor.hub else {}),
                **({"bk_columns": link.anchor.bk_columns} if link.anchor.bk_columns else {})
            },
            "hub_mapping": [
                {
                    "hub": mapping.hub,
                    "table": mapping.table,
                    "bk_columns": mapping.bk_columns,
                    **({"join_on": mapping.join_on} if mapping.join_on else {})
                }
                for mapping in link.hub_mapping
            ],
            **({"load_datetime_column": link.load_datetime_column} if link.load_datetime_column else {}),
            **({"source_columns": link.source_columns} if link.source_columns else {})
        }
