<div style="display:flex; align-items:flex-start; margin-bottom:1rem;">
  <!-- Left: Book cover -->
  <img
    src="https://adb-1376134742576436.16.azuredatabricks.net/files/Images/book_cover.JPG"
    style="width:35%; margin-right:1rem; border-radius:4px; box-shadow:0 2px 6px rgba(0,0,0,0.1);"
    alt="Book Cover"/>
  <!-- Right: Metadata -->
  <div style="flex:1;">
    <!-- O'Reilly logo above title -->
    <div style="display:flex; flex-direction:column; align-items:flex-start; margin-bottom:0.75rem;">
      <img
        src="https://cdn.oreillystatic.com/images/sitewide-headers/oreilly_logo_mark_red.svg"
        style="height:2rem; margin-bottom:0.25rem;"
        alt="O‘Reilly"/>
      <span style="font-size:1.75rem; font-weight:bold; line-height:1.2;">
        AI, ML and GenAI in the Lakehouse
      </span>
    </div>
    <!-- Details, now each on its own line -->
    <div style="font-size:0.9rem; color:#555; margin-bottom:1rem; line-height:1.4;">
      <div><strong>Name:</strong> 09-00-Common-Code</div>
      <div><strong>Author:</strong> Bennie Haelen</div>
      <div><strong>Date:</strong> 7-26-2025</div>
    </div>
    <!-- Purpose -->
    <div style="font-weight:600; margin-bottom:0.75rem;">
      Purpose: This notebook contains the common code for this chapter
    </div>
    <!-- Outline -->
    <div style="margin-top:0;">
      <h3 style="margin:0 0 0.25rem;">Table of Contents</h3>
      <ol style="padding-left:1.25rem; margin:0; color:#333;">
        <li>Fetch Wikipedia articles and load them into a DataFrame</li>
        <li>Extract/clean the text content-split it into manageable chunks</li>
        <li>Calculate the embeddings</li>
        <li>Store the embeddings in a Delta file</li>
      </ol>
    </div>
  </div>
</div>


# Constants

In [0]:
CATALOG_NAME = "book_ai_ml_lakehouse"
SCHEMA_NAME  = "rag"
VECTOR_SEARCH_ENDPOINT_NAME = "book_ml_and_genai"
USER_NAME = "bhaelen@gmail.com"
ENDPOINT_PREFIX = "vs_endpoint_"

#Imports

In [0]:
import time
from typing import Dict, Any, List
from enum import Enum
from dataclasses import dataclass

#Endpoint functions

In [0]:
import time
from typing import Dict, Any, List
from enum import Enum


class EndpointType(Enum):
    """Supported vector search endpoint types."""
    STANDARD = "STANDARD"
    DATABRICKS_MANAGED_EMBEDDINGS = "DATABRICKS_MANAGED_EMBEDDINGS"


def create_vector_search_endpoint(
    client,
    endpoint_name: str,
    endpoint_type: EndpointType = EndpointType.STANDARD,
    wait_for_ready: bool = True,
    max_wait_time: int = 1800,  # 30 minutes
    **kwargs
) -> Dict[str, Any]:
    """
    Create a new vector search endpoint with the specified configuration.
    
    Args:
        client: VectorSearchClient instance
        endpoint_name: Name for the new endpoint
        endpoint_type: Type of endpoint to create (default: STANDARD)
        wait_for_ready: Whether to wait for endpoint to become ONLINE (default: True)
        max_wait_time: Maximum time to wait for endpoint readiness in seconds (default: 1800)
        **kwargs: Additional endpoint configuration parameters
    
    Returns:
        Dict[str, Any]: The created endpoint configuration
        
    Raises:
        RuntimeError: If endpoint creation fails or enters error state
        TimeoutError: If endpoint doesn't become ready within max_wait_time
        ValueError: If endpoint name is invalid or already exists
        
    Example:
        >>> endpoint = create_vector_search_endpoint(
        ...     client=vector_client,
        ...     endpoint_name="my_endpoint",
        ...     endpoint_type=EndpointType.STANDARD
        ... )
    """
    # Validate endpoint name
    if not _is_valid_endpoint_name(endpoint_name):
        raise ValueError(f"Invalid endpoint name '{endpoint_name}'. Must contain only letters, numbers, and underscores.")
    
    # Check if endpoint already exists
    if _endpoint_exists(client, endpoint_name):
        raise ValueError(f"Endpoint '{endpoint_name}' already exists. Use a different name or delete the existing endpoint.")
    
    try:
        print(f"Creating vector search endpoint: '{endpoint_name}' (type: {endpoint_type.value})")
        
        # Prepare endpoint configuration
        endpoint_config = {
            "name": endpoint_name,
            "endpoint_type": endpoint_type.value,
            **kwargs
        }
        
        # Create the endpoint
        created_endpoint = client.create_endpoint(**endpoint_config)
        print(f"✓ Endpoint '{endpoint_name}' creation initiated successfully")
        
        # Wait for endpoint to become ready if requested
        if wait_for_ready:
            print(f"Waiting for endpoint '{endpoint_name}' to become ONLINE...")
            ready_endpoint = poll_vector_search_endpoint_status(
                client=client,
                endpoint_name=endpoint_name,
                max_attempts=max_wait_time // 10,  # Poll every 10 seconds
                poll_interval_seconds=10
            )
            print(f"✓ Endpoint '{endpoint_name}' is now ready for use")
            return ready_endpoint
        
        return created_endpoint
        
    except Exception as e:
        error_msg = f"Failed to create endpoint '{endpoint_name}': {str(e)}"
        print(f"✗ {error_msg}")
        raise RuntimeError(error_msg) from e


def create_endpoint_with_fallback(
    client,
    username: str,
    endpoint_prefix: str = "vs_endpoint_",
    endpoint_type: EndpointType = EndpointType.STANDARD,
    wait_for_ready: bool = True,
    **kwargs
) -> tuple[Dict[str, Any], str]:
    """
    Create a vector search endpoint with automatic fallback naming.
    
    Attempts to create an endpoint with username-based naming, falling back
    to a generic name if the primary name fails or already exists.
    
    Args:
        client: VectorSearchClient instance
        username: Username to base the endpoint naming on
        endpoint_prefix: Prefix for endpoint names (default: "vs_endpoint_")
        endpoint_type: Type of endpoint to create (default: STANDARD)
        wait_for_ready: Whether to wait for endpoint to become ONLINE
        **kwargs: Additional endpoint configuration parameters
    
    Returns:
        tuple[Dict[str, Any], str]: (created_endpoint, endpoint_name_used)
        
    Example:
        >>> endpoint, name = create_endpoint_with_fallback(
        ...     client=vector_client,
        ...     username="john.doe@company.com"
        ... )
        >>> print(f"Created endpoint: {name}")
    """
    primary_name, fallback_name = configure_vector_search_endpoint(username, endpoint_prefix)
    
    # Try primary endpoint name first
    try:
        endpoint = create_vector_search_endpoint(
            client=client,
            endpoint_name=primary_name,
            endpoint_type=endpoint_type,
            wait_for_ready=wait_for_ready,
            **kwargs
        )
        return endpoint, primary_name
        
    except (ValueError, RuntimeError) as e:
        print(f"Primary endpoint name '{primary_name}' failed: {e}")
        print(f"Attempting fallback endpoint name '{fallback_name}'...")
        
        # Try fallback name
        try:
            endpoint = create_vector_search_endpoint(
                client=client,
                endpoint_name=fallback_name,
                endpoint_type=endpoint_type,
                wait_for_ready=wait_for_ready,
                **kwargs
            )
            return endpoint, fallback_name
            
        except (ValueError, RuntimeError) as fallback_error:
            # If fallback also fails, try with a timestamp suffix
            timestamp_name = f"{fallback_name}_{int(time.time())}"
            print(f"Fallback also failed: {fallback_error}")
            print(f"Final attempt with timestamp: '{timestamp_name}'...")
            
            endpoint = create_vector_search_endpoint(
                client=client,
                endpoint_name=timestamp_name,
                endpoint_type=endpoint_type,
                wait_for_ready=wait_for_ready,
                **kwargs
            )
            return endpoint, timestamp_name


def configure_vector_search_endpoint(username: str, endpoint_prefix: str = "vs_endpoint_") -> tuple[str, str]:
    """
    Configure vector search endpoint names based on user identity.
    
    Creates both a primary endpoint naming strategy and a fallback option
    for vector search endpoint configuration.
    
    Args:
        username: The username to base the endpoint naming on
        endpoint_prefix: Prefix to use for endpoint names (default: "vs_endpoint_")
    
    Returns:
        tuple[str, str]: A tuple containing (primary_endpoint_name, fallback_endpoint_name)
        
    Example:
        >>> primary, fallback = configure_vector_search_endpoint("john.doe")
        >>> print(f"Primary: {primary}, Fallback: {fallback}")
        Primary: vs_endpoint_john_doe, Fallback: vs_endpoint_fallback
    """
    # Create fallback endpoint name for error scenarios
    fallback_endpoint_name = f"{endpoint_prefix}fallback"
    
    # Generate primary endpoint name by sanitizing username
    sanitized_username = _sanitize_username_for_endpoint(username)
    primary_endpoint_name = f"{endpoint_prefix}{sanitized_username}"
    
    return primary_endpoint_name, fallback_endpoint_name


def _sanitize_username_for_endpoint(username: str) -> str:
    """
    Sanitize username for use in endpoint naming.
    
    Converts username to a format suitable for endpoint names by:
    - Converting to lowercase
    - Replacing dots and special characters with underscores
    - Removing invalid characters
    
    Args:
        username: Raw username string
        
    Returns:
        str: Sanitized username suitable for endpoint naming
    """
    import re
    
    # Convert to lowercase and replace dots with underscores
    sanitized = username.lower().replace(".", "_")
    
    # Remove any characters that aren't alphanumeric or underscores
    sanitized = re.sub(r'[^a-z0-9_]', '', sanitized)
    
    # Ensure it doesn't start with a number
    if sanitized and sanitized[0].isdigit():
        sanitized = f"user_{sanitized}"
    
    return sanitized or "anonymous"


def _is_valid_endpoint_name(name: str) -> bool:
    """
    Validate endpoint name format.
    
    Args:
        name: Endpoint name to validate
        
    Returns:
        bool: True if name is valid, False otherwise
    """
    import re
    
    if not name or len(name) < 1 or len(name) > 255:
        return False
    
    # Check if name contains only valid characters
    return bool(re.match(r'^[a-zA-Z0-9_]+$', name))


def _endpoint_exists(client, endpoint_name: str) -> bool:
    """
    Check if an endpoint with the given name already exists.
    
    Args:
        client: VectorSearchClient instance
        endpoint_name: Name to check
        
    Returns:
        bool: True if endpoint exists, False otherwise
    """
    try:
        client.get_endpoint(endpoint_name)
        return True
    except Exception:
        return False


def poll_vector_search_endpoint_status(
    client,
    endpoint_name: str,
    max_attempts: int = 180,
    poll_interval_seconds: int = 10,
    log_frequency: int = 10
) -> Dict[str, Any]:
    """
    Continuously polls a VectorSearchClient endpoint until it reaches ONLINE status.
    
    This function monitors the state of a vector search endpoint by repeatedly
    checking its status until it becomes available or the maximum retry limit
    is exceeded.
    
    Args:
        client: VectorSearchClient instance used to query endpoint status
        endpoint_name: Name of the vector search endpoint to monitor
        max_attempts: Maximum number of polling attempts before timeout (default: 180)
        poll_interval_seconds: Time to wait between status checks in seconds (default: 10)
        log_frequency: How often to log status updates (every N attempts, default: 10)
    
    Returns:
        Dict[str, Any]: The endpoint configuration object when status becomes ONLINE
    
    Raises:
        RuntimeError: When endpoint enters an unexpected error state
        TimeoutError: When max_attempts is exceeded without reaching ONLINE status
        
    Example:
        >>> endpoint = poll_vector_search_endpoint_status(
        ...     client=vector_client,
        ...     endpoint_name="my-search-endpoint",
        ...     max_attempts=120,
        ...     poll_interval_seconds=15
        ... )
    """
    last_known_state = ""
    
    for attempt_number in range(max_attempts):
        try:
            # Retrieve current endpoint configuration
            endpoint_config = client.get_endpoint(endpoint_name)
            
            # Extract status from endpoint config (handles different SDK versions)
            status_info = endpoint_config.get("endpoint_status", endpoint_config.get("status", {}))
            current_state = status_info.get("state", "").upper()
            
            # Store the last known state for error reporting
            last_known_state = current_state
            
            # Check if endpoint is ready for use
            if current_state == "ONLINE":
                print(f"✓ Endpoint '{endpoint_name}' is now ONLINE after {attempt_number + 1} attempts")
                return endpoint_config
            
            # Handle expected transitional states and early attempts
            is_transitional_state = current_state in ("PROVISIONING", "")
            is_early_attempt = attempt_number < 6
            
            if is_transitional_state or is_early_attempt:
                # Log progress at specified intervals
                if attempt_number % log_frequency == 0:
                    status_display = current_state if current_state else "UNKNOWN"
                    print(f"[{attempt_number + 1:>3}/{max_attempts}] "
                          f"Endpoint '{endpoint_name}' status: {status_display}, waiting...")
                
                # Wait before next poll
                time.sleep(poll_interval_seconds)
            else:
                # Endpoint is in an unexpected error state
                raise RuntimeError(
                    f"Endpoint '{endpoint_name}' entered unexpected state '{current_state}'. "
                    f"Full endpoint info: {endpoint_config}"
                )
                
        except (KeyError, AttributeError) as e:
            # Handle cases where endpoint structure is unexpected
            raise RuntimeError(
                f"Failed to parse endpoint status for '{endpoint_name}': {e}"
            ) from e
    
    # Timeout occurred - exceeded maximum attempts
    raise TimeoutError(
        f"Timeout after {max_attempts} attempts ({max_attempts * poll_interval_seconds}s) "
        f"waiting for endpoint '{endpoint_name}' to become ONLINE. "
        f"Last known state: '{last_known_state}'"
    )


# Additional utility functions
def list_vector_search_endpoints(client) -> List[Dict[str, Any]]:
    """
    List all existing vector search endpoints.
    
    Args:
        client: VectorSearchClient instance
        
    Returns:
        List[Dict[str, Any]]: List of endpoint configurations
    """
    try:
        endpoints = client.list_endpoints()
        return endpoints.get("endpoints", []) if hasattr(endpoints, 'get') else endpoints
    except Exception as e:
        print(f"Failed to list endpoints: {e}")
        return []


def delete_vector_search_endpoint(client, endpoint_name: str, force: bool = False) -> bool:
    """
    Delete a vector search endpoint.
    
    Args:
        client: VectorSearchClient instance
        endpoint_name: Name of the endpoint to delete
        force: Whether to force deletion without confirmation
        
    Returns:
        bool: True if deletion was successful, False otherwise
    """
    if not force:
        response = input(f"Are you sure you want to delete endpoint '{endpoint_name}'? (y/N): ")
        if response.lower() not in ['y', 'yes']:
            print("Deletion cancelled.")
            return False
    
    try:
        client.delete_endpoint(endpoint_name)
        print(f"✓ Endpoint '{endpoint_name}' deleted successfully")
        return True
    except Exception as e:
        print(f"✗ Failed to delete endpoint '{endpoint_name}': {e}")
        return False

#Index Management Utilities

In [0]:
from dataclasses import dataclass
from typing import List, Dict, Any
import time


class IndexPipelineType(Enum):
    """Supported vector index pipeline types."""
    TRIGGERED = "TRIGGERED"
    CONTINUOUS = "CONTINUOUS"


class IndexStatus(Enum):
    """Vector index status states."""
    PROVISIONING = "PROVISIONING"
    ONLINE = "ONLINE"
    OFFLINE = "OFFLINE"
    FAILED = "FAILED"
    SYNCING = "SYNCING"


@dataclass
class IndexConfiguration:
    """Configuration parameters for vector index creation."""
    source_table_name: str
    index_name: str
    endpoint_name: str
    primary_key: str
    embedding_vector_column: str
    embedding_dimension: int
    pipeline_type: IndexPipelineType = IndexPipelineType.TRIGGERED
    sync_computed_embeddings: bool = False
    
    def __post_init__(self):
        """Validate configuration parameters."""
        if self.embedding_dimension <= 0:
            raise ValueError("Embedding dimension must be positive")
        if not all([self.source_table_name, self.index_name, self.endpoint_name, 
                   self.primary_key, self.embedding_vector_column]):
            raise ValueError("All required configuration fields must be provided")


def create_or_sync_vector_index(
    client,
    catalog_name: str,
    schema_name: str,
    table_name: str,
    index_name: str,
    endpoint_name: str,
    primary_key: str = "id",
    embedding_dimension: int = 1024,
    embedding_vector_column: str = "embedding",
    pipeline_type: IndexPipelineType = IndexPipelineType.TRIGGERED,
    sync_computed_embeddings: bool = False,
    wait_for_ready: bool = True,
    max_wait_time: int = 3600  # 1 hour
) -> Dict[str, Any]:
    """
    Create a new vector index or sync an existing one with optimized configuration.
    
    This function handles the complete lifecycle of vector index management:
    - Creates new indexes with proper configuration validation
    - Syncs existing indexes to update with new data
    - Monitors index status until ready for use
    
    Args:
        client: VectorSearchClient instance
        catalog_name: Unity Catalog name
        schema_name: Schema name within the catalog
        table_name: Source table name containing embeddings
        index_name: Name for the vector search index
        endpoint_name: Vector search endpoint to host the index
        primary_key: Primary key column name (default: "id")
        embedding_dimension: Dimension of embedding vectors (default: 1024)
        embedding_vector_column: Column containing embedding vectors (default: "embedding")
        pipeline_type: Index pipeline type (default: TRIGGERED)
        sync_computed_embeddings: Whether to sync computed embeddings (default: False)
        wait_for_ready: Whether to wait for index to become ready (default: True)
        max_wait_time: Maximum time to wait for readiness in seconds (default: 3600)
    
    Returns:
        Dict[str, Any]: The index configuration object
        
    Raises:
        ValueError: If configuration parameters are invalid
        RuntimeError: If index creation/sync fails
        TimeoutError: If index doesn't become ready within max_wait_time
        
    Example:
        >>> index = create_or_sync_vector_index(
        ...     client=vector_client,
        ...     catalog_name="my_catalog",
        ...     schema_name="my_schema", 
        ...     table_name="embeddings_table",
        ...     index_name="my_vector_index",
        ...     endpoint_name="my_endpoint",
        ...     embedding_dimension=1536
        ... )
    """
    print(f"Catalog name: {catalog_name}")
    print(f"Schema name: {schema_name}")
    print(f"endpoint_name name: {endpoint_name}")

    # Construct full table and index names
    source_table_fullname = f"{catalog_name}.{schema_name}.{table_name}"
    index_fullname = f"{catalog_name}.{schema_name}.{index_name}"
    
    # Create configuration object for validation
    config = IndexConfiguration(
        source_table_name=source_table_fullname,
        index_name=index_fullname,
        endpoint_name=endpoint_name,
        primary_key=primary_key,
        embedding_vector_column=embedding_vector_column,
        embedding_dimension=embedding_dimension,
        pipeline_type=pipeline_type,
        sync_computed_embeddings=sync_computed_embeddings
    )
    
    print(f"Managing vector index: {index_fullname}")
    print(f"  Source table: {source_table_fullname}")
    print(f"  Target endpoint: {endpoint_name}")
    print(f"  Embedding dimension: {embedding_dimension}")
    
    try:
        # Check if index already exists
        if not index_exists(client, endpoint_name, index_fullname):
            print(f"Creating new vector index '{index_fullname}' on endpoint '{endpoint_name}'...")
            
            # Create the delta sync index
            created_index = client.create_delta_sync_index(
                endpoint_name=endpoint_name,
                index_name=index_fullname,
                source_table_name=source_table_fullname,
                pipeline_type=pipeline_type.value,
                primary_key=primary_key,
                embedding_dimension=embedding_dimension,
                embedding_vector_column=embedding_vector_column,
                sync_computed_embeddings=sync_computed_embeddings
            )
            
            print(f"✓ Index creation initiated successfully")
            
        else:
            print(f"Index '{index_fullname}' already exists, triggering sync...")
            
            # Get existing index and trigger sync
            existing_index = get_index(client, endpoint_name, index_fullname)
            sync_result = existing_index.sync()
            
            print(f"✓ Index sync triggered successfully")
            print(f"  Sync operation ID: {sync_result.get('sync_id', 'N/A')}")
    
        # Wait for index to be ready if requested
        if wait_for_ready:
            print(f"Waiting for index '{index_fullname}' to be ready...")
            ready_index = wait_for_index_to_be_ready(
                client=client,
                endpoint_name=endpoint_name,
                index_name=index_fullname,
                max_wait_time=max_wait_time
            )
            print(f"✓ Index '{index_fullname}' is now ready for queries")
            return ready_index
        else:
            # Return current index state without waiting
            return get_index(client, endpoint_name, index_fullname)
            
    except Exception as e:
        error_msg = f"Failed to create or sync index '{index_fullname}': {str(e)}"
        print(f"✗ {error_msg}")
        raise RuntimeError(error_msg) from e


def index_exists(client, endpoint_name: str, index_name: str) -> bool:
    """
    Check if a vector search index exists on the specified endpoint.
    
    Args:
        client: VectorSearchClient instance
        endpoint_name: Name of the vector search endpoint
        index_name: Full name of the index to check
        
    Returns:
        bool: True if index exists and is accessible, False otherwise
        
    Example:
        >>> exists = index_exists(client, "my_endpoint", "catalog.schema.my_index")
        >>> print(f"Index exists: {exists}")
    """
    try:
        # Attempt to get the index - if it exists, this will succeed
        client.get_index(endpoint_name, index_name)
        return True
    except Exception:
        # If any exception occurs (not found, access denied, etc.), index doesn't exist or isn't accessible
        return False


def get_index(client, endpoint_name: str, index_name: str) -> Any:
    """
    Retrieve a vector search index object with enhanced error handling.
    
    Args:
        client: VectorSearchClient instance
        endpoint_name: Name of the vector search endpoint
        index_name: Full name of the index to retrieve
        
    Returns:
        Vector search index object
        
    Raises:
        RuntimeError: If index cannot be retrieved or doesn't exist
        
    Example:
        >>> index = get_index(client, "my_endpoint", "catalog.schema.my_index")
        >>> status = index.describe()
    """
    try:
        index_obj = client.get_index(endpoint_name, index_name)
        return index_obj
    except Exception as e:
        error_msg = (
            f"Failed to retrieve index '{index_name}' from endpoint '{endpoint_name}'. "
            f"Verify that both the endpoint and index exist and are accessible. Error: {str(e)}"
        )
        raise RuntimeError(error_msg) from e


def wait_for_index_to_be_ready(
    client,
    endpoint_name: str,
    index_name: str,
    max_wait_time: int = 3600,  # 1 hour default
    poll_interval_seconds: int = 30,
    log_frequency: int = 4  # Log every 4th attempt (every 2 minutes with 30s intervals)
) -> Dict[str, Any]:
    """
    Poll a vector search index until it reaches a ready state for querying.
    
    This function continuously monitors the index status until it becomes ONLINE
    and ready to serve queries, or until the maximum wait time is exceeded.
    
    Args:
        client: VectorSearchClient instance
        endpoint_name: Name of the vector search endpoint hosting the index
        index_name: Full name of the index to monitor
        max_wait_time: Maximum time to wait in seconds (default: 3600)
        poll_interval_seconds: Time between status checks in seconds (default: 30)
        log_frequency: How often to log status updates (every N attempts, default: 4)
        
    Returns:
        Dict[str, Any]: The index status information when ready
        
    Raises:
        RuntimeError: If index enters an error state or cannot be accessed
        TimeoutError: If index doesn't become ready within max_wait_time
        
    Example:
        >>> ready_index = wait_for_index_to_be_ready(
        ...     client=vector_client,
        ...     endpoint_name="my_endpoint",
        ...     index_name="catalog.schema.my_index",
        ...     max_wait_time=1800  # 30 minutes
        ... )
    """
    max_attempts = max_wait_time // poll_interval_seconds
    last_known_status = ""
    
    print(f"Monitoring index readiness (max wait: {max_wait_time}s, poll interval: {poll_interval_seconds}s)")
    
    for attempt_number in range(max_attempts):
        try:
            # Get current index status
            index_obj = get_index(client, endpoint_name, index_name)
            index_status = index_obj.describe()
            
            # Extract status information (handle different response formats)
            if hasattr(index_status, 'get'):
                current_status = index_status.get("status", {}).get("ready", False)
                detailed_status = index_status.get("status", {}).get("detailed_state", "UNKNOWN")
            else:
                # Fallback for different SDK versions
                current_status = getattr(index_status, 'ready', False)
                detailed_status = getattr(index_status, 'detailed_state', "UNKNOWN")
            
            last_known_status = detailed_status
            
            # Check if index is ready for queries
            if current_status:
                print(f"✓ Index '{index_name}' is now READY after {attempt_number + 1} attempts")
                print(f"  Final status: {detailed_status}")
                return index_status
            
            # Handle different status states
            if detailed_status.upper() in ["PROVISIONING", "SYNCING", "ONLINE_NO_PENDING_UPDATE"]:
                # These are expected transitional states
                if attempt_number % log_frequency == 0:
                    elapsed_time = (attempt_number + 1) * poll_interval_seconds
                    remaining_time = max_wait_time - elapsed_time
                    print(f"[{attempt_number + 1:>3}/{max_attempts}] "
                          f"Index status: {detailed_status}, "
                          f"elapsed: {elapsed_time}s, remaining: {remaining_time}s")
                
                time.sleep(poll_interval_seconds)
                
            elif detailed_status.upper() in ["FAILED", "OFFLINE"]:
                # These are error states
                raise RuntimeError(
                    f"Index '{index_name}' entered error state: {detailed_status}. "
                    f"Check the index configuration and source table. "
                    f"Full status: {index_status}"
                )
            else:
                # Unknown state - log and continue for a few attempts
                if attempt_number < 10:  # Give some time for unknown states
                    if attempt_number % log_frequency == 0:
                        print(f"[{attempt_number + 1:>3}/{max_attempts}] "
                              f"Unknown status: {detailed_status}, continuing...")
                    time.sleep(poll_interval_seconds)
                else:
                    raise RuntimeError(
                        f"Index '{index_name}' in unknown state: {detailed_status}. "
                        f"Full status: {index_status}"
                    )
                
        except RuntimeError:
            # Re-raise RuntimeError as-is (these are our custom errors)
            raise
        except Exception as e:
            # Handle unexpected errors during status checking
            if attempt_number < 5:  # Retry a few times for transient errors
                print(f"Warning: Error checking index status (attempt {attempt_number + 1}): {e}")
                time.sleep(poll_interval_seconds)
                continue
            else:
                raise RuntimeError(
                    f"Persistent error checking status for index '{index_name}': {e}"
                ) from e
    
    # Timeout occurred
    elapsed_time = max_attempts * poll_interval_seconds
    raise TimeoutError(
        f"Timeout after {elapsed_time}s waiting for index '{index_name}' to become ready. "
        f"Last known status: {last_known_status}. "
        f"Consider increasing max_wait_time or checking the index configuration."
    )


def list_indexes_on_endpoint(client, endpoint_name: str) -> List[Dict[str, Any]]:
    """
    List all indexes on a specific vector search endpoint.
    
    Args:
        client: VectorSearchClient instance
        endpoint_name: Name of the vector search endpoint
        
    Returns:
        List[Dict[str, Any]]: List of index information dictionaries
        
    Example:
        >>> indexes = list_indexes_on_endpoint(client, "my_endpoint")
        >>> print(f"Found {len(indexes)} indexes")
    """
    try:
        endpoint_info = client.get_endpoint(endpoint_name)
        return endpoint_info.get("endpoint_status", {}).get("indexes", [])
    except Exception as e:
        print(f"Failed to list indexes for endpoint '{endpoint_name}': {e}")
        return []


def delete_vector_index(client, endpoint_name: str, index_name: str, force: bool = False) -> bool:
    """
    Delete a vector search index with confirmation.
    
    Args:
        client: VectorSearchClient instance
        endpoint_name: Name of the vector search endpoint
        index_name: Full name of the index to delete
        force: Whether to skip confirmation prompt (default: False)
        
    Returns:
        bool: True if deletion successful, False otherwise
        
    Example:
        >>> deleted = delete_vector_index(client, "my_endpoint", "catalog.schema.my_index")
    """
    if not force:
        response = input(f"Are you sure you want to delete index '{index_name}'? (y/N): ")
        if response.lower() not in ['y', 'yes']:
            print("Deletion cancelled.")
            return False
    
    try:
        client.delete_index(endpoint_name, index_name)
        print(f"✓ Index '{index_name}' deleted successfully")
        return True
    except Exception as e:
        print(f"✗ Failed to delete index '{index_name}': {e}")
        return False
