In [None]:
import pandas as pd
import sempy.fabric as fab

def get_workspace_metadata(workspace_id: str) -> pd.DataFrame:
    """
    Consolidates metadata (datasets, tables, columns, and measures)
    for all datasets within a specified workspace.

    Args:
        workspace_id: The ID of the Microsoft Fabric workspace.

    Returns:
        A pandas DataFrame with consolidated metadata.
    """
    # 1. List all datasets in the workspace
    try:
        datasets = fab.list_datasets(workspace=workspace_id)
    except Exception as e:
        print(f"Error listing datasets in workspace {workspace_id}: {e}")
        return pd.DataFrame() # Return empty DataFrame on failure

    all_metadata = []

    # 2. Iterate through each dataset
    for _, dataset_row in datasets.iterrows():
        dataset_id = dataset_row['Dataset ID']
        dataset_name = dataset_row['Dataset Name']

        print(f"Processing Dataset: {dataset_name} ({dataset_id})")

        # 3. Get Tables and Columns for the current dataset
        try:
            tables = fab.list_tables(
                dataset=dataset_id,
                workspace=workspace_id,
                include_columns=True
            ).assign(
                MetadataType='Table/Column',
                DatasetId=dataset_id,
                DatasetName=dataset_name
            )
            # Rename for clarity and standardize
            if 'Column' not in tables.columns:
                tables['Column'] = None
            if 'Type' not in tables.columns:
                tables['Type'] = None
            tables = tables.rename(columns={'Name': 'TableName'})
            all_metadata.append(tables)
        except Exception as e:
            print(f"  Warning: Could not list tables/columns for {dataset_name}. {e}")

        # 4. Get Measures for the current dataset
        try:
            measures = fab.list_measures(
                dataset=dataset_id,
                workspace=workspace_id
            ).assign(
                MetadataType='Measure',
                DatasetId=dataset_id,
                DatasetName=dataset_name,
                TableName=lambda x: x['Table Name'] # Keep existing TableName
            ).rename(columns={'Measure Name': 'MeasureName', 'Measure Expression':'Expression'}) # Rename Measure Name
            all_metadata.append(measures)
        except Exception as e:
            print(f"  Warning: Could not list measures for {dataset_name}. {e}")


    # 5. Concatenate all lists of tables/columns and measures
    if all_metadata:
        # Define a consistent set of columns for the final DataFrame
        final_columns = [
            'DatasetName', 'DatasetId', 'MetadataType',
            'TableName',
            'Column', 'Type', # Specific to Tables/Columns
            'MeasureName', 'Expression' # Specific to Measures
        ]
        
        # Concatenate and reindex to the common columns, filling missing values
        consolidated_df = pd.concat(all_metadata, ignore_index=True)
        return consolidated_df[
            consolidated_df.columns.intersection(final_columns)
        ].reindex(columns=final_columns)
    
    return pd.DataFrame()


# --- USAGE EXAMPLE ---

# Note: You must replace this with your actual Workspace ID
WORKSPACE_ID = "b1c1beef-4463-41b6-a24b-764acf8972f2" 

# Run the function
metadata_df = get_workspace_metadata(WORKSPACE_ID)

# Display the consolidated DataFrame
print("\n--- Consolidated Metadata DataFrame ---")
display(metadata_df)