# Exporting Dataplex Metadata

You can run a **metadata export job** to get a snapshot of your Dataplex Universal Catalog metadata (which consists of entries and  aspects) for use in external systems.

### Defining the Export Scope

Every export job requires a **job scope** to define exactly what metadata to export. You must choose one of the following primary scopes:

- `Organization`: Export all metadata belonging to your organization.
- `Projects`: Export metadata from one or more specified projects.
- `Entry groups`: Export metadata from one or more specified entry groups.

You can further refine the scope by specifying the entry types or aspect types to include, ensuring the job only exports the specific entries and aspects you need.

In [1]:
import json
import os
from typing import Any, Dict, Optional

import google.auth
from google.api_core.exceptions import Conflict
from google.auth.transport.requests import AuthorizedSession
from google.cloud import bigquery, storage
from google.cloud.exceptions import NotFound
from requests import HTTPError

In [2]:
# --- Configuration ---
# @title Metadata Export Configuration { display-mode: "form" }

# GCP Settings
PROJECT_ID = os.environ.get("GOOGLE_CLOUD_PROJECT", "bq-sme-governance-build") #@param {type:"string"}
LOCATION = "us-central1" #@param ["us-central1", "us-east1", "us-west1", "europe-west1", "asia-southeast1"]

# Export Settings
EXPORT_BUCKET_NAME = f"{PROJECT_ID}-lab-data-export" #@param {type:"string"}

# BigQuery Configuration
DATASET_ID = "dataplex_metadata" #@param {type:"string"}
TABLE_ID = "metadata_export" #@param {type:"string"}
DATASET_LOCATION = "us-central1" #@param ["US", "EU", "us-central1", "us-east1", "europe-west1"]

print("Configuration loaded:")
print(f"   Project: {PROJECT_ID}")
print(f"   Location: {LOCATION}")
print(f"   Export Bucket: {EXPORT_BUCKET_NAME}")
print(f"   BigQuery Dataset: {DATASET_ID}")
print(f"   BigQuery Table: {TABLE_ID}")
print(f"   Dataset Location: {DATASET_LOCATION}")

Configuration loaded:
   Project: bq-sme-governance-build
   Location: us-central1
   Export Bucket: bq-sme-governance-build-lab-data-export
   BigQuery Dataset: dataplex_metadata
   BigQuery Table: metadata_export
   Dataset Location: us-central1


In [3]:
def call_google_api(
    url: str,
    http_verb: str,
    request_body: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
    """
    Makes authenticated API calls to Google Cloud services.

    Args:
        url: The complete API endpoint URL
        http_verb: HTTP method (GET, POST, PUT, DELETE, etc.)
        request_body: Optional request payload as a dictionary

    Returns:
        Response data as a dictionary (empty dict for 204 responses)

    Raises:
        RuntimeError: If the API call fails with detailed error information
    """
    creds, project = google.auth.default(
        scopes=["https://www.googleapis.com/auth/cloud-platform"]
    )
    authed_session = AuthorizedSession(creds)

    try:
        response = authed_session.request(
            method=http_verb,
            url=url,
            json=request_body
        )
        response.raise_for_status()

        # Handle no-content responses
        if response.status_code == 204:
            return {}

        return response.json()

    except HTTPError as e:
        error_message = (
            f"API call failed with status {e.response.status_code}: "
            f"{e.response.text}"
        )
        print(error_message)
        raise RuntimeError(error_message) from e

In [4]:
def create_storage_bucket() -> None:
    """
    Creates a GCS bucket for metadata exports if it doesn't already exist (idempotent).

    Uses the globally configured PROJECT_ID and EXPORT_BUCKET_NAME variables.
    Prints status messages indicating whether the bucket was created or already exists.
    """
    storage_client = storage.Client(project=PROJECT_ID)

    try:
        bucket = storage_client.get_bucket(EXPORT_BUCKET_NAME)
        print(f"Bucket {EXPORT_BUCKET_NAME} already exists.")
    except NotFound:
        try:
            bucket = storage_client.create_bucket(
                EXPORT_BUCKET_NAME,
                location=LOCATION
            )
            print(f"Bucket {bucket.name} created in {LOCATION}.")
        except Conflict:
            # Handle race condition where bucket was created between get and create
            print(f"Bucket {EXPORT_BUCKET_NAME} already exists.")
        except Exception as e:
            print(f"Error creating bucket: {e}")
            raise


create_storage_bucket()

Bucket bq-sme-governance-build-lab-data-export already exists.


In [5]:
# Define the export scope - choose one of the following options:

# Option 1: Export metadata for specific entry groups
# request_body = {
#     "type": "EXPORT",
#     "export_spec": {
#         "output_path": f"gs://{EXPORT_BUCKET_NAME}/",
#         "scope": {
#             "entryGroups": [
#                 "@bigquery",
#                 # Add additional entry groups as needed
#             ],
#         },
#     }
# }

# Option 2: Export metadata for specific projects
# request_body = {
#     "type": "EXPORT",
#     "export_spec": {
#         "output_path": f"gs://{EXPORT_BUCKET_NAME}/",
#         "scope": {
#             "projects": [
#                 f"projects/{PROJECT_ID}"
#             ]
#         }
#     }
# }

# Option 3: Export metadata for the entire organization (currently active)
request_body = {
    "type": "EXPORT",
    "export_spec": {
        "output_path": f"gs://{EXPORT_BUCKET_NAME}/",
        "scope": {
            "organizationLevel": "true",
        },
    }
}

In [6]:
# Create and trigger the metadata export job
url = f"https://dataplex.googleapis.com/v1/projects/{PROJECT_ID}/locations/{LOCATION}/metadataJobs"
response = call_google_api(url, "POST", request_body)

# Store the job target for status monitoring
metadata_job_target = response['metadata']['target']


# Display the job creation response
pretty_json = json.dumps(response, indent=4, sort_keys=True)
print(pretty_json)


{
    "done": false,
    "metadata": {
        "@type": "type.googleapis.com/google.cloud.dataplex.v1.OperationMetadata",
        "apiVersion": "v1",
        "createTime": "2025-10-09T05:24:59.554690089Z",
        "requestedCancellation": false,
        "target": "projects/bq-sme-governance-build/locations/us-central1/metadataJobs/metadata-job-1bd77134-ae32-47f0-ab05-b1779eb53621",
        "verb": "create"
    },
    "name": "projects/bq-sme-governance-build/locations/us-central1/operations/operation-1759987498643-640b305aa6ff6-8f2e5ecb-a89e0cb3"
}


The metadata export takes approximately 20-25 minutes to complete.  You can refresh this cell to monitor the progress.  

Feel free to move to the next section of the notebook, due to time constraints a complete export is provided for the next section of the lab.

In [8]:
# Check the status of the metadata export job
status_url = f"https://dataplex.googleapis.com/v1/{metadata_job_target}"
response = call_google_api(status_url, "GET")

# Display the job status
pretty_json = json.dumps(response, indent=4, sort_keys=True)
print(pretty_json)

{
    "createTime": "2025-10-09T05:24:59.550448919Z",
    "exportResult": {},
    "exportSpec": {
        "outputPath": "gs://bq-sme-governance-build-lab-data-export/",
        "scope": {
            "organizationLevel": true
        }
    },
    "name": "projects/bq-sme-governance-build/locations/us-central1/metadataJobs/metadata-job-1bd77134-ae32-47f0-ab05-b1779eb53621",
    "status": {
        "message": "Logs for this MetadataJob can be found at: https://console.cloud.google.com/logs/query;query=resource.type=\"dataplex.googleapis.com/MetadataJob\"\nresource.labels.location=\"us-central1\"\nresource.labels.metadata_job_id=\"metadata-job-1bd77134-ae32-47f0-ab05-b1779eb53621\";?project=184517388310\n",
        "state": "QUEUED"
    },
    "type": "EXPORT",
    "uid": "330d2007-2092-4e77-9d0c-460a8ecb021d",
    "updateTime": "2025-10-09T05:25:03.946494513Z"
}


## Analyzing Dataplex Metadata in BigQuery

We've just exported our Dataplex metadata to GCS. When you want to analyze this metadata in BigQuery, you can create an external table. This lets you query the data directly from its exported location without needing to load or transform it first.

### Why a Business Would Import Dataplex Metadata into BigQuery

There are several key reasons why a business would want to bring its Dataplex metadata into BigQuery for analysis:

* **Advanced Querying and Analysis**: By having the metadata in BigQuery, you can run SQL queries to gain deeper insights.
    * *Example*: Count the number of entries by entry group, or find all entries that have a specific aspect (like data quality scores).
    ```sql
    -- Example: Count entries per entry group
    SELECT
      entry_group,
      COUNT(entry_id) AS number_of_entries
    FROM
      `your_project.your_dataset.dataplex_metadata_external_table`
    GROUP BY
      entry_group
    ORDER BY
      number_of_entries DESC;
    ```

* **Integration with Analytics Tools**: Importing the metadata to BigQuery allows you to analyze your metadata alongside other business data, or visualize it in tools like Looker Studio.

* **Programmatic Processing**: For businesses that need to process large volumes of metadata, exporting it allows for programmatic manipulation using SQL. This processed metadata can then be imported back into Dataplex via API if needed.

* **Custom Applications and Third-Party Tools**: You can integrate your metadata into custom-built applications (like a data governance dashboard) or other third-party tools that connect with BigQuery, extending the functionality and use of your metadata.

In [9]:
def create_hive_partitioned_external_table(
    project_id: str,
    export_bucket_name: str
) -> None:
    """
    Creates a Hive-partitioned external table in BigQuery pointing to exported metadata.

    This function is idempotent - it can be run multiple times safely. If the table
    already exists, it will be replaced with the updated configuration.

    This function creates a BigQuery external table that reads newline-delimited JSON
    files from a GCS bucket. The table uses Hive-style partitioning for efficient
    querying of time-partitioned data.

    Args:
        project_id: Google Cloud project ID
        export_bucket_name: GCS bucket name containing the exported metadata files
    """
    client = bigquery.Client(project=project_id)
    dataset_ref = client.dataset(DATASET_ID)
    table_ref = dataset_ref.table(TABLE_ID)

    # Ensure dataset exists (idempotent)
    dataset = bigquery.Dataset(dataset_ref)
    dataset.location = DATASET_LOCATION
    dataset = client.create_dataset(dataset, exists_ok=True)
    print(f"Dataset '{DATASET_ID}' ready (created or already exists).")

    # Define table schema matching Dataplex metadata export format
    schema = [
        bigquery.SchemaField(
            "entry", "RECORD", "NULLABLE",
            fields=[
                bigquery.SchemaField("name", "STRING", "NULLABLE"),
                bigquery.SchemaField("entryType", "STRING", "NULLABLE"),
                bigquery.SchemaField("createTime", "STRING", "NULLABLE"),
                bigquery.SchemaField("updateTime", "STRING", "NULLABLE"),
                bigquery.SchemaField("aspects", "JSON", "NULLABLE"),
                bigquery.SchemaField("parentEntry", "STRING", "NULLABLE"),
                bigquery.SchemaField("fullyQualifiedName", "STRING", "NULLABLE"),
                bigquery.SchemaField(
                    "entrySource", "RECORD", "NULLABLE",
                    fields=[
                        bigquery.SchemaField("resource", "STRING", "NULLABLE"),
                        bigquery.SchemaField("system", "STRING", "NULLABLE"),
                        bigquery.SchemaField("platform", "STRING", "NULLABLE"),
                        bigquery.SchemaField("displayName", "STRING", "NULLABLE"),
                        bigquery.SchemaField("description", "STRING", "NULLABLE"),
                        bigquery.SchemaField("labels", "JSON", "NULLABLE"),
                        bigquery.SchemaField(
                            "ancestors", "RECORD", "REPEATED",
                            fields=[
                                bigquery.SchemaField("name", "STRING", "NULLABLE"),
                                bigquery.SchemaField("type", "STRING", "NULLABLE"),
                            ],
                        ),
                        bigquery.SchemaField("createTime", "STRING", "NULLABLE"),
                        bigquery.SchemaField("updateTime", "STRING", "NULLABLE"),
                        bigquery.SchemaField("location", "STRING", "NULLABLE"),
                    ],
                ),
            ],
        )
    ]

    # Configure external data source with Hive partitioning
    external_config = bigquery.ExternalConfig("NEWLINE_DELIMITED_JSON")
    gcs_uri = f"gs://{export_bucket_name}/*"
    external_config.source_uris = [gcs_uri]

    hive_partitioning_options = bigquery.HivePartitioningOptions()
    hive_partitioning_options.mode = "AUTO"
    hive_partitioning_options.source_uri_prefix = f"gs://{export_bucket_name}/"
    external_config.hive_partitioning = hive_partitioning_options

    table = bigquery.Table(table_ref, schema=schema)
    table.external_data_configuration = external_config

    # Create or replace the table (idempotent)
    try:
        # Check if table exists
        existing_table = client.get_table(table_ref)
        # Table exists, update it
        existing_table.schema = schema
        existing_table.external_data_configuration = external_config
        updated_table = client.update_table(
            existing_table,
            ["schema", "external_data_configuration"]
        )
        print(
            f"Updated existing external table: "
            f"{updated_table.project}.{updated_table.dataset_id}.{updated_table.table_id}"
        )
    except NotFound:
        # Table doesn't exist, create it
        created_table = client.create_table(table)
        print(
            f"Created new external table: "
            f"{created_table.project}.{created_table.dataset_id}.{created_table.table_id}"
        )


def create_aspect_extraction_udf() -> None:
    """
    Creates a persistent UDF to extract aspect information from the nested JSON structure.
    This UDF handles the dynamic keys in the aspects JSON.
    """
    udf_sql = f"""
    CREATE OR REPLACE FUNCTION `{PROJECT_ID}.{DATASET_ID}.extract_aspect_types`(aspects_json JSON)
    RETURNS STRING
    LANGUAGE js AS r\"\"\"
      if (!aspects_json) return null;

      try {{
        const aspectTypes = [];

        // Iterate through all keys in the aspects object
        for (const aspectId in aspects_json) {{
          if (aspects_json.hasOwnProperty(aspectId)) {{
            const aspect = aspects_json[aspectId];
            if (aspect && aspect.aspectType) {{
              aspectTypes.push(aspect.aspectType);
            }}
          }}
        }}

        // Return unique, sorted aspect types as comma-separated string
        return [...new Set(aspectTypes)].sort().join(', ');
      }} catch (e) {{
        return null;
      }}
    \"\"\";
    """

    try:
        bq_client.query(udf_sql).result()
        print(f"Created/updated UDF: {PROJECT_ID}.{DATASET_ID}.extract_aspect_types")
    except Exception as e:
        print(f"Error creating UDF: {e}")
        raise


def create_unnested_metadata_view() -> None:
    """
    Creates a view that unnests the metadata export table for easier querying.

    The view flattens the nested entry structure and parses aspect JSON to extract
    key metadata fields. This makes it easier to query and analyze the metadata
    without dealing with complex nested structures.
    """
    view_id = f"{PROJECT_ID}.{DATASET_ID}.vw_{TABLE_ID}_unnested"

    # Create view SQL that unnests and parses the metadata
    view_sql = f"""
    CREATE OR REPLACE VIEW `{view_id}` AS
    SELECT
      -- Entry identification
      entry.name AS entry_name,
      entry.entryType AS entry_type,
      entry.fullyQualifiedName AS fully_qualified_name,
      entry.parentEntry AS parent_entry,

      -- Entry metadata (parse timestamps)
      PARSE_TIMESTAMP('%Y-%m-%dT%H:%M:%E*SZ', entry.createTime) AS entry_create_time,
      PARSE_TIMESTAMP('%Y-%m-%dT%H:%M:%E*SZ', entry.updateTime) AS entry_update_time,

      -- Entry Source: Resource information
      entry.entrySource.resource AS resource,
      entry.entrySource.system AS system,
      entry.entrySource.platform AS platform,
      entry.entrySource.displayName AS display_name,
      entry.entrySource.description AS description,
      entry.entrySource.location AS resource_location,
      PARSE_TIMESTAMP('%Y-%m-%dT%H:%M:%E*SZ', entry.entrySource.createTime) AS resource_create_time,
      PARSE_TIMESTAMP('%Y-%m-%dT%H:%M:%E*SZ', entry.entrySource.updateTime) AS resource_update_time,

      -- Labels (keep as JSON for flexibility)
      entry.entrySource.labels AS labels_json,

      -- Aspects: Keep full JSON for detailed analysis
      entry.aspects AS aspects_json,

      -- Extract aspect types using the UDF
      `{PROJECT_ID}.{DATASET_ID}.extract_aspect_types`(entry.aspects) AS aspect_types,

      -- Ancestor information (unnest the ancestors array)
      ancestor.name AS ancestor_name,
      ancestor.type AS ancestor_type,

      -- Partition columns for efficient filtering
      project,
      year,
      month,
      day

    FROM
      `{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}`
    LEFT JOIN
      UNNEST(entry.entrySource.ancestors) AS ancestor
    """

    try:
        bq_client.query(view_sql).result()
        print(f"Created/updated unnested view: {view_id}")
    except Exception as e:
        print(f"Error creating view: {e}")
        raise


# Initialize BigQuery client
bq_client = bigquery.Client(project=PROJECT_ID)

# Create the external table
create_hive_partitioned_external_table(PROJECT_ID, EXPORT_BUCKET_NAME)

# Create the UDF for aspect extraction
create_aspect_extraction_udf()

# Create the unnested view
create_unnested_metadata_view()

Dataset 'dataplex_metadata' ready (created or already exists).
Created new external table: bq-sme-governance-build.dataplex_metadata.metadata_export
Created/updated UDF: bq-sme-governance-build.dataplex_metadata.extract_aspect_types
Created/updated unnested view: bq-sme-governance-build.dataplex_metadata.vw_metadata_export_unnested


In [None]:
# Query the unnested view for easier analysis
# This view flattens the nested structure making queries simpler

query = f"""
-- List the top 10 projects with the most resources
SELECT
    project,
    COUNT(DISTINCT resource) AS unique_resources,
    COUNT(DISTINCT entry_name) AS total_entries,
    COUNT(DISTINCT entry_type) AS entry_type_count
FROM
    `{PROJECT_ID}.{DATASET_ID}.vw_{TABLE_ID}_unnested`
WHERE
    year = EXTRACT(YEAR FROM CURRENT_DATE())
GROUP BY
    project
ORDER BY
    unique_resources DESC
LIMIT 10;
"""

df = bq_client.query(query).to_dataframe()
display(df)

In [10]:
# Analyze aspect types across all entries using the unnested view
# The view has already extracted aspect types, making this query much simpler

query = f"""
SELECT
    TRIM(aspect_type) AS aspect_type,
    COUNT(DISTINCT entry_name) AS entry_count,
    COUNT(DISTINCT project) AS project_count,
    COUNT(DISTINCT system) AS system_count
FROM
    `{PROJECT_ID}.{DATASET_ID}.vw_{TABLE_ID}_unnested`,
    UNNEST(SPLIT(aspect_types, ', ')) AS aspect_type
WHERE
    aspect_type IS NOT NULL
    AND aspect_type != ''
GROUP BY
    aspect_type
ORDER BY
    entry_count DESC;
"""

df = bq_client.query(query).to_dataframe()
display(df)

Unnamed: 0,aspect_type,entry_count,project_count,system_count
0,projects/655216118709/locations/global/aspectT...,261,13,3
1,projects/655216118709/locations/global/aspectT...,210,9,2
2,projects/655216118709/locations/global/aspectT...,187,6,1
3,projects/655216118709/locations/global/aspectT...,187,6,1
4,projects/655216118709/locations/global/aspectT...,141,11,1
5,projects/655216118709/locations/global/aspectT...,93,17,1
6,projects/655216118709/locations/global/aspectT...,73,9,2
7,projects/655216118709/locations/global/aspectT...,71,4,1
8,projects/655216118709/locations/global/aspectT...,37,6,1
9,projects/655216118709/locations/global/aspectT...,35,5,1


---

## Interesting Questions to Explore in Data Canvas

Once your metadata is loaded, here are some compelling questions you can investigate and visualize:

### **Data Governance Dashboard**
- **Metadata Coverage**: What percentage of resources have descriptions?
- **Freshness Analysis**: Resources that haven't been updated in 6+ months (potential for cleanup)
- **Documentation Gaps**: Entry types missing critical aspects (schema, data quality, ownership)

### **Data Estate Insights**
- **Growth Trends**: New resources created per month by project/system (line chart)
- **Platform Distribution**: Pie chart of resources by platform (BigQuery, GCS, Dataform, etc.)
- **Entry Type Breakdown**: Bar chart showing most common entry types across the organization

Some examples:

![Entites created by system over time](https://raw.githubusercontent.com/haneyr/bq-academy-datagov-lab/main/media/metadata01.png)


![Resources created over time by project](https://raw.githubusercontent.com/haneyr/bq-academy-datagov-lab/main/media/metadata02.png)


Try these queries in Data Canvas to get started!

---

## Cleanup

The following cells will help you clean up resources created by this notebook. Run these only when you're done with the lab and want to remove all created resources.

In [None]:
def cleanup_bigquery_resources(delete_dataset: bool = True) -> None:
    """
    Cleans up BigQuery resources created by this notebook.
    
    Args:
        delete_dataset: If True, deletes the entire dataset including all tables and views.
                       If False, only deletes the specific table, view, and UDF.
    """
    client = bigquery.Client(project=PROJECT_ID)
    
    if delete_dataset:
        # Delete the entire dataset (includes table, view, and UDF)
        dataset_id = f"{PROJECT_ID}.{DATASET_ID}"
        try:
            client.delete_dataset(dataset_id, delete_contents=True, not_found_ok=True)
            print(f"Deleted dataset '{dataset_id}' and all its contents.")
        except Exception as e:
            print(f"Error deleting dataset '{dataset_id}': {e}")
    else:
        # Delete individual resources
        resources_deleted = []
        resources_failed = []
        
        # Delete the view
        view_id = f"{PROJECT_ID}.{DATASET_ID}.vw_{TABLE_ID}_unnested"
        try:
            client.delete_table(view_id, not_found_ok=True)
            resources_deleted.append(f"View: {view_id}")
        except Exception as e:
            resources_failed.append(f"View: {view_id} - {e}")
        
        # Delete the table
        table_id = f"{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}"
        try:
            client.delete_table(table_id, not_found_ok=True)
            resources_deleted.append(f"Table: {table_id}")
        except Exception as e:
            resources_failed.append(f"Table: {table_id} - {e}")
        
        # Delete the UDF
        udf_id = f"{PROJECT_ID}.{DATASET_ID}.extract_aspect_types"
        try:
            client.delete_routine(udf_id, not_found_ok=True)
            resources_deleted.append(f"UDF: {udf_id}")
        except Exception as e:
            resources_failed.append(f"UDF: {udf_id} - {e}")
        
        # Print results
        if resources_deleted:
            print("Successfully deleted resources:")
            for resource in resources_deleted:
                print(f"  - {resource}")
        
        if resources_failed:
            print("\nFailed to delete:")
            for resource in resources_failed:
                print(f"  - {resource}")


def cleanup_gcs_bucket(delete_bucket: bool = True, delete_contents_only: bool = False) -> None:
    """
    Cleans up GCS bucket and/or its contents.
    
    Args:
        delete_bucket: If True, deletes the entire bucket.
        delete_contents_only: If True, only deletes bucket contents but keeps the bucket.
    """
    storage_client = storage.Client(project=PROJECT_ID)
    
    try:
        bucket = storage_client.get_bucket(EXPORT_BUCKET_NAME)
        
        # Delete all blobs in the bucket
        blobs = list(bucket.list_blobs())
        if blobs:
            for blob in blobs:
                blob.delete()
            print(f"Deleted {len(blobs)} file(s) from bucket '{EXPORT_BUCKET_NAME}'.")
        else:
            print(f"Bucket '{EXPORT_BUCKET_NAME}' is already empty.")
        
        # Delete the bucket itself if requested
        if delete_bucket and not delete_contents_only:
            bucket.delete()
            print(f"Deleted bucket '{EXPORT_BUCKET_NAME}'.")
        elif delete_contents_only:
            print(f"Bucket '{EXPORT_BUCKET_NAME}' contents deleted, bucket retained.")
            
    except NotFound:
        print(f"Bucket '{EXPORT_BUCKET_NAME}' not found (already deleted).")
    except Exception as e:
        print(f"Error cleaning up bucket '{EXPORT_BUCKET_NAME}': {e}")


def cleanup_all_resources(
    delete_bigquery_dataset: bool = True,
    delete_gcs_bucket: bool = True
) -> None:
    """
    Cleans up all resources created by this notebook.
    
    Args:
        delete_bigquery_dataset: If True, deletes the entire BigQuery dataset.
        delete_gcs_bucket: If True, deletes the GCS bucket and all contents.
    """
    print("=" * 80)
    print("CLEANING UP RESOURCES")
    print("=" * 80)
    
    print("\n1. BigQuery Resources:")
    cleanup_bigquery_resources(delete_dataset=delete_bigquery_dataset)
    
    print("\n2. GCS Bucket:")
    cleanup_gcs_bucket(delete_bucket=delete_gcs_bucket)
    
    print("\n" + "=" * 80)
    print("CLEANUP COMPLETE")
    print("=" * 80)


# Example usage - UNCOMMENT to run cleanup
# Cleanup options:

# Option 1: Delete everything (dataset, bucket, and all contents)
# cleanup_all_resources(delete_bigquery_dataset=True, delete_gcs_bucket=True)

# Option 2: Delete only BigQuery resources, keep GCS bucket
# cleanup_all_resources(delete_bigquery_dataset=True, delete_gcs_bucket=False)

# Option 3: Delete only specific BigQuery resources (table, view, UDF), keep dataset
# cleanup_bigquery_resources(delete_dataset=False)

# Option 4: Delete only GCS bucket contents, keep the bucket
# cleanup_gcs_bucket(delete_bucket=False, delete_contents_only=True)

print("Cleanup functions defined. Uncomment one of the options above to run cleanup.")