# How much?!?

The below code can help you predict how much data and how much cost there is behind your SQL query on BigQuery.

Run the two code blocks below after you have added your SQL query to the second one.

In [None]:
import os
import json
from google.cloud import bigquery
from google.oauth2 import service_account

def estimate_query_cost(sql: str, price_per_tb: float = 6.25):
    """
    Estimate how much a BigQuery SQL query will cost using a dry run.

    Args:
        sql (str): The SQL query to estimate.
        price_per_tb (float): Cost per TB scanned in USD (default $5.00).

    Returns:
        dict: A dictionary with bytes scanned, GB, TB, and estimated USD cost.
    """
    # Load credentials from Codespaces secret (JSON string)
    creds_json = os.environ["BQ_CREDENTIALS"]
    creds_info = json.loads(creds_json)
    creds = service_account.Credentials.from_service_account_info(creds_info)

    # Create BigQuery client
    client = bigquery.Client(credentials=creds, project=creds.project_id)

    # Dry run job
    job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)
    job = client.query(sql, job_config=job_config)

    bytes_scanned = job.total_bytes_processed
    gb = bytes_scanned / (1024**3)
    tb = bytes_scanned / (1024**4)
    estimated_cost = tb * price_per_tb

    print("Estimated size to scan:")
    print(f"{bytes_scanned/1024:,.0f} KB")
    print(f"{gb:,.0f} GB")
    print(f"{tb:.4f} TB")
    print("")
    print(f"Estimated cost: ${estimated_cost:.4f} (at ${price_per_tb}/TB)")
    return

Place your SQL query in the function call below to get an estimate of size and cost

In [None]:
sql = """
[put your SQL query here!]
"""

estimate_query_cost(sql)