In [0]:
"""
01_platform_bootstrap.py

Purpose:
- Create Bronze, Silver, and Gold schemas in the Hive metastore
- Validate Delta Lake support
- Validate external API connectivity
- Perform sanity checks before ingestion

Note:
- Databricks Community Edition
- Uses metastore-managed Delta tables (no DBFS / Unity Catalog)
"""

# Databricks notebooks do not automatically include the project root in PYTHONPATH.
# To enable imports from src/, each notebook adds the project root to sys.path at runtime
import sys, os
sys.path.append(os.path.abspath(".."))

# Create medallion schemas (Databricks CE compatible)
# Databricks “databases” are logical namespaces, not folders — but the data is physically stored and persistent.
spark.sql("CREATE DATABASE IF NOT EXISTS air_quality_bronze")
spark.sql("CREATE DATABASE IF NOT EXISTS air_quality_silver")
spark.sql("CREATE DATABASE IF NOT EXISTS air_quality_gold")

print("Medallion schemas created.")

# Test reading a JSON URL
import requests
import json

test_url = "https://data.sensor.community/static/v2/data.json"

try:
    response = requests.get(test_url, timeout=10).json()
    print("Successful test request.")
    print(json.dumps(response[0], indent=2))
except requests.RequestException as e:
    # Log error and re-raise for visibility
    print(f"Error fetching data from {test_url}: {e}")
    raise

# Create sample Spark DataFrame
sample_df = spark.createDataFrame(
    [(1, "test")],
    ["id", "value"]
)

# Internal table used only to validate Delta + metastore functionality
BRONZE_TABLE = "air_quality_bronze.bootstrap_sample"

sample_df.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable(BRONZE_TABLE)

# Read Delta table
sample_df_loaded = spark.read.table(BRONZE_TABLE)
display(sample_df_loaded)

# Sanity check: medallion-layered databases & tables
# 1. Show all databases
spark.sql("SHOW DATABASES").show()
# 2. Show tables in Bronze layer
spark.sql("SHOW TABLES IN air_quality_bronze").show()
# 3. Query sample Bronze table
spark.sql("SELECT * FROM air_quality_bronze.sample_test").show()
# 4. Show Delta table metadata (owner-safe)
spark.sql("DESCRIBE DETAIL air_quality_bronze.sample_test").display()
# 5. Programmatic catalog metadata (clean summary)
table = spark.catalog.getTable("air_quality_bronze.sample_test")

display({
    "name": table.name,
    "database": table.database,
    "tableType": table.tableType,
    "isTemporary": table.isTemporary
})

print("Platform bootstrap completed successfully.")

dbutils.notebook.exit(
    "Bootstrap completed: schemas validated, API reachable, Delta OK"
)