In [0]:
"""
01_platform_bootstrap.py

Purpose:
- Create Bronze, Silver, and Gold schemas in the Hive metastore
- Validate Delta Lake support
- Validate external API connectivity
- Perform sanity checks before ingestion

Note:
- Databricks Community Edition
- Uses metastore-managed Delta tables (no DBFS / Unity Catalog)
"""

# Create medallion schemas (Databricks CE compatible)
# Databricks “databases” are logical namespaces, not folders — but the data is physically stored and persistent.
spark.sql("CREATE DATABASE IF NOT EXISTS air_quality_bronze")
spark.sql("CREATE DATABASE IF NOT EXISTS air_quality_silver")
spark.sql("CREATE DATABASE IF NOT EXISTS air_quality_gold")

print("Medallion schemas created.")

# Test reading a JSON URL
import requests
import json

test_url = "https://data.sensor.community/static/v2/data.json"
try:
    response = requests.get(test_url, timeout=10).json()
    print("Successful test request.")
    print(json.dumps(response[0], indent=2))
except requests.RequestException as e:
    # Log error and re-raise for visibility
    print(f"Error fetching data from {test_url}: {e}")
    raise

# Create sample Spark DataFrame
sample_df = spark.createDataFrame(
    [(1, "test")],
    ["id", "value"]
)

# Internal table used only to validate Delta + metastore functionality
BRONZE_TABLE = "air_quality_bronze.bootstrap_sample"

sample_df.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable(BRONZE_TABLE)

# Read Delta table
sample_df_loaded = spark.read.table(BRONZE_TABLE)
display(sample_df_loaded)

# Sanity check: medallion-layered databases & tables
# 1. Show all databases
spark.sql("SHOW DATABASES").show()
# 2. Show tables in Bronze layer
spark.sql("SHOW TABLES IN air_quality_bronze").show()
# 3. Query sample Bronze table
spark.sql("SELECT * FROM air_quality_bronze.sample_test").show()
# 4. Show Delta table metadata (owner-safe)
spark.sql("DESCRIBE DETAIL air_quality_bronze.sample_test").display()
# 5. Programmatic catalog metadata (clean summary)
table = spark.catalog.getTable("air_quality_bronze.sample_test")

display({
    "name": table.name,
    "database": table.database,
    "tableType": table.tableType,
    "isTemporary": table.isTemporary
})

print("Platform bootstrap completed successfully.")

dbutils.notebook.exit(
    "Bootstrap completed: schemas validated, API reachable, Delta OK"
)

Medallion schemas created.
Successful test request.
{
  "id": 27891855283,
  "sampling_rate": null,
  "timestamp": "2026-01-16 11:32:01",
  "location": {
    "id": 49,
    "latitude": "48.53",
    "longitude": "9.2",
    "altitude": "373.1",
    "country": "DE",
    "exact_location": 0,
    "indoor": 0
  },
  "sensor": {
    "id": 107,
    "pin": "5",
    "sensor_type": {
      "id": 1,
      "name": "PPD42NS",
      "manufacturer": "Shinyei"
    }
  },
  "sensordatavalues": [
    {
      "id": 64692307527,
      "value": "832348.00",
      "value_type": "durP1"
    },
    {
      "id": 64692307530,
      "value": "111108.00",
      "value_type": "durP2"
    },
    {
      "id": 64692307529,
      "value": "1437.81",
      "value_type": "P1"
    },
    {
      "id": 64692307532,
      "value": "192.74",
      "value_type": "P2"
    },
    {
      "id": 64692307528,
      "value": "2.77",
      "value_type": "ratioP1"
    },
    {
      "id": 64692307531,
      "value": "0.37",
      "v

id,value
1,test


+------------------+
|      databaseName|
+------------------+
|       air_quality|
|air_quality_bronze|
|  air_quality_gold|
|air_quality_silver|
|           default|
|information_schema|
+------------------+

+------------------+----------------+-----------+
|          database|       tableName|isTemporary|
+------------------+----------------+-----------+
|air_quality_bronze|bootstrap_sample|      false|
|air_quality_bronze| live_sensor_raw|      false|
|air_quality_bronze|     sample_test|      false|
+------------------+----------------+-----------+

+---+-----+
| id|value|
+---+-----+
|  1| test|
+---+-----+



format,id,name,description,location,createdAt,lastModified,partitionColumns,clusteringColumns,numFiles,sizeInBytes,properties,minReaderVersion,minWriterVersion,tableFeatures,statistics,clusterByAuto
delta,1b84d9ec-68a1-4106-9c12-2daffb65c87d,workspace.air_quality_bronze.sample_test,,,2025-12-18T08:09:41.164Z,2025-12-18T08:44:37.000Z,List(),List(),1,835,"Map(delta.parquet.compression.codec -> zstd, delta.enableDeletionVectors -> true)",3,7,"List(appendOnly, deletionVectors, invariants)","Map(numRowsDeletedByDeletionVectors -> 0, numDeletionVectors -> 0)",False


{'name': 'sample_test',
 'database': 'air_quality_bronze',
 'tableType': 'MANAGED',
 'isTemporary': False}

Platform bootstrap completed successfully.
