In [0]:

# Check Spark session and environment
print("Spark version: ", spark.version)

# Simple test to verify compute is working
data = [("OK", 1)]
df = spark.createDataFrame(data, ["status", "value"])
df.show()
# DBTITLE 1]


In [0]:
%sql
-- Create catalog for the project
CREATE CATALOG IF NOT EXISTS taxi_catalog;

-- Create schema inside the catalog
CREATE SCHEMA IF NOT EXISTS taxi_catalog.taxi_schema;
-- Confirm catalog exists
SHOW CATALOGS;

In [0]:
%sql
--Create a volume for bronze/silver/gold data
CREATE VOLUME IF NOT EXISTS taxi_catalog.taxi_schema.taxi_volume;

In [0]:
%sql
SHOW VOLUMES IN taxi_catalog.taxi_schema;

In [0]:
# Project global variables (best practice)
catalog_name = "taxi_catalog"
schema_name = "taxi_schema"

volume_name = "taxi_volume"

# Base UC volume path
base_path = f"/Volumes/{catalog_name}/{schema_name}/{volume_name}"

bronze_path = f"{base_path}/bronze"
silver_path = f"{base_path}/silver"
gold_path = f"{base_path}/gold"

print("Base path:", base_path)
print("Bronze:", bronze_path)
print("Silver:", silver_path)
print("Gold:", gold_path)

In [0]:
# Notebook parameters for pipeline execution (job-friendly)
dbutils.widgets.text("catalog", "taxi_catalog")
dbutils.widgets.text("schema", "taxi_schema")
dbutils.widgets.text("volume", "taxi_volume")

# Read widget values
catalog_name = dbutils.widgets.get("catalog")
schema_name = dbutils.widgets.get("schema")
volume_name = dbutils.widgets.get("volume")

# Base UC volume path and medallion directories
base_path = f"/Volumes/{catalog_name}/{schema_name}/{volume_name}"
bronze_path = f"{base_path}/bronze"
silver_path = f"{base_path}/silver"
gold_path = f"{base_path}/gold"

print("Base path:", base_path)
print("Bronze path:", bronze_path)
print("Silver path:", silver_path)
print("Gold path:", gold_path)


In [0]:
# Base Path for the UC-managed volume
base_path = "/Volumes/taxi_catalog/taxi_schema/taxi_volume"

# Create medallion directories
dbutils.fs.mkdirs(f"{base_path}/bronze")
dbutils.fs.mkdirs(f"{base_path}/silver")
dbutils.fs.mkdirs(f"{base_path}/gold")

# List the structure to confirm
display(dbutils.fs.ls(base_path))

In [0]:
# Switch session to project catalog/schema
spark.sql("USE CATALOG taxi_catalog")
spark.sql("USE  taxi_schema")

In [0]:
# Enviroment validation
print("====Environment validation====")
print("User:", spark.sql("SELECT current_user()").collect()[0][0])
print("Catalog:", spark.sql("SELECT current_catalog()").collect()[0][0])
print("Schema:", spark.sql("SELECT current_schema()").collect()[0][0])

print("\nVolume Structure:")
display(dbutils.fs.ls(base_path))

print("\nNotebook 00_setup_project completed successfully")

In [0]:
# Signal success to orchestrator
dbutils.notebook.exit("OK")