In [0]:
storage_account = dbutils.secrets.get("azure-secrets", "storage_account")
storage_key = dbutils.secrets.get("azure-secrets", "storage_key")

print("Secrets accessible ✔")
print("Storage account length:", len(storage_account))


Secrets accessible ✔
Storage account length: 19


In [0]:
storage_account = dbutils.secrets.get("azure-secrets", "storage_account")

print("repr:", repr(storage_account))
print("length:", len(storage_account))


repr: '[REDACTED]'
length: 19


In [0]:
storage_account = dbutils.secrets.get("azure-secrets", "storage_account")

print("value length:", len(storage_account))


value length: 18


In [0]:
storage_account = dbutils.secrets.get("azure-secrets", "storage_account")
storage_key = dbutils.secrets.get("azure-secrets", "storage_key")

print("account length:", len(storage_account))
print("key length:", len(storage_key))


account length: 18
key length: 88


In [0]:
container = "taxi-data"
mount_point = "/mnt/taxi"

# Unmount if exists
if mount_point in [m.mountPoint for m in dbutils.fs.mounts()]:
    dbutils.fs.unmount(mount_point)

dbutils.fs.mount(
  source=f"wasbs://{container}@{storage_account}.blob.core.windows.net",
  mount_point=mount_point,
  extra_configs={
    f"fs.azure.account.key.{storage_account}.blob.core.windows.net": storage_key
  }
)

print("Mounted successfully")


Mounted successfully


In [0]:
dbutils.fs.ls("/mnt/taxi")


[FileInfo(path='dbfs:/mnt/taxi/taxi_bangalore_10000.csv', name='taxi_bangalore_10000.csv', size=1267533, modificationTime=1765877998000)]

In [0]:
# =========================
# 00-setup notebook
# =========================

# 1️⃣ Load secrets from Databricks Secret Scope
storage_account = dbutils.secrets.get("azure-secrets", "storage_account")
storage_key = dbutils.secrets.get("azure-secrets", "storage_key")

print("Secrets loaded successfully")


# 2️⃣ Define container & mount point
container = "taxi-data"          # your Azure Blob container name
mount_point = "/mnt/taxi"        # standard mount location


# 3️⃣ Mount Azure Blob Storage (only if not already mounted)
existing_mounts = [m.mountPoint for m in dbutils.fs.mounts()]

if mount_point not in existing_mounts:
    dbutils.fs.mount(
        source=f"wasbs://{container}@{storage_account}.blob.core.windows.net",
        mount_point=mount_point,
        extra_configs={
            f"fs.azure.account.key.{storage_account}.blob.core.windows.net": storage_key
        }
    )
    print(f"Mounted successfully at {mount_point}")
else:
    print(f"Already mounted at {mount_point}")


# 4️⃣ Define global paths (used by all downstream notebooks)

# Raw CSV (your actual file)
RAW_PATH = f"{mount_point}/taxi_bangalore_10000.csv"

# Bronze / Silver / Gold layers
BRONZE_PATH = f"{mount_point}/bronze/trips_parquet"
SILVER_PATH = f"{mount_point}/silver/trips_features"
GOLD_PATH   = f"{mount_point}/gold/predictions"

# Model storage on DBFS
MODELS_DBFS = "/dbfs/models/taxi_fare_pipeline_v1"


# 5️⃣ Print paths for verification
print("RAW_PATH    :", RAW_PATH)
print("BRONZE_PATH :", BRONZE_PATH)
print("SILVER_PATH :", SILVER_PATH)
print("GOLD_PATH   :", GOLD_PATH)
print("MODELS_DBFS :", MODELS_DBFS)


# 6️⃣ Final sanity check – list files in mount
dbutils.fs.ls(mount_point)


Secrets loaded successfully
Already mounted at /mnt/taxi
RAW_PATH    : /mnt/taxi/taxi_bangalore_10000.csv
BRONZE_PATH : /mnt/taxi/bronze/trips_parquet
SILVER_PATH : /mnt/taxi/silver/trips_features
GOLD_PATH   : /mnt/taxi/gold/predictions
MODELS_DBFS : /dbfs/models/taxi_fare_pipeline_v1


[FileInfo(path='dbfs:/mnt/taxi/taxi_bangalore_10000.csv', name='taxi_bangalore_10000.csv', size=1267533, modificationTime=1765877998000)]

In [0]:
spark.read.option("header", True) \
     .csv("/mnt/taxi/taxi_bangalore_10000.csv") \
     .write.mode("overwrite") \
     .saveAsTable("default.taxi_bronze")
