In [0]:
from pyspark.sql import DataFrame
from pyspark.sql.utils import AnalysisException
from pyspark.dbutils import DBUtils
import os, re

# ========================
# CONFIG
# ========================

TARGET_CATALOG = "main"
TARGET_SCHEMA  = "dbrx_12daysofdemos"

CSV_FILES = [
    "holiday-sales-and-trends.csv",
    "santa_letters_canada.csv",
]

CSV_OPTIONS = {
    "header": "true",
    "inferSchema": "true",
}

# ========================
# 1. Figure out this notebook's folder as a DBFS path
# ========================

dbutils = DBUtils(spark)
ctx = dbutils.notebook.entry_point.getDbutils().notebook().getContext()
notebook_path = ctx.notebookPath().get()          # e.g. /Workspace/Repos/user/repo/folder/notebook
notebook_dir  = "/".join(notebook_path.split("/")[:-1])

# Use DBFS view of workspace files instead of "file:" (which hits WorkspaceLocalFileSystem)
repo_dir_dbfs = f"{notebook_dir}"

print(f"Notebook path: {notebook_path}")
print(f"Notebook dir:  {notebook_dir}")
print(f"DBFS repo dir: {repo_dir_dbfs}")

# ========================
# 2. USE catalog and ensure schema
# ========================

try:
    spark.sql(f"USE CATALOG {TARGET_CATALOG}")
    print(f"\n✓ Using catalog: {TARGET_CATALOG}")
except AnalysisException as e:
    raise RuntimeError(
        f"Could not USE CATALOG {TARGET_CATALOG}. "
        f"Make sure it exists and you have access."
    ) from e

spark.sql(f"CREATE SCHEMA IF NOT EXISTS {TARGET_CATALOG}.{TARGET_SCHEMA}")
print(f"✓ Schema exists: {TARGET_CATALOG}.{TARGET_SCHEMA}")

# ========================
# Helper: filename -> table name
# ========================

def clean_table_name(filename: str) -> str:
    base = os.path.splitext(filename)[0]
    base = re.sub(r"[^0-9a-zA-Z_]", "_", base).lower()
    return base or "table_from_csv"

# ========================
# 3. Read each CSV from the repo folder and save as UC table
# ========================

for filename in CSV_FILES:
    table_name      = clean_table_name(filename)
    full_table_name = f"{TARGET_CATALOG}.{TARGET_SCHEMA}.{table_name}"
    csv_path        = f"/Workspace{repo_dir_dbfs}/{filename}"

    print(f"\n=== Processing {filename} ===")
    print(f"Source path:  {csv_path}")
    print(f"Target table: {full_table_name}")

    # Read CSV -> DataFrame
    try:
        reader = spark.read
        for k, v in CSV_OPTIONS.items():
            reader = reader.option(k, v)

        df: DataFrame = reader.csv(csv_path)
        print(f"DataFrame rows: {df.count()}")

        # Save as managed table in UC
        df.write.mode("overwrite").saveAsTable(full_table_name)
        print(f"✓ Created / replaced table {full_table_name}")
    except Exception as e:
        print(f"✗ Error for file at {csv_path}: {e}")
        continue

print("\nAll done creating tables from local repo CSVs.")