In [0]:
# Set our working environment
# Catalog and schema names
catalog_name = "smart_claims_dev" 
# schema_name = "00_landing" 
# schema_name = "01_bronze"

# schema_name = "02_silver" 
schema_name = "03_gold" 

# Create the catalog if it does not exist
spark.sql(
    f"CREATE CATALOG IF NOT EXISTS {catalog_name}"
)

# Create the schema in the catalog
spark.sql(
    f"CREATE SCHEMA IF NOT EXISTS {catalog_name}.{schema_name}"
)

In [0]:
import os
import pandas as pd


# Local data folder
data_dir = "data/sql_server/"

csv_files = {
    "claims": "claims.csv",
    "customers": "customers.csv", 
    "policies": "policies.csv"
}

# Pure Spark: Requires absolute paths, more complex file handling
# This hybrid: pandas handles file I/O easily, Spark handles Unity Catalog integration

# Load each CSV file
for table_name, file_name in csv_files.items():
    file_path = os.path.join(data_dir, file_name)
    
    # Read CSV and convert to Spark DataFrame in one line
    spark_df = spark.createDataFrame(pd.read_csv(file_path))
    spark_df.write.mode("overwrite").saveAsTable(f"{catalog_name}.01_bronze.{table_name}")

print("Tables created successfully")


In [0]:
# Define your source and destination
source_path = "data/training_imgs/"
destination_path = "/Volumes/smart_claims_dev/00_landing/training_imgs"

# Create the directory in the Volume if it doesn't exist
dbutils.fs.mkdirs(destination_path)

# Move the files
dbutils.fs.cp(source_path, destination_path, recurse=True)

# Verify the files are there
display(dbutils.fs.ls(destination_path))