In [0]:
# Infer and display schemas for databricks-analytics volume files
from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()

files = [
    '/Volumes/workspace/databricks-analytics/raw/dim_customers.csv',
    '/Volumes/workspace/databricks-analytics/raw/dim_products.csv',
    '/Volumes/workspace/databricks-analytics/raw/fact_sales.csv'
]

for file_path in files:
    print(f"Schema for {file_path}:")
    df = spark.read.option("header", True).csv(file_path)
    df.printSchema()
    print("\n")

In [0]:
# Create Delta tables from databricks-analytics volume files
# All columns are treated as string types based on inferred schema

# Create schema if it does not exist
spark.sql("CREATE SCHEMA IF NOT EXISTS workspace.databricks_analytics")

# Create dim_customers table
customers_df = spark.read.option("header", True).csv("/Volumes/workspace/databricks-analytics/raw/dim_customers.csv")
customers_df.write.format("delta").mode("overwrite").saveAsTable("workspace.databricks_analytics.dim_customers")

# Create dim_products table
products_df = spark.read.option("header", True).csv("/Volumes/workspace/databricks-analytics/raw/dim_products.csv")
products_df.write.format("delta").mode("overwrite").saveAsTable("workspace.databricks_analytics.dim_products")

# Create fact_sales table
sales_df = spark.read.option("header", True).csv("/Volumes/workspace/databricks-analytics/raw/fact_sales.csv")
sales_df.write.format("delta").mode("overwrite").saveAsTable("workspace.databricks_analytics.fact_sales")