#Read from CSV file

In [0]:
df = (spark.read.option("header", "true")
      .csv("/Volumes/dev_project/bronze/source_systems/source_crm/sales_details.csv"))
df.display() 

#Write it to Bronze Layer

In [0]:
df.write.mode("overwrite").saveAsTable("dev_project.bronze.crm_cust_info")

In [0]:
%sql
select * from dev_project.bronze.crm_cust_info

In [0]:
INGESTION_CONFIG = [
    {
        "source": "crm",
        "path": "/Volumes/dev_project/bronze/source_systems/source_crm/cust_info.csv",
        
        "table": "crm_cust_info"
    },
    {
        "source": "crm",
        "path": "/Volumes/dev_project/bronze/source_systems/source_crm/prd_info.csv",
        "table": "crm_prd_info"
    },
    {
        "source": "crm",
        "path": "/Volumes/dev_project/bronze/source_systems/source_crm/sales_details.csv",
        "table": "crm_sales_details"
    },
    {
        "source": "erp",
        "path": "/Volumes/dev_project/bronze/source_systems/source_erp/CUST_AZ12.csv",
        "table": "erp_cust_az12"
    },
    {
        "source": "erp",
        "path": "/Volumes/dev_project/bronze/source_systems/source_erp/LOC_A101.csv",
        "table": "erp_loc_a101"
    },
    {
        "source": "erp",
        "path": "/Volumes/dev_project/bronze/source_systems/source_erp/PX_CAT_G1V2.csv",
        "table": "erp_px_cat_g1v2"
    }
]


In [0]:
for item in INGESTION_CONFIG:
    print(f"Ingesting {item['source']} â†’ dev_project.bronze.{item['table']}")

    df = (
        spark.read
             .option("header", "true")
             .option("inferSchema", "true")
             .csv(item["path"])
    )

    # Minimal fix: align schema for crm_cust_info only
    if item["table"] == "crm_cust_info":
        df = df.selectExpr(
            "cast(cst_id as string) as sls_cust_id",
            "cast(cst_key as string) as sls_prd_key",
            "cast(cst_firstname as string) as sls_ord_num",
            "cast(cst_lastname as string) as sls_order_dt",
            "cast(cst_marital_status as string) as sls_ship_dt",
            "cast(cst_gndr as string) as sls_due_dt",
            "cast(cst_create_date as string) as sls_sales"
        )

    (
        df.write
          .mode("overwrite")
          .format("delta")
          .saveAsTable(f"dev_project.bronze.{item['table']}")
    )


In [0]:
display(spark.sql("SELECT * FROM dev_project.bronze.crm_cust_info"))

In [0]:
display(spark.sql("SELECT * FROM dev_project.bronze.crm_prd_info"))

In [0]:
display(spark.sql("SELECT * FROM dev_project.bronze.crm_sales_details"))

In [0]:
display(spark.sql("SELECT * FROM dev_project.bronze.erp_cust_az12"))

In [0]:
display(spark.sql("SELECT * FROM dev_project.bronze.erp_loc_a101"))

In [0]:
display(spark.sql("SELECT * FROM dev_project.bronze.erp_px_cat_g1v2"))