# Load bronze layer

This notebook loads data into the bronze tables using overwrite operations. Each write replaces any existing data in the target tables with the latest data from the source, ensuring the bronze layer always reflects the most up-to-date information.

## Loading crm tables

In [0]:
import time

start_time = time.time()

#### Insert data into `sales_project.bronze.crm_cust_info`

In [0]:
df = spark.table("sales_project.google_drive.cust_info").select(
    "cst_id",
    "cst_key",
    "cst_firstname",
    "cst_lastname",
    "cst_gndr",
    "cst_marital_status",
    "cst_create_date"
)

df.write.mode("overwrite").saveAsTable("sales_project.bronze.crm_cust_info")

print(f"Inserted rows: {df.count()}")

Inserted rows: 18494


#### Insert data into `sales_project.bronze.crm_prd_info`

In [0]:
df = spark.table("sales_project.google_drive.prd_info").select(
    "prd_id",
    "prd_key",
    "prd_nm",
    "prd_cost",
    "prd_line",
    "prd_start_dt",
    "prd_end_dt"
)

df.write.mode("overwrite").saveAsTable("sales_project.bronze.crm_prd_info")

print(f"Inserted rows: {df.count()}")

Inserted rows: 397


#### Insert data into `sales_project.bronze.crm_sales_details`

In [0]:
df = spark.table("sales_project.google_drive.sales_details").select(
    "sls_ord_num",
    "sls_prd_key",
    "sls_cust_id",
    "sls_order_dt",
    "sls_ship_dt",
    "sls_due_dt",
    "sls_sales",
    "sls_quantity",
    "sls_price"
)

df.write.mode("overwrite").saveAsTable("sales_project.bronze.crm_sales_details")

print(f"Inserted rows: {df.count()}")

Inserted rows: 60398


## Loading erp tables

#### Insert data into `sales_project.bronze.erp_loc_a_101`

In [0]:
df = spark.table("sales_project.google_drive.loc_a_101").select(
    "cid",
    "cntry"
)

df.write.mode("overwrite").saveAsTable("sales_project.bronze.erp_loc_a_101")

print(f"Inserted rows: {df.count()}")

Inserted rows: 18484


#### Insert data into `sales_project.bronze.erp_cust_az_12`

In [0]:
df = spark.table("sales_project.google_drive.cust_az_12").select(
    "cid",
    "bdate",
    "gen"
)

df.write.mode("overwrite").saveAsTable("sales_project.bronze.erp_cust_az_12")

print(f"Inserted rows: {df.count()}")

Inserted rows: 18484


#### Insert data into `sales_project.bronze.erp_px_cat_g_1_v_2`

In [0]:
df = spark.table("sales_project.google_drive.px_cat_g_1_v_2").select(
    "id",
    "cat",
    "subcat",
    "maintenance"
)

df.write.mode("overwrite").saveAsTable("sales_project.bronze.erp_px_cat_g_1_v_2")

print(f"Inserted rows: {df.count()}")

Inserted rows: 37


In [0]:
import time

end_time = time.time()
duration = end_time - start_time
print(f"Execution time: {round(duration,2)} seconds")

Execution time: 15.89 seconds
