# Silver to Gold: Building BI Ready Tables

In [0]:
import pyspark.sql.functions as F
from pyspark.sql.types import StringType, IntegerType, DateType, TimestampType, FloatType

catalog_name = 'ecommerce'

In [0]:
df = spark.table(f"{catalog_name}.silver.slv_order_items")
df.limit(10).display()

In [0]:
# 1. Add Gross Amount
df = df.withColumn(
    "gross_amount",
    F.col("quantity") * F.col("unit_price")
)

# 2. Add Discount Amount
df = df.withColumn(
    "discount_amount",
    F.ceil(F.col("gross_amount") * (F.col("discount_pct") / 100.0))
)

# 3. Add Sale Amount = Gross - Discount
df = df.withColumn(
    "sale_amount",
    F.col("gross_amount") - F.col("discount_amount") + F.col("tax_amount")
)

# 4. Add Date ID
df = df.withColumn(
    "date_id",
    F.date_format(F.col("dt"), "yyyyMMdd").cast(IntegerType())
)

# Coupon Flag
# Coupon Flag = 1 if coupon_code is not null, else 0
df = df.withColumn(
    "coupon_flag",
    F.when(F.col("coupon_code").isNotNull(), F.lit(1)) \
        .otherwise(F.lit(0))
)

df.limit(10).display()

### Currency Conversion

In [0]:
# 1. Define Fixed Rates for Currency Exchange
fixed_rates = {
    "INR": 1.00,
    "AED": 24.18,
    "AUD": 57.55,
    "CAD": 62.93,
    "GBP": 117.98,
    "SGD": 68.18,
    "USD": 88.29,
}

rates = [(k,float(v)) for k, v in fixed_rates.items()]
rates_df =  spark.createDataFrame(rates, ["currency", "inr_rate"])
rates_df.show()

In [0]:
df = (
    df
    .join(rates_df, rates_df.currency == F.upper(F.trim(F.col("unit_price_currency"))),
    "left"
    )
    .withColumn("sale_amount_inr", F.col("sale_amount") * F.col("inr_rate"))
    .withColumn("sale_amount_inr", F.ceil(F.col("sale_amount_inr")))
)

df.limit(10).display()

In [0]:
orders_gold_df = df.select(
    F.col("date_id"),
    F.col("dt").alias("transaction_date"),
    F.col("order_ts").alias("transaction_ts"),
    F.col("order_id").alias("transaction_id"),
    F.col("customer_id"),
    F.col("item_seq").alias("seq_no"),
    F.col("product_id"),
    F.col("channel"),
    F.col("coupon_code"),
    F.col("coupon_flag"),
    F.col("unit_price_currency"),
    F.col("quantity"),
    F.col("unit_price"),
    F.col("gross_amount"),
    F.col("discount_pct").alias("discount_percent"),
    F.col("discount_amount"),
    F.col("tax_amount"),
    F.col("sale_amount").alias("net_amount"),
    F.col("sale_amount_inr").alias("net_amount_inr")
)

In [0]:
orders_gold_df.limit(5).display()

In [0]:
# Write raw data to the gold layer
orders_gold_df.write.format("delta") \
    .mode("overwrite") \
    .option("mergeSchema", "true") \
    .saveAsTable(f"{catalog_name}.gold.gld_fact_order_items")

In [0]:
spark.sql(f"SELECT count(*) FROM {catalog_name}.gold.gld_fact_order_items").show()