In [0]:
# Purpose:
# This notebook ingests batch transaction data into the Bronze layer
# and adds ingestion metadata for traceability.

In [0]:
from pyspark.sql import functions as F
from pyspark.sql.types import *
import uuid
from datetime import date, timedelta
import random

schema = StructType([
    StructField("transaction_id", StringType()),
    StructField("transaction_date", DateType()),
    StructField("partner_id", StringType()),
    StructField("amount", DoubleType()),
    StructField("currency_code", StringType()),
    StructField("transaction_status", StringType()),
    StructField("source_system", StringType())
])

In [0]:
partners = ["P001","P002","P003","P004","P005"]
currencies = ["USD","EUR","INR"]
statuses = ["SUCCESS","FAILED","PENDING"]
systems = ["PORTAL_A","PORTAL_B","API_FEED"]

rows = []
run_date = date.today()

for _ in range(5000):
    rows.append((
        str(uuid.uuid4()),
        run_date,
        random.choice(partners),
        round(random.uniform(10,5000),2),
        random.choice(currencies),
        random.choice(statuses),
        random.choice(systems)
    ))

df = spark.createDataFrame(rows, schema)

In [0]:
bronze_df = (
    df
    .withColumn("ingestion_date", F.current_date())
    .withColumn("load_timestamp", F.current_timestamp())
)

In [0]:
bronze_df.write.format("delta") \
    .mode("append") \
    .saveAsTable("bronze_db.transactions_raw")