In [0]:
# 02_data_ingest.py
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, DoubleType, TimestampType
from datetime import datetime

class DataIngestor:
    def __init__(self, spark: SparkSession, catalog: str, schema: str):
        self.spark = spark
        self.catalog = catalog
        self.schema = schema

    def create_sample_df(self):
        schema = StructType([
            StructField('id', IntegerType(), False),
            StructField('name', StringType(), True),
            StructField('amount', DoubleType(), True),
            StructField('created_at', TimestampType(), True),
        ])
        data = [
            (3, 'Charlie', 10.5, datetime.now()),
            (4, 'Diana',   20.0, datetime.now()),
        ]
        return self.spark.createDataFrame(data, schema)

    def append_to_table(self, df, table_name: str):
        full_table = f"{self.catalog}.{self.schema}.{table_name}"
        # Use saveAsTable on the fully-qualified table (Unity Catalog managed)
        df.write.format('delta').mode('append').saveAsTable(full_table)

    def overwrite_table(self, df, table_name: str):
        full_table = f"{self.catalog}.{self.schema}.{table_name}"
        df.write.format('delta').mode('overwrite').saveAsTable(full_table)
