In [0]:

# -------------------------------------------
# Incremental Reader (Auto Loader)
# -------------------------------------------
import traceback

def read_incremental_data(path: str, file_format: str, schema_evolution_mode: str, job_name: str, logger):
    try:
        schema_path = f"{path}/_schema"
        logger.info(f"{job_name} - Auto Loader init: path={path}, file_format={file_format}, schema_path={schema_path}, schema_evolution_mode={schema_evolution_mode}")
        df = (spark.readStream
                    .format("cloudFiles")
                    .option("cloudFiles.format", file_format)
                    .option("cloudFiles.schemaLocation", schema_path)
                    .option("cloudFiles.inferColumnTypes", "true")
                    .option("cloudFiles.schemaEvolutionMode", schema_evolution_mode)
                    .load(path))
        logger.info(f"{job_name} - Auto Loader stream created.")
        logger.info(f"{job_name} - Schema: {df.schema.simpleString()}")
        return df
    except Exception as e:
        logger.error(f"{job_name} - Auto Loader read failed: {e}")
        logger.error(traceback.format_exc())
        raise
