In [0]:

# -------------------------------------------
# Incremental Reader (Auto Loader)
# -------------------------------------------
import traceback

def read_incremental_data(path: str, file_format: str, schema_evolution_mode: str, job_name: str, logger):
    try:
        schema_path = f"{path}/_schema"
        logger.info(f"{job_name} - Auto Loader init: path={path}, file_format={file_format}, schema_path={schema_path}, schema_evolution_mode={schema_evolution_mode}")
        reader = (spark.readStream
                    .format("cloudFiles")
                    .option("cloudFiles.format", file_format)
                    .option("cloudFiles.schemaLocation", schema_path)
                    .option("cloudFiles.inferColumnTypes", "true")
                    .option("cloudFiles.schemaEvolutionMode", schema_evolution_mode))
        
        if file_format.lower() == "binaryfile":
            reader = (reader
                      .option("recursiveFileLookup", "true")  # allow * / * folders
                      .option("pathGlobFilter", "*.png"))     # only .png files
        # Static preview count for debug
        try:
            static_reader = (spark.read.format(file_format)
                                .option("recursiveFileLookup", "true") if file_format.lower() == "binaryfile" else spark.read.format(file_format))
            preview_count = static_reader.load(path).count()
            logger.info(f"{job_name} - Current static count at path: {preview_count:,} rows/files")
        except Exception as ce:
            logger.warning(f"{job_name} - Static preview count failed: {ce}")

        df = reader.load(path)   

        logger.info(f"{job_name} - Auto Loader read stream created.")
        logger.info(f"{job_name} - Schema: {df.schema.simpleString()}")
        return df
    except Exception as e:
        logger.error(f"{job_name} - Auto Loader read stream failed: {e}")
        logger.error(traceback.format_exc())
        raise
