In [0]:
import dlt
import logging
from pyspark.sql.types import StructType, StructField, StringType

In [0]:
logger = logging.getLogger("DLTLoggerBronzeNintendo")

logger.setLevel(logging.INFO)

if not logger.handlers:
    handler = logging.StreamHandler()
    formatter = logging.Formatter('[%(levelname)s] %(asctime)s - %(message)s',
                                  '%Y-%m-%d %H:%M:%S')
    handler.setFormatter(formatter)
    logger.addHandler(handler)

In [0]:
# Schema fixo para o JSON de entrada
schema = StructType([
    StructField("desconto", StringType(), True),   
    StructField("codigo", StringType(), True),
    StructField("nome", StringType(), True),
    StructField("preco", StringType(), True),      
    StructField("parcelamento", StringType(), True),
    StructField("link", StringType(), True),
    StructField("origem", StringType(), True),
    StructField("extract", StringType(), True)  
])

In [0]:
@dlt.table(
    name="nintendodatabricks_workspace.bronze.consoles_ing",
    comment="Tabela streming referente a consoles nintendo switch em diversas fontes de busca"
)

@dlt.expect("colunas_obrriatorias_nao_nulas", "codigo IS NOT NULL AND preco IS NOT NULL")

@dlt.expect("extract_format", "extract RLIKE '^[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$'")

def nintendo_bronze():

    logger.info("Streaming de dados iniciado...")

    df = (
        spark.readStream
            .format("cloudFiles")
            .option("cloudFiles.format", "json")
            .option("recursiveFileLookup", "true")
            .schema(schema)
            .load("abfss://nintendo@nintendostorageaccount.dfs.core.windows.net/inbound/")
    )
    # Deduplicação por "codigo", "nome", "preco", "desconto", "parcelamento"
    df = df.dropDuplicates(["codigo", "preco", "desconto", "parcelamento"])

    logger.info("Carregando dados na Streaming table")

    return df