# Fact IoT Agregado (Horário)

Agregação horária de leituras de sensores para otimização de queries.

In [None]:
# Configuração (lê parâmetros do job)
try:
    CATALOG = dbutils.widgets.get('catalog')
except:
    CATALOG = 'manufatura_lakehouse'

try:
    SCHEMA_SILVER = dbutils.widgets.get('schema_silver')
except:
    SCHEMA_SILVER = 'silver'

try:
    SCHEMA_GOLD = dbutils.widgets.get('schema_gold')
except:
    SCHEMA_GOLD = 'gold'

def fqtn(schema, table):
    if CATALOG and CATALOG.lower() != 'none':
        return f"`{CATALOG}`.`{schema}`.`{table}`"
    else:
        return f"`{schema}`.`{table}`"

# Criar schema se necessário
if CATALOG and CATALOG.lower() != 'none':
    spark.sql(f"CREATE SCHEMA IF NOT EXISTS `{CATALOG}`.`{SCHEMA_GOLD}`")
    spark.sql(f"USE CATALOG `{CATALOG}`")
else:
    spark.sql(f"CREATE DATABASE IF NOT EXISTS `{SCHEMA_GOLD}`")


In [None]:
spark.sql(f"""
CREATE OR REPLACE TABLE {fqtn(SCHEMA_GOLD, 'fact_iot_agregado')} (
    equipment_id STRING,
    sensor_type STRING,
    hour_bucket TIMESTAMP,
    date_key INT,
    avg_value DOUBLE,
    min_value DOUBLE,
    max_value DOUBLE,
    reading_count BIGINT
) USING DELTA
PARTITIONED BY (date_key, sensor_type)
""")

spark.sql(f"""
INSERT OVERWRITE {fqtn(SCHEMA_GOLD, 'fact_iot_agregado')}
SELECT 
    equipment_id,
    sensor_type,
    date_trunc('hour', reading_timestamp) as hour_bucket,
    cast(date_format(date_trunc('hour', reading_timestamp), 'yyyyMMdd') as int) as date_key,
    avg(reading_value) as avg_value,
    min(reading_value) as min_value,
    max(reading_value) as max_value,
    count(*) as reading_count
FROM {fqtn(SCHEMA_SILVER, 'iot_readings_clean')}
GROUP BY 1, 2, 3
""")