In [1]:
import pyspark
from pyspark.sql import SparkSession
import logging
from datetime import datetime
from pyspark.sql.functions import lit, coalesce, col, current_date
from configs import configs
from functions import functions as F
from dotenv import load_dotenv
import os

In [2]:
load_dotenv()

HOST_ADDRESS=os.getenv('HOST_ADDRESS')
MINIO_ACCESS_KEY=os.getenv('MINIO_ACCESS_KEY')
MINIO_SECRET_KEY=os.getenv('MINIO_SECRET_KEY')

In [3]:
if __name__ == "__main__":
    spark = SparkSession.builder \
            .appName("process_bronze_to_silver_isp_performance") \
            .config("spark.hadoop.fs.s3a.endpoint", f"http://{HOST_ADDRESS}:9000") \
            .config("spark.hadoop.fs.s3a.access.key", MINIO_ACCESS_KEY) \
            .config("spark.hadoop.fs.s3a.secret.key", MINIO_SECRET_KEY) \
            .config("spark.hadoop.fs.s3a.path.style.access", True) \
            .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") \
            .config("spark.hadoop.fs.s3a.aws.credentials.provider", "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider") \
            .config("hive.metastore.uris", "thrift://metastore:9083") \
            .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
            .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
            .config("spark.executor.memory", "4g") \
            .config("spark.driver.memory", "4g") \
            .config("spark.memory.fraction", "0.8") \
            .config("spark.sql.shuffle.partitions", "50") \
            .getOrCreate()

In [4]:
df = spark.read.format("parquet").load('s3a://landing/isp_performance/landing_ordem_servico_aberto').show(100)

+--------------------+-------------------+---------------------+-------------------+-------------------+---------------+-------+-------------------+------+----+---------+-------------+----------+-------------------+---------------------+--------+------+----------+----------+-----+---------+----------+----------+--------------------+------------+--------------------+--------------------+-------------+--------------------+-----------+-------------------+--------------------+-------------------+--------------------+--------+-------------------+-------------------+-------------------+-------------------+---------------+--------------------+-----------+---------------------+---+-----------------+-------------+------------+--------+-------------------+-------------------+--------------+---------+---------------+--------------------------+-------------------------+-------------------+-------------------+---------------+-------------------+
|   mensagem_resposta|  data_hora_analise|data_hora_e