# el_landing_to_bronze_isp_performance

In [1]:
import pyspark
from pyspark.sql import SparkSession
import logging
from datetime import datetime
from pyspark.sql.functions import lit
from configs import configs
from functions import functions as F
from dotenv import load_dotenv
import os


## Import Environment

In [2]:
load_dotenv()

HOST_ADDRESS=os.getenv('HOST_ADDRESS')
MINIO_ACCESS_KEY=os.getenv('MINIO_ACCESS_KEY')
MINIO_SECRET_KEY=os.getenv('MINIO_SECRET_KEY')

## Spark Session

In [3]:
spark = SparkSession.builder \
        .appName("el_landing_to_bronze_adventureworks") \
        .config("spark.hadoop.fs.s3a.endpoint", f"http://{HOST_ADDRESS}:9000") \
        .config("spark.hadoop.fs.s3a.access.key", MINIO_ACCESS_KEY) \
        .config("spark.hadoop.fs.s3a.secret.key", MINIO_SECRET_KEY) \
        .config("spark.hadoop.fs.s3a.path.style.access", True) \
        .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") \
        .config("spark.hadoop.fs.s3a.aws.credentials.provider", "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider") \
        .config("hive.metastore.uris", "thrift://metastore:9083") \
        .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
        .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
        .getOrCreate()

## Log configs

In [4]:
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

logging.info("Starting convertions from Minio to Minio Delta...")

2024-10-21 03:34:02,752 - INFO - Starting convertions from Minio to Minio Delta...


## Path configs

In [5]:
input_prefix_layer_name = configs.prefix_layer_name['0']
table_input_name = configs.lake_path['landing']
output_prefix_layer_name = configs.prefix_layer_name['1']
storage_output = configs.lake_path['bronze']

## Process

In [6]:
for key, value in configs.tables_api_isp_performance.items():
    table = value
    table_name = F.convert_table_name(table)
    try:
        df_input_data = spark.read.format("parquet").load(f'{table_input_name}{input_prefix_layer_name}{table_name}')
        df_with_update_date = F.add_metadata(df_input_data)
        df_with_update_date.write.format("delta").mode("overwrite").partitionBy('month_key').save(f'{storage_output}{output_prefix_layer_name}{table_name}')
        logging.info(f'Table {table_name} sucessfully processed and saved to Minio: {storage_output}{output_prefix_layer_name}{table_name}')   
    except Exception as e:
        logging.error(f'Error processing table {table}: {str(e)}')
        
logging.info("Convertion from parquet to Delta completed was sucessfully!")

2024-10-21 03:34:30,121 - INFO - Table ordem_servico_aberto sucessfully processed and saved to Minio: s3a://bronze/isp_performance/bronze_ordem_servico_aberto
2024-10-21 03:34:30,123 - INFO - Convertion from parquet to Delta completed was sucessfully!
