# process_bronze_to_silver_adventureworks

In [1]:
import pyspark
from pyspark.sql import SparkSession
import logging
from datetime import datetime

from configs import configs
from functions import functions as F

from dotenv import load_dotenv
import os

## Import Environment

In [2]:
load_dotenv()

HOST_ADDRESS=os.getenv('HOST_ADDRESS')
MINIO_ACCESS_KEY=os.getenv('MINIO_ACCESS_KEY')
MINIO_SECRET_KEY=os.getenv('MINIO_SECRET_KEY')

## Function process table

In [3]:
def process_table(spark, query_input, output_path):
    try:
        df_input_data = spark.sql(query_input)
        df_with_update_date = F.add_metadata(df_input_data)
        df_with_update_date.write.format("delta").mode("overwrite").partitionBy('month_key').save(output_path)
        logging.info(f"query '{query_input}' sucessfully processed and saved to {output_path}")
    except Exception as e:
        logging.error(f"Error processsing query '{query_input}': {str(e)}")

## Spark Session

In [4]:
if __name__ == "__main__":
    spark = SparkSession.builder \
            .appName("process_bronze_to_silver_adventureworks") \
            .config("spark.hadoop.fs.s3a.endpoint", f"http://{HOST_ADDRESS}:9000") \
            .config("spark.hadoop.fs.s3a.access.key", MINIO_ACCESS_KEY) \
            .config("spark.hadoop.fs.s3a.secret.key", MINIO_SECRET_KEY) \
            .config("spark.hadoop.fs.s3a.path.style.access", True) \
            .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") \
            .config("spark.hadoop.fs.s3a.aws.credentials.provider", "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider") \
            .config("hive.metastore.uris", "thrift://metastore:9083") \
            .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
            .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
            .getOrCreate()

## Log configs

In [5]:
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

logging.info("Starting processing from bronze to silver Adventureworks...")

2024-08-21 10:13:18,366 - INFO - Starting processing from bronze to silver Adventureworks...


## Path configs

In [6]:
input_prefix_layer_name = configs.prefix_layer_name['1']  # bronze layer
input_path = configs.lake_path['bronze']

output_prefix_layer_name = configs.prefix_layer_name['2']  # silver layer
output_path = configs.lake_path['silver']

## Process

In [7]:
try:
    for table_name, query_input in configs.tables_silver.items():
        table_name = F.convert_table_name(table_name)
        
        query_input = F.get_query(table_name, input_path, input_prefix_layer_name, configs.tables_silver)
        
        storage_output = f'{output_path}{output_prefix_layer_name}{table_name}'
        
        process_table(spark, query_input, storage_output)
        
    logging.info("Process to silver completed!")
    
except Exception as e:
    logging.error(f'Error processing table: {str(e)}')

2024-08-21 10:13:28,538 - INFO - query '
SELECT 
	countryregioncode as codigo_pais, 
	currencycode as codigo_moeda, 
	modifieddate,
    month_key
FROM 
    delta.`s3a://bronze/adventure_works/bronze_sales_countryregioncurrency`
' sucessfully processed and saved to s3a://silver/adventure_works/silver_sales_countryregioncurrency
2024-08-21 10:13:30,590 - INFO - query 'SELECT * FROM delta.`s3a://bronze/adventure_works/bronze_humanresources_department`' sucessfully processed and saved to s3a://silver/adventure_works/silver_humanresources_department
2024-08-21 10:13:32,425 - INFO - query 'SELECT * FROM delta.`s3a://bronze/adventure_works/bronze_humanresources_employee`' sucessfully processed and saved to s3a://silver/adventure_works/silver_humanresources_employee
2024-08-21 10:13:34,862 - INFO - query 'SELECT * FROM delta.`s3a://bronze/adventure_works/bronze_sales_salesorderheader`' sucessfully processed and saved to s3a://silver/adventure_works/silver_sales_salesorderheader
2024-08-21 10:1

In [8]:
df = spark.read.format("delta").load('s3a://silver/adventure_works/silver_sales_countryregioncurrency').show()

+-----------+------------+--------------------+---------+--------------------+
|codigo_pais|codigo_moeda|        modifieddate|month_key|         last_update|
+-----------+------------+--------------------+---------+--------------------+
|         AE|         AED|2014-02-08 10:17:...|   201402|2024-08-21 10:13:...|
|         AR|         ARS|2014-02-08 10:17:...|   201402|2024-08-21 10:13:...|
|         AT|         ATS|2014-02-08 10:17:...|   201402|2024-08-21 10:13:...|
|         AU|         AUD|2014-02-08 10:17:...|   201402|2024-08-21 10:13:...|
|         BB|         BBD|2014-02-08 10:17:...|   201402|2024-08-21 10:13:...|
|         BD|         BDT|2014-02-08 10:17:...|   201402|2024-08-21 10:13:...|
|         BE|         BEF|2014-02-08 10:17:...|   201402|2024-08-21 10:13:...|
|         BG|         BGN|2014-02-08 10:17:...|   201402|2024-08-21 10:13:...|
|         BH|         BHD|2014-02-08 10:17:...|   201402|2024-08-21 10:13:...|
|         BN|         BND|2014-02-08 10:17:...|   20