# el_postgres_to_landing_adventureworks

In [3]:
pip install python-dotenv

Note: you may need to restart the kernel to use updated packages.


In [4]:
import pyspark
from pyspark.sql import SparkSession
import logging
from configs import configs
from functions import functions as F

from dotenv import load_dotenv
import os

## Import Environment

In [5]:
load_dotenv()

HOST_ADDRESS=os.getenv('HOST_ADDRESS')
MINIO_ACCESS_KEY=os.getenv('MINIO_ACCESS_KEY')
MINIO_SECRET_KEY=os.getenv('MINIO_SECRET_KEY')
USER_POSTGRES=os.getenv('USER_POSTGRES')
PASSWORD_POSTGRES=os.getenv('PASSWORD_POSTGRES')

## Spark Session

In [6]:
spark = SparkSession.builder \
        .appName("el_postgres_to_landing_adventureworks") \
        .config("spark.hadoop.fs.s3a.endpoint", f"http://{HOST_ADDRESS}:9000") \
        .config("spark.hadoop.fs.s3a.access.key", MINIO_ACCESS_KEY) \
        .config("spark.hadoop.fs.s3a.secret.key", MINIO_SECRET_KEY) \
        .config("spark.hadoop.fs.s3a.path.style.access", True) \
        .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") \
        .config("spark.hadoop.fs.s3a.aws.credentials.provider", "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider") \
        .config("hive.metastore.uris", "thrift://metastore:9083") \
        .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
        .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
        .getOrCreate()

## Log configs

In [7]:
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

logging.info("Starting ingestions from Postgres to Minio landing Adventureworks...")

2024-09-22 23:23:52,871 - INFO - Starting ingestions from Postgres to Minio landing Adventureworks...


## Process

In [8]:
for table_input_name in configs.tables_postgres_adventureworks.values():
    try:
        table_input_path = F.convert_table_name(table_input_name)

        df_input_data = spark.read \
            .format("jdbc") \
            .option("url", f"jdbc:postgresql://{HOST_ADDRESS}:5435/Adventureworks") \
            .option("user", USER_POSTGRES) \
            .option("dbtable", table_input_name) \
            .option("password", PASSWORD_POSTGRES) \
            .option("driver", "org.postgresql.Driver") \
            .load()

        output_prefix_layer_name = configs.prefix_layer_name['0']
        output_table_name = configs.lake_path['landing_adventure_works']
        output_table_path = f"{output_table_name}{output_prefix_layer_name}{table_input_path}"

        logging.info(f"Processing table: {table_input_path}")

        df_with_update_date = F.add_metadata(df_input_data)
        df_with_month_key = F.add_month_key(df_with_update_date, 'modifieddate')
        df_with_month_key.write.format("parquet").mode("overwrite").partitionBy('month_key').save(output_table_path)

        logging.info(f"Table {table_input_path} successfully processed and saved to Minio: {output_table_path}")

    except Exception as e:
        logging.error(f"Error processing table {table_input_name}: {str(e)}")

logging.info("Ingestions to Landing Zone completed!")

2024-09-22 23:24:09,178 - INFO - Processing table: sales_countryregioncurrency
2024-09-22 23:24:15,202 - INFO - Table sales_countryregioncurrency successfully processed and saved to Minio: s3a://landing/adventure_works/landing_sales_countryregioncurrency
2024-09-22 23:24:15,321 - INFO - Processing table: sales_creditcard
2024-09-22 23:24:31,615 - INFO - Table sales_creditcard successfully processed and saved to Minio: s3a://landing/adventure_works/landing_sales_creditcard
2024-09-22 23:24:31,710 - INFO - Processing table: sales_currency
2024-09-22 23:24:32,697 - INFO - Table sales_currency successfully processed and saved to Minio: s3a://landing/adventure_works/landing_sales_currency
2024-09-22 23:24:32,817 - INFO - Processing table: humanresources_department
2024-09-22 23:24:33,854 - INFO - Table humanresources_department successfully processed and saved to Minio: s3a://landing/adventure_works/landing_humanresources_department
2024-09-22 23:24:33,946 - INFO - Processing table: humanre