In [25]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [26]:
import json
import logging
from pathlib import Path
from datetime import datetime

from pyspark.sql import SparkSession, DataFrame, functions as F

from src.innova.config.config import etl_config
from utils.trackers import setup_logging, save_metadata
from utils.spark_helpers import (
    load_parquet, save_parquet, load_metadata, save_metadata
)
from utils.transform_schema import apply_strict_schema
from utils.connections.sql_server_connector import SQLServerConnector
from src.innova.utils.loader import DataWarehouseLoader          



In [27]:
spark = SparkSession.builder.getOrCreate()

In [28]:
BUSINESS: str = "innova"
LOAD_TS = datetime.now().strftime("%Y%m%d%H%M%S")
DWH_SCHEMA = "dwh_innova"

LAYER: str = etl_config["etl"]["layer_slv"]
STEP_NAME_2: str = etl_config["etl"]["steps"][1]["name"]
STEP_NAME_3: str = etl_config["etl"]["steps"][2]["name"]
BASE_PATH: str = etl_config["paths"]["base_path"]      
LOGS_PATH: str = etl_config["paths"]["logs_path"]    
METADATA_PATH: str = etl_config["paths"]["metadata_path"] 

BASE_SILVER: Path = f"{BASE_PATH}/{LAYER}/{BUSINESS}"

logger = setup_logging(LOGS_PATH)
logger.info(f"starting {STEP_NAME_2}")

In [30]:
metadata_file = Path(f"{METADATA_PATH}/{STEP_NAME_2}.json")
metadata_step_02 = load_metadata(metadata_path=metadata_file)

In [35]:
with SQLServerConnector(spark, logger=logger) as sql_conn:
    loader = DataWarehouseLoader(
        spark = spark,
        sql_conn    = sql_conn,
        metadata    = metadata_step_02,
        dwh_cfg     = etl_config["dwh"],       
        logger      = logger,
    )
    loader.run()

spark.stop()