In [0]:
# %sql

# select * from dimruncycle
# order by runcycleid desc
# limit 1;

# -- DROP TABLE IF EXISTS dimdeliverymodes;

# CREATE TABLE dimdeliverymodes
# USING DELTA AS
#     SELECT
#         deliverymodeid,
#         CAST(NULL AS TIMESTAMP) AS createddate,
#         CAST(NULL AS TIMESTAMP) AS modifieddate,
#         CAST(NULL AS STRING) AS cpal,
#         CAST(NULL AS STRING) AS cdefault,
#         CAST(NULL AS STRING) AS cdelete,
#         name AS deliverymodename,
#         CASE
#             WHEN isactive = TRUE THEN 'Y'
#         ELSE
#             'N'
#         END AS cactive,
#         'FOIMOD' AS sourceoftruth
#     FROM foi_mod.deliverymodes
#     order by deliverymodeid;


In [0]:
%restart_python
%pip install boto3
import boto3
import os
from botocore.exceptions import NoCredentialsError
import datetime
import sys
sys.path.insert(0, '/Workspace/Shared')
import etl_helpers
from pyspark.sql.functions import col #lit
# from pyspark.sql.types import StringType, TimestampType #IntegerType, BooleanType

tablename = "dimdeliverymodes"
runcycleid = etl_helpers.start_run_cycle(tablename)
os.makedirs("/dbfs/foi/dataload", exist_ok=True)  # make sure directory exists

try:
    query = f"""
        SELECT
            deliverymodeid,
            CAST(NULL AS TIMESTAMP) AS createddate,
            CAST(NULL AS TIMESTAMP) AS modifieddate,
            CAST(NULL AS STRING) AS cpal,
            CAST(NULL AS STRING) AS cdefault,
            CAST(NULL AS STRING) AS cdelete,
            name AS deliverymodename,
            CASE
                WHEN isactive = TRUE THEN 'Y'
            ELSE
                'N'
            END AS cactive,
            'FOIMOD' AS sourceoftruth
        FROM foi_mod.deliverymodes
        order by deliverymodeid;
        """

    # print(query)

    df = spark.sql(query)
    df.show()

    # order of columns here is important!
    df_mapped = df.selectExpr(
            "deliverymodeid AS deliverymodeid",
            "createddate AS createddate",
            "modifieddate AS modifieddate",
            "cpal AS cpal",
            "cdefault AS cdefault",
            "cdelete AS cdelete",
            "deliverymodename AS deliverymodename",
            "cactive as cactive",
            "sourceoftruth AS sourceoftruth"
        )
    # df_mapped = df_mapped.withColumn("createddate", col("createddate").cast(TimestampType()))
    # df_mapped = df_mapped.withColumn("modifieddate", col("modifieddate").cast(TimestampType()))
    # df_mapped = df_mapped.withColumn("cpal", col("cpal").cast(StringType()))
    # df_mapped = df_mapped.withColumn("cdefault", col("cdefault").cast(StringType()))
    # df_mapped = df_mapped.withColumn("cdelete", col("cdelete").cast(StringType()))
    df_mapped.show()

    from delta.tables import DeltaTable
    delta_table = DeltaTable.forName(spark, f"hive_metastore.default.{tablename}")
    delta_table.alias("target").merge(
        df_mapped.alias("source"),
        "target.deliverymodeid = source.deliverymodeid AND target.sourceoftruth = source.sourceoftruth"
    ).whenMatchedUpdate(set = {
        "createddate": col("source.createddate"),
        "modifieddate": col("source.modifieddate"),
        "cpal": col("source.cpal"),
        "cdefault": col("source.cdefault"),
        "cdelete": col("source.cdelete"),
        "deliverymodename": col("source.deliverymodename"),
        "cactive": col("source.cactive")
    }).whenNotMatchedInsert(values = {
        "deliverymodeid": col("source.deliverymodeid"),
        "createddate": col("source.createddate"),
        "modifieddate": col("source.modifieddate"),
        "cpal": col("source.cpal"),
        "cdefault": col("source.cdefault"),
        "cdelete": col("source.cdelete"),
        "deliverymodename": col("source.deliverymodename"),
        "cactive": col("source.cactive"),
        "sourceoftruth": col("source.sourceoftruth")
    }).execute()

    etl_helpers.end_run_cycle(runcycleid, 't', tablename)
except NoCredentialsError:
    print("Credentials not available")
    etl_helpers.end_run_cycle(runcycleid, 'f', tablename, "Credentials not available")
except Exception as e:
    if (str(e) == "no changes for today"):
        print("here")
        etl_helpers.end_run_cycle(runcycleid, 't', tablename)
    else:
        print(f"An error occurred: {e}")    
        etl_helpers.end_run_cycle(runcycleid, 'f', tablename, f"An error occurred: {e}")