In [0]:
# %sql

# select * from dimruncycle
# order by runcycleid desc
# limit 1;

# -- DROP TABLE IF EXISTS dimreceivedmodes;

# -- CREATE TABLE dimreceivedmodes
# -- USING DELTA AS
# --     SELECT
# --         receivedmodeid,
# --         name AS receivedmodename,
# --         CASE
# --             WHEN isactive = true THEN 'Y'
# --         ELSE
# --             'N'
# --         END AS cactive,
# --         CAST(NULL AS TIMESTAMP) AS createddate,
# --         CAST(NULL AS TIMESTAMP) AS modifieddate,
# --         CAST(NULL AS STRING) AS cdelete,
# --         'FOIMOD' AS sourceoftruth
# --     FROM foi_mod.receivedmodes
# --     order by receivedmodeid;


In [0]:
%restart_python
%pip install boto3
import boto3
import os
from botocore.exceptions import NoCredentialsError
import datetime
import sys
sys.path.insert(0, '/Workspace/Shared')
import etl_helpers
from pyspark.sql.functions import col #lit

tablename = "dimreceivedmodes"
runcycleid = etl_helpers.start_run_cycle(f"{tablename}")
os.makedirs("/dbfs/foi/dataload", exist_ok=True)  # make sure directory exists

try:
    query = f"""
        SELECT
            receivedmodeid,
            name AS receivedmodename,
            CASE
                WHEN isactive = true THEN 'Y'
            ELSE
                'N'
            END AS cactive,
            CAST(NULL AS TIMESTAMP) AS createddate,
            CAST(NULL AS TIMESTAMP) AS modifieddate,
            CAST(NULL AS STRING) AS cdelete,
            'FOIMOD' AS sourceoftruth
        FROM foi_mod.receivedmodes
        order by receivedmodeid;
        """

    # print(query)

    df = spark.sql(query)
    df.show()

    # order of columns here is important!
    df_mapped = df.selectExpr(
            "receivedmodeid AS receivedmodeid",
            "receivedmodename AS receivedmodename",
            "cactive as cactive",
            "createddate AS createddate",
            "modifieddate AS modifieddate",
            "cdelete AS cdelete",
            "sourceoftruth AS sourceoftruth"
        )
    df_mapped.show()

    from delta.tables import DeltaTable
    delta_table = DeltaTable.forName(spark, f"hive_metastore.default.{tablename}")
    delta_table.alias("target").merge(
        df_mapped.alias("source"),
        "target.receivedmodeid = source.receivedmodeid AND target.sourceoftruth = source.sourceoftruth"
    ).whenMatchedUpdate(set = {
        "receivedmodename": col("source.receivedmodename"),
        "cactive": col("source.cactive"),
        "createddate": col("source.createddate"),
        "modifieddate": col("source.modifieddate"),
        "cdelete": col("source.cdelete")
    }).whenNotMatchedInsert(values = {
        "receivedmodeid": col("source.receivedmodeid"),
        "receivedmodename": col("source.receivedmodename"),
        "cactive": col("source.cactive"),
        "createddate": col("source.createddate"),
        "modifieddate": col("source.modifieddate"),
        "cdelete": col("source.cdelete"),
        "sourceoftruth": col("source.sourceoftruth")
    }).execute()

    etl_helpers.end_run_cycle(runcycleid, 't', f"{tablename}")
except NoCredentialsError:
    print("Credentials not available")
    etl_helpers.end_run_cycle(runcycleid, 'f', f"{tablename}", "Credentials not available")
    raise Exception("notebook failed") from e
except Exception as e:
    if (str(e) == "no changes for today"):
        print("here")
        etl_helpers.end_run_cycle(runcycleid, 't', f"{tablename}")
    else:
        print(f"An error occurred: {e}")    
        etl_helpers.end_run_cycle(runcycleid, 'f', f"{tablename}", f"An error occurred: {e}")
        raise Exception("notebook failed") from e