In [0]:
# %sql

# select * from dimruncycle
# order by runcycleid desc
# limit 1;

# -- SELECT
# --     DISTINCT(applicantcategoryid) AS requestertypeid,
# --     name AS requestertypename,
# --     description AS requestertypedescription,
# --     CAST(NULL AS TIMESTAMP) AS createddate,
# --     CAST(NULL AS TIMESTAMP) AS modifieddate,
# --     CAST(NULL AS STRING) AS creport,
# --     CAST(NULL AS STRING) AS cpal,
# --     CASE
# --         WHEN isactive = true THEN 'Y'
# --     ELSE
# --         'N'
# --     END AS cactive,
# --     CAST(NULL AS INTEGER) AS sortorder,
# --     'FOIMOD' AS sourceoftruth
# -- FROM foi_mod.applicantcategories
# -- order by requestertypeid;


# -- DROP TABLE IF EXISTS dimrequestertypes;

# -- CREATE TABLE dimrequestertypes
# -- USING DELTA AS
# --     SELECT
# --         applicantcategoryid AS requestertypeid,
# --         name AS requestertypename,
# --         description AS requestertypedescription,
# --         CAST(NULL AS TIMESTAMP) AS createddate,
# --         CAST(NULL AS TIMESTAMP) AS modifieddate,
# --         CAST(NULL AS STRING) AS creport,
# --         CAST(NULL AS STRING) AS cpal,
# --         CASE
# --             WHEN isactive = true THEN 'Y'
# --         ELSE
# --             'N'
# --         END AS cactive,
# --         CAST(NULL AS INTEGER) AS sortorder,
# --         'FOIMOD' AS sourceoftruth
# --     FROM foi_mod.applicantcategories
# --     order by applicantcategoryid;

In [0]:
%restart_python
%pip install boto3
import boto3
import os
from botocore.exceptions import NoCredentialsError
import datetime
import sys
sys.path.insert(0, '/Workspace/Shared')
import etl_helpers
from pyspark.sql.functions import col #lit

tablename = "dimrequestertypes"
runcycleid = etl_helpers.start_run_cycle(tablename)
os.makedirs("/dbfs/foi/dataload", exist_ok=True)  # make sure directory exists

try:
    query = f"""
        SELECT
            DISTINCT(applicantcategoryid) AS requestertypeid,
            name AS requestertypename,
            description AS requestertypedescription,
            CAST(NULL AS TIMESTAMP) AS createddate,
            CAST(NULL AS TIMESTAMP) AS modifieddate,
            CAST(NULL AS STRING) AS creport,
            CAST(NULL AS STRING) AS cpal,
            CASE
                WHEN isactive = true THEN 'Y'
            ELSE
                'N'
            END AS cactive,
            CAST(NULL AS INTEGER) AS sortorder,
            'FOIMOD' AS sourceoftruth
        FROM foi_mod.applicantcategories
        order by requestertypeid;
        """

    # print(query)

    df = spark.sql(query)
    df.show()

# order of columns here is important!
    df_mapped = df.selectExpr(
            "requestertypeid AS requestertypeid",
            "requestertypename AS requestertypename",
            "requestertypedescription AS requestertypedescription",
            "createddate AS createddate",
            "modifieddate AS modifieddate",
            "creport AS creport",
            "cpal AS cpal",
            "cactive as cactive",
            "sortorder as sortorder",
            "sourceoftruth AS sourceoftruth"
        )
    df_mapped.show()

    from delta.tables import DeltaTable
    delta_table = DeltaTable.forName(spark, f"hive_metastore.default.{tablename}")
    delta_table.alias("target").merge(
        df_mapped.alias("source"),
        "target.requestertypeid = source.requestertypeid AND target.sourceoftruth = source.sourceoftruth"
    ).whenMatchedUpdate(set = {
        "requestertypename": col("source.requestertypename"),
        "requestertypedescription": col("source.requestertypedescription"),
        "createddate": col("source.createddate"),
        "modifieddate": col("source.modifieddate"),
        "creport": col("source.creport"),
        "cpal": col("source.cpal"),
        "cactive": col("source.cactive"),
        "sortorder": col("source.sortorder")
    }).whenNotMatchedInsert(values = {
        "requestertypeid": col("source.requestertypeid"),
        "requestertypename": col("source.requestertypename"),
        "requestertypedescription": col("source.requestertypedescription"),
        "createddate": col("source.createddate"),
        "modifieddate": col("source.modifieddate"),
        "creport": col("source.creport"),
        "cpal": col("source.cpal"),
        "cactive": col("source.cactive"),
        "sortorder": col("source.sortorder"),
        "sourceoftruth": col("source.sourceoftruth")
    }).execute()

    etl_helpers.end_run_cycle(runcycleid, 't', tablename)
except NoCredentialsError:
    print("Credentials not available")
    etl_helpers.end_run_cycle(runcycleid, 'f', tablename, "Credentials not available")
    raise Exception("notebook failed") from e
except Exception as e:
    if (str(e) == "no changes for today"):
        print("here")
        etl_helpers.end_run_cycle(runcycleid, 't', tablename)
    else:
        print(f"An error occurred: {e}")    
        etl_helpers.end_run_cycle(runcycleid, 'f', tablename, f"An error occurred: {e}")
        raise Exception("notebook failed") from e