In [0]:
# %sql

# DROP TABLE IF EXISTS diminvoicedetails;

# CREATE TABLE diminvoicedetails
# USING DELTA AS
#     SELECT
#         payment_id AS invoiceid,
#         fee_code_id AS feetypeid,
#         total AS invoiceamount,
#         CAST(NULL AS DOUBLE) AS unitvalue,
#         quantity AS units,
#         total AS chargedamount,
#         CAST(NULL AS DOUBLE) AS incurredamount,
#         CAST(NULL AS DOUBLE) AS feewaived,
#         'FOIMOD' AS sourceoftruth
#     FROM foi_mod.payments
#     order by payment_id;

In [0]:
%restart_python
%pip install boto3
import boto3
import os
from botocore.exceptions import NoCredentialsError
import datetime
import sys
sys.path.insert(0, '/Workspace/Shared')
import etl_helpers
from pyspark.sql.functions import col #lit

tablename = "diminvoicedetails"
runcycleid = etl_helpers.start_run_cycle(tablename)
os.makedirs("/dbfs/foi/dataload", exist_ok=True)  # make sure directory exists

try:
    query = f"""
        SELECT
            payment_id AS invoiceid,
            fee_code_id AS feetypeid,
            total AS invoiceamount,
            CAST(NULL AS DOUBLE) AS unitvalue,
            quantity AS units,
            total AS chargedamount,
            CAST(NULL AS DOUBLE) AS incurredamount,
            CAST(NULL AS DOUBLE) AS feewaived,
            'FOIMOD' AS sourceoftruth
        FROM foi_mod.payments
        order by payment_id;
        """

    # print(query)

    df = spark.sql(query)
    df.show()

    # order of columns here is important!
    df_mapped = df.selectExpr(
            "invoiceid AS invoiceid",
            "feetypeid AS feetypeid",
            "invoiceamount AS invoiceamount",
            "unitvalue AS unitvalue",
            "units AS units",
            "chargedamount AS chargedamount",
            "incurredamount AS incurredamount",
            "feewaived as feewaived",
            "sourceoftruth AS sourceoftruth"
        )
    df_mapped.show()

    from delta.tables import DeltaTable
    delta_table = DeltaTable.forName(spark, f"hive_metastore.default.{tablename}")
    delta_table.alias("target").merge(
        df_mapped.alias("source"),
        "target.invoiceid = source.invoiceid AND target.sourceoftruth = source.sourceoftruth"
    ).whenMatchedUpdate(set = {
        "feetypeid": col("source.feetypeid"),
        "invoiceamount": col("source.invoiceamount"),
        "unitvalue": col("source.unitvalue"),
        "units": col("source.units"),
        "chargedamount": col("source.chargedamount"),
        "incurredamount": col("source.incurredamount"),
        "feewaived": col("source.feewaived")
    }).whenNotMatchedInsert(values = {
        "invoiceid": col("source.invoiceid"),
        "feetypeid": col("source.feetypeid"),
        "invoiceamount": col("source.invoiceamount"),
        "unitvalue": col("source.unitvalue"),
        "units": col("source.units"),
        "chargedamount": col("source.chargedamount"),
        "incurredamount": col("source.incurredamount"),
        "feewaived": col("source.feewaived"),
        "sourceoftruth": col("source.sourceoftruth")
    }).execute()

    etl_helpers.end_run_cycle(runcycleid, 't', tablename)
except NoCredentialsError:
    print("Credentials not available")
    etl_helpers.end_run_cycle(runcycleid, 'f', tablename, "Credentials not available")
    raise Exception("notebook failed") from e
except Exception as e:
    if (str(e) == "no changes for today"):
        print("here")
        etl_helpers.end_run_cycle(runcycleid, 't', tablename)
    else:
        print(f"An error occurred: {e}")    
        etl_helpers.end_run_cycle(runcycleid, 'f', tablename, f"An error occurred: {e}")
        raise Exception("notebook failed") from e