In [0]:
%restart_python
%pip install boto3
import boto3
import os
from botocore.exceptions import NoCredentialsError
import datetime
import sys
sys.path.insert(0, '/Workspace/Shared')
import etl_helpers 

tablename = "dimmodministryrequestids"
runcycleid = etl_helpers.start_run_cycle(f"{tablename}")
os.makedirs("/dbfs/foi/dataload", exist_ok=True)  # make sure directory exists

try:
    df_lastrun = spark.sql(f"SELECT runcyclestartat as createddate FROM dimruncycle WHERE packagename = \"{tablename}\" AND success = 't' ORDER BY runcycleid DESC LIMIT 1")
    
    if df_lastrun.count() > 0:
        lastruntime = df_lastrun.first().createddate.strftime("%Y-%m-%d %H:%M:%S")
    else:
        lastruntime = "2019-01-01 00:00:00"
    print(lastruntime)

    query = f"""
        SELECT
            foiministryrequestid,
            foirequest_id AS foirequestid,
            axisrequestid,
            created_at
        FROM foi_mod.foiministryrequests
        WHERE version = 1 AND created_at >= "{lastruntime}"
        ORDER BY foiministryrequestid;
    """

    # print(query)

    df = spark.sql(query)
    df.show()

    # order of columns here is important!
    df_mapped = df.selectExpr(
        "foiministryrequestid AS foiministryrequestid",
        "foirequestid AS foirequestid",
        "axisrequestid AS axisrequestid",
        "created_at AS createddate"
    )
    df_mapped.show()

    from delta.tables import DeltaTable
    delta_table = DeltaTable.forName(spark, f"hive_metastore.default.{tablename}")
    delta_table.alias("target").merge(
        df_mapped.alias("source"),
        "target.foiministryrequestid = source.foiministryrequestid"
    ).whenMatchedUpdate(set = {
        "foirequestid": "source.foirequestid",
        "axisrequestid": "source.axisrequestid",
        "createddate": "source.createddate"
    }).whenNotMatchedInsert(values = {
        "foiministryrequestid": "source.foiministryrequestid",
        "foirequestid": "source.foirequestid",
        "axisrequestid": "source.axisrequestid",
        "createddate": "source.createddate"
    }).execute()

    etl_helpers.end_run_cycle(runcycleid, 't', f"{tablename}")
except NoCredentialsError:
    print("Credentials not available")
    etl_helpers.end_run_cycle(runcycleid, 'f', f"{tablename}", "Credentials not available")
except Exception as e:
    if (str(e) == "no changes for today"):
        print("here")
        etl_helpers.end_run_cycle(runcycleid, 't', f"{tablename}")
    else:
        print(f"An error occurred: {e}")    
        etl_helpers.end_run_cycle(runcycleid, 'f', f"{tablename}", f"An error occurred: {e}")