In [0]:
# %sql

# select * from dimruncycle
# order by runcycleid desc
# limit 1;

In [0]:
# %sql

# -- select * from foi_mod.foirequestoipc
# -- order by oipcid desc
# -- limit 100;


# -- select
# -- 	mr.foirequest_id as foirequestid,
# -- 	0 as runcycleid,
# -- 	roipc.oipcno as orderno,
# -- 	o.name as outcome,
# --     CASE
# --         WHEN roipc.isinquiry = 'NULL' THEN null
# --     ELSE
# --         try_cast(get_json_object(inquiryattributes, '$.inquirydate') AS DATE)
# --     END AS inquirydate,
# -- 	roipc.isinquiry as activeflag
# -- from foi_mod.foirequestoipc roipc
# -- inner join foi_mod.foiministryrequests mr on roipc.foiministryrequest_id = mr.foiministryrequestid and roipc.foiministryrequestversion_id = mr.version
# -- left join foi_mod.oipcoutcomes o on try_cast(roipc.outcomeid AS INT) = o.outcomeid
# -- where roipc.created_at > '2025-01-01';

In [0]:
%restart_python
%pip install boto3
import boto3
import os
from botocore.exceptions import NoCredentialsError
import datetime
import sys
sys.path.insert(0, '/Workspace/Shared')
import etl_helpers
from pyspark.sql.functions import lit

tablename = "factrequestoipcfields"
runcycleid = etl_helpers.start_run_cycle(tablename)
os.makedirs("/dbfs/foi/dataload", exist_ok=True)  # make sure directory exists

try:

    df_lastrun = spark.sql(f"SELECT runcyclestartat as createddate FROM dimruncycle WHERE packagename = \"{tablename}\" AND success = 't' ORDER BY runcycleid DESC LIMIT 1")
    
    if df_lastrun.count() > 0:
        lastruntime = df_lastrun.first().createddate.strftime("%Y-%m-%d %H:%M:%S")
    else:
        lastruntime = "2019-01-01 00:00:00"
    print(lastruntime)

    query = f"""
        SELECT
            mr.foirequest_id as foirequestid,
            {runcycleid} as runcycleid,
            roipc.oipcno as orderno,
            o.name as outcome,
            CASE
                WHEN roipc.isinquiry = 'NULL' THEN null
            ELSE
                try_cast(get_json_object(inquiryattributes, '$.inquirydate') AS DATE)
            END AS inquirydate,
            roipc.isinquiry as activeflag,
            'FOIMOD' AS sourceoftruth
        FROM foi_mod.foirequestoipc roipc
        INNER JOIN foi_mod.foiministryrequests mr on roipc.foiministryrequest_id = mr.foiministryrequestid and roipc.foiministryrequestversion_id = mr.version
        LEFT JOIN foi_mod.oipcoutcomes o on try_cast(roipc.outcomeid AS INT) = o.outcomeid
        WHERE roipc.created_at > '{lastruntime}'
    """

    # print(query)

    df = spark.sql(query)
    df.show()

    # order of columns here is important!
    df_mapped = df.selectExpr(
            "foirequestid AS foirequestid",
            "runcycleid AS runcycleid",
            "orderno AS orderno",
            "outcome AS outcome",
            "inquirydate AS inquirydate",
            "activeflag AS activeflag",
            "sourceoftruth AS sourceoftruth"
        )
    df_mapped.show()

    from delta.tables import DeltaTable
    delta_table = DeltaTable.forName(spark, f"hive_metastore.default.{tablename}")
    delta_table.alias("target").merge(
        df_mapped.alias("source"),
        "target.foirequestid = source.foirequestid AND target.orderno = source.orderno AND target.sourceoftruth = source.sourceoftruth"
    ).whenMatchedUpdate(
        condition = "target.activeflag = 't'",
        set = {
            "activeflag": lit("f"),
        }
    ).whenNotMatchedInsert(values = {
        "foirequestid": "source.foirequestid",
        "runcycleid": "source.runcycleid",
        "orderno": "source.orderno",
        "outcome": "source.outcome",
        "inquirydate": "source.inquirydate",
        "activeflag": "source.activeflag",
        "sourceoftruth": "source.sourceoftruth"
    }).execute()

    etl_helpers.end_run_cycle(runcycleid, 't', tablename)
except NoCredentialsError:
    print("Credentials not available")
    etl_helpers.end_run_cycle(runcycleid, 'f', tablename, "Credentials not available")
    dbutils.notebook.exit("Error: Something went wrong.")
except Exception as e:
    if (str(e) == "no changes for today"):
        print("here")
        etl_helpers.end_run_cycle(runcycleid, 't', tablename)
    else:
        print(f"An error occurred: {e}")    
        etl_helpers.end_run_cycle(runcycleid, 'f', tablename, f"An error occurred: {e}")
        dbutils.notebook.exit("Error: Something went wrong.")