In [0]:
%restart_python
%pip install boto3
import boto3
import os
from botocore.exceptions import NoCredentialsError
import datetime
import sys
sys.path.insert(0, '/Workspace/Shared')
import etl_helpers 

runcycleid = etl_helpers.start_run_cycle("factFOIFlowRequestStatusDetails")

os.makedirs("/dbfs/foi/dataload", exist_ok=True)  # make sure directory exists

try:
    today = str(datetime.date.today())

    df_existing = spark.sql("SELECT max(createddate) as createddate FROM factfoiflowrequeststatusdetails")
    maxcreatedate = df_existing.first().createddate
    maxcreatedate_str = maxcreatedate.strftime("%Y-%m-%d %H:%M:%S")

    query = f"""
        WITH assignees AS (
        SELECT 
            username,
            CASE 
            WHEN firstname IS NOT NULL THEN CONCAT(firstname, ' ', lastname)
            ELSE username
            END AS fullname
        FROM foi_mod.FOIAssignees
        )

        SELECT
        FMR.foiministryrequestid AS foiministryrequestid,
        FMR.version AS requestversion,
        FMR.isactive AS requestisactive,
        FMR.axisrequestid AS filerequestnumber,
        PA.iaocode AS ministrycode,
        FMR.startdate AS requeststartdate,
        FMR.duedate AS requestduedate,
        FMR.assignedto AS iaoassignedto,
        FMR.created_at AS requestcreatedat,
        FMR.updated_at AS requestupdatedat,
        FMR.createdby AS requestcreatedby,
        FMR.updatedby AS requestupdatedby,
        FMR.requeststatusid AS ministryrequeststatusid,
        FMR.assignedgroup AS iaoassignedgroup,
        FMR.assignedministryperson AS ministryperson,
        FMR.assignedministrygroup AS ministryteam,
        FMR.cfrduedate AS requestcfrduedate,
        FMR.closedate AS requestclosedate,
        a1.fullname AS assigneefullname,
        a2.fullname AS iaoassigneefullname,
        FMR.recordspagecount AS recordspagecount,
        FMR.axislanpagecount AS lanpagecount

        FROM foi_mod.FOIMinistryRequests FMR
        INNER JOIN foi_mod.ProgramAreas PA 
        ON FMR.programareaid = PA.programareaid

        LEFT JOIN assignees a1 
        ON a1.username = FMR.assignedministryperson

        LEFT JOIN assignees a2 
        ON a2.username = FMR.assignedto

        WHERE FMR.created_at > '{maxcreatedate_str}'
        """

    print(query)

    df = spark.sql(query)
    df.show()

    if df.count() == 0:
        raise Exception("no changes for today")


    # order of columns here is important!
    df_mapped = df.selectExpr(
            f"{runcycleid} as runcycleid",
            "filerequestnumber AS foirequestnumber",
            "requestversion AS version",
            "requestisactive AS isactive",
            "ministryrequeststatusid AS requestatusid",
            "ministrycode AS ministrycode",
            "iaoassignedgroup AS assignedgroup",
            "ministryperson AS assignedministryperson",
            "ministryteam AS assignedministrygroup",
            "requeststartdate AS startdate",
            "requestduedate AS duedate",
            "requestcfrduedate AS cfrduedate",
            "requestclosedate AS closedate",
            "requestcreatedby AS createdby",
            "requestcreatedat AS createddate",
            "requestupdatedby AS modifiedby",
            "requestupdatedat AS modifieddate",
            "iaoassignedto AS iaoassignedto",
            "iaoassigneefullname AS iaoassigneefullname",
            "assigneefullname AS ministryassigneefullname",
            "recordspagecount AS recordspagecount",
            "lanpagecount AS lanpagecount",            
        )
    df_mapped.show()
    etl_helpers.append_with_incrementing_id(df_mapped, table_name="factfoiflowrequeststatusdetails", id_column="foiflowrequeststatusdetailsid", database="hive_metastore.default")
    etl_helpers.end_run_cycle(runcycleid, 't', "factFOIFlowRequestStatusDetails")
except NoCredentialsError:
    print("Credentials not available")
    etl_helpers.end_run_cycle(runcycleid, 'f', "factFOIFlowRequestStatusDetails", "Credentials not available")
    raise Exception("notebook failed") from e
except Exception as e:
    if (str(e) == "no changes for today"):
        print("here")
        etl_helpers.end_run_cycle(runcycleid, 't', "factFOIFlowRequestStatusDetails")
    else:
        print(f"An error occurred: {e}")    
        etl_helpers.end_run_cycle(runcycleid, 'f', "factFOIFlowRequestStatusDetails", f"An error occurred: {e}")
        raise Exception("notebook failed") from e