In [0]:
%restart_python
%pip install boto3
import boto3
import os
from botocore.exceptions import NoCredentialsError
import datetime
import sys
sys.path.insert(0, '/Workspace/Shared')
import etl_helpers 

runcycleid = etl_helpers.start_run_cycle("factRequestDivisionalStages")

os.makedirs("/dbfs/foi/dataload", exist_ok=True)  # make sure directory exists

try:
    today = str(datetime.date.today())

    df_existing = spark.sql("SELECT max(createddate) as createddate FROM factrequestdivisionalstages")
    maxcreatedate = df_existing.first().createddate
    maxcreatedate_str = maxcreatedate.strftime("%Y-%m-%d %H:%M:%S")

    query = f"""
        SELECT
        FMRD.foiministrydivisionid AS foiministrydivisionid,
        FMRD.divisionid AS divisionid,
        FMRD.stageid AS stageid,
        FMRD.created_at AS created_at,
        FMRD.createdby AS createdby,
        FMRD.updated_at AS updated_at,
        FMRD.updatedby AS updatedby,
        FMRD.foiministryrequest_id AS foiministryrequest_id,
        FMRD.foiministryrequestversion_id AS foiministryrequestversion_id,
        FMRD.divisionduedate AS divisionduedate,
        FMRD.eapproval AS eapproval,
        FMRD.divisionreceiveddate AS divisionreceiveddate,
        FMR.axisrequestid AS filerequestnumber
        FROM foi_mod.FOIMinistryRequestDivisions FMRD
        LEFT JOIN foi_mod.FOIMinistryRequests FMR
        ON FMR.foiministryrequestid = FMRD.foiministryrequest_id
            AND FMR.version = FMRD.foiministryrequestversion_id
        -- WHERE FMRD.created_at > CAST('{maxcreatedate_str}' AS TIMESTAMP)
        WHERE DATE(FMRD.created_at) = DATE('2025-05-14')
        """

    df = spark.sql(query)
    df.show()


    # order of columns here is important!
    df_mapped = df.selectExpr(
        "foiministrydivisionid AS foirequestdivisionstageid",
        f"{runcycleid} as runcycleid",
        "divisionid AS divisionid",
        "stageid AS stageid",
        "createdby AS createdby",
        "created_at AS createddate",
        "updatedby AS modifiedby",
        "updated_at AS modifieddate",
        "divisionduedate AS divisionstageduedate",
        "divisionreceiveddate AS divisionstagereceiveddate",
        "eapproval AS divisionstageeapproval",
        "foiministryrequestversion_id AS foirequestversionid",
        "foiministryrequest_id AS foiministryrequestid",
        "filerequestnumber as foirequestnumber",
        "'' as officecode",
        
    )
    df_mapped.show()
    df_mapped.write.format("delta").mode("append").option("mergeSchema", "false").insertInto("factrequestdivisionalstages")  
    etl_helpers.end_run_cycle(runcycleid, 't', "factRequestDivisionalStages")
except NoCredentialsError:
    print("Credentials not available")
    etl_helpers.end_run_cycle(runcycleid, 'f', "factRequestDivisionalStages", "Credentials not available")
    raise Exception("notebook failed") from e
except Exception as e:
    if (str(e) == "no changes for today"):
        print("here")
        etl_helpers.end_run_cycle(runcycleid, 't', "factRequestDivisionalStages")
    else:
        print(f"An error occurred: {e}")    
        etl_helpers.end_run_cycle(runcycleid, 'f', "factRequestDivisionalStages", f"An error occurred: {e}")
        raise Exception("notebook failed") from e