In [0]:
%restart_python
%pip install boto3
import boto3
import os
from botocore.exceptions import NoCredentialsError
import datetime
import sys
sys.path.insert(0, '/Workspace/Shared')
import etl_helpers 
from pyspark.sql.functions import collect_list, concat_ws, udf, lit
from pyspark.sql.types import IntegerType


runcycleid = etl_helpers.start_run_cycle("factRequestExtensions")

try:

    df_existing = spark.sql("SELECT max(TIMESTAMP(runcycleendat)) as runcycleendat from dimruncycle where packagename = 'factRequestExtensions' and success = 't'")
    df_existing.show()
    maxcreatedate = df_existing.first().runcycleendat
    print(maxcreatedate)
    maxcreatedate_str = maxcreatedate.strftime("%Y-%m-%d %H:%M:%S")

    df_existing = spark.sql(f"SELECT distinct foiministryrequestid, min(foirequest_id) as foirequest_id from foi_mod.foiministryrequests where (created_at > '{maxcreatedate_str}' or TRY_CAST(updated_at AS DATE) > '{maxcreatedate_str}') group by foiministryrequestid")

    df_existing.show()

    comma_list_df = df_existing.select(concat_ws(",", collect_list("foirequest_id")).alias("comma_list"))
    comma_list = comma_list_df.collect()[0]["comma_list"]

    print(comma_list)

    query = f"""MERGE INTO default.factRequestExtensions dd
        --USING dimRequests r -- commented out since we are adding sourceoftruth column to each fact table
        --ON r.foirequestid = dd.foirequestid
        WHEN MATCHED and dd.foirequestid in ({comma_list}) and sourceoftruth = 'FOIMOD' THEN

        UPDATE 
        SET dd.activeflag = 'N'""";

    comma_list_df = df_existing.select(concat_ws(",", collect_list("foiministryrequestid")).alias("comma_list"))
    comma_list = comma_list_df.collect()[0]["comma_list"]

    print(comma_list)

    query = f"""
        select * from 
        (SELECT *
                FROM (
                    SELECT 
                    closedate,
                    case when closedate is not null then createdby else null end as completedby,
                    duedate + interval 1 day as extensionactiondate,
                    duedate,
                    recordspagecount,
                    foirequest_id,
                    foiministryrequestid,
                    version,
                    
                    --*,
                        ROW_NUMBER() OVER (
                            PARTITION BY foiministryrequestid 
                            ORDER BY created_at DESC
                        ) AS rn
                    FROM foi_mod.foiministryrequests
                    where foiministryrequestid in ({comma_list})

                ) sub
                WHERE rn = 1) sq

        join 

        (select * from (select foirequestextensionid, extendedduedays, extendedduedate, e.createdby, created_at, extendedduedays, TRY_CAST(approvednoofdays AS INT) as approvednoofdays, updatedby, TRY_CAST(updated_at AS DATE) as updated_at, version, foiministryrequest_id,
        et.extensiontypeid, e.isactive, es.name as extensionstatus,
        ROW_NUMBER() OVER (
                            PARTITION BY foirequestextensionid 
                            ORDER BY version DESC
                        ) AS rn
        from foi_mod.foirequestextensions e
        join foi_mod.extensionreasons er on er.extensionreasonid = e.extensionreasonid
        join default.dimextensiontypes et on et.extensiontypename = er.reason
        join foi_mod.extensionstatuses es on es.extensionstatusid = e.extensionstatusid
        ) sub where rn = 1
        ) sq2 on sq2.foiministryrequest_id = sq.foiministryrequestid

        join 

        (select * from (select TRY_CAST(updated_at AS DATE) as approveddate, foiministryrequest_id,
        ROW_NUMBER() OVER (
                            PARTITION BY foirequestextensionid 
                            ORDER BY version asc
                        ) AS rn
        from foi_mod.foirequestextensions) sub where rn = 1
        ) sq3 on sq3.foiministryrequest_id = sq.foiministryrequestid
        """

    print(query)

    df = spark.sql(query)
    print(df.count())
    df.show()

    # getdays = udf(getbusinessdaysbetween, IntegerType())

    # print(df.first()['duedate'])
    # print(type(df.first()['duedate']))

    # df = df.withColumn("overduedays", getdays(lit(date.today()), df['duedate']))
    # df = df.withColumn("elapseddays", getdays(lit(date.today()), df['firstcreated_at']))
    # df = df.withColumn("passduedays", getdays(lit(date.today()), df['duedate']))
    # df = df.withColumn("remainingdays", getdays(df['duedate'], lit(date.today())))

    # df.show(df.count(), truncate=False)



    # order of columns here is important!
    df_mapped = df.selectExpr( 
        "foirequest_id as foirequestid",
        f"{runcycleid} as runcycleid",
        "createdby AS createdby",
        "created_at as createddate",
        "updatedby as modifiedby",
        "updated_at as modifieddate",
        "'' as cstatus",
        "extensiontypeid as extensiontypeid",
        "'' as approvedby",
        "approveddate as approveddate",
        "extendedduedays as extensiondays",
        "extendedduedate as extendeddate",
        "'' AS comments",
        "'' AS approvedcomments",
        "approvednoofdays AS requesteddays",
        "'' AS type",
        "'' AS cnoticetooic",
        "closedate AS completeddate",
        "completedby AS completedby",
        "'' AS completedcomments",
        "'' as ticategory",
        "extensionactiondate AS extensionactiondate",
        "extensionstatus AS approvedstatus",
        "duedate AS oldtargetdate",
        "'' as oldestimateddeliverydate",
        "'' AS consultationtype",
        "recordspagecount as noofpagesdisclosed",
        "recordspagecount as noofpagessent",
        "foirequestextensionid as requestextid",
        "isactive AS activeflag",
        "'FOIMOD' AS sourceoftruth"
    )
    df_mapped.show()
    df_mapped.write.format("delta").mode("append").option("mergeSchema", "false").insertInto("factrequestextensions")  
    etl_helpers.end_run_cycle(runcycleid, 't', "factRequestExtensions")
except NoCredentialsError:
    print("Credentials not available")
    etl_helpers.end_run_cycle(runcycleid, 'f', "factRequestExtensions", "Credentials not available")
    dbutils.notebook.exit("Error: Something went wrong.")
except Exception as e:    
    if (str(e) == "no changes for today"):
        # print("here")
        etl_helpers.end_run_cycle(runcycleid, 't', "factRequestExtensions")
    else:
        print(f"An error occurred: {e}")    
        etl_helpers.end_run_cycle(runcycleid, 'f', "factRequestExtensions", f"An error occurred: {e}")
        dbutils.notebook.exit("Error: Something went wrong.")