In [0]:
%restart_python
%pip install boto3
import boto3
import os
from botocore.exceptions import NoCredentialsError
import datetime
import sys
sys.path.insert(0, '/Workspace/Shared')
import etl_helpers
from pyspark.sql.functions import col #lit
from pyspark.sql.types import StringType #IntegerType, BooleanType

tablename = "dimdivisions"
runcycleid = etl_helpers.start_run_cycle(tablename)
os.makedirs("/dbfs/foi/dataload", exist_ok=True)  # make sure directory exists

try:
    query = f"""
        SELECT
            a.divisionid,
            a.name AS division,
            NULL AS description,
            CASE
                WHEN a.isactive = TRUE THEN 1
            ELSE
                0
            END AS cactive,
            b.bcgovcode AS programarea,
            'FOIMOD' AS sourceoftruth
        FROM foi_mod.programareadivisions a
        INNER JOIN foi_mod.programareas b ON a.programareaid = b.programareaid
        order by a.divisionid;  
        """

    # print(query)

    df = spark.sql(query)
    df.show()

    # order of columns here is important!
    df_mapped = df.selectExpr(
            "divisionid AS divisionid",
            "division AS division",
            "description AS description",
            "cactive AS cactive",
            "programarea AS programarea",
            "sourceoftruth AS sourceoftruth"
        )
    df_mapped = df_mapped.withColumn("description", col("description").cast(StringType()))
    df_mapped.show()

    from delta.tables import DeltaTable
    delta_table = DeltaTable.forName(spark, f"hive_metastore.default.{tablename}")
    delta_table.alias("target").merge(
        df_mapped.alias("source"),
        "target.divisionid = source.divisionid AND target.sourceoftruth = source.sourceoftruth"
    ).whenMatchedUpdate(set = {
        "division": "source.division",
        "description": "source.description",
        "cactive": "source.cactive",
        "programarea": "source.programarea"
    }).whenNotMatchedInsert(values = {
        "divisionid": "source.divisionid",
        "division": "source.division",
        "description": "source.description",
        "cactive": "source.cactive",
        "programarea": "source.programarea",
        "sourceoftruth": "source.sourceoftruth"
    }).execute()

    etl_helpers.end_run_cycle(runcycleid, 't', tablename)
except NoCredentialsError:
    print("Credentials not available")
    etl_helpers.end_run_cycle(runcycleid, 'f', tablename, "Credentials not available")
    raise Exception("notebook failed") from e
except Exception as e:
    if (str(e) == "no changes for today"):
        print("here")
        etl_helpers.end_run_cycle(runcycleid, 't', tablename)
    else:
        print(f"An error occurred: {e}")    
        etl_helpers.end_run_cycle(runcycleid, 'f', tablename, f"An error occurred: {e}")
        raise Exception("notebook failed") from e