In [0]:
# service principal for integrating with ADLS and access it's data

spark.conf.set("fs.azure.account.auth.type.hpadlsacc.dfs.core.windows.net", "OAuth")
spark.conf.set("fs.azure.account.oauth.provider.type.hpadlsacc.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set("fs.azure.account.oauth2.client.id.hpadlsacc.dfs.core.windows.net", dbutils.secrets.get("hc-secret-scope", "app-key"))
spark.conf.set("fs.azure.account.oauth2.client.secret.hpadlsacc.dfs.core.windows.net", dbutils.secrets.get("hc-secret-scope", "service-cred"))
tenant_id = dbutils.secrets.get("hc-secret-scope", "dir-id")
spark.conf.set("fs.azure.account.oauth2.client.endpoint.hpadlsacc.dfs.core.windows.net", f"https://login.microsoftonline.com/{tenant_id}/oauth2/token")

# Creating the NPI table in Silver layer

In [0]:
# defining the source path for NPI
src_npi = "abfss://bronze@hpadlsacc.dfs.core.windows.net/npi_extract"

# reading the path data
df=spark.read.parquet(src_npi)
df.display()

# creating a temp view
df.createOrReplaceTempView('npi_extract')

In [0]:
%sql
-- creating a silver.npi_extract external table 

CREATE TABLE IF NOT EXISTS silver.npi_extract (
  npi_id STRING,
  first_name STRING,
  last_name STRING,
  position STRING,
  organisation_name STRING,
  last_updated STRING,
  inserted_date DATE,
  updated_date DATE,
  is_current_flag BOOLEAN
)
USING DELTA
LOCATION "abfss://silver@hpadlsacc.dfs.core.windows.net/NPI"

In [0]:
%sql
-- Step 1: based on condition npi_id should be simliar and is_current should be true (i.e currently that specific record is active)
-- Mark that existing records as historical (is_current = false) for patients that will be updated
-- target.is_current = false,
-- target.modified_date = current_timestamp()


MERGE INTO
  silver.npi_extract AS target
USING
  npi_extract AS source
ON target.npi_id = source.npi_id and target.is_current_flag = true
WHEN MATCHED AND
  target.first_name != source.first_name OR
  target.last_name != source.last_name OR
  target.position != source.position OR
  target.organisation_name != source.organisation_name OR
  target.last_updated != source.last_updated
  THEN UPDATE SET
  target.updated_date = current_date,
  target.is_current_flag = False

In [0]:
%sql
-- Step 2: Insert new and updated records into the Delta table, marking them as current
-- that is old record is updated with new records
-- based on condition npi_id should be simliar and is_current should be true (i.e currently that specific record is active)
-- because the condition will not satisfy
-- inserting the new records which are not present in the silver table and updating the old records

MERGE INTO
  silver.npi_extract AS target
USING
  npi_extract AS source
ON target.npi_id = source.npi_id and target.is_current_flag = true
WHEN NOT MATCHED THEN INSERT (
   npi_id,
  first_name ,
  last_name ,
  position ,
  organisation_name ,
  last_updated ,
  inserted_date ,
  updated_date ,
  is_current_flag 
)
  VALUES (
    source.npi_id,
  source.first_name ,
  source.last_name ,
  source.position ,
  source.organisation_name ,
  source.last_updated ,
  current_date,
  current_date, 
  true
  )

In [0]:
%sql
-- displaying some records

select * from silver.npi_extract


In [0]:
%sql
-- drop table silver.npi_extract