In [0]:
# service principal for integrating with ADLS and access it's data

spark.conf.set("fs.azure.account.auth.type.hpadlsacc.dfs.core.windows.net", "OAuth")
spark.conf.set("fs.azure.account.oauth.provider.type.hpadlsacc.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set("fs.azure.account.oauth2.client.id.hpadlsacc.dfs.core.windows.net", dbutils.secrets.get("hc-secret-scope", "app-key"))
spark.conf.set("fs.azure.account.oauth2.client.secret.hpadlsacc.dfs.core.windows.net", dbutils.secrets.get("hc-secret-scope", "service-cred"))
tenant_id = dbutils.secrets.get("hc-secret-scope", "dir-id")
spark.conf.set("fs.azure.account.oauth2.client.endpoint.hpadlsacc.dfs.core.windows.net", f"https://login.microsoftonline.com/{tenant_id}/oauth2/token")

# creating the providers table in silver layerr

In [0]:
# defining the source path for the providers data from the both hospitals
src_hosa = "abfss://bronze@hpadlsacc.dfs.core.windows.net/hos-a/providers"
src_hosb = "abfss://bronze@hpadlsacc.dfs.core.windows.net/hos-b/providers"

# immporting the functions
from pyspark.sql import SparkSession, functions as f

#Reading Hospital A departments data 
df_hosa=spark.read.parquet(src_hosa)

#Reading Hospital B departments data 
df_hosb=spark.read.parquet(src_hosb)

#union two departments dataframes
df_merged = df_hosa.unionByName(df_hosb)
display(df_merged)

# creating a temp view for sql operations
df_merged.createOrReplaceTempView("providers")

In [0]:
%sql
-- Create a table called silver.providers with external location

CREATE TABLE IF NOT EXISTS silver.providers (
ProviderID string,
FirstName string,
LastName string,
Specialization string,
DeptID string,
NPI long,
datasource string,
is_quarantined boolean
)
USING DELTA
LOCATION "abfss://silver@hpadlsacc.dfs.core.windows.net/providers";

In [0]:
%sql
-- truncating the table
truncate table silver.providers

In [0]:
%sql
-- inserting the data into the silver table, bronze act as the source here
insert into silver.providers
select distinct ProviderID,
FirstName,
LastName,
Specialization,
DeptID,
cast(NPI as INT) NPI,
datasource,
    CASE 
        WHEN ProviderID IS NULL OR DeptID IS NULL THEN TRUE
        ELSE FALSE
    END AS is_quarantined
from providers;

In [0]:
%sql
-- fetching some data from the providers
select * from silver.providers;