In [0]:
# service principal for integrating with ADLS and access it's data

spark.conf.set("fs.azure.account.auth.type.hpadlsacc.dfs.core.windows.net", "OAuth")
spark.conf.set("fs.azure.account.oauth.provider.type.hpadlsacc.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set("fs.azure.account.oauth2.client.id.hpadlsacc.dfs.core.windows.net", dbutils.secrets.get("hc-secret-scope", "app-key"))
spark.conf.set("fs.azure.account.oauth2.client.secret.hpadlsacc.dfs.core.windows.net", dbutils.secrets.get("hc-secret-scope", "service-cred"))
tenant_id = dbutils.secrets.get("hc-secret-scope", "dir-id")
spark.conf.set("fs.azure.account.oauth2.client.endpoint.hpadlsacc.dfs.core.windows.net", f"https://login.microsoftonline.com/{tenant_id}/oauth2/token")

# Creating the departments table in Silver layer

In [0]:
# importing the necessary functions and spark session
from pyspark.sql import SparkSession, functions as f
from pyspark.sql.functions import col

# defining the source path for the departments from both hospitals
src_hosa = "abfss://bronze@hpadlsacc.dfs.core.windows.net/hos-a/departments"
src_hosb = "abfss://bronze@hpadlsacc.dfs.core.windows.net/hos-b/departments"

#Reading Hospital A departments data 
df_hosa=spark.read.parquet(src_hosa)

#Reading Hospital B departments data 
df_hosb=spark.read.parquet(src_hosb)

#union two departments dataframes
df_merged = df_hosa.unionByName(df_hosb)

# Create the dept_id column and rename deptid to src_dept_id
df_merged = df_merged.withColumn("SRC_Dept_id", f.col("deptid")) \
                     .withColumn("Dept_id", f.concat(f.col("deptid"),f.lit('-'), f.col("datasource"))) \
                     .drop("deptid")

# creating a temp view on the dataframe, so that we can perform sql operations on it
df_merged.createOrReplaceTempView("departments")

# df_merged.select(col('Dept_id'),col('SRC_Dept_id'),col('Name'),col('datasource')).display()

In [0]:
%sql
-- creating a silver department table with external location
CREATE TABLE IF NOT EXISTS silver.departments (
Dept_Id string,
SRC_Dept_Id string,
Name string,
datasource string,
is_quarantined boolean
)
USING DELTA
LOCATION "abfss://silver@hpadlsacc.dfs.core.windows.net/departments";


In [0]:
%sql
-- reset the table
truncate table silver.departments;


In [0]:
%sql
-- pushing the record into the departments table from the bronze departments 
insert into silver.departments
SELECT 
Dept_Id,
SRC_Dept_Id,
Name,
Datasource,
CASE 
      WHEN SRC_Dept_Id IS NULL OR Name IS NULL THEN TRUE
      ELSE FALSE
END AS is_quarantined
FROM departments;

In [0]:
%sql
-- getting some records
select * from silver.departments;