## Reference Links

https://github.com/microsoft/semantic-link-labs

https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.admin.html

https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Service%20Principal.ipynb


## Import Libraries

In [7]:
import sempy
import sempy.fabric as fabric
import sempy_labs as labs
from pyspark.sql.functions import col, count
from datetime import datetime, timedelta
from pyspark.sql.functions import lit
from pyspark.sql.types import NullType

StatementMeta(, f6c4d9ad-3a97-49d4-9451-1aedfe3e949d, 11, Finished, Available, Finished)

In [3]:
spark.conf.set("spark.databricks.delta.properties.defaults.minWriterVersion", 5)
spark.conf.set("spark.databricks.delta.properties.defaults.minReaderVersion", 2)

StatementMeta(, f6c4d9ad-3a97-49d4-9451-1aedfe3e949d, 7, Finished, Available, Finished)

#### Get workspaces

In [23]:
df_workspaces = labs.admin.list_workspaces()

# Create a spark dataframe
spark_df_workspaces = spark.createDataFrame(df_workspaces)

# 1. Add IsDeleted column with default value 0 and Deleted On as null

spark_df_workspaces = (
    spark_df_workspaces
    .withColumn("IsDeleted", lit(0))
    .withColumn("Deleted On", lit(None))
)
# ----------------- Run below block on Day 0 -----------------------------

# Create table add load data
# spark_df_workspaces.write.format("Delta").mode("Overwrite").option("delta.columnMapping.mode", "name").saveAsTable("dim_workspaces")



# --------------- Run below block for daily load -------------------------
# Register as a temp view
spark_df_workspaces.createOrReplaceTempView("v_workspace")

# Merge into the target Delta table
spark.sql("""
MERGE INTO LH_Monitoring.dbo.dim_workspaces AS t
USING v_workspace AS s
ON t.`Id` = s.`Id`
WHEN MATCHED THEN 
    UPDATE SET *
WHEN NOT MATCHED THEN 
    INSERT *
WHEN NOT MATCHED BY SOURCE AND t.IsDeleted = 0 THEN
    UPDATE SET t.IsDeleted = 1,
               t.`Deleted On` = now()
""")


StatementMeta(, 32648a51-8325-4373-a5f9-26f069eae04a, 27, Finished, Available, Finished)

DataFrame[num_affected_rows: bigint, num_updated_rows: bigint, num_deleted_rows: bigint, num_inserted_rows: bigint]

#### Get reports

In [12]:
df_reports = labs.admin.list_reports()

# Create a spark dataframe
spark_df_report = spark.createDataFrame(df_reports)


# Add IsDeleted column with default value 0 and "Deleted On" column with default value None
spark_df_report = spark_df_report.withColumn("IsDeleted", lit(0)).withColumn("Deleted On", lit(None))

# ----------------- Run below block on Day 0 -----------------------------
# spark_df_report.write.format("Delta").mode("Overwrite").option("delta.columnMapping.mode", "name").saveAsTable("dim_reports")


# --------------- Run below block for daily load -------------------------
# Register as a temp view
spark_df_report.createOrReplaceTempView("v_report")

# Load the target Delta table
spark.sql("""
MERGE INTO LH_Monitoring.dbo.dim_reports as t USING v_report as s 
ON t.`Report Id` = s.`Report Id` 
WHEN MATCHED THEN UPDATE SET *
WHEN NOT MATCHED THEN INSERT *
WHEN NOT MATCHED BY SOURCE AND t.IsDeleted = 0 
THEN UPDATE SET t.IsDeleted = 1, t.`Deleted On` = now()
""")


StatementMeta(, f6c4d9ad-3a97-49d4-9451-1aedfe3e949d, 18, Finished, Available, Finished)

DataFrame[num_affected_rows: bigint, num_updated_rows: bigint, num_deleted_rows: bigint, num_inserted_rows: bigint]

#### Get datasets

In [11]:
df_datasets = labs.admin.list_datasets()
# display(df_datasets)

# Create a spark dataframe
spark_df_datasets = spark.createDataFrame(df_datasets)

spark_df_datasets = spark_df_datasets.withColumn("Users",col("Users").cast("string")).withColumn("Upstream Datasets",col("Upstream Datasets").cast("string"))
# 1. Add IsDeleted column with default value 0 and Deleted On as null

spark_df_datasets = (
    spark_df_datasets
    .withColumn("IsDeleted", lit(0))
    .withColumn("Deleted On", lit(None))
)

# ----------------- Run below block on Day 0 -----------------------------
# spark_df_datasets.write.format("Delta").mode("Overwrite").option("delta.columnMapping.mode", "name").saveAsTable("dim_datasets")


# --------------- Run below block for daily load -------------------------
# Register as a temp view
spark_df_datasets.createOrReplaceTempView("v_dataset")

# Merge into the target Delta table
spark.sql("""
MERGE INTO LH_Monitoring.dbo.dim_datasets AS t
USING v_dataset AS s
ON t.`Dataset Id` = s.`Dataset Id`
WHEN MATCHED THEN 
    UPDATE SET *
WHEN NOT MATCHED THEN 
    INSERT *
WHEN NOT MATCHED BY SOURCE AND t.IsDeleted = 0 THEN
    UPDATE SET t.IsDeleted = 1,
               t.`Deleted On` = now()
""")


StatementMeta(, f6c4d9ad-3a97-49d4-9451-1aedfe3e949d, 17, Finished, Available, Finished)

DataFrame[num_affected_rows: bigint, num_updated_rows: bigint, num_deleted_rows: bigint, num_inserted_rows: bigint]

#### Get Fabric Items

In [27]:
df_lists = labs.admin.list_items()
# display(df_lists)

# Create a spark dataframe
spark_df_lists = spark.createDataFrame(df_lists)

# Add IsDeleted column with default value 0 and Deleted On as null
spark_df_lists = (
    spark_df_lists
    .withColumn("IsDeleted", lit(0))
    .withColumn("Deleted On", lit(None))
)

# ----------------- Run below block on Day 0 -----------------------------
# spark_df_lists.write.format("Delta").mode("Overwrite").option("delta.columnMapping.mode", "name").saveAsTable("dim_items")


# --------------- Run below block for daily load -------------------------
# Register as a temp view
spark_df_lists.createOrReplaceTempView("v_item")

# Merge into the target Delta table
spark.sql("""
MERGE INTO LH_Monitoring.dbo.dim_items AS t
USING v_item AS s
ON t.`Item Id` = s.`Item Id`
WHEN MATCHED THEN 
    UPDATE SET *
WHEN NOT MATCHED THEN 
    INSERT *
WHEN NOT MATCHED BY SOURCE AND t.IsDeleted = 0 THEN
    UPDATE SET t.IsDeleted = 1,
               t.`Deleted On` = now()
""")


StatementMeta(, 32648a51-8325-4373-a5f9-26f069eae04a, 35, Finished, Available, Finished)

DataFrame[num_affected_rows: bigint, num_updated_rows: bigint, num_deleted_rows: bigint, num_inserted_rows: bigint]

#### Get Domains

In [6]:
df_domains = labs.admin.list_domains()
# display(df_domains)

# Create a spark dataframe
spark_df_domains = spark.createDataFrame(df_domains)

spark_df_domains = spark_df_domains.withColumn("Parent Domain ID",col("Parent Domain ID").cast("string"))

# Add IsDeleted column with default value 0 and Deleted On as null
spark_df_domains = (
    spark_df_domains
    .withColumn("IsDeleted", lit(0))
    .withColumn("Deleted On", lit(None))
)

# ----------------- Run below block on Day 0 -----------------------------
# spark_df_domains.write.format("Delta").mode("Overwrite").option("delta.columnMapping.mode", "name").saveAsTable("dim_domains")


# --------------- Run below block for daily load -------------------------
# Register as a temp view
spark_df_domains.createOrReplaceTempView("v_domain")

# Merge into the target Delta table
spark.sql("""
MERGE INTO LH_Monitoring.dbo.dim_domains AS t
USING v_domain AS s
ON t.`Domain Id` = s.`Domain Id`
WHEN MATCHED THEN 
    UPDATE SET *
WHEN NOT MATCHED THEN 
    INSERT *
WHEN NOT MATCHED BY SOURCE AND t.IsDeleted = 0 THEN
    UPDATE SET t.IsDeleted = 1,
               t.`Deleted On` = now()
""")


StatementMeta(, f6c4d9ad-3a97-49d4-9451-1aedfe3e949d, 10, Finished, Available, Finished)

DataFrame[num_affected_rows: bigint, num_updated_rows: bigint, num_deleted_rows: bigint, num_inserted_rows: bigint]

###### If you have these tables created already you can use below commands to add IsDeleted and "Deleted On" columns

In [None]:
%%sql
ALTER TABLE LH_Monitoring.dbo.dim_datasets ADD COLUMN IsDeleted int;
ALTER TABLE LH_Monitoring.dbo.dim_datasets ADD COLUMN `Deleted On` TIMESTAMP;

Update LH_Monitoring.dbo.dim_datasets SET IsDeleted = 0