## Review

https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Capacity%20Migration.ipynb for info on Capacity Migration and ideas

1. Attach a lakehouse a schema enabled Lakehouse to the Notebook
2. Execute all cells
3. Execute again after changes to any Fabric item
3. Review changes to SCDs


In [None]:
%pip install semantic-link-labs

In [None]:
import sempy.fabric as fabric
import sempy_labs as labs
import sempy_labs.admin as labs_admin
import pandas as pd
from pyspark.sql.functions import *
from datetime import datetime

current_datetime = datetime.now()
#change this to hide result of API loads
show_results = False


In [None]:
%%sql
CREATE SCHEMA IF NOT EXISTS stage

## Fetch data for Capacities, Workspaces and Items from API

Using Semantic Link Labs and logged in user

In [None]:
capacity_list = labs_admin._basic_functions._list_capacities_meta()
capacity_list = capacity_list.rename(columns={'Capacity Id': 'CapacityId','Capacity Name': 'CapacityName'})
capacity_list['EffectiveDate'] = current_datetime
spark_capacity_list=spark.createDataFrame(capacity_list)
spark_capacity_list = spark_capacity_list.withColumn('row_checksum', md5(concat_ws('CapacityId','CapacityName','SKU','Region','State','Admins')))
spark_capacity_list.write.mode("overwrite").format("delta").saveAsTable("stage.stage_capacity_list")
if show_results:
    display(spark_capacity_list)

In [None]:
ws_list = labs_admin.list_workspaces()
ws_list = ws_list.rename(columns={'Id': 'WorkspaceId','Name': 'WorkspaceName', 'Capacity Id': 'CapacityId'})
ws_list['EffectiveDate'] = current_datetime
spark_ws_list=spark.createDataFrame(ws_list)
spark_ws_list = spark_ws_list.withColumn('row_checksum', md5(concat_ws('WorkspaceId','Type', 'WorkspaceName', 'State', 'CapacityId')))
spark_ws_list.write.mode("overwrite").format("delta").saveAsTable("stage.stage_workspace_list")
if show_results:
    display(spark_ws_list)

In [None]:
ws_item_list = labs_admin.list_items()
ws_item_list = ws_item_list.rename(columns={'Item Id': 'ItemId','Item Name': 'ItemName','Last Updated Date': 'LastUpdatedDate','Creator Principal Id': 'CreatorPrincipalId'
                                    ,'Creator Principal Display Name': 'CreatorPrincipalDisplayName','Creator Principal Type': 'CreatorPrincipalType'
                                    ,'Creator User Principal Name': 'CreatorUserPrincipalName','Capacity Id': 'CapacityId'
                                    , 'Workspace Id': 'WorkspaceId'})
ws_item_list['EffectiveDate'] = current_datetime
spark_ws_item_list=spark.createDataFrame(ws_item_list)
spark_ws_item_list = spark_ws_item_list.withColumn('row_checksum', md5(concat_ws('ItemId','ItemName','Type','Description','State'
                        ,'LastUpdatedDate','CreatorPrincipalId','CreatorPrincipalDisplayName','CreatorPrincipalType','CreatorUserPrincipalName','WorkspaceId','CapacityId')))
spark_ws_item_list.write.mode("overwrite").format("delta").saveAsTable("stage.stage_workspace_item_list")
if show_results:
    display(spark_ws_item_list)

## Create tables for history if they don't exist

In [None]:
%%sql
create table if not exists dbo.capacity_list
    (CapacityId	string
    , CapacityName	string
    , Sku	string
    , Region	string
    , State	string
    , Admins string
    , EffectiveDate	timestamp
    , row_checksum string
    , current_row boolean
    , EndDate timestamp
    )

In [None]:
%%sql
create table if not exists dbo.workspace_list
    (WorkspaceId	string
    , WorkspaceName	string
    , State string
    , Type	string
    , CapacityId	string
    , EffectiveDate	timestamp
    , row_checksum	string
    , current_row boolean
    , EndDate timestamp
    )

In [None]:
%%sql
create table if not exists dbo.workspace_item_list
    (ItemId string
    , ItemName string
    , Type string
    , Description string
    , State string
    , LastUpdatedDate string
    , CreatorPrincipalId string
    , CreatorPrincipalDisplayName string
    , CreatorPrincipalType string
    , CreatorUserPrincipalName string
    , WorkspaceId string
    , CapacityId string
    , EffectiveDate	timestamp
    , row_checksum	string
    , current_row boolean
    , EndDate timestamp
    )

## Populate slowly changing dimensions

#### Capacities

In [None]:
  %%sql
--scd type 2 for capacity list
MERGE INTO dbo.capacity_list
USING (
  SELECT s.CapacityId as mergeKey, s.*
  FROM stage.stage_capacity_list s
  UNION ALL
  SELECT NULL as mergeKey, s.*
  FROM stage.stage_capacity_list s JOIN capacity_list t ON s.CapacityId = t.CapacityId 
  WHERE s.row_checksum <> t.row_checksum and t.current_row = TRUE
    ) staged_updates ON capacity_list.CapacityId = mergeKey
WHEN MATCHED AND capacity_list.current_row = TRUE AND capacity_list.row_checksum <> staged_updates.row_checksum THEN  
  UPDATE SET endDate = staged_updates.EffectiveDate, current_row = FALSE    
WHEN NOT MATCHED THEN 
  INSERT(CapacityId, CapacityName, Sku, Region, State, Admins, EffectiveDate, row_checksum, current_row, EndDate)
  VALUES(staged_updates.CapacityId, staged_updates.CapacityName, staged_updates.Sku, staged_updates.Region, staged_updates.State, staged_updates.Admins, staged_updates.EffectiveDate, staged_updates.row_checksum, TRUE, make_date(2099,12,31))
WHEN NOT MATCHED BY SOURCE and capacity_list.current_row = TRUE THEN 
    UPDATE SET EndDate = CURRENT_TIMESTAMP(), current_row= false

#### Workspaces

In [None]:
%%sql
--scd type 2 for workspace list
MERGE INTO dbo.workspace_list
USING ( 
  SELECT s.WorkspaceId as mergeKey, s.*
  FROM stage.stage_workspace_list s
  UNION ALL
  SELECT NULL as mergeKey, s.*
  FROM stage.stage_workspace_list s JOIN workspace_list t ON s.WorkspaceId = t.WorkspaceId 
  WHERE s.row_checksum <> t.row_checksum and t.current_row = TRUE
  ) staged_updates
ON workspace_list.WorkspaceId = mergeKey
WHEN MATCHED AND workspace_list.current_row = TRUE AND workspace_list.row_checksum <> staged_updates.row_checksum THEN  
  UPDATE SET endDate = staged_updates.EffectiveDate , current_row = FALSE
WHEN NOT MATCHED THEN 
  INSERT(WorkspaceId, WorkspaceName, State, Type, CapacityId, EffectiveDate,row_checksum,current_row, EndDate) 
  VALUES(staged_updates.WorkspaceId, staged_updates.WorkspaceName, staged_updates.State, staged_updates.Type, staged_updates.CapacityId, staged_updates.EffectiveDate,staged_updates.row_checksum, TRUE, make_date(2099,12,31))
WHEN NOT MATCHED BY SOURCE and workspace_list.current_row = TRUE THEN 
    UPDATE SET EndDate = CURRENT_TIMESTAMP(), current_row= false

#### Items

In [None]:
%%sql
--scd type 2 for workspace_item_list 
MERGE INTO dbo.workspace_item_list
USING ( 
  SELECT s.ItemId as mergeKey, s.*
  FROM stage.stage_workspace_item_list s
  UNION ALL
  SELECT NULL as mergeKey, s.*
  FROM stage.stage_workspace_item_list s JOIN dbo.workspace_item_list t ON s.ItemId = t.ItemId 
  WHERE s.row_checksum <> t.row_checksum and t.current_row = TRUE
  ) staged_updates
ON workspace_item_list.ItemId  = mergeKey
WHEN MATCHED AND workspace_item_list.current_row = TRUE AND workspace_item_list.row_checksum <> staged_updates.row_checksum THEN  
  UPDATE SET endDate = staged_updates.EffectiveDate, current_row = FALSE
WHEN NOT MATCHED THEN 
  INSERT(ItemId, ItemName, Type, Description, State, LastUpdatedDate, CreatorPrincipalId, CreatorPrincipalDisplayName
    , CreatorPrincipalType, CreatorUserPrincipalName, WorkspaceId, CapacityId, EffectiveDate, row_checksum, current_row, EndDate) 
  VALUES(staged_updates.ItemId, staged_updates.ItemName, staged_updates.Type, staged_updates.Description, staged_updates.State
    , staged_updates.LastUpdatedDate, staged_updates.CreatorPrincipalId, staged_updates.CreatorPrincipalDisplayName
    , staged_updates.CreatorPrincipalType, staged_updates.CreatorUserPrincipalName, staged_updates.WorkspaceId
    , staged_updates.CapacityId, staged_updates.EffectiveDate
    , staged_updates.row_checksum, TRUE, make_date(2099,12,31))
WHEN NOT MATCHED BY SOURCE and workspace_item_list.current_row = TRUE THEN 
    UPDATE SET EndDate = CURRENT_TIMESTAMP(), current_row= false

In [None]:
%%sql
select * from dbo.capacity_list
where EffectiveDate > dateadd(current_timestamp(), -3)
order by CapacityName, EffectiveDate

In [None]:
%%sql
--changes from the last day, note does not include items where workspace is deleted
select c.CapacityName, w.WorkspaceName, i.* 
from dbo.workspace_item_list i
    join workspace_list w on i.WorkspaceId = w.WorkspaceId and w.current_row = True
    join capacity_list c on w.CapacityId =c.CapacityId and c.current_row = True
where ItemId in (
    SELECT ItemId FROM dbo.workspace_item_list 
    where current_row = false and EndDate > date_add(current_timestamp,-1)
    )
order by ItemId, EffectiveDate

In [None]:
%%sql
--show workspaces by capacity
select 
    c.CapacityId, c.CapacityName, c.Region
    , w.WorkspaceId, w.WorkspaceName
from
dbo.capacity_list c
left join dbo.workspace_list w on c.CapacityId = w.CapacityId and c.current_row = True and w.current_row = True
order by 
    CapacityName, WorkspaceName
