# Lab 1: Create Direct Lake custom semantic model

Create your a custom semantic model
- Add measures
- Add relationships
- Mark Date Table
- Set sortby columns
- Hide columns
- Run DAX Query and DMV

## 1. Install Semantic Link Labs Python Library

In [None]:
%pip install -q semantic-link-labs

## 2. Install Python Libraries

In [None]:
import sempy_labs as labs
from sempy import fabric
import sempy
import pandas
import json
import time

LakehouseName = "AdventureWorks"
SemanticModelName = f"{LakehouseName}_model"

## 3. Create Lakehouse

In [None]:
lakehouses=labs.list_lakehouses()["Lakehouse Name"]
if LakehouseName in lakehouses.values:
    lakehouseId = notebookutils.lakehouse.getWithProperties(LakehouseName)["id"]
else:
    lakehouseId = fabric.create_lakehouse(LakehouseName)

workspaceId = notebookutils.lakehouse.getWithProperties(LakehouseName)["workspaceId"]
workspaceName = sempy.fabric.resolve_workspace_name(workspaceId)
print(f"WorkspaceId = {workspaceId}, LakehouseID = {lakehouseId}, Workspace Name = {workspaceName}")

### 2.1 Remove any unwanted semantic models

This code will not get past line 1 unless changed to the following.

```
if **True** :
```

In [None]:
if False:
    for index, row in sempy.fabric.list_items().iterrows():
        if row["Type"] == "SemanticModel" and row["Display Name"] != LakehouseName:
            sempy.fabric.delete_item(item_id=row["Id"],workspace=workspaceId)
            print(f"Deleted semantic model {row['Display Name']}")

## 4. Remove all tables from Adventureworks Lakehouse

In [None]:
if False:
    folders = notebookutils.fs.ls(f"abfss://{workspaceId}@onelake.dfs.fabric.microsoft.com/{lakehouseId}/Tables")
    for fileInfo in folders:
        print(f"Deleting...{fileInfo.path}")
        notebookutils.fs.rm(fileInfo.path,recurse=True)

## 5. Copy data from source into local lakehouse

In [None]:
def loadDataToLakehouse(fromTable:str,toTable:str):
    workspaceId = notebookutils.lakehouse.getWithProperties(LakehouseName)["workspaceId"]
    lakehouseId = notebookutils.lakehouse.getWithProperties(LakehouseName)["id"]
    customer_df =spark.read.load(f"abfss://81363cc3-0a91-4393-a10f-0d58415fddef@onelake.dfs.fabric.microsoft.com/69031e47-6b22-467d-80b6-9edab4f29f72/Tables/{fromTable}")
    customer_df.write.mode("overwrite").save(f"abfss://{workspaceId}@onelake.dfs.fabric.microsoft.com/{lakehouseId}/Tables/{toTable}")
    print(f"Loaded {toTable}")

loadDataToLakehouse("adw_DimCustomer"       ,"DimCustomer")
loadDataToLakehouse("adw_DimDate"           ,"DimDate")
loadDataToLakehouse("adw_DimProduct"        ,"DimProduct")
loadDataToLakehouse("adw_FactInternetSales" ,"FactInternetSales")
print("Done")

## 6. Trigger backround job to sync Lakehouse tables

In [None]:
##https://medium.com/@sqltidy/delays-in-the-automatically-generated-schema-in-the-sql-analytics-endpoint-of-the-lakehouse-b01c7633035d

def triggerMetadataRefresh():
    client = fabric.FabricRestClient()
    response = client.get(f"/v1/workspaces/{workspaceId}/lakehouses/{lakehouseId}")
    sqlendpoint = response.json()['properties']['sqlEndpointProperties']['id']

    # trigger sync
    uri = f"/v1.0/myorg/lhdatamarts/{sqlendpoint}"
    payload = {"commands":[{"$type":"MetadataRefreshExternalCommand"}]}
    response = client.post(uri,json= payload)
    batchId = response.json()['batchId']

    # Monitor Progress
    statusuri = f"/v1.0/myorg/lhdatamarts/{sqlendpoint}/batches/{batchId}"
    statusresponsedata = client.get(statusuri).json()
    progressState = statusresponsedata['progressState']
    print(progressState)
    while progressState != "success":
        statusuri = f"/v1.0/myorg/lhdatamarts/{sqlendpoint}/batches/{batchId}"
        statusresponsedata = client.get(statusuri).json()
        progressState = statusresponsedata['progressState']
        print(progressState)
        time.sleep(1)

    print('done')

triggerMetadataRefresh()

## 7. Create Custom Semantic Model from Lakehouse

In [None]:
#1. Generate list of ALL table names from lakehouse to add to Semantic Model
lakehouseTables:list = labs.lakehouse.get_lakehouse_tables(lakehouse=LakehouseName)["Table Name"]

#2 Create the semantic model
if sempy.fabric.list_items().query(f"`Display Name`=='{LakehouseName}_model' & Type=='SemanticModel'  ").shape[0] ==0:
    labs.directlake.generate_direct_lake_semantic_model(dataset=f"{LakehouseName}_model",lakehouse_tables=lakehouseTables,workspace=workspaceName,lakehouse=lakehouseId,refresh=False,overwrite=True)

## 8. Add model relationships

In [None]:
with labs.tom.connect_semantic_model(dataset=SemanticModelName, readonly=False) as tom:
    #1. Remove any existing relationships
    for r in tom.model.Relationships:
        tom.model.Relationships.Remove(r)

    #2. Creates correct relationships
    tom.add_relationship(from_table="FactInternetSales", from_column="OrderDateKey" , to_table="DimDate"    , to_column="DateKey"       , from_cardinality="many" , to_cardinality="one")
    tom.add_relationship(from_table="FactInternetSales", from_column="CustomerKey"  , to_table="DimCustomer", to_column="CustomerKey"   , from_cardinality="many" , to_cardinality="one")
    tom.add_relationship(from_table="FactInternetSales", from_column="ProductKey"   , to_table="DimProduct" , to_column="ProductKey"    , from_cardinality="many" , to_cardinality="one")


## 9. Add model measures

In [None]:
with labs.tom.connect_semantic_model(dataset=SemanticModelName, readonly=False) as tom:
    #1. Remove any existing measures
    for t in tom.model.Tables:
        for m in t.Measures:
            tom.remove_object(m)
            print(f"[{m.Name}] measure removed")

    tom.add_measure(table_name="FactInternetSales" ,measure_name="Sum of Sales",expression="SUM(FactInternetSales[SalesAmount])")
    

## 10. Mark DimDate as Date Table

In [None]:
with labs.tom.connect_semantic_model(dataset=SemanticModelName, readonly=False) as tom:
    tom.mark_as_date_table(table_name="DimDate",column_name="Date")

## 11. Set Sort by Cols

In [None]:
import json
tom = labs.tom.TOMWrapper(dataset=SemanticModelName, workspace=workspaceName, readonly=False)
tom.set_sort_by_column(table_name="DimDate",column_name="MonthName"       ,sort_by_column="MonthNumberOfYear")
tom.set_sort_by_column(table_name="DimDate",column_name="DayOfWeek"       ,sort_by_column="DayNumberOfWeek")
tom.model.SaveChanges()

i:int=0
for t in tom.model.Tables:
    if t.Name=="DimDate":
        bim = json.dumps(tom.get_bim()["model"]["tables"][i],indent=4)
        print(bim)
    i=i+1

## 12. Hide Fact Table columns

In [None]:
i:int=0
for t in tom.model.Tables:
    if t.Name in ["FactInternetSales"]:
        for c in t.Columns:
            c.IsHidden=True

        bim = json.dumps(tom.get_bim()["model"]["tables"][i],indent=4)
        print(bim)
    i=i+1

## 13. Reframe model to update changes

In [None]:
reframeOK:bool=False
while not reframeOK:
    try:
        result:pandas.DataFrame = labs.refresh_semantic_model(dataset=SemanticModelName)
        reframeOK=True
    except:
        print('Error with reframe... trying again.')
        triggerMetadataRefresh()
        sleep(3)

print('Custom Semantic Model reframe OK')

## 14. Create function to run DAX query with a server timings trace

In [None]:
import warnings
import time
from Microsoft.AnalysisServices.Tabular import TraceEventArgs
from typing import Dict, List, Optional, Callable

def runDMV():
    df = sempy.fabric.evaluate_dax(
        dataset=SemanticModelName, 
        dax_string="""
        
        SELECT 
            MEASURE_GROUP_NAME AS [TABLE],
            ATTRIBUTE_NAME AS [COLUMN],
            DATATYPE ,
            DICTIONARY_SIZE 		    AS SIZE ,
            DICTIONARY_ISPAGEABLE 		AS PAGEABLE ,
            DICTIONARY_ISRESIDENT		AS RESIDENT ,
            DICTIONARY_TEMPERATURE		AS TEMPERATURE,
            DICTIONARY_LAST_ACCESSED	AS LASTACCESSED 
        FROM $SYSTEM.DISCOVER_STORAGE_TABLE_COLUMNS 
        ORDER BY 
            [DICTIONARY_TEMPERATURE] DESC
        
        """)
    display(df)

def filter_func(e):
    retVal:bool=True
    if e.EventSubclass.ToString() == "VertiPaqScanInternal":
        retVal=False      
    #     #if e.EventSubClass.ToString() == "VertiPaqScanInternal":
    #     retVal=False
    return retVal


# define events to trace and their corresponding columns
def runQueryWithTrace (expr:str,workspaceName:str,SemanticModelName:str,Result:Optional[bool]=True,Trace:Optional[bool]=True,DMV:Optional[bool]=True):
    event_schema = fabric.Trace.get_default_query_trace_schema()
    event_schema.update({"ExecutionMetrics":["EventClass","TextData"]})
    del event_schema['VertiPaqSEQueryBegin']
    del event_schema['VertiPaqSEQueryCacheMatch']
    del event_schema['DirectQueryBegin']

    warnings.filterwarnings("ignore")

    WorkspaceName = workspaceName
    SemanticModelName = SemanticModelName

    with fabric.create_trace_connection(SemanticModelName,WorkspaceName) as trace_connection:
        # create trace on server with specified events
        with trace_connection.create_trace(
            event_schema=event_schema, 
            name="Simple Query Trace",
            filter_predicate=filter_func,
            stop_event="QueryEnd"
            ) as trace:

            trace.start()

            df=sempy.fabric.evaluate_dax(
                dataset=SemanticModelName, 
                dax_string=expr)

            if Result:
                displayHTML(f"<H2>####### DAX QUERY RESULT #######</H2>")
                display(df)

            # Wait 5 seconds for trace data to arrive
            time.sleep(5)

            # stop Trace and collect logs
            final_trace_logs = trace.stop()

    if Trace:
        displayHTML(f"<H2>####### SERVER TIMINGS #######</H2>")
        display(final_trace_logs)
    
    if DMV:
        displayHTML(f"<H2>####### SHOW DMV RESULTS #######</H2>")
        runDMV()



## 15. DAX Queries

In [None]:
df=sempy.fabric.evaluate_dax(
    dataset=SemanticModelName, 
    dax_string="""
    
    evaluate tabletraits()
    
    """)
display(df)

In [None]:
df=labs.directlake.get_direct_lake_guardrails()
display(df)

## 16. Run DMV to check column details

In [None]:
runDMV()

## 17. Run DAX Query on custom semantic model

In [None]:
labs.clear_cache(SemanticModelName)

df=sempy.fabric.evaluate_dax(
    dataset=SemanticModelName, 
    dax_string="""
    
    EVALUATE
        SUMMARIZECOLUMNS(
               
                DimDate[MonthName] ,
                "Count of Transactions" , COUNTROWS(FactInternetSales) ,
                "Sum of Sales" , [Sum of Sales] 
        )
        ORDER BY [MonthName]
    """)
display(df)

runDMV()