# Lab 8: Direct Lake over One Lake with Import

## 1. Install Semantic Link Labs Python Library
This step installs Semantic Link Library which is a Python library design for use in Microsoft Fabric Notebooks.  The library extends the capabilities of [Semantic Link](https://learn.microsoft.com/en-us/fabric/data-science/semantic-link-overview) offering additional functionalities to seamlessly integrate alongside it.

In [None]:
%pip install -q --disable-pip-version-check semantic-link-labs

## 2. Install Python Libraries
This step does the following:
- Sets up libraries that will be used iater in the script for various functions related to data processing, manipulation and handling
- Creates a populates the following variables
    - LakehouseName - Used as the name for the Lakehouse that will be created later in this script
    - SemanticModelName = Used as the  name for the Semantic Model that will be created later in this script

In [None]:
import sempy_labs as labs
from sempy import fabric
import pandas as pd
import json
import time
import uuid
from sempy_labs.tom._model import TOMWrapper, connect_semantic_model

from sempy_labs._helper_functions import (
    format_dax_object_name,
    generate_guid,
    _make_list_unique,
    resolve_dataset_name_and_id,
    resolve_workspace_name_and_id,
    _base_api,
    resolve_workspace_id,
    resolve_item_id,
    resolve_lakehouse_id,
    resolve_lakehouse_name_and_id
)
fabric._client._utils._init_analysis_services()
import Microsoft.AnalysisServices.Tabular as TOM
import Microsoft.AnalysisServices


LakehouseName = "BigData"
SemanticModelName = f"{LakehouseName}_model"
ClonedModelName = SemanticModelName + "_clone"
workspace = None


(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
(lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(lakehouse=LakehouseName, workspace=workspace)

## Clone BigData semantic model

In [None]:
#Clear any existing cloned model if re-running
df = fabric.list_items()
if ClonedModelName in df.values:
    model_id = df.at[df[df['Display Name'] == ClonedModelName].index[0], 'Id']
    fabric.delete_item(model_id)
    print("Cloned model deleted")

with labs.tom.connect_semantic_model(dataset=SemanticModelName, readonly=False) as tom:
    newDB = tom._tom_server.Databases.GetByName(SemanticModelName).Clone()
    newModel = tom._tom_server.Databases.GetByName(SemanticModelName).Model.Clone()
    newDB.Name = ClonedModelName
    newDB.ID = str(uuid.uuid4())
    #newDB.Model = newModel
    newModel.CopyTo(newDB.Model)
    tom._tom_server.Databases.Add(newDB)

    newDB.Update(Microsoft.AnalysisServices.UpdateOptions.ExpandFull)

## Frame the cloned model

In [None]:
labs.refresh_semantic_model(dataset=ClonedModelName)

## Check what version of Direct Lake is being used

##### Sql.Database    = DirectLake over SQL

##### Azure.Lakehouse = DirectLake over One Lake

In [None]:
with labs.tom.connect_semantic_model(dataset=ClonedModelName, readonly=False) as tom:
    for e in tom.model.Expressions:
        print(e.Expression)

## Show storage mode for each table in Cloned model

In [None]:
objects = {}
with labs.tom.connect_semantic_model(dataset=ClonedModelName, readonly=False) as tom:
    for t in tom.model.Tables:
        #print(t.Name)
        for p in t.Partitions:
            #print(p.Mode)
            objects[t.Name] = str(p.Mode)
 
df=pd.DataFrame([objects])
display(df)

## Try to convert Direct Lake table to Import
#### Will fail if Direct Lake over SQL

In [None]:
with labs.tom.connect_semantic_model(dataset=ClonedModelName, readonly=False) as tom:
    tom.convert_direct_lake_to_import(
        table_name="dim_Date" ,
        entity_name="dim_Date" ,
        source="BigData",
        source_type = "Lakehouse"
    )

## Convert cloned model to Direct Lake over One Lake

In [None]:
with labs.tom.connect_semantic_model(dataset=ClonedModelName, readonly=False) as tom:

    #Convert import tables to Direct Lake
    for t in tom.model.Tables:
        for p in t.Partitions:
            if(p.Mode==TOM.ModeType.Import):
                t.Partitions.Remove(p)
                tom.add_entity_partition(table_name=t.Name,entity_name=t.Name)
                print(f"Table {t.Name} converted")
            p.Source.SchemaName=None

    for e in tom.model.Expressions:
        e.Expression = f"""
        let
            Source = AzureStorage.DataLake("https://northcentralus-onelake.dfs.fabric.microsoft.com/{workspace_id}/{lakehouse_id}", [HierarchicalNavigation=true])
        in
            Source"""
        
print("Converted Import tables back to Direct Lake (on One Lake)")

## Try to convert Direct Lake table to Import (_attempt 2_)
#### Should work this time now model is Direct Lake over One Lake

In [None]:
with labs.tom.connect_semantic_model(dataset=ClonedModelName, readonly=False) as tom:
    tom.convert_direct_lake_to_import(
        table_name="dim_Date" ,
        entity_name="dim_Date" ,
        source="BigData",
        source_type = "Lakehouse"
    )

## Show storage mode for each table

In [None]:
objects = {}
with labs.tom.connect_semantic_model(dataset=ClonedModelName, readonly=False) as tom:
    for t in tom.model.Tables:
        #print(t.Name)
        for p in t.Partitions:
            #print(p.Mode)
            objects[t.Name] = str(p.Mode)
 
df=pd.DataFrame([objects])
display(df)

## <mark>SET CREDENTIALS AND LARGE MODEL IN SERVICE</mark>

## Refresh import table model so import table gets hydrated

In [None]:
labs.refresh_semantic_model(dataset=ClonedModelName,tables=["dim_Date"])

## Recalculate relationship indexes

In [None]:
labs.refresh_semantic_model(dataset=ClonedModelName,refresh_type="calculate")

## Show what version of Direct Lake is being used

In [None]:
with labs.tom.connect_semantic_model(dataset=ClonedModelName, readonly=False) as tom:
    for e in tom.model.Expressions:
        print(e.Expression)

## Run query on 1Bln Row

In [None]:
df=fabric.evaluate_dax(
    dataset=ClonedModelName,
    dax_string="""
        EVALUATE
	        SUMMARIZECOLUMNS(
		        dim_Date[DateKey],
		        "Quantity" , [Sum of Sales (1bln)]
		        )
        """
    )
display(df)

## Run query on 2Bln Row
#### This will fail due to guardrail

In [None]:
df=fabric.evaluate_dax(
    dataset=ClonedModelName,
    dax_string="""
        EVALUATE
	        SUMMARIZECOLUMNS(
		        dim_Date[DateKey],
		        "Quantity" , [Sum of Sales (2bln)]
		        )
        """

    )
display(df)

## Convert cloned model to Direct Lake over SQL Endpoint

In [None]:
df=pd.DataFrame(labs.list_lakehouses())
endpointid = df[df['Lakehouse Name']==LakehouseName]['SQL Endpoint ID'].iloc[0]
server = df[df['Lakehouse Name']==LakehouseName]['SQL Endpoint Connection String'].iloc[0]

with labs.tom.connect_semantic_model(dataset=ClonedModelName, readonly=False) as tom:

    #Convert import tables to Direct Lake
    for t in tom.model.Tables:
        for p in t.Partitions:
            if(p.Mode==TOM.ModeType.Import):
                t.Partitions.Remove(p)
                tom.add_entity_partition(table_name=t.Name,entity_name=t.Name)
                print(f"Table {t.Name} converted")
            p.Source.SchemaName=None

    #Switch Model to Direct Lake over SQL
    for e in tom.model.Expressions:
        e.Expression = f"""
        let
            Source = Sql.Database("{server}", "{endpointid}")
        in
            Source"""

        # if e.Name == "DirectLake - AdventureWorks" :
        #     tom.model.Expressions.Remove(e)

print("Converted to Direct Lake over SQL")

In [None]:
with labs.tom.connect_semantic_model(dataset=ClonedModelName, readonly=False) as tom:
    for e in tom.model.Expressions:
        print(e.Expression)

## Show storage mode for each table

In [None]:
objects = {}
with labs.tom.connect_semantic_model(dataset=ClonedModelName, readonly=False) as tom:
    for t in tom.model.Tables:
        #print(t.Name)
        for p in t.Partitions:
            #print(p.Mode)
            objects[t.Name] = str(p.Mode)
 
df=pd.DataFrame([objects])
display(df)

## Run query on 2bln row table
This should work, but fall back to SQL Endpoint

In [None]:
df=fabric.evaluate_dax(
    dataset=ClonedModelName,
    dax_string="""
        EVALUATE
	        SUMMARIZECOLUMNS(
		        dim_Date[DateKey],
		        "Quantity" , [Sum of Sales (2bln)]
		        )
        """
    )
display(df)

## Show TMSL code for cloned model

In [None]:
import json
with labs.tom.connect_semantic_model(dataset=ClonedModelName, readonly=False) as tom:
    x= tom.get_bim()

    formatted_json = json.dumps(x, indent=4)
    print(formatted_json)