In [1]:
%pip install semantic-link==0.11.0 semantic-link-labs==0.11.1

StatementMeta(, fe2556b5-5d60-43b6-883a-bc771a1e755b, 7, Finished, Available, Finished)

Collecting semantic-link==0.11.0
  Downloading semantic_link-0.11.0-py3-none-any.whl.metadata (10 kB)
Collecting semantic-link-labs==0.11.1
  Downloading semantic_link_labs-0.11.1-py3-none-any.whl.metadata (27 kB)
Collecting semantic-link-sempy==0.11.0 (from semantic-link==0.11.0)
  Downloading semantic_link_sempy-0.11.0-py3-none-any.whl.metadata (10 kB)
Collecting semantic-link-functions-geopandas==0.11.0 (from semantic-link==0.11.0)
  Downloading semantic_link_functions_geopandas-0.11.0-py3-none-any.whl.metadata (1.9 kB)
Collecting semantic-link-functions-holidays==0.11.0 (from semantic-link==0.11.0)
  Downloading semantic_link_functions_holidays-0.11.0-py3-none-any.whl.metadata (1.8 kB)
Collecting semantic-link-functions-meteostat==0.11.0 (from semantic-link==0.11.0)
  Downloading semantic_link_functions_meteostat-0.11.0-py3-none-any.whl.metadata (2.0 kB)
Collecting semantic-link-functions-phonenumbers==0.11.0 (from semantic-link==0.11.0)
  Downloading semantic_link_functions_phonen

In [2]:
import requests
import zipfile
import os
import sempy_labs as labs
import sempy.fabric as semfabric
import base64
import json

#initialization of variables

#lakehouse
lakehouse_name = "cms_lakehouse"

#directories - base directory is created in the lakehouse as part of the demo setup
base_dir_relative_path = "Files/cmsdemofiles"

##enable/disable specific steps
skip_lakehouse_creation = False
skip_artificate_download_step = False
skip_import_notebook_step = False
skip_import_datafactory_pipeline_step = False
skip_import_semantic_model_step = False
skip_import_report_step = False
skip_invoke_datafactory_pipeline_step = False

#external links
artifactzip_github_url = "https://github.com/isinghrana/fabric-samples-healthcare/raw/refs/heads/isr-auto1/analytics-bi-directlake-starschema/demoautomation/artifacts.zip"

#data factory definition files are extracted from artifact zip file and paths are relative to the base dir
datafactory_pipeline_jsonfile_relativepath = "/cms_pipeline.DataPipeline/pipeline-content.json"
datafactory_platform_file_relativepath = "/cms_pipeline.DataPipeline/.platform"

#semantic model definition files are extracted from artifact zip file and paths are relative to the base dir
semanticmodel_relative_path = "/CMS_Direct_Lake_Star_Schema.SemanticModel"
report_relative_path = "/CMS Medicare Part D Star Schema.Report"

download_cmsdata_notebook_github_url = "https://raw.githubusercontent.com/isinghrana/fabric-samples-healthcare/refs/heads/isr-auto1/analytics-bi-directlake-starschema/demoautomation/01-DownloadCMSDataCsvFiles.ipynb"
download_cmsdata_notebook_name = "01-DownloadCMSDataCsvFiles"

create_data_table_notebook_github_url = "https://raw.githubusercontent.com/isinghrana/fabric-samples-healthcare/refs/heads/isr-auto1/analytics-bi-directlake-starschema/demoautomation/02-CreateCMSDataTable.ipynb"
create_data_table_notebook_name = "02-CreateCMSDataTable"

create_starschema_table_notebook_github_url = "https://raw.githubusercontent.com/isinghrana/fabric-samples-healthcare/refs/heads/isr-auto1/analytics-bi-directlake-starschema/demoautomation/03-CreateCMSStarSchemaTables.ipynb"
create_starschema_table_notebook_name = "03-CreateCMSStarSchemaTables"


#pipeline json has original workspace Id and noteobook ids which need to be replaced
replace_pipeline_workspace_id = "904f9388-f876-4176-be2a-6ef7d62d6544"
replace_pipeline_download_cmsdata_notebook_id = "d48a1f2f-8392-4fba-bd05-f76f4b978bfe"
replace_pipeline_create_data_table_notebook_id = "b052405e-a729-4316-97bf-bbb339f86985"
replace_pipeline_create_starschema_notebook_id = "63eb8687-b7ae-4d84-9c5c-2d96b82ce74b"


StatementMeta(, fe2556b5-5d60-43b6-883a-bc771a1e755b, 9, Finished, Available, Finished)

In [3]:
if (skip_lakehouse_creation):
    lakehouse_id = notebookutils.lakehouse.get(lakehouse_name)['id']
else:
    lakehouse = notebookutils.lakehouse.create(lakehouse_name)    
    lakehouse_id = lakehouse['id']
    
workspace_id = notebookutils.runtime.context["currentWorkspaceId"]                                  

#directory initialization
base_dir_full_path = f"abfss://{workspace_id}@onelake.dfs.fabric.microsoft.com/{lakehouse_id}/{base_dir_relative_path}"
notebookutils.fs.mkdirs(base_dir_full_path)
    
mount_point = "/mnt/lakehouse/" + lakehouse_name + "/" + base_dir_relative_path
print(f'base_dir full: {base_dir_full_path}, mount_point: {mount_point}')

notebookutils.fs.mount(base_dir_full_path, mount_point)
base_dir_local_path = notebookutils.fs.getMountPath(mount_point)

StatementMeta(, fe2556b5-5d60-43b6-883a-bc771a1e755b, 10, Finished, Available, Finished)

base_dir full: abfss://3818e56e-86ac-40bd-8a34-d1479336e94a@onelake.dfs.fabric.microsoft.com/9a9f1836-cd91-47ab-955f-de98dda3c09f/Files/cmsdemofiles, mount_point: /mnt/lakehouse/cms_lakehouse/Files/cmsdemofiles


SynapseWidget(Synapse.DataFrame, 8b23aef7-3ad5-4a92-9b93-0340313451ef)

In [4]:
#common utility functions

def get_file_contents(local_file_path):
    with open(local_file_path, "r", encoding="utf-8") as file:
        file_content = file.read()
    return file_content

#function is used in steps to import semantic model and report
#input arugment is folder with definition files
#directory and subdirectories are walked through and a dictionary returned where key is the part path and value is the content of the file
def get_fabricitemdef_partdict(definitionfiles_local_path) -> dict[str,str]:

    def_dict = {}

    for root, dirs, files in os.walk(definitionfiles_local_path):
        #print(f'Current directory: {root}')
        for file in files:
            #print(f'  File: {file}')
            part_key = root.replace(definitionfiles_local_path, "") + "/" + file
            part_key = part_key.lstrip('/')
            #print(f'part_key: {part_key}')

            with open( root + "/" + file, "r", encoding="utf-8") as file:
                payload = file.read()
                def_dict[part_key] = payload

    return def_dict    

def fabriclient_post(url, request_body):

    client = semfabric.FabricRestClient()
    #print(create_datafactory_pipeline_request_body)
    response = client.request(method = "POST", path_or_url=url, lro_wait=True, json = request_body)
    print(response.status_code)
    print(response.text)
    response.raise_for_status()  # Raise an error for bad status codes   


StatementMeta(, fe2556b5-5d60-43b6-883a-bc771a1e755b, 11, Finished, Available, Finished)

In [5]:
#download artifacts zip file - Data Factory Pipeline, Semantic Model and REport files from GitHub which be used to create corresponding Fabric Items

def download_binary_file(url, output_path):
    try:        
        response = requests.get(url=url, stream = True)
        
        response.raise_for_status()  # Raise an error for bad status codes
        with open(output_path, 'wb') as file:
            for chunk in response.iter_content(chunk_size=8192):
                file.write(chunk)
        print(f"File downloaded successfully to: {output_path}")
    except requests.exceptions.RequestException as e:
        print(f"Download failed: {e}")
        raise RuntimeError(f"Failed to download file from {url}") from e


def unzip_file(zip_path, extract_to):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)
    print(f"Extracted all contents to '{extract_to}'")


if (skip_artificate_download_step):
    print ("skipping artifact zip file download step")    
else:
    download_path = base_dir_local_path + '/artifacts.zip'
    download_binary_file(artifactzip_github_url, download_path)
    unzip_file(download_path, base_dir_local_path)


StatementMeta(, fe2556b5-5d60-43b6-883a-bc771a1e755b, 12, Finished, Available, Finished)

File downloaded successfully to: /synfs/notebook/fe2556b5-5d60-43b6-883a-bc771a1e755b/mnt/lakehouse/cms_lakehouse/Files/cmsdemofiles/artifacts.zip
Extracted all contents to '/synfs/notebook/fe2556b5-5d60-43b6-883a-bc771a1e755b/mnt/lakehouse/cms_lakehouse/Files/cmsdemofiles'


In [6]:
#import notebooks


#import the notebooks from GitHub
if skip_import_notebook_step:
    print('skipping Notebook import step')
else:
    result = labs.import_notebook_from_web(notebook_name = download_cmsdata_notebook_name, url = download_cmsdata_notebook_github_url, overwrite=True)
    print(f"Import {download_cmsdata_notebook_name} result: {result}")
    result = labs.import_notebook_from_web(notebook_name = create_data_table_notebook_name, url = create_data_table_notebook_github_url, overwrite=True)
    print(f"Import {create_data_table_notebook_name} result: {result}")
    result = labs.import_notebook_from_web(notebook_name = create_starschema_table_notebook_name, url = create_starschema_table_notebook_github_url, overwrite=True)
    print(f"Import {create_starschema_table_notebook_name} result: {result}")

#get NotebookIds for all 3 notebooks to be used in subsequent steps
download_cmsdata_notebook_id = semfabric.resolve_item_id(item_name = download_cmsdata_notebook_name, type = "Notebook")
print(f"download_cmsdata_notebook_id: {download_cmsdata_notebook_id}")

create_data_table_notebook_id = semfabric.resolve_item_id(item_name = create_data_table_notebook_name, type = "Notebook")
print(f"create_data_table_notebook_id: {create_data_table_notebook_id}")

create_starschema_table_notebook_id = semfabric.resolve_item_id(item_name = create_starschema_table_notebook_name, type = "Notebook")
print(f"create_starschema_notebook_id: {create_starschema_table_notebook_id}")



StatementMeta(, fe2556b5-5d60-43b6-883a-bc771a1e755b, 13, Finished, Available, Finished)

🟢 The '01-DownloadCMSDataCsvFiles' notebook has been successfully created within the 'cms_automation7' workspace.
Import 01-DownloadCMSDataCsvFiles result: None
🟢 The '02-CreateCMSDataTable' notebook has been successfully created within the 'cms_automation7' workspace.
Import 02-CreateCMSDataTable result: None
🟢 The '03-CreateCMSStarSchemaTables' notebook has been successfully created within the 'cms_automation7' workspace.
Import 03-CreateCMSStarSchemaTables result: None
download_cmsdata_notebook_id: 0d1c99c8-d8c9-4566-94ee-423d76cfcaa7
create_data_table_notebook_id: ec1076d4-8dac-4c99-83d6-d7fbb25316db
create_starschema_notebook_id: 53df9300-3074-498b-81d7-981b76e339d8


In [7]:
#import data factory pipeline

#do no change as the data facotry pipeline name exists in the definition file as well
datafactory_pipeline_name = "cms_pipeline"

if skip_import_datafactory_pipeline_step:
    print('skipping create data factory step')
else:

    datafactory_pipeline_jsonfile_local_path = base_dir_local_path + datafactory_pipeline_jsonfile_relativepath
    datafactory_platform_file_local_path = base_dir_local_path + datafactory_platform_file_relativepath

    #read file contents
    platform_file_payload =  get_file_contents(datafactory_platform_file_local_path)
    pipeline_json_payload =  get_file_contents(datafactory_pipeline_jsonfile_local_path)

    #workspace id and notebook ids need to be updated/replaced from the origin pipeline definition json
    pipeline_json_payload = pipeline_json_payload.replace(replace_pipeline_workspace_id, workspace_id)
    pipeline_json_payload = pipeline_json_payload.replace(replace_pipeline_download_cmsdata_notebook_id, download_cmsdata_notebook_id)
    pipeline_json_payload = pipeline_json_payload.replace(replace_pipeline_create_data_table_notebook_id, create_data_table_notebook_id)
    pipeline_json_payload = pipeline_json_payload.replace(replace_pipeline_create_starschema_notebook_id, create_starschema_table_notebook_id)

    #create post request body
    create_datafactory_pipeline_request_body = {
        "displayName": datafactory_pipeline_name,
        "description": "cms_pipeline to ingest and process data",
        "definition" : {
            "parts": [
                {
                    "path": "pipeline-content.json",
                    "payload": base64.b64encode(pipeline_json_payload.encode('utf-8')),
                    "payloadType": "InlineBase64"
                },
                {
                    "path": ".platform",
                    "payload": base64.b64encode(platform_file_payload.encode('utf-8')),
                    "payloadType": "InlineBase64"
                }
            ]
        }
    }

    create_pipeline_uri = f"v1/workspaces/{workspace_id}/dataPipelines"

    client = semfabric.FabricRestClient()
    #print(create_datafactory_pipeline_request_body)
    create_datafactory_pipeline_response = client.request(method = "POST", path_or_url=create_pipeline_uri, lro_wait=True, json = create_datafactory_pipeline_request_body)
    print(create_datafactory_pipeline_response.status_code)
    print(create_datafactory_pipeline_response.text)
    create_datafactory_pipeline_response.raise_for_status()  # Raise an error for bad status codes   

StatementMeta(, fe2556b5-5d60-43b6-883a-bc771a1e755b, 14, Finished, Available, Finished)

201
{"id":"bb431c31-2383-4496-a63e-92f61b44dacc","type":"DataPipeline","displayName":"cms_pipeline","description":"cms_pipeline to ingest and process data","workspaceId":"3818e56e-86ac-40bd-8a34-d1479336e94a"}


In [8]:
#import semantic model

#do no change as the semantic model name exists in the definition file as well
semantic_model_name = "CMS_Direct_Lake_Star_Schema" 

if (skip_import_semantic_model_step):
    print('skipping semantic model creation step')
else:    
    create_semantic_model_uri = f"v1/workspaces/{workspace_id}/semanticModels"

    #start with body which will get populated using the model defintion 
    create_semantic_model_request_body = {
        "displayName": semantic_model_name,
        "description": "cms semantic model created using API",
        "definition" : {
            "parts": []
            }
        }

    #read the semantic model definition folder into a dictionary to be used to be populate the request body for API Post call 
    semanticmodel_local_path = base_dir_local_path + semanticmodel_relative_path
    print(f'semantic model definition files path: {semanticmodel_local_path}')

    semantic_model_part_dict = get_fabricitemdef_partdict(semanticmodel_local_path)   
    
    #populate the request body using dictionary
    for key, value in semantic_model_part_dict.items():        
        new_part = {
            "path": key,
            "payload" : base64.b64encode(value.encode('utf-8')),
            "payloadType": "inlineBase64"
        }
    
        create_semantic_model_request_body["definition"]["parts"].append(new_part)
   
    fabriclient_post(create_semantic_model_uri, create_semantic_model_request_body)   

    print('Semantic Model createed successfully and updating the semantic model to point to lakehouse in this workspace')
    
    #update the semantic model to point to lakehouse in this workspace
    labs.directlake.update_direct_lake_model_lakehouse_connection(
        dataset = semantic_model_name,
        lakehouse =  lakehouse_name
    )

StatementMeta(, fe2556b5-5d60-43b6-883a-bc771a1e755b, 15, Finished, Available, Finished)

semantic model definition files path: /synfs/notebook/fe2556b5-5d60-43b6-883a-bc771a1e755b/mnt/lakehouse/cms_lakehouse/Files/cmsdemofiles/CMS_Direct_Lake_Star_Schema.SemanticModel


Waiting 20 seconds for v1/workspaces/3818e56e-86ac-40bd-8a34-d1479336e94a/semanticModels operation to check fo…

200
{"id":"283ef801-a210-4a81-8a51-905e33b98305","type":"SemanticModel","displayName":"CMS_Direct_Lake_Star_Schema","description":"","workspaceId":"3818e56e-86ac-40bd-8a34-d1479336e94a"}
🟢 The expression in the 'CMS_Direct_Lake_Star_Schema' semantic model within the 'cms_automation7' workspace has been updated to point to the 'cms_lakehouse' lakehouse in the 'cms_automation7' workspace.


In [9]:
#import report

def update_semantic_model_id(report_def_str, id) -> str:

    report_def_json = json.loads(report_def_str)

    # Replace the pbiModelDatabaseName value    
    report_def_json["datasetReference"]["byConnection"]["pbiModelDatabaseName"] = id
    # Convert back to JSON string
    updated_json_str = json.dumps(report_def_json, indent=4)
    #print(updated_json_str)
    return updated_json_str

#do no change as the report name exists in the definition file as well
report_name = "CMS Medicare Part D Star Schema"

if (skip_import_report_step):
    print('skipping report creation step')
else:    
    
    #need to get semantic model id because report definition.pbir file needs to be updated with the semantic model craeted as part of the setup
    #in this workspace
    semantic_model_id = semfabric.resolve_item_id(semantic_model_name, type = "SemanticModel")
    create_report_uri = f"v1/workspaces/{workspace_id}/reports"

    #start with body which will get populated using the model defintion 
    create_report_request_body = {
        "displayName": report_name,
        "description": "report created using API",
        "definition" : {
            "parts": []
            }
        }

    #read the semantic model definition folder into a dictionary to be used to be populate the request body for API Post call 
    report_local_path = base_dir_local_path + report_relative_path
    print(f'report definition files path: {report_local_path}')

    report_part_dict = get_fabricitemdef_partdict(report_local_path)   
    
    #populate the request body using dictionary
    for key, value in report_part_dict.items():              

        if ("definition.pbir" in key):
            value = update_semantic_model_id(value, semantic_model_id)        
            #print(f'Updated definition json: {value}')

        new_part = {
            "path": key,
            "payload" : base64.b64encode(value.encode('utf-8')),
            "payloadType": "inlineBase64"
        }           
    
        create_report_request_body["definition"]["parts"].append(new_part)
                
    fabriclient_post(create_report_uri, create_report_request_body)
    print('report created successfully')
    #labs.report.report_rebind(report=report_name,dataset=semantic_model_name)  
   

StatementMeta(, fe2556b5-5d60-43b6-883a-bc771a1e755b, 16, Finished, Available, Finished)

report definition files path: /synfs/notebook/fe2556b5-5d60-43b6-883a-bc771a1e755b/mnt/lakehouse/cms_lakehouse/Files/cmsdemofiles/CMS Medicare Part D Star Schema.Report
Updated definition json: {
    "$schema": "https://developer.microsoft.com/json-schemas/fabric/item/report/definitionProperties/1.0.0/schema.json",
    "version": "4.0",
    "datasetReference": {
        "byConnection": {
            "connectionString": "Data Source=\"powerbi://api.powerbi.com/v1.0/myorg/CMSDemo-0125\";initial catalog=cms_semantic_model_auto5;integrated security=ClaimsToken",
            "pbiServiceModelId": null,
            "pbiModelVirtualServerName": "sobe_wowvirtualserver",
            "pbiModelDatabaseName": "283ef801-a210-4a81-8a51-905e33b98305",
            "name": "EntityDataSource",
            "connectionType": "pbiServiceXmlaStyleLive"
        }
    }
}


Waiting 20 seconds for v1/workspaces/3818e56e-86ac-40bd-8a34-d1479336e94a/reports operation to check for statu…

200
{"id":"80cb9a9c-879a-4e8a-8c19-a4b88e01267a","type":"Report","displayName":"CMS Medicare Part D Star Schema","description":"report created using API","workspaceId":"3818e56e-86ac-40bd-8a34-d1479336e94a"}
report created successfully


StatementMeta(, fe2556b5-5d60-43b6-883a-bc771a1e755b, 18, Finished, Available, Finished)

In [10]:
#invoke Data Factory Pipeline to load data to Lakehouse

if skip_invoke_datafactory_pipeline_step:
    print('Skipping invocation of Data Factory Pipeline setp')
else:
    datafactory_pipeline_id = semfabric.resolve_item_id(datafactory_pipeline_name, type = "DataPipeline")
    print(datafactory_pipeline_id)

    url = f"v1/workspaces/{workspace_id}/items/{datafactory_pipeline_id}/jobs/instances?jobType=Pipeline"

    client = semfabric.FabricRestClient()
    response = client.request(method = "POST", path_or_url=url)
    print(response.status_code)
    print(response.text)
    response.raise_for_status()  # Raise an error for bad status codes   

    print("Data Factory Pipeline Job submitted successfully - monitor Pipeline Run from Monitoring Hub or open the pipeline then use Run > View Run History menu to actively monitor the pipeline. Once pipeline job complete data is available in Lakehouse for querying and reporting")

StatementMeta(, fe2556b5-5d60-43b6-883a-bc771a1e755b, 17, Finished, Available, Finished)

bb431c31-2383-4496-a63e-92f61b44dacc
202

Data Factory Pipeline Job submitted successfully - monitor Pipeline Run from Monitoring Hub or open the pipeline then use Run > View Run History menu to actively monitor the pipeline. Once pipeline job complete data is available in Lakehouse for querying and reporting
