# Install Semantic Link

In [None]:
!pip install semantic-link --upgrade -q

# Import Sempy fabric

In [None]:
from sempy import fabric

# Initialize Variables

In [None]:
raw_lakehouse = "lh_bronze"
clean_lakehouse = "lh_gold"
workspace_name = "Cricsheet" # Target workspace name where the artifacts will be deployed
capacity_id = None # None will pick a random capacity which the user has access to
start_dataload = True # False if you don't want to start data load

# Get or Create Workspace

In [None]:
try:
    # Attempt to resolve the workspace ID using the provided workspace name
    workspace_id = fabric.resolve_workspace_id(workspace_name)
except:
    # If the workspace ID resolution fails, check if capacity ID is missing
    if capacity_id == None:
        try:
            # List active capacities and select the first one that is not PPU
            capacity_id = fabric.list_capacities().query("State == 'Active' and Sku != 'PP3'")["Id"].iloc[0]
        except:
            # If no suitable capacity is found, exit the notebook
            mssparkutils.notebook.exit("No Premium/Fabric Capacities found")
    # Create a new workspace using the provided workspace name and capacity ID
    workspace_id = fabric.create_workspace(workspace_name, capacity_id)


# Constants

In [None]:
REPO_NAME = "Fabric-Cricsheet"
MAIN_NAME = f"/{REPO_NAME}-main"
FILES_PATH = '/Files'
GIT_PATH = '/git'
UTILITY_PATH = '/Environment'
NOTEBOOK_PATH = "/Notebooks"
PACKAGE_FOLDER = 'packages'
FUNCTION_FOLDER = 'fabric_python_functions'
FABRIC_UTILS_FOLDER = "fabric_utils"
ENV_NAME = "cricsheet-environment"
SPARK_CONFIG_FILE = "Sparkcompute.yml"
FABRIC_UTILS_FILE = f"{FABRIC_UTILS_FOLDER}.py"
CRICSHEET = {
        "Cricsheet Model": ["Cricsheet Analysis"]
    }
DATALOAD =  {
        "Data Load Model": ["Data Load Monitor"]
    }
GITHUB_REPO_ZIP = f"https://github.com/akhilannan/{REPO_NAME}/archive/refs/heads/main.zip"

# Get or create lakehouse details

In [None]:
def get_or_create_lakehouse(lakehouse_name):
    try:
        lakehouse_details = mssparkutils.lakehouse.create(name=lakehouse_name, workspaceId = workspace_id)
    except:
        lakehouse_details = mssparkutils.lakehouse.get(name=lakehouse_name, workspaceId = workspace_id)
    lakehouse_id = lakehouse_details.id
    abfss_lakehouse_path = f"abfss://{workspace_id}@onelake.dfs.fabric.microsoft.com/{lakehouse_id}"
    mount_point = f"/lakehouse/{lakehouse_name}"
    mssparkutils.fs.mount(abfss_lakehouse_path, mount_point)
    return mssparkutils.fs.getMountPath(mount_point)

# Create Mount Points

In [None]:
# Semantic Models and Reports Structure
lh_semantic_reports = {
    clean_lakehouse: CRICSHEET,
    raw_lakehouse: DATALOAD
}
# Mount lakehouses and get paths
lakehouses = {}
# Mount lakehouses
for lh in lh_semantic_reports:
  lakehouses[lh] = get_or_create_lakehouse(lh)

# Set Variables for Semantic Model and Report

In [None]:
semantic_model_report = []
lh_files_path = lakehouses[raw_lakehouse] + FILES_PATH
lh_git_path = lh_files_path + GIT_PATH 
lh_repo_path = lh_git_path + MAIN_NAME

for lakehouse, models in lh_semantic_reports.items():
    for model, reports in models.items():
        bim_path = f"/Semantic Model/{model}.SemanticModel"
        for report in reports:
            report_path = f"/Power BI Report/{report}.Report"
            semantic_model_report.append({
                'lakehouse_name': lakehouse,
                'semantic_model': model,
                'semantic_model_path': lh_repo_path + bim_path,
                'report_path': lh_repo_path + report_path
            })

# Set Additional Variables

In [None]:
import os
lh_git_notebook_path = lh_repo_path + NOTEBOOK_PATH
git_utils_path = lh_repo_path + UTILITY_PATH + '/' + FABRIC_UTILS_FILE
spark_config_path = lh_repo_path + UTILITY_PATH + '/' + SPARK_CONFIG_FILE
package_path = os.path.join(lh_files_path, PACKAGE_FOLDER)
function_folder_path = os.path.join(package_path, FUNCTION_FOLDER)
fabric_utils_path = os.path.join(function_folder_path, FABRIC_UTILS_FOLDER)

# Download Git Repo contents as Zip and Unzip it

In [None]:
import requests
import zipfile
import io
import shutil

shutil.rmtree(lh_git_path, ignore_errors=True)
r = requests.get(GITHUB_REPO_ZIP)
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall(lh_git_path)

# Add and Import Fabric Utilities

In [None]:
sc.addPyFile(git_utils_path)
import fabric_utils as U

# Create and Publish Spark Environment

In [None]:
U.create_and_publish_spark_environment(ENV_NAME, spark_config_path, git_utils_path, workspace_id)

# Deploy Notebooks

In [None]:
import json

# Define the code replacements using formatted strings
code_replacements = {
    'RAW_LAKEHOUSE = "lh_bronze"': f'RAW_LAKEHOUSE = "{raw_lakehouse}"',
    'CLEAN_LAKEHOUSE = "lh_gold"': f'CLEAN_LAKEHOUSE = "{clean_lakehouse}"'
}

# Define the mapping of notebook names to their respective default lakehouse
notebook_lakehouse = {
    'Cricsheet Orchestrator': raw_lakehouse,
    'Cricsheet Initialize': raw_lakehouse,
    'Cricsheet Ingest Data': raw_lakehouse,
    'Cricsheet Build Facts and Dimensions': clean_lakehouse,
    'Cricsheet Model Refresh': clean_lakehouse,
    'Cricsheet Optimize and Vacuum': raw_lakehouse
}

# Iterate over the notebook_lakehouse dictionary and process each notebook
for file_name, lakehouse_type in notebook_lakehouse.items():
    full_file_name = file_name + ".ipynb"
    file_path = os.path.join(lh_git_notebook_path , full_file_name)
    with open(file_path, 'r') as file:
        notebook_json = json.load(file)
    U.create_or_replace_notebook_from_ipynb(file_name, notebook_json, lakehouse_type, ENV_NAME, code_replacements, workspace_id)

# Deploy Semantic Models and Reports

In [None]:
# Define a function to process each semantic model report
def deploy_model_and_report(smr):

    # Create or replace the semantic model from BIM
    U.execute_with_retries(U.create_or_replace_semantic_model,
                           model_path=smr['semantic_model_path'],
                           workspace_id = workspace_id)
    # Update the model expression
    U.execute_with_retries(U.update_model_expression,
                           dataset_name=smr['semantic_model'],
                           lakehouse_name=smr['lakehouse_name'],
                           workspace_id = workspace_id)
    # Create or replace the report from report JSON
    U.execute_with_retries(U.create_or_replace_report_from_pbir,
                           report_path=smr['report_path'],
                           dataset_name=smr['semantic_model'],
                           dataset_workspace_id = workspace_id,
                           report_workspace_id = workspace_id)

# Iterate over each semantic model report and process it
for smr in semantic_model_report:
    deploy_model_and_report(smr)

# Delete Git folder

In [None]:
shutil.rmtree(lh_git_path, ignore_errors=True)

# Start Data Load

In [None]:
if start_dataload:
    cricsheet_orchestrator_id = U.get_item_id('Cricsheet Orchestrator', 'Notebook', workspace_id)
    try:
        fabric.run_notebook_job(notebook_id = cricsheet_orchestrator_id, workspace = workspace_id)
    except Exception as e:
        print(f"{e}. Check the run details from Monitoring Hub or by opening the Data Load Monitor report.")