# Initialize Variables

In [None]:
raw_lakehouse = "lh_bronze"
clean_lakehouse = "lh_gold"
workspace_name = "Cricsheet" # Target workspace name where the artifacts will be deployed
capacity_id = None # None will pick a random capacity which the user has access to
start_dataload = True # False if you don't want to start data load
wait_for_dataload_completion = True # False if you don't want to wait for the completion of job

# Constants

In [None]:
REPO_NAME = "Fabric-Cricsheet"
BRANCH = "main"
REPO_BRANCH_NAME = f"/{REPO_NAME}-{BRANCH}"
ROOT_DOWNLOAD_FOLDER = 'git'
ENV_PATH = '/Environment'
NOTEBOOK_PATH = "/Notebooks"
ENV_NAME = "cricsheet-environment"
SPARK_CONFIG_FILE = "Sparkcompute.yml"
UTILS = ["fabric_utils", "delta_table_operations", "file_operations", "job_operations", "powerbi_operations", "environment_operations"]
UTILS_PY = [util + ".py" for util in UTILS]
CRICSHEET = {
        "Cricsheet Model": ["Cricsheet Analysis"]
    }
DATALOAD =  {
        "Data Load Model": ["Data Load Monitor"]
    }
GITHUB_REPO = f"https://github.com/akhilannan/{REPO_NAME}"
GITHUB_RAW = f"/raw/{BRANCH}"
ZIP_FILE_NAME = f"{BRANCH}.zip"
GITHUB_REPO_ZIP = f"{GITHUB_REPO}/archive/refs/heads/{ZIP_FILE_NAME}"

# Load Common Functions from Git

In [None]:
git_url_files = [GITHUB_REPO + GITHUB_RAW + ENV_PATH + '/' + util for util in UTILS_PY]
for git_url_file in git_url_files:
    sc.addPyFile(git_url_file)
import fabric_utils as U
import file_operations as L
import job_operations as J
import powerbi_operations as P
import environment_operations as E

# Create Workspace if not exists

In [None]:
workspace_id = U.get_or_create_fabric_workspace(workspace_name, capacity_id)

# Set Lakehouse Path

In [None]:
# Semantic Models and Reports Structure
lh_semantic_reports = {
    clean_lakehouse: CRICSHEET,
    raw_lakehouse: DATALOAD
}
# Mount lakehouses and get paths
lakehouses = {}
# Mount lakehouses
for lh in lh_semantic_reports:
  lakehouses[lh] = U.get_lakehouse_path(lakehouse_name = lh, path_type = "local", folder_type = "Files", workspace = workspace_id)

lh_repo_path = lakehouses[raw_lakehouse] + "/" + ROOT_DOWNLOAD_FOLDER + REPO_BRANCH_NAME

# Download Git Repo contents as Zip and Unzip it

In [None]:
zip_file_path = L.download_data(GITHUB_REPO_ZIP, raw_lakehouse, ROOT_DOWNLOAD_FOLDER, workspace_id)
L.unzip_files(zip_file_path)

# Create and Publish Spark Environment

In [None]:
lh_git_env_path = lh_repo_path + ENV_PATH
spark_config_path = lh_git_env_path + '/' + SPARK_CONFIG_FILE
utils_files = [lh_git_env_path + '/' + util for util in UTILS_PY]
E.create_and_publish_spark_environment(ENV_NAME, spark_config_path, utils_files, workspace_id)

# Deploy Notebooks

In [None]:
# Define the code replacements using formatted strings
code_replacements = {
    'RAW_LAKEHOUSE = "lh_bronze"': f'RAW_LAKEHOUSE = "{raw_lakehouse}"',
    'CLEAN_LAKEHOUSE = "lh_gold"': f'CLEAN_LAKEHOUSE = "{clean_lakehouse}"'
}

# Define the mapping of notebook names to their respective default lakehouse
notebook_lakehouse = {
    'Cricsheet Orchestrator': raw_lakehouse,
    'Cricsheet Initialize': raw_lakehouse,
    'Cricsheet Ingest Data': raw_lakehouse,
    'Cricsheet Build Facts and Dimensions': clean_lakehouse,
    'Cricsheet Model Refresh': clean_lakehouse,
    'Cricsheet Optimize and Vacuum': raw_lakehouse
}

# Iterate over the notebook_lakehouse dictionary and process each notebook
lh_git_notebook_path = lh_repo_path + NOTEBOOK_PATH
for file_name, lakehouse_type in notebook_lakehouse.items():
    file_path = lh_git_notebook_path + "/" + file_name + ".ipynb"
    E.create_or_replace_notebook_from_ipynb(file_path, lakehouse_type, ENV_NAME, code_replacements, workspace_id)

# Deploy Semantic Models and Reports

In [None]:
# Define a function to process each semantic model report
def deploy_model_and_report(smr, workspace_id):
    # Create or replace the semantic model from BIM
    J.execute_with_retries(P.create_or_replace_semantic_model,
                           model_path=smr['semantic_model_path'],
                           workspace=workspace_id)
    # Update the model expression
    J.execute_with_retries(P.update_model_expression,
                           dataset_name=smr['semantic_model'],
                           lakehouse_name=smr['lakehouse_name'],
                           workspace=workspace_id)
    # Create or replace the report from report JSON
    J.execute_with_retries(P.create_or_replace_report_from_pbir,
                           report_path=smr['report_path'],
                           dataset_name=smr['semantic_model'],
                           dataset_workspace=workspace_id,
                           report_workspace=workspace_id)

# Set Variables for Semantic Model and Report
semantic_model_report = [
    {
        'lakehouse_name': lakehouse,
        'semantic_model': model,
        'semantic_model_path': f"{lh_repo_path}/Semantic Model/{model}.SemanticModel",
        'report_path': f"{lh_repo_path}/Power BI Report/{report}.Report"
    }
    for lakehouse, models in lh_semantic_reports.items()
    for model, reports in models.items()
    for report in reports
]

# Iterate over each semantic model report and process it
for smr in semantic_model_report:
    deploy_model_and_report(smr, workspace_id)

# Delete Git folder

In [None]:
L.delete_folder_from_lakehouse(raw_lakehouse, ROOT_DOWNLOAD_FOLDER, workspace_id)

# Start Data Load

In [None]:
if start_dataload:
    J.run_notebook_job('Cricsheet Orchestrator', wait_for_dataload_completion, workspace_id)