In [None]:
import sys
import os

# Check if running on Fabric

In [None]:
is_fabric = 'notebookutils' in sys.modules

# Initialize Variables

In [None]:
# Common configurations
raw_lakehouse = "lh_bronze"
clean_lakehouse = "lh_gold"
workspace_name = "Cricsheet" # Target workspace name where the artifacts will be deployed
capacity_id = None # None will pick a random capacity which the user has access to
start_dataload = True # False if you don't want to start data load
wait_for_dataload_completion = True # False if you don't want to wait for the completion of job
# Authentication variables (required when running outside Fabric)
tenant_id = os.getenv('tenant_id') or None
client_id = os.getenv('generic_client_id') or None
client_secret = None
username = os.getenv('azure_username') or None
password = os.getenv('azure_password') or None

# Constants

In [None]:
REPO_NAME = "Fabric-Cricsheet"
BRANCH = "main"
ENV_PATH = "/Environment"
NOTEBOOK_PATH = "/Notebooks"
ENV_NAME = "cricsheet-environment"
SPARK_CONFIG_FILE = "SparkCompute.json"
UTILS = [
    "api_client",
    "fabric_utils",
    "delta_table_operations",
    "file_operations",
    "job_operations",
    "powerbi_operations",
    "environment_operations"
]
UTILS_PY = [util + ".py" for util in UTILS]
CRICSHEET = {"Cricsheet Model": ["Cricsheet Analysis"]}
DATALOAD = {"Data Load Model": ["Data Load Monitor"]}

# Fabric-specific configurations

In [None]:
if is_fabric:
    REPO_BRANCH_NAME = f"/{REPO_NAME}-{BRANCH}"
    ROOT_DOWNLOAD_FOLDER = "git"
    GITHUB_REPO = f"https://github.com/akhilannan/{REPO_NAME}"
    GITHUB_RAW = f"/raw/{BRANCH}"
    ZIP_FILE_NAME = f"{BRANCH}.zip"
    GITHUB_REPO_ZIP = f"{GITHUB_REPO}/archive/refs/heads/{ZIP_FILE_NAME}"

    # Add utility files to Spark context
    git_url_files = [
        GITHUB_REPO + GITHUB_RAW + ENV_PATH + "/" + util for util in UTILS_PY
    ]
    for git_url_file in git_url_files:
        sc.addPyFile(git_url_file)
else:
    # Local-specific configurations
    sys.path.append(os.path.abspath(os.path.join("..", "Environment")))

# Import Libraries

In [None]:
from api_client import FabricPowerBIClient
import fabric_utils as U
import file_operations as L
import job_operations as J
import powerbi_operations as P
import environment_operations as E

# Initialize Fabric Client

In [None]:
fabric_client = FabricPowerBIClient(
    tenant_id=tenant_id,
    client_id=client_id,
    client_secret=client_secret,
    username=username,
    password=password,
    client_type="FabricRestClient"
)

# Create Workspace if not exists

In [None]:
workspace_id = U.get_or_create_fabric_workspace(workspace_name=workspace_name, capacity_id=capacity_id, client=fabric_client)

# Semantic Models and Reports Structure

In [None]:
lh_semantic_reports = {
    clean_lakehouse: CRICSHEET,
    raw_lakehouse: DATALOAD
}

# Fabric-specific operations

In [None]:
if is_fabric:
    # Set Lakehouse Path
    lakehouses = {}
    for lh in lh_semantic_reports:
        lakehouses[lh] = U.get_lakehouse_path(
            lakehouse_name=lh,
            path_type="local",
            folder_type="Files",
            workspace=workspace_id,
            client=fabric_client
        )

    lh_repo_path = (
        lakehouses[raw_lakehouse] + "/" + ROOT_DOWNLOAD_FOLDER + REPO_BRANCH_NAME
    )
    lh_git_notebook_path = lh_repo_path + NOTEBOOK_PATH
    lh_git_env_path = lh_repo_path + ENV_PATH
    spark_config_path = lh_git_env_path + "/" + SPARK_CONFIG_FILE
    utils_files = [lh_git_env_path + "/" + util for util in UTILS_PY]

    # Download Git Repo contents as Zip and Unzip it
    zip_file_path = L.download_data(
        url=GITHUB_REPO_ZIP,
        lakehouse=raw_lakehouse,
        path=ROOT_DOWNLOAD_FOLDER,
        workspace=workspace_id,
        client=fabric_client
    )
    L.unzip_files(zip_file_path)
else:
    # Local-specific paths
    env_base_path = f"..{ENV_PATH}"
    spark_config_path = f"{env_base_path}/{SPARK_CONFIG_FILE}"
    utils_files = [env_base_path + "/" + util for util in UTILS_PY]
    lh_git_notebook_path = f"..{NOTEBOOK_PATH}"


# Create and Publish Spark Environment

In [None]:
E.create_and_publish_spark_environment(
    environment_name=ENV_NAME,
    json_path=spark_config_path,
    py_path=utils_files,
    workspace=workspace_id,
    client=fabric_client
)

# Deploy Notebooks

In [None]:
# Code replacements
code_replacements = {
    'RAW_LAKEHOUSE = "lh_bronze"': f'RAW_LAKEHOUSE = "{raw_lakehouse}"',
    'CLEAN_LAKEHOUSE = "lh_gold"': f'CLEAN_LAKEHOUSE = "{clean_lakehouse}"'
}

# Notebook to lakehouse mapping
notebook_lakehouse = {
    "Cricsheet Orchestrator": raw_lakehouse,
    "Cricsheet Initialize": raw_lakehouse,
    "Cricsheet Ingest Data": raw_lakehouse,
    "Cricsheet Build Facts and Dimensions": clean_lakehouse,
    "Cricsheet Model Refresh": clean_lakehouse,
    "Cricsheet Optimize and Vacuum": raw_lakehouse
}

# Process notebooks
for file_name, lakehouse_type in notebook_lakehouse.items():
    file_path = lh_git_notebook_path + "/" + file_name + ".ipynb"
    E.create_or_replace_notebook_from_ipynb(
        notebook_path=file_path,
        default_lakehouse_name=lakehouse_type,
        environment_name=ENV_NAME,
        replacements=code_replacements,
        workspace=workspace_id,
        client=fabric_client
    )

# Deploy Semantic Models and Reports

In [None]:
# Set Variables for Semantic Model and Report
base_path = lh_repo_path if is_fabric else ".."
semantic_model_report = [
    {
        'lakehouse_name': lakehouse,
        'semantic_model': model,
        'semantic_model_path': f"{base_path}/Semantic Model/{model}.SemanticModel",
        'report_path': f"{base_path}/Power BI Report/{report}.Report"
    }
    for lakehouse, models in lh_semantic_reports.items()
    for model, reports in models.items()
    for report in reports
]

# Deploy models and reports
for smr in semantic_model_report:
    P.create_or_replace_semantic_model(
        model_path=smr["semantic_model_path"],
        lakehouse_name=smr["lakehouse_name"],
        workspace=workspace_id,
        client=fabric_client
    )
    P.create_or_replace_report_from_pbir(
        report_path=smr["report_path"],
        dataset_name=smr["semantic_model"],
        dataset_workspace=workspace_id,
        report_workspace=workspace_id,
        client=fabric_client
    )

# Delete Git folder

In [None]:
if is_fabric:
    L.delete_folder_from_lakehouse(
        lakehouse=raw_lakehouse,
        path=ROOT_DOWNLOAD_FOLDER,
        workspace=workspace_id,
        client=fabric_client
    )

# Start Data Load

In [None]:
if start_dataload:
    J.run_notebook_job(
        notebook_name="Cricsheet Orchestrator",
        wait_for_completion=wait_for_dataload_completion,
        workspace=workspace_id,
        client=fabric_client
    )