In [1]:
import sys
import os

# Check if running on Fabric

In [2]:
is_fabric = 'notebookutils' in sys.modules

# Initialize Variables

In [3]:
# Common configurations
lakehouse_name = "lh_cricsheet"
staging_schema = "staging"
reporting_schema = "reporting"
log_schema = "logs"
workspace_name = "Cricsheet" # Target workspace name where the artifacts will be deployed
capacity_id = None # None will pick a random capacity which the user has access to
start_dataload = True # False if you don't want to start data load
wait_for_dataload_completion = True # False if you don't want to wait for the completion of job
# Authentication variables (required when running outside Fabric)
tenant_id = os.getenv('tenant_id') or None
client_id = os.getenv('generic_client_id') or None
client_secret = None
username = os.getenv('azure_username') or None
password = os.getenv('azure_password') or None

# Constants

In [4]:
REPO_NAME = "Fabric-Cricsheet"
BRANCH = "main"
ENV_PATH = "/Environment"
NOTEBOOK_PATH = "/Notebooks"
ENV_NAME = "cricsheet-environment"
SPARK_CONFIG_FILE = "SparkCompute.json"
UTILS = [
    "api_client",
    "fabric_utils",
    "delta_table_operations",
    "file_operations",
    "job_operations",
    "powerbi_operations",
    "environment_operations"
]
UTILS_PY = [util + ".py" for util in UTILS]
MODELS_AND_REPORTS = [
    {
        "model": "Cricsheet Model",
        "reports": ["Cricsheet Analysis"],
    },
    {
        "model": "Data Load Model",
        "reports": ["Data Load Monitor"],
    },
]

# Fabric-specific configurations

In [5]:
if is_fabric:
    REPO_BRANCH_NAME = f"/{REPO_NAME}-{BRANCH}"
    ROOT_DOWNLOAD_FOLDER = "git"
    GITHUB_REPO = f"https://github.com/akhilannan/{REPO_NAME}"
    GITHUB_RAW = f"/raw/{BRANCH}"
    ZIP_FILE_NAME = f"{BRANCH}.zip"
    GITHUB_REPO_ZIP = f"{GITHUB_REPO}/archive/refs/heads/{ZIP_FILE_NAME}"

    # Add utility files to Spark context
    git_url_files = [
        GITHUB_REPO + GITHUB_RAW + ENV_PATH + "/" + util for util in UTILS_PY
    ]
    for git_url_file in git_url_files:
        sc.addPyFile(git_url_file)
else:
    # Local-specific configurations
    sys.path.append(os.path.abspath(os.path.join("..", "Environment")))

# Import Libraries

In [6]:
from api_client import AzureAPIClient
import fabric_utils as U
import file_operations as L
import job_operations as J
import powerbi_operations as P
import environment_operations as E

# Initialize Fabric Client

In [7]:
fabric_client = AzureAPIClient(
    tenant_id=tenant_id,
    client_id=client_id,
    client_secret=client_secret,
    username=username,
    password=password,
    client_type="FabricRestClient"
)

# Create Workspace if not exists

In [8]:
workspace_id = U.get_or_create_fabric_workspace(
    workspace_name=workspace_name, capacity_id=capacity_id, client=fabric_client
)

# Fabric-specific operations

In [9]:
if is_fabric:
    # Set Lakehouse Path
    lakehouse_path = U.get_lakehouse_path(
        lakehouse_name=lakehouse_name,
        path_type="local",
        folder_type="Files",
        workspace=workspace_id,
        client=fabric_client,
    )

    lh_repo_path = lakehouse_path + "/" + ROOT_DOWNLOAD_FOLDER + REPO_BRANCH_NAME
    lh_git_notebook_path = lh_repo_path + NOTEBOOK_PATH
    lh_git_env_path = lh_repo_path + ENV_PATH
    spark_config_path = lh_git_env_path + "/" + SPARK_CONFIG_FILE
    utils_files = [lh_git_env_path + "/" + util for util in UTILS_PY]

    # Download Git Repo contents as Zip and Unzip it
    zip_file_path = L.download_data(
        url=GITHUB_REPO_ZIP,
        lakehouse=lakehouse_name,
        path=ROOT_DOWNLOAD_FOLDER,
        workspace=workspace_id,
        client=fabric_client,
    )
    L.unzip_files(zip_file_path)
else:
    # Local-specific paths
    env_base_path = f"..{ENV_PATH}"
    spark_config_path = f"{env_base_path}/{SPARK_CONFIG_FILE}"
    utils_files = [env_base_path + "/" + util for util in UTILS_PY]
    lh_git_notebook_path = f"..{NOTEBOOK_PATH}"

# Create and Publish Spark Environment

In [None]:
E.create_and_publish_spark_environment(
    environment_name=ENV_NAME,
    json_path=spark_config_path,
    py_path=utils_files,
    workspace=workspace_id,
    client=fabric_client
)

# Deploy Notebooks

In [None]:
replacements = {
    r'LAKEHOUSE\s*=\s*"[^"]*"': f'LAKEHOUSE = "{lakehouse_name}"',
    r'STAGING_SCHEMA\s*=\s*"[^"]*"': f'STAGING_SCHEMA = "{staging_schema}"',
    r'REPORTING_SCHEMA\s*=\s*"[^"]*"': f'REPORTING_SCHEMA = "{reporting_schema}"',
    r'LOG_SCHEMA\s*=\s*"[^"]*"': f'LOG_SCHEMA = "{log_schema}"',
}

notebook_prefixes = {
    "Cricsheet Orchestrator": None,
    "Cricsheet Initialize": replacements,
    "Cricsheet Ingest Data": None,
    "Cricsheet Build Facts and Dimensions": None,
    "Cricsheet Model Refresh": None,
    "Cricsheet Optimize and Vacuum": None,
}

for file_name_prefix, notebook_replacements in notebook_prefixes.items():
    file_path = os.path.join(lh_git_notebook_path, f"{file_name_prefix}.ipynb")

    E.create_or_replace_notebook_from_ipynb(
        notebook_path=file_path,
        default_lakehouse_name=lakehouse_name,
        environment_name=ENV_NAME,
        replacements=notebook_replacements,
        workspace=workspace_id,
        client=fabric_client,
    )

# Deploy Semantic Models and Reports

In [None]:
base_path = lh_repo_path if is_fabric else ".."

for config in MODELS_AND_REPORTS:
    # Add schema mapping
    if config["model"] == "Data Load Model":
        config["schema"] = log_schema
    else:
        config["schema"] = reporting_schema

    # Deploy semantic model
    P.create_or_replace_semantic_model(
        model_path=f"{base_path}/Semantic Model/{config['model']}.SemanticModel",
        lakehouse_name=lakehouse_name,
        schema_name=config["schema"],
        workspace=workspace_id,
        client=fabric_client,
    )

    # Deploy associated reports
    for report in config["reports"]:
        P.create_or_replace_report_from_pbir(
            report_path=f"{base_path}/Power BI Report/{report}.Report",
            dataset_name=config["model"],
            dataset_workspace=workspace_id,
            report_workspace=workspace_id,
            client=fabric_client,
        )

# Delete Git folder

In [13]:
if is_fabric:
    L.delete_folder_from_lakehouse(
        lakehouse=lakehouse_name,
        path=ROOT_DOWNLOAD_FOLDER,
        workspace=workspace_id,
        client=fabric_client
    )

# Start Data Load

In [None]:
if start_dataload:
    J.run_notebook_job(
        notebook_name="Cricsheet Orchestrator",
        wait_for_completion=wait_for_dataload_completion,
        workspace=workspace_id,
        client=fabric_client
    )