# Blogs setup

In [None]:
%run CONFIG

## Config

- [sempy.fabric documentation](https://learn.microsoft.com/en-us/python/api/semantic-link-sempy/sempy.fabric?view=semantic-link-python#functions)
- [sempy_labs documentation](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html)


In [None]:
%pip install semantic-link-labs

In [None]:
import sempy.fabric as fabric

# get workspace Id
workspaceName
workspaceId = fabric.resolve_workspace_id(workspaceName)


## Create lakehouses

- [sempy.fabric create_lakehouse](https://learn.microsoft.com/en-us/python/api/semantic-link-sempy/sempy.fabric?view=semantic-link-python#sempy-fabric-create-lakehouse)

In [None]:
# CREATE LAKEHOUSES
import sempy.fabric as fabric

max_attempts = 10

# lh_raw
lh_raw_desc = "Lakehouse for raw data"
lh1 = fabric.create_lakehouse(lh_raw, lh_raw_desc, max_attempts, workspaceName)

# lh_transformed
lh_transformed_desc = "Lakehouse for raw data"
lh2 = fabric.create_lakehouse(lh_transformed, lh_transformed_desc, max_attempts, workspaceName)


## Create Environment

Creates the default environment that will be used for all notebooks in the project. 
- [sempy_labs.create_environment](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.create_environment)

### Manual configurations
Following configurations need to be done __manually__ after environment creation.

Adding _pypi_ libraries:
- feedparser
- trafilatura
- openai

Adding spark properties:
- sprk.sql.parquet.vorder.enabled: true
- spark.microsoft.delta.optimizeWrite.enabled: true



In [None]:
# CREATE ENVIRONMENT
import sempy_labs as labs
env_description = "Blogs custom environment"

env = labs.create_environment(env_blogs, env_description, workspaceName)
environmentId = labs.resolve_environment_id(env_blogs)

## Import Notebooks

Import notebooks using Semantic Link Labs from public Github repository: 
- [sempy_labs.import_notebook_from_web(notebook_name: str, url: str, description: str | None = None, workspace: str | None = None, overwrite: bool = False)](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.import_notebook_from_web)


In [None]:
import sempy_labs as labs

nb_overwrite = False
base_url = "https://github.com/florianboigner/Blogs/blob/main/notebooks" # points to the folder where the notebooks are stored. Default is the public repository of the solution

nb_get_feeds = "nb_get_feeds"
nb_get_feeds_url = f"{base_url}/{nb_get_feeds}.ipynb"
nb_get_feeds_description = "Notebook to get feeds"
labs.import_notebook_from_web(nb_get_feeds, nb_get_feeds_url, nb_get_feeds_description, workspaceName, nb_overwrite)


nb_get_blogs = "nb_get_blogs"
nb_get_blogs_url = f"{base_url}/{nb_get_blogs}.ipynb"
nb_get_blogs_description = "Notebook to get blogposts"
labs.import_notebook_from_web(nb_get_blogs, nb_get_blogs_url, nb_get_blogs_description, workspaceName, nb_overwrite)

nb_create_summary = "nb_create_summary"
nb_create_summary_url = f"{base_url}/{nb_create_summary}.ipynb"
nb_create_summary_description = "Notebook to create summaries"
labs.import_notebook_from_web(nb_create_summary, nb_create_summary_url, nb_create_summary_description, workspaceName, nb_overwrite)

## Attache Lakehouse to Notebooks

Thanks to Sandweep Pawar for writing [this great blog](https://fabric.guru/programmatically-removing-updating-default-lakehouse-of-a-fabric-notebook) and pointing me to the right direction. 

In [None]:
import sempy.fabric as fabric
import json

def remove_all_lakehouses(notebook_name, workspaceId):
    try:
        nb = json.loads(notebookutils.notebook.getDefinition(notebook_name, workspaceId))
    except:
        print("Error, check notebook & workspace id")

    if 'dependencies' in nb['metadata'] and 'lakehouse' in nb['metadata']['dependencies']:
        # Remove all lakehouses
        nb['metadata']['dependencies']['lakehouse'] = {}

    # Update the notebook definition without any lakehouses
    notebookutils.notebook.updateDefinition(
        name=notebook_name,
        content=json.dumps(nb),
        workspaceId=workspaceId
    )

    print(f"All lakehouses have been removed from notebook '{notebook_name}'.")

def update_notebook_definition(notebook_name, lakehouse_name, workspaceId, environmentId):
    try:
        (notebookutils
            .notebook
            .updateDefinition(
                name = notebook_name, 
                workspaceId=workspaceId,
                defaultLakehouse=lakehouse_name, 
                defaultLakehouseWorkspace=workspaceId,
                environmentId=environmentId,
                environmentWorkspaceId=workspaceId
                )
        )
    except:
        print("Error, please check IDs")
    
    print(f"Lakehouse '{lakehouse_name}' was attached as default lakehouse to notebook '{notebook_name}'")


In [None]:
# Variables were all defined earlier. If running only this cell make sure the variables are set

# nb_get_blogs
remove_all_lakehouses(nb_get_blogs, workspaceId)
update_notebook_definition(nb_get_blogs, lh_raw, workspaceId, environmentId)

# nb_get_feeds
remove_all_lakehouses(nb_get_feeds, workspaceId)
update_notebook_definition(nb_get_feeds, lh_raw, workspaceId, environmentId)

# nb_create_summary
remove_all_lakehouses(nb_create_summary, workspaceId)
update_notebook_definition(nb_create_summary, lh_raw, workspaceId, environmentId)
update_notebook_definition(nb_create_summary, lh_transformed, workspaceId, environmentId) # we want lh_transformed as default notebook, which is why it is attached second, overwriting the previous default