## LLMs

In [1]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

emb_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from llama_index.llms.ollama import Ollama  

chat_model = Ollama(model="qwen2:7b")

## Researcher: An Agent Workflow

Let's create an agent workflow that would: 
 1) Take a topic to write a blogpost about and subdivide it into sections
 2) The first section should include variables, definitions, concepts and terminology that are going to be used for explaining (mathematicaly) the problem   
    - Briefly explain those concepts 
 3) Look up websites that explain the topic  
    - Download the selected websites and process their data (make a note that you need to refence them)
    - Select websites that best fit the subsections identified before  
    - Decide which website is the best at explaining the topic based on how it covers the subsections
1) Analyse the website and change rearrange its content to fit the desired subsection structure and the variables and defintions  
     

In [3]:
import urllib.error
import urllib.error
import urllib.request

from llama_index.core.workflow import (
    InputRequiredEvent,
    HumanResponseEvent,
)
from llama_index.core.agent.workflow import ReActAgent, AgentWorkflow
from llama_index.core.tools import FunctionTool
from llama_index.core.workflow import Context
from duckduckgo_search import DDGS
import mlflow

mlflow.set_experiment(experiment_name="Test-agentic-workflow")
mlflow.set_tracking_uri('http://localhost:5000')
mlflow.llama_index.autolog()


def get_relevant_webpages(ctx: Context, query: str) -> tuple[list[dict[str, str]], int]:
    """
    Gets the relevant webpages urls for a query.

    Args:
        query (str): what to search online on www.

    Returns:
        tuple: first element is list of url links (websites) related to the query and second is integer number that is the lenght of that list.  
    """
    search_ggg = DDGS()
    results = search_ggg.text(query)
    return results, len(results)

def download_webpage(ctx: Context, url: str, num_) -> str:
    """
    Load the raw webpage of the url. Store it in the context.
    
    Args:
        url (str): www url of the page.
    
    Returns: 
        str: html string of the whole webpage.
    """
    try: 
        with urllib.request.urlopen(url) as response:
            html_text = response.read()
            ctx.store.set("html_resource", html_text)
            return html_text
    except urllib.error.URLError as e:
        print("Error getting the page.")
    except Exception as e:
        print("Something happened.")

def generate_blogpost(ctx: Context, text: str) -> str:
    """
    Generate a blogpost in a markdown format based on the raw of a resource. 
    """
    task = f"Based on the this resource html {text}, generate a blogpost about the topic in markdown format."
    resp = chat_model.complete(task)

    print(resp.text)

    while True: 
        question = "do you like what you see?"
        human_feedback = ctx.wait_for_event(
            HumanResponseEvent,
            waiter_id=question,
            waiter_event=InputRequiredEvent(
                prefix=question
            )
        )
        if len(human_feedback) == 0:
            return resp.text
        else:
            task = f"Based on the this resource html {text}, generate a blogpost about the topic in markdown format. Take into account previous feedback which was: {human_feedback}."
            resp = chat_model.complete(task)


In [4]:
web_search_tool = FunctionTool.from_defaults(
    fn=get_relevant_webpages,
    name="get_relevant_webpages",
    description="Useful for getting a list of relevant webpages (url links) for a particular query. " \
    "Together with the web links the list also include a short information about the answer for the query."
)

page_download_tool = FunctionTool.from_defaults(
    fn=download_webpage,
    name="download_webpage",
    description="Useful for dowloading raw html of the page, storing it in the agent's context and returning the content of that page."
)

blogpost_write_tool = FunctionTool.from_defaults(
    fn=generate_blogpost,
    name="generate_blogpost",
    description="Generate a blogpost based on the provided resource's html webpage."
)


blogpost_writer_agent = ReActAgent(
    name = "BlogpostWriter", 
    description = "Write a blogpost in markdown based on a resource's web page html",
    system_prompt = "You receive a raw html of a webpage that describes a topic at interest, you need to generate a blogpost in markdown format that would be modern, factual and not too verbose.",
    tools=[blogpost_write_tool],
    verbose=True,
    llm=chat_model,
)

download_page_agent = ReActAgent(
    name = "WebpageDownloader", 
    description = "Download a web page's html",
    system_prompt = "You receive a url link and the order number that you need to pass to the 'page_download_tool' ",
    tools=[page_download_tool],
    verbose=True,
    llm=chat_model,
    can_handoff_to=["BlogpostWriter"],
)

search_agent = ReActAgent(
    name = "WebSearcher", 
    description = "Search the web give links to the relevant pages found. For each element in the returned list call",
    system_prompt = "You will receive a list of webpages on a topic in the format [{'title': '<....>', 'href': 'https://<....>', 'body': '<....>']. For each element of the list run the ",
    tools=[web_search_tool],
    verbose=True,
    llm=chat_model,
    can_handoff_to=["WebpageDownloader"],
)

workflow = AgentWorkflow(agents=[search_agent,download_page_agent,blogpost_writer_agent], root_agent="BlogpostWriter")

ctx = Context(workflow=workflow)
handler = workflow.run("History of Prussia", ctx=ctx)

async for event in handler.stream_events():
    if isinstance(event, InputRequiredEvent):
        response = input(event.prefix)
        handler.ctx.send_event(
            HumanResponseEvent(
                response=response
            )
        )

answer = await handler        
print(answer)



The history of Prussia dates back to around 1075 when it was established as a duchy under the Kingdom of Denmark. It went through various transformations and had numerous rulers over time, including the Hohenzollern dynasty who were the dominant power in Prussia.

In 1871, Prussia played a pivotal role in unifying Germany after their victory against France during the Franco-Prussian War. The outcome led to the creation of the German Empire with the House of Hohenzollern as its ruling family.

From then on, Prussia became part of Germany and was instrumental in shaping modern European politics until World War II when it lost territories including East Prussia due to post-war redrawing of borders by the Allies. 

After World War II, Berlin (Prussian capital) split into four zones occupied by the allies, leading to its division into today's East and West Berlin.

The history of Prussia is rich with significant contributions in fields such as military tactics, literature, science, music an

{'_state': DictState(),
 '_lock': <asyncio.locks.Lock object at 0x309eab650 [unlocked]>}