# Web Browsing

###  **Full Browsing**

In [5]:
from warnings import warn

from aiohttp import ClientSession
from lxml import html
from langchain.prompts import PromptTemplate
from fastapi.concurrency import run_in_threadpool

from app.shared import Shared
from app.utils.chat.chains import Chains
from app.common.constants import QueryBasedSearchTemplates, QueryTemplates
from app.utils.logger import CustomLogger, LoggingConfig

logger = CustomLogger(
    "Browsing", logging_config=LoggingConfig(file_log_name="./logs/notebook.log")
)

FEEDBACK_CLICK_OR_NOT_TEMPLATE: PromptTemplate = PromptTemplate(
    template=(
        "You are a feedback bot that determines if the provided CONTEXT is sufficient to "
        "answer the user's question. Follow the rules below to output a response.\n- Outpu"
        't your next action to do in JSON form like {"action": YOUR_NEXT_ACTION, "link": '
        'LINK_TO_CLICK}.\n- "action" should be one of "click", "finish".\n- {"action": "cli'
        'ck"} should be selected when you want to click on a link to read more about it.\n'
        '- {"action": "finish"} should be selected when the information already provided '
        'is sufficient to answer the user.\n- "link" should be a link to click. You don\'t '
        'have to output "link" if you decided to take "action" as "finish".\n- CONTEXT con'
        "sists of multiple #[LINK]\\n```TITLE\\nSNIPPET\n```CONTEXT\n{{context}}\n```\n```USER'"
        "S QUESTION\n{{query}}\n```"
    ),
    input_variables=["context", "query"],
    template_format="jinja2",
)
FEEDBACK_SUFFICIENT_OR_NOT_TEMPLATE: PromptTemplate = PromptTemplate(
    template=(
        "You are a feedback bot that uses the context provided to determine if you can an"
        "swer the user's question. Follow the rules below to output a response.\n- Output "
        'your next action to do in JSON format like {"is_sufficient": TRUE_OR_FALSE}.\n- "'
        'is_sufficient" should be one of true or false.\n```CONTEXT\n{{context}}\n```\n```USE'
        "R'S QUESTION\n{{query}}\n```"
    ),
    input_variables=["context", "query"],
    template_format="jinja2",
)

ddg = Shared().duckduckgo

In [10]:
query = "Explain to me what the vision pro is that Apple released."
query_to_search = await Chains._aget_query_to_search(
    query=query, query_template=QueryBasedSearchTemplates.QUERY__JSONIFY_WEB_BROWSING
)
if query_to_search is None:
    query_to_search = query
    warn(f"No query template found for query: {query}")
print(query_to_search)

Apple vision pro


In [6]:
async def scrolling(
    link: str,
    tokens_per_chunk: int,
    chunk_overlap: int,
) -> str | None:
    async with ClientSession() as session:
        res = await session.get(link)
        paragraphs = html.fromstring(await res.text()).xpath("//p")
        scrollable_contents: list[str] = Shared().token_text_splitter.split_text(
            "\n".join([p.text_content().strip() for p in paragraphs]),
            tokens_per_chunk=tokens_per_chunk,
            chunk_overlap=chunk_overlap,
        )
        for scrollable_content in scrollable_contents:
            scrollable_content = scrollable_content.strip()
            logger.info(f"Reading content: {scrollable_content}")
            sufficient_or_not_json = await Chains.aget_json(
                query_template=FEEDBACK_SUFFICIENT_OR_NOT_TEMPLATE,
                context=scrollable_content,
                question=query,
            )
            logger.info(f"sufficient_or_not_json: {sufficient_or_not_json}")
            if not isinstance(
                sufficient_or_not_json, dict
            ) or sufficient_or_not_json.get("is_sufficient") not in (True, False):
                logger.info("Reading content failed.")
                continue
            if sufficient_or_not_json["is_sufficient"]:
                logger.info("Feedback bot decided to finish browsing.")
                return scrollable_content
            else:
                continue
        logger.info("This link is not sufficient to answer the user's question.")
        return None

In [11]:
async def full_browsing(
    query: str,
    tokens_per_chunk: int,
    chunk_overlap: int,
) -> str | None:
    query_to_search: str | None = await Chains._aget_query_to_search(
        query=query,
        query_template=QueryBasedSearchTemplates.QUERY__JSONIFY_WEB_BROWSING,
    )
    if query_to_search is None:
        query_to_search = query

    snippets_with_link: dict[str, str] = await run_in_threadpool(
        ddg.formatted_results_with_link, query=query_to_search
    )

    while snippets_with_link:
        action_and_link_json = await Chains.aget_json(
            query_template=FEEDBACK_CLICK_OR_NOT_TEMPLATE,
            query=query,
            context="\n\n".join(snippets_with_link.values()),
        )
        logger.info(f"action_and_link_json: {action_and_link_json}")
        if (
            not isinstance(action_and_link_json, dict)
            or (action_and_link_json.get("action") not in ("click", "finish"))
            or (
                action_and_link_json.get("action") == "link"
                and action_and_link_json.get("link") not in snippets_with_link
            )
        ):
            logger.info("Failed browsing.")
            return None
        if action_and_link_json.get("action") == "finish":
            logger.info("We've got the answer!")
            return "\n\n".join(snippets_with_link.values())
        snippets_with_link.pop(action_and_link_json["link"])
        scroll_result: str | None = await scrolling(
            link=action_and_link_json["link"],
            tokens_per_chunk=tokens_per_chunk,
            chunk_overlap=chunk_overlap,
        )
        if scroll_result is not None:
            logger.info("We've got the answer!")
            return scroll_result
        else:
            logger.info("We still don't have the answer.")
            continue

In [13]:
browsing_result = (
    await full_browsing(
        query=query,
        tokens_per_chunk=2048,
        chunk_overlap=1024,
    ),
)

[2023-06-13 21:20:35,213] Browsing:INFO - action_and_link_json: {'action': 'click', 'link': 'https://www.techradar.com/news/apple-vision-pro-everything-we-know'}
[2023-06-13 21:20:35,760] Browsing:INFO - Reading content: When you purchase through links on our site, we may earn an affiliate commission. Here’s how it works.
The Apple Vision Pro AR and VR headset is a reality
After years of rumors, leaks, and speculation, Apple has finally unveiled the Vision Pro, its first AR and VR headset.
-Mixed reality headset
-Dual M2 and R1 chip setup
-4K resolution per eye
-No controllers, uses hand tracking and voice inputs
-External battery pack
-Two-hour battery life
-Starts at $3,499 (around £2,800 / AU$5,300)
-Runs on visionOS
The announcement came as the 'One more thing' of Apple's WWDC 2023 event, at which it also unveiled a new 15-inch MacBook Air, new features coming with iOS 17, and a new M2 Ultra processor, among other reveals.
Many of the leaks we'd heard proved to be correct, although

In [14]:
llm = Shared().openai_llm
llm_output = await llm.apredict(  # type: ignore
    QueryTemplates.CONTEXT_QUESTION__DEFAULT.format(
        context=browsing_result, question=query
    )
)
print(llm_output)

The Vision Pro is Apple's first AR and VR headset, featuring a mixed reality headset design with dual M2 and R1 chip setup, 4K resolution per eye, no controllers, external battery pack, and two-hour battery life. It uses hand tracking and voice inputs to control its visionOS software. The headset is expected to release sometime "early next year" and starts at $3,499. It relies solely on tracking hand and eye movements as well as voice inputs to control its apps and experiences. The headset uses an array of 12 cameras, five sensors, and six microphones to pick up these inputs. The headset uses two chipsets, an M2 and a new R1 to handle regular software and its XR capabilities, respectively. The Vision Pro is expected to run most Mac apps natively, and users will be able to access their Mac apps and see their screen on a large immersive display when connected to a Mac via Bluetooth.
