# WebResearchRetriever

Given a query, this retriever will: 

* Formulate a set of relate Google searches
* Search for each 
* Load all the resulting URLs
* Then embed and perform similarity search with the query on the consolidate page content

In [None]:
print("Checking dependencies...")
# %pip install --upgrade pip --quiet
# %pip install langchain --quiet
# %pip install python-dotenv --quiet
# %pip install openai --quiet
# %pip install beautifulsoup4 --quiet
# %pip install chromadb --quiet
%pip install google-api-python-client --quiet
print("Done!")


from dotenv import load_dotenv
load_dotenv()

In [2]:
from langchain.retrievers.web_research import WebResearchRetriever

### Simple usage

Specify the LLM to use for Google search query generation.

In [3]:
import os


from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models.openai import ChatOpenAI
from langchain.utilities import GoogleSearchAPIWrapper

# Vectorstore
vectorstore = Chroma(embedding_function=OpenAIEmbeddings(),persist_directory="./chroma_db_oai")

# LLM
llm = ChatOpenAI(temperature=0)

# Search 
os.environ["GOOGLE_CSE_ID"] = "xxx"
os.environ["GOOGLE_API_KEY"] = "xxx"
search = GoogleSearchAPIWrapper()

In [4]:
# Initialize
web_research_retriever = WebResearchRetriever.from_llm(
    vectorstore=vectorstore,
    llm=llm, 
    search=search, 
)

#### Run with citations

We can use `RetrievalQAWithSourcesChain` to retrieve docs and provide citations.

In [6]:
from langchain.chains import RetrievalQAWithSourcesChain
user_input = "How do LLM Powered Autonomous Agents work?"
qa_chain = RetrievalQAWithSourcesChain.from_chain_type(llm,retriever=web_research_retriever)
result = qa_chain({"question": user_input})
result

HttpError: <HttpError 400 when requesting https://customsearch.googleapis.com/customsearch/v1?q=1.+What+is+the+functioning+principle+of+LLM+Powered+Autonomous+Agents%3F&cx=xxx&num=1&key=xxx&alt=json returned "API key not valid. Please pass a valid API key.". Details: "[{'message': 'API key not valid. Please pass a valid API key.', 'domain': 'global', 'reason': 'badRequest'}]">

#### Run with logging

Here, we use `get_relevant_documents` method to return docs.

In [None]:
# Run
import logging
logging.basicConfig()
logging.getLogger("langchain.retrievers.web_research").setLevel(logging.INFO)
user_input = "What is Task Decomposition in LLM Powered Autonomous Agents?"
docs = web_research_retriever.get_relevant_documents(user_input)

#### Generate answer using retrieved docs

We can use `load_qa_chain` for QA using the retrieved docs.

In [None]:
from langchain.chains.question_answering import load_qa_chain
chain = load_qa_chain(llm, chain_type="stuff")
output = chain({"input_documents": docs, "question": user_input},return_only_outputs=True)
output['output_text']

### More flexibility

Pass an LLM chain with custom prompt and output parsing.

In [None]:
import os
import re
from typing import List
from langchain.chains import LLMChain
from pydantic import BaseModel, Field
from langchain.prompts import PromptTemplate
from langchain.output_parsers.pydantic import PydanticOutputParser

# LLMChain
search_prompt = PromptTemplate(
    input_variables=["question"],
    template="""You are an assistant tasked with improving Google search 
    results. Generate FIVE Google search queries that are similar to
    this question. The output should be a numbered list of questions and each
    should have a question mark at the end: {question}""",
)

class LineList(BaseModel):
    """List of questions."""

    lines: List[str] = Field(description="Questions")

class QuestionListOutputParser(PydanticOutputParser):
    """Output parser for a list of numbered questions."""

    def __init__(self) -> None:
        super().__init__(pydantic_object=LineList)

    def parse(self, text: str) -> LineList:
        lines = re.findall(r"\d+\..*?\n", text)
        return LineList(lines=lines)
    
llm_chain = LLMChain(
            llm=llm,
            prompt=search_prompt,
            output_parser=QuestionListOutputParser(),
        )

In [None]:
# Initialize
web_research_retriever_llm_chain = WebResearchRetriever(
    vectorstore=vectorstore,
    llm_chain=llm_chain, 
    search=search, 
)

# Run
docs = web_research_retriever_llm_chain.get_relevant_documents(user_input)

In [None]:
len(docs)

### Run locally

Specify LLM and embeddings that will run locally (e.g., on your laptop).

In [None]:
from langchain.llms import LlamaCpp
from langchain.embeddings import GPT4AllEmbeddings
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

n_gpu_layers = 1  # Metal set to 1 is enough.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
llama = LlamaCpp(
    model_path="/Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin",
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    n_ctx=4096,  # Context window
    max_tokens=1000,  # Max tokens to generate
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    callback_manager=callback_manager,
    verbose=True,
)

vectorstore_llama = Chroma(embedding_function=GPT4AllEmbeddings(),persist_directory="./chroma_db_llama")

We supplied `StreamingStdOutCallbackHandler()`, so model outputs (e.g., generated questions) are streamed. 

We also have logging on, so we seem them there too.

In [None]:
from langchain.chains import RetrievalQAWithSourcesChain
# Initialize
web_research_retriever = WebResearchRetriever.from_llm(
    vectorstore=vectorstore_llama,
    llm=llama, 
    search=search, 
)

# Run
user_input = "What is Task Decomposition in LLM Powered Autonomous Agents?"
qa_chain = RetrievalQAWithSourcesChain.from_chain_type(llama,retriever=web_research_retriever)
result = qa_chain({"question": user_input})
result