In [1]:
import json
from langchain.schema.output_parser import StrOutputParser
from langchain.chat_models import ChatOpenAI
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
from dotenv import load_dotenv


load_dotenv()

True

In [2]:
from langchain.prompts import ChatPromptTemplate
import requests
from bs4 import BeautifulSoup
from langchain.utilities import DuckDuckGoSearchAPIWrapper

In [3]:
ddg_search = DuckDuckGoSearchAPIWrapper()

def web_search(query: str, num_results: int = 3):
    try:
        results = ddg_search.results(query, num_results)
        return [r['link'] for r in results]
    except Exception as e:
        return f"Error: {e}"

In [4]:
SUMMARY_TEMPLATE = """{text} 

-----------

Using the above text, answer in short the following question: 

> {question}
 
-----------
if the question cannot be answered using the text, imply summarize the text. Include all factual information, numbers, stats etc if available."""  # noqa: E501

SUMMARY_PROMPT = ChatPromptTemplate.from_template(SUMMARY_TEMPLATE)


In [5]:
def scrap_text(url: str):
    try:
        page = requests.get(url)
        soup = BeautifulSoup(page.content, 'html.parser')
        
        # Extract all the text from the page
        text = soup.get_text(separator=' ', strip=True)
        return text
    
    except Exception as e:
        print(e)
        return f"Error: {e}"

In [6]:
scrape_and_summarize_chain = RunnablePassthrough.assign(
    summary = RunnablePassthrough.assign(
    text=lambda x: scrap_text(x["url"])[:10000]
) | SUMMARY_PROMPT | ChatOpenAI(model="gpt-3.5-turbo-1106") | StrOutputParser()
) | (lambda x: f"URL: {x['url']}\n\nSUMMARY: {x['summary']}")


In [7]:
web_search_chain = RunnablePassthrough.assign(
    urls = lambda x: web_search(x["question"])
) | (lambda x: [{"question": x["question"], "url": u} for u in x["urls"]]) | scrape_and_summarize_chain.map()



In [8]:
SEARCH_PROMPT = ChatPromptTemplate.from_messages(
    [
        (
            "user",
            "Write 3 google search queries to search online that form an "
            "objective opinion from the following: {question}\n"
            "You must respond with a list of strings in the following format: "
            '["query 1", "query 2", "query 3"].',
        ),
    ]
)

In [9]:
search_question_chain = SEARCH_PROMPT | ChatOpenAI(temperature=0) | StrOutputParser() | json.loads

In [10]:
full_research_chain = search_question_chain | (lambda x: [{"question": q} for q in x]) | web_search_chain.map()

In [12]:
WRITER_SYSTEM_PROMPT = "You are an AI critical thinker research assistant. Your sole purpose is to write well written, critically acclaimed, objective and structured reports on given text."  # noqa: E501

# Report prompts from https://github.com/assafelovic/gpt-researcher/blob/master/gpt_researcher/master/prompts.py
RESEARCH_REPORT_TEMPLATE = """Information:
--------
{research_summary}
--------
Using the above information, answer the following question or topic: "{question}" in a detailed report -- \
The report should focus on the answer to the question, should be well structured, informative, \
in depth, with facts and numbers if available and a minimum of 1,200 words.
You should strive to write the report as long as you can using all relevant and necessary information provided.
You must write the report with markdown syntax.
You MUST determine your own concrete and valid opinion based on the given information. Do NOT deter to general and meaningless conclusions.
Write all used source urls at the end of the report, and make sure to not add duplicated sources, but only one reference for each.
You must write the report in apa format.
Please do your best, this is very important to my career."""  # noqa: E501

RESEARCH_PROMPT = ChatPromptTemplate.from_messages(
    [
        ("system", WRITER_SYSTEM_PROMPT),
        ("user", RESEARCH_REPORT_TEMPLATE),
    ]
)

In [13]:
def collapse_list_of_lists(list_of_lists):
    content = []
    for l in list_of_lists:
        content.append("\n\n".join(l))
    return "\n\n".join(content)

In [14]:
chain = RunnablePassthrough.assign(
    research_summary= full_research_chain | collapse_list_of_lists
) | RESEARCH_PROMPT | ChatOpenAI(model="gpt-3.5-turbo-1106") | StrOutputParser()


In [17]:
chain

RunnableAssign(mapper={
  research_summary: ChatPromptTemplate(input_variables=['question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], template='Write 3 google search queries to search online that form an objective opinion from the following: {question}\nYou must respond with a list of strings in the following format: ["query 1", "query 2", "query 3"].'))])
                    | ChatOpenAI(client=<class 'openai.api_resources.chat_completion.ChatCompletion'>, temperature=0.0, openai_api_key='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', openai_proxy='')
                    | StrOutputParser()
                    | RunnableLambda(...)
                    | RunnableLambda(lambda x: [{'question': q} for q in x])
                    | RunnableEach(bound=RunnableAssign(mapper={
                        urls: RunnableLambda(lambda x: web_search(x['question']))
                      })
                      | RunnableLambda(...)
           