# Setup

In [73]:
import os
import operator
import re

from langchain_anthropic import ChatAnthropic
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain_community.retrievers import WikipediaRetriever
from langchain_core.runnables import chain as as_runnable
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.runnables import RunnableLambda

import json
from langchain_core.runnables import RunnableConfig

from typing import List, Optional

from typing import Annotated
from langchain_core.messages import AnyMessage
from typing_extensions import TypedDict
from langgraph.graph import END, StateGraph, START

In [74]:
# Set API key
api_key = os.environ["ANTHROPIC_API_KEY"]
openai_api_key = os.environ["OPENAI_API_KEY"]

# Initialize models. We'll use a smaller LLM for most of the work, but a large LLM to distill conversations and writing the final report
fast_llm = ChatOpenAI(api_key=openai_api_key, model="gpt-4o-mini")
slow_llm = ChatAnthropic(api_key=api_key, model="claude-3-5-sonnet-20240620")

# Generating the Outline

## Generate Conversations and Resources

In this section we will fetch related articles and develop a set of personas based on the outlines of the fetched related articles. Once the personas have been created, we will then pass them one by one into a conversation cycle.

In the conversation cycle, the persona will ask questions about the topic to an expert on the topic. The expert will take their questions, split it into search queries, then find resources online based on these queries. The expert will then sift through these, collect the trusted sources, and answer the question using these sources. The persona will read the response and ask another question until they have no more questions. The conversation will be saved for the creation of the final outline.

Once all of the personas have conversed with the expert, we will have compiled a set of conversations and trusted resources to be used in the final outline.

This section will be the bulk of the work.

### Fetch Related Articles

In [75]:
class RelatedTopics(BaseModel):
    topics: List[str] = Field(
        description="List of related topics to help in generating personas.",
    )

In [76]:
related_topics_prompt = ChatPromptTemplate.from_template(
"""I'm writing a Wikipedia page for the topic mentioned below. 
Please identify and recommend some related subjects that might be interesting. 
I'm looking for related subjects that provide insights into interesting aspects commonly associated with this topic.

Feel free to list things that are only tangentially related

Please list as many subjects as you can.

Topic of interest: {topic}

make sure to call the RelatedTopics function.
"""
)

related_topics_chain = related_topics_prompt | fast_llm.with_structured_output(
    RelatedTopics
)

In [77]:
example_topic = "AI agents and the potential economic, social, and environmental impacts."
related_topics = await related_topics_chain.ainvoke({"topic": example_topic})
related_topics

RelatedTopics(topics=['AI agents', 'economic impacts of AI', 'social impacts of AI', 'environmental impacts of AI', 'automation', 'machine learning', 'robotics', 'digital economy', 'job displacement', 'AI ethics', 'sustainability', 'technology adoption', 'data privacy', 'AI in healthcare', 'AI in education', 'smart cities', 'AI governance', 'future of work', 'AI and climate change', 'AI policy', 'AI and inequality'])

In [78]:
# Get related articles with wikipedia retriever
wikipedia_retriever = WikipediaRetriever(load_all_available_meta=True, top_k_results=1)

@as_runnable
async def getRelatedArticles(topic: str) -> List[str]:
    related_topics = await related_topics_chain.ainvoke({"topic": topic})
    related_articles = await wikipedia_retriever.abatch(related_topics.topics, return_exceptions=True)
    return formatArticles(related_articles)

def formatArticles(unformattedArticles: List[str]) -> List[str]:
    formatted_articles = []
    for doc in unformattedArticles:
        article = doc[0]
        if isinstance(article, BaseException):
            continue
        # formatted_article = "\n\n" + "Title: " + article.metadata["title"] + "\n" + "\nCategories: " + ", ".join(article.metadata["categories"])
        formatted_article = "\n\n" + "Title: " + article.metadata["title"] + "\n" + "Summary:\n" + article.metadata["summary"] + "\nCategories: " + ", ".join(article.metadata["categories"])
        formatted_articles.append(formatted_article)
    return formatted_articles

In [79]:
related_articles = await getRelatedArticles.ainvoke(related_topics)

In [80]:
related_articles

['\n\nTitle: Intelligent agent\nSummary:\nIn intelligence and artificial intelligence, an intelligent agent (IA) is an agent that perceives its environment, takes actions autonomously in order to achieve goals, and may improve its performance with learning or acquiring knowledge. An intelligent agent may be simple or complex: A thermostat or other control system is considered an example of an intelligent agent, as is a human being, as is any system that meets the definition, such as a firm, a state, or a biome.\n\nLeading AI textbooks define "artificial intelligence" as the "study and design of intelligent agents", a definition that considers goal-directed behavior to be the essence of intelligence. Goal-directed agents are also described using a term borrowed from economics, "rational agent".\nAn agent has an "objective function" that encapsulates all the IA\'s goals. Such an agent is designed to create and execute whatever plan will, upon completion, maximize the expected value of th

### Create Personas

In [81]:
gen_perspectives_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You need to select a diverse (and distinct) group of wikipedia writers who will 
            work together to create a comprehensive article on the given topic.
            Each of them represents a different perspective, role, or persona related to this topic.\
            You can use the provided related Wikipedia pages of related topics for inspiration. 
            For each writer, add a description of what they will focus on.

            Make sure to call the WikiWriter function.

            Wiki page outlines of related topics for inspiration:
            {example_articles}""",
        ),
        ("user", "Topic of interest: {topic}"),
    ])

def sanitize_name(name):
    return re.sub(r'[^a-zA-Z0-9_-]', '', name)

class WikiWriter(BaseModel):
    name: str = Field(
        description="Name of the editor.", pattern=r"^[a-zA-Z0-9_-]{1,64}$"
    )
    role: str = Field(
        description="Role of the editor in the context of the topic.",
    )
    description: str = Field(
        description="Description of the editor's focus, concerns, and motives.",
    )

    @property
    def persona(self) -> str:
        return f"Name: {self.name}\nRole: {self.role}\nDescription: {self.description}\n"

class Perspectives(BaseModel):
    writers: List[WikiWriter] = Field(
        description="Comprehensive list of editors with their roles and affiliations.",
        # Add a pydantic validation/restriction to be at most M editors
    )

In [82]:
perspectives_chain = gen_perspectives_prompt | fast_llm.with_structured_output(Perspectives)

default_writer = WikiWriter(name = "Default Writer", 
                            role = "Basic fact writer",
                            description = "Basic fact writer focusing on broadly covering the basic facts about the topic.")

# Generate persepctives
perspectives = await perspectives_chain.ainvoke({"example_articles": related_articles, "topic": example_topic})
# Add default writer to beginning of list
perspectives.writers.insert(0, default_writer)

In [83]:
perspectives.writers

[WikiWriter(name='Default Writer', role='Basic fact writer', description='Basic fact writer focusing on broadly covering the basic facts about the topic.'),
 WikiWriter(name='Dr. Alice Thompson', role='Economic Analyst', description='Dr. Thompson will focus on the economic implications of AI agents, exploring how they impact job markets, productivity, and business revenues. She will analyze both the positive and negative effects of automation on different economic sectors and the potential for AI to create new job opportunities.'),
 WikiWriter(name='Prof. John Reynolds', role='Sociologist', description="Prof. Reynolds will examine the social impacts of AI agents, particularly in relation to human interactions, community dynamics, and issues of inequality. He will address concerns about job displacement and the social fabric's response to increasing automation."),
 WikiWriter(name='Emma Green', role='Environmental Scientist', description='Emma will investigate the environmental effects 

### Converse With Expert

The conversation cycle will be its own graph. The state will consist of the current writer in the conversation, the messages in the conversation, and the references.

In [84]:
# Set up state for interview loop

def add_messages(left, right):
    if not isinstance(left, list):
        left = [left]
    if not isinstance(right, list):
        right = [right]
    return left + right


def update_references(references, new_references):
    if not references:
        references = {}
    references.update(new_references)
    return references


def update_writer(writer, new_writer):
    # Can only set at the outset
    if not writer:
        return new_writer
    return writer


class InterviewState(TypedDict):
    messages: Annotated[List[AnyMessage], operator.add] 
    references: Annotated[Optional[dict], update_references]
    writer: Annotated[Optional[WikiWriter], update_writer]

#### Question Asking

In [85]:
gen_question_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are an experienced Wikipedia writer and want to edit a specific page. \
Besides your identity as a Wikipedia writer, you have a specific focus when researching the topic. \
Now, you are chatting with an expert to get information. Ask good questions to get more useful information.

When you have no more questions to ask, say "Thank you so much for your help!" to end the conversation.\
Please only ask one question at a time and don't ask what you have asked before.\
Your questions should be related to the topic you want to write.
Be comprehensive and curious, gaining as much unique insight from the expert as possible.\

Stay true to your specific perspective:

{persona}""",
        ),
        MessagesPlaceholder(variable_name="messages", optional=True),
    ]
)

def label_message_with_writer_name(ai_message: AIMessage, name: str):
    ai_message.name = sanitize_name(name)
    return ai_message


def set_current_writer(state: InterviewState, name: str):
    '''
    Set up the messages for the current writer.
    To do this we convert all messages in the history
    that are not from the current writer to a HumanMessage,
    so the current writer views the other writers' and experts'
    messages as HumanMessages.
    '''
    name = sanitize_name(name)
    converted = []
    for message in state["messages"]:
        if isinstance(message, AIMessage) and message.name != name:
            message = HumanMessage(**message.dict(exclude={"type"}))
        converted.append(message)
    return {"messages": converted}


@as_runnable
async def gen_question(state: InterviewState):
    writer = state["writer"]
    question_chain = (
        RunnableLambda(set_current_writer).bind(name=sanitize_name(writer.name))
        | gen_question_prompt.partial(persona=writer.persona)
        | fast_llm
        | RunnableLambda(label_message_with_writer_name).bind(name=sanitize_name(writer.name))
    )
    result = await question_chain.ainvoke(state)
    return {"messages": [result]}


In [86]:
messages = [
    HumanMessage(f"So you said you were writing an article on {example_topic}?")
]
question = await gen_question.ainvoke(
    {
        "writer": perspectives.writers[0],
        "messages": messages,
    }
)

question["messages"][0].content

'What are some of the most significant economic impacts that AI agents are expected to have on various industries?'

#### Question Answering

Below is the code for generating search engine queries

In [87]:
class Queries(BaseModel):
    queries: List[str] = Field(
        description="Comprehensive list of search engine queries to answer the user's questions.",
    )

gen_queries_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful research assistant. Query the search engine to answer the user's questions.",
        ),
        MessagesPlaceholder(variable_name="messages", optional=True),
    ]
)
gen_queries_chain = gen_queries_prompt | fast_llm.with_structured_output(Queries, include_raw=True)

In [88]:
queries = await gen_queries_chain.ainvoke(
    {"messages": [HumanMessage(content=question["messages"][0].content)]}
)
queries["parsed"].queries

['economic impacts of AI agents on industries',
 'AI agents effects on manufacturing',
 'AI agents influence on healthcare',
 'AI agents impact on finance',
 'AI agents in retail economic implications',
 'AI agents in transportation economic effects',
 'AI agents effects on agriculture industry',
 'AI agents influence on education sector']

In [89]:
queries["raw"]

AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_qBZzsJ2Iw3VIPRyE5L6f7QTw', 'function': {'arguments': '{"queries":["economic impacts of AI agents on industries","AI agents effects on manufacturing","AI agents influence on healthcare","AI agents impact on finance","AI agents in retail economic implications","AI agents in transportation economic effects","AI agents effects on agriculture industry","AI agents influence on education sector"]}', 'name': 'Queries'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 58, 'prompt_tokens': 95, 'total_tokens': 153}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_f33667828e', 'finish_reason': 'stop', 'logprobs': None}, id='run-e9e09bb0-bfee-4be3-ba00-88702fe7fa8a-0', tool_calls=[{'name': 'Queries', 'args': {'queries': ['economic impacts of AI agents on industries', 'AI agents effects on manufacturing', 'AI agents influence on healthcare', 'AI agents impact on fina

Search engine setup

In [90]:
from langchain_community.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper
from langchain_core.tools import tool

'''
# Tavily is typically a better search engine, but your free queries are limited
search_engine = TavilySearchResults(max_results=4)

@tool
async def search_engine(query: str):
    """Search engine to the internet."""
    results = tavily_search.invoke(query)
    return [{"content": r["content"], "url": r["url"]} for r in results]
'''

# DDG
search_engine = DuckDuckGoSearchAPIWrapper()


@tool
async def search_engine(query: str):
    """Search engine to the internet."""
    results = DuckDuckGoSearchAPIWrapper()._ddgs_text(query)
    return [{"content": r["body"], "url": r["href"]} for r in results]

async def search_and_format(queries):
    results = await search_engine.abatch(queries, return_exceptions=True)
    formatted_results = {}
    for result_list in results:
        if not isinstance(result_list, Exception):
            for result in result_list:
                formatted_results.update({result["url"]: result["content"]})
    return formatted_results

Now we will answer the writer's questions

In [91]:
class AnswerWithCitations(BaseModel):
    answer: str = Field(
        description="Comprehensive answer to the user's question with citations.",
    )
    cited_urls: List[str] = Field(
        description="List of urls cited in the answer.",
    )

    @property
    def as_str(self) -> str:
        return f"{self.answer}\n\nCitations:\n\n" + "\n".join(
            f"[{i+1}]: {url}" for i, url in enumerate(self.cited_urls)
        )


gen_answer_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are an expert on the follwoing topic: {topic}. You are chatting with a Wikipedia writer who wants\
 to write a Wikipedia page on the topic you know. You have gathered the related information and will now use the information to form a response.

Make your response as informative as possible and make sure every sentence is supported by the gathered information.
Each response must be backed up by a citation from a reliable source, formatted as a footnote, reproducing the URLS after your response.""",
        ),
        MessagesPlaceholder(variable_name="messages", optional=True),
    ]
)

gen_answer_chain = gen_answer_prompt.partial(topic=example_topic) | fast_llm.with_structured_output(
    AnswerWithCitations, include_raw=True
).with_config(run_name="GenerateAnswer")


In [92]:
async def gen_answer(
    state: InterviewState,
    name: str = "subject_matter_expert",
    max_str_length: int = 15000):

    # Set the expert as the current writer
    current_state = set_current_writer(state, sanitize_name(name))

    # Generate queries based on the last question to pass to the search engine
    queries = await gen_queries_chain.ainvoke(current_state)

    # Query the search engine
    search_results = await search_and_format(queries["parsed"].queries)
    truncated_search_results = json.dumps(search_results)[:max_str_length]

    # Get tool information to pass to model when generating answers
    ai_message: AIMessage = queries["raw"]
    tool_call = ai_message.additional_kwargs["tool_calls"][0]
    tool_id = tool_call["id"]
    tool_message = ToolMessage(tool_call_id=tool_id, content=truncated_search_results)
    current_state["messages"].extend([ai_message, tool_message])

    # Answer the question with citations
    answer = await gen_answer_chain.ainvoke(current_state)
    cited_urls = set(answer["parsed"].cited_urls)

    # Update the state: Add message to messages list, add references to references list
    formatted_message = AIMessage(name=sanitize_name(name), content=answer["parsed"].as_str)
    cited_references = {url: content for url, content in search_results.items() if url in cited_urls}
    return {"messages": [formatted_message], "references": cited_references}

In [93]:
example_answer = await gen_answer(
    {"messages": [HumanMessage(content=question["messages"][0].content)]}
)
example_answer["messages"][-1].content

'AI agents are poised to significantly impact various industries economically, with effects ranging from increased productivity to shifts in labor markets. In manufacturing, AI technologies such as robotics and automation are enhancing operational efficiency, leading to reduced production costs and faster output, which can increase profit margins for companies. This transformation is projected to also lead to a potential decrease in manufacturing jobs as machines take over routine tasks, although it may create new opportunities in AI management and maintenance roles.\n\nIn healthcare, AI agents can analyze patient data and improve diagnostic accuracy, which can reduce costs associated with misdiagnoses and ineffective treatments. The integration of AI into healthcare is expected to optimize processes, leading to significant savings for both providers and patients, while also improving patient outcomes through personalized medicine.\n\nThe finance industry is experiencing profound chang

Interview Graph

In [97]:
max_responses = 5

def end_or_pass_back_to_writer(state: InterviewState, name: str = "subject_matter_expert"):
    messages = state["messages"]
    num_responses = len(
        [m for m in messages if isinstance(m, AIMessage) and sanitize_name(m.name) == sanitize_name(name)]
    )
    if num_responses >= max_responses:
        return END
    last_question = messages[-2]
    if last_question.content.endswith("Thank you so much for your help!"):
        return END
    return "ask_question"

In [99]:
interview_graph = StateGraph(InterviewState)

# Add nodes
interview_graph.add_node("ask_question", gen_question)
interview_graph.add_node("answer_question", gen_answer)

# Add edges
interview_graph.add_edge(START, "ask_question")
interview_graph.add_edge("ask_question", "answer_question")
interview_graph.add_conditional_edges("answer_question", end_or_pass_back_to_writer)

# Compile graph
interview_graph = interview_graph.compile().with_config(run_name="Conduct Interviews")

## Generate Draft Outline

The draft outline is a rough draft of the outline based only on the task. The collected conversations and resources from above will then be used to refine the draft outline.

## Generate Final Outline

Use the collected conversations and resources to refine the draft outline. The result will be the final outline.

# Generate the Final Article

Each section will be generated independently using the previously gathered resources. Once each section is generated, we will have a large model go over the entire article to ensure consistency and remove duplicate information.