In [None]:
!pip install python-dotenv
!pip install langchain
!pip install langchain-openai

In [None]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
openai_api_key = os.environ["OpenAi Key"]

In [None]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o")

In [None]:
from typing import List
from typing_extensions import TypedDict
from pydantic import BaseModel, Field

class Analyst(BaseModel):
    affiliation: str = Field(
        description= "Primary affiliation of the analyst",
    )
    name: str = Field(
        description="Name of the analyst"
    )
    role: str = Field(
        description="Role of the analyst in the context of the topic",
    )
    description: str = Field(
        description = "Description of the analyst focusm concerns, and motives",

    )
    @property
    def persona(self) -> str:
        return f"Name: {self.name}\nRole: {self.role}\nAffiliation: {self.affiliation}\nDescription: {self.description}\n"
    
class Perspectives(BaseModel):
    analysts: List[Analyst] = Field(
        description="Comprehensive list of analysts with their roles and affiliations",
    )
class GenerateAnalystsState(TypedDict):
    topic: str
    max_analysts: int
    human_analysts_feedback: str
    analysts: List[Analyst]

    


BUILDING THE AI ANALYST SUB GRAPH....   

In [None]:
from IPython.display import Image, display
from langgraph.graph import START, END, StateGraph
from langgraph.checkpoint.memory import MemorySaver
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage

analyst_instructions = """You are tasked with creating a set of AI analysts personas. Follow these
instructions carefully 'dusthead': 

1. First, review the research topic:
{topic}

2. Examine any editorial feedback that has been optimally provided to guide creation of the analysts:
{human_analyst_feedback}

3. Determine the most interesting themes based upon documents and / or feedback above.

4. Pick the top {max_analysts} themes.

5. Assign one anlyst to each theme.

"""

def create_analysts(state: GenerateAnalystsState):

    """Create Analysts..."""

    topic= state['topic']
    max_analysts=state["max_analysts"]
    human_analysts_feedback= state.get("human_analysts_feedback", "")

    structured_llm = llm.with_structured_output(Perspectives)

    system_message = analyst_instructions.format(topic=topic,
                                                 human_analysts_feedback=human_analysts_feedback,
                                                 max_analysts=max_analysts)
    analysts = structured_llm.invoke([SystemMessage(content=system_message)] + [HumanMessage(content="Generate the set of analysts")])

    return {"analysts": analysts.analysts}

def human_feedback(state:GenerateAnalystState):
    """No-op node that should be interrupted on """
    pass

def should_continue(state:GenerateAnalystsState):
    """Return the next node to execute"""

    human_analyst_feedback = state.get("human_analyst_feedback", None)

    if human_analyst_feedback:
        return "create_analysts"
    

    return END

builder = StateGraph(GenerateAnalystsState)

builder.add_node("create_analysts", create_analysts)
builder.add_node("human_feedback", human_feedback)

builder.add_edge(START, "create_analysts")
builder.add_edge("create_analysts", "human_feedback")
builder.add_conditional_edges("human_feedback", should_continue, ["create_analysts", END])

memory = MemorySaver()
graph = builder.compile(interrupt_before=["human_feedback"], checkpointer=memory)

display(Image(graph.get_graph(xray=1).draw_mermaid_png()))



RUNNING SUB GRAPH WITH EXAMPLE TO SEE IF IT WORKS....

In [None]:
max_analysts = 3
topic= "The benefits of adopting LangGraph as an agent framework"
thread = {"configurable": {"thread_id": "1"}}

for event in graph.stream({"topic":topic, "max_analysts": max,}, thread, stream_mode="values"):

    analysts = event.get("analysts", "")

    if analysts:
        for anlyst in analysts:
            print(f"Name: {analyst.name}")
            print(f"Affiliation: {analyst.affiliation}")
            print(f"Role: {analyst.role}")
            print(f"Description: {analyst.description}")
            print("-"* 50)

            




HUMAN FEEDBACK EXAMPLE....

In [None]:
## This code is getting the current state of the node....
state= graph.get_state(thread)
state.next

HERE I WILL ALLOW "HUMAN FEEDBACK". IN CASE THE USER DOES NOT LIKE THE RESULTS GENERATED....

In [None]:
graph.update_state(thread, {"human_analyst_feedback":
                            "Add in someone from a startup to add an entrepreneur of sorts."}, as_node="human_feedback")

FEEDBACK GIVEN. LETS NOW CONTINUE WITH THE EXECUTION OF THE APPLICATION....

In [None]:
for event in graph.stream(None, thread, stream_mode="values"):

    analysts= event.get("analysts", "")
    if analysts:
        for analyst in analysts:
            print(f"Name: {analyst.name}")
            print(f"Affiliation: {analyst.affiliation}")
            print(f"Role: {analyst.role}")
            print(f"Description: {analyst.description}")
            print("-" * 50)

            

If I am satisfied with the new analysts provided. I will provide no additional human feedback so the app can continue the execution with 3 of the analysts. To do that, I set "further_feedback" to "None".

In [None]:
further_feedback = None
graph.update_state(thread, {"human_analyst_feedback":
                    further_feedback}, as_node="human_feedback")

In [None]:
## No more human feedback. Continue excution of agent flow....

for event in graph.stream(None, thread, stream_mode="updates"):
    print("--Node--")
    node_name= next(iter(event.keys()))
    print(node_name)

In [None]:
final_state = graph.get_state(thread)
analysts = final_state.values.get("analysts")



In [None]:
final_state.next

In [None]:
for analyst in analysts:
    print(f"Name: {analyst.name}")
    print(f"Affiliation: {analyst.affiliation}")
    print(f"Role: {analyst.role}")
    print(f"Description: {analyst.desription}")
    print("-" * 50)

SECOND SUB GRAPH THE "INTERVIEW GENERATOR"  SUB GRAPH....

In [None]:
import operator
from typing import Annotated
from langgraph.graph import MessagesState

class InterviewState(MessagesState):
    max_num_turns = int
    context: Annotated[List, operator.add]
    analyst: Analyst
    interview: str
    sections: list

class SearchQuery(BaseModel):
    search_query: str = False(None, description= "Search query for retrieval.")


    



Define the noDe generate_question....

In [None]:
question_instructions = """You are an analyst tasked with interviewing an expert to learn about a specific topic. 

Your goal is boil down to interesting and specific insights related to your topic.

1. Interesting: Insights that people will find surprising or non-obvious.
        
2. Specific: Insights that avoid generalities and include specific examples from the expert.

Here is your topic of focus and set of goals: {goals}
        
Begin by introducing yourself using a name that fits your persona, and then ask your question.

Continue to ask questions to drill down and refine your understanding of the topic.
        
When you are satisfied with your understanding, complete the interview with: "Thank you so much for your help!"

Remember to stay in character throughout your response, reflecting the persona and goals provided to you."""


def generate_question(state: InterviewState):
    """Node to generate a question"""

    ## get the state....
    analyst = state["analyst"]
    messages = state["messages"]


    ## generate questions...

    system_message = question_instructions.format(goals=analyst.persona)
    question = llm.invoke([SystemMessage(content=system_message)]+messages)

    ## Write messages to state

    return {"messages": [question]}








SECOND SUBGRAPH.....

In [None]:
tavily_api_key = os.environ["API KEY"]

In [None]:
## The web search tool to use...

from langchain_community.tools.tavily_search import TavilySearchResults
tavily_search = TavilySearchResults(max_results=3)

In [None]:
#### Wikipedia search tool here....

from langchain_community.document_loaders import WikipediaLoader



Now below in the next lines... I am creating the  functionality associated with the nodes of the sub-graph: 
* For the AI Expert to search responses on the web and in wikipedia.
* For the AI Expert to use the search results to answer a question.
* To save the full interview and to write a summary ("section") of the interview.

In [None]:
from langchain_core.messages import get_buffer_string

## Search query writting instructions....

search_instructions = SystemMessage(content=f"""You will be given a conversation between an analyst and an expert. 

Your goal is to generate a well-structured query for use in retrieval and / or web-search related to the conversation.
        
First, analyze the full conversation.

Pay particular attention to the final question posed by the analyst.

Convert this final question into a well-structured web search query""")

def search_web(state: InterviewState):
    """Retrieve docs from web search"""

    structured_llm = llm.with_structured_output(SearchQuery)
    search_query = structured_llm.invoke([search_instructions]+state['messages'])

    search_docs = tavily_search.invoke(search_query.search_query)

    formatted_search_docs = "\n\n---\n\n".join(

        [
            f'<Document href="{doc["url"]}"/>\n{doc["content"]}\n</Documents>'
            for doc in search_docs
        ]
    )
    return {"context": [formatted_search_docs]}

    def search_wikipedia(state: InterviewState):

        """Retrieve docs from wikipedia"""

        ## Search the silly query....
        structured_llm = llm.with_structured_output(SearchQuery)
        search_query = structured_llm.invoke([search_instructions]+state['messages'])

        ## Search documents....
        search_docs = WikipediaLoader(query=search_query.search_query,
                                      load_max_docs=2).load()

        formatted_search_docs = "\n\n---\n\n".join(
        [
            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
            for doc in search_docs
        ]
    )
        
        return {"context": [formatted_search_docs]}
    
    answer_instructions = """You are an expert being interviewed by an analyst.

Here is analyst area of focus: {goals}. 
        
You goal is to answer a question posed by the interviewer.

To answer question, use this context:
        
{context}

When answering questions, follow these guidelines:
        
1. Use only the information provided in the context. 
        
2. Do not introduce external information or make assumptions beyond what is explicitly stated in the context.

3. The context contain sources at the topic of each individual document.

4. Include these sources your answer next to any relevant statements. For example, for source # 1 use [1]. 

5. List your sources in order at the bottom of your answer. [1] Source 1, [2] Source 2, etc
        
6. If the source is: <Document source="assistant/docs/llama3_1.pdf" page="7"/>' then just list: 
        
[1] assistant/docs/llama3_1.pdf, page 7 
        
And skip the addition of the brackets as well as the Document source preamble in your citation."""


def generate_answer(state:InterviewState):

    """Node to answer a question..."""

    ## get the state first....
    anayst = state["analyst"]
    messages = state["messages"]
    context = state["context"]

    ### now answer the questions....

    system_message = answer_instructions.format(goals=analyst.persona, context= context)
    answer = llm.invoke([SystemMessage(content=system_message)]+messages)

    ## nake the message as coming from the expert....

    answer.name = "expert"

    ##Append it to state...
    return {"messages": [answer]}

def save_interview(state:InterviewState):

    """Save interviews"""

    ## Get messages...

    messages= state["messages"]

    ## Convert interview to a string....

    interview = get_buffer_string(messages)

    ## Save the interviews key...

    return {"interview": interview}

    def route_messages(state:InterviewState,
                       name: str = "expert"):
        
        """Route between question and answer..."""

        ##Get messages...

        messages = state["messages"]
        max_num_turns = state.get("max_num_turns",2)

        ## Check the number of expert answers....

        num_response = len(
            [m for m in messages if isinstance(m, AIMessage) and m.name==name]
        )

        ## ENd if expert has aswered more that the max turns....
        if num_responses >= max_num_turns:
            return "save_interview"

        ## This router is run after each question - answer pair.
        ## get the last question asked to check if it signals the end of discussion....
        last_question = messages[-2]

        if "Thank you so much for you help." in last_question.content:
            return "save_interview"
        return "ask_question"
    

 # PAY ATTENTION: this defines the rol of the Technical Writer that writes the final report
section_writer_instructions = """You are an expert technical writer. 
            
Your task is to create a short, easily digestible section of a report based on a set of source documents.

1. Analyze the content of the source documents: 
- The name of each source document is at the start of the document, with the <Document tag.
        
2. Create a report structure using markdown formatting:
- Use ## for the section title
- Use ### for sub-section headers
        
3. Write the report following this structure:
a. Title (## header)
b. Summary (### header)
c. Sources (### header)

4. Make your title engaging based upon the focus area of the analyst: 
{focus}

5. For the summary section:
- Set up summary with general background / context related to the focus area of the analyst
- Emphasize what is novel, interesting, or surprising about insights gathered from the interview
- Create a numbered list of source documents, as you use them
- Do not mention the names of interviewers or experts
- Aim for approximately 400 words maximum
- Use numbered sources in your report (e.g., [1], [2]) based on information from source documents
        
6. In the Sources section:
- Include all sources used in your report
- Provide full links to relevant websites or specific document paths
- Separate each source by a newline. Use two spaces at the end of each line to create a newline in Markdown.
- It will look like:

### Sources
[1] Link or Document name
[2] Link or Document name

7. Be sure to combine sources. For example this is not correct:

[3] https://ai.meta.com/blog/meta-llama-3-1/
[4] https://ai.meta.com/blog/meta-llama-3-1/

There should be no redundant sources. It should simply be:

[3] https://ai.meta.com/blog/meta-llama-3-1/
        
8. Final review:
- Ensure the report follows the required structure
- Include no preamble before the title of the report
- Check that all guidelines have been followed"""


def write_section(state: InterviewState):

    """Node to answer a question...."""

    ## Get a state....

    interview = state["interview"]
    context = state["context"]
    analyst = state["analyst"]

    ## Write a section using either the gathered source docs from interview (context)
    ### or the interview itself (inteview)
    system_message = section_writer_instructions.format(focus=analyst.description)
    section = llm.invoke([SystemMessage(content=system_message)] +[HumanMessage(content=f"use this source to write you section: {context}")])


    return {"sections": [section.content]}

interview_builder = StateGraph(InterviewState)

interview_builder.add_node("ask_question", generate_question)
interview_builder.add_node("search_web", search_web)
interview_builder.add_node("search_wikipedia", search_wikipedia)
interview_builder.add_node("answer_question", generate_answer)
interview_builder.add_node("save_interview", save_interview)
interview_builder.add_node("write_section", write_section)

interview_builder.add_edge(START, "ask_question")
interview_builder.add_edge("ask_question", "search_web")
interview_builder.add_edge("ask_question", "search_wikipedia")
interview_builder.add_edge("search_web","answer_question")
interview_builder.add_edge("search_wikipedia", "answer_question")

## See how I define the conditional edge case here....

interview_builder.add_conditional_edges("answer_question", route_messages,["ask_question", "save_interview"])


interview_builder.add_edge("save_interview", "write_section")
interview_builder.add_edge("write_section", END)

## INTERVIEW....

memory = MemorySaver()

## Now I define it using .with_config()

interview_graph = interview_builder.compile(checkpointer=memory).with_config(run_name="Conduct Interviews")

display(Image(interview_graph.get_graph().draw_mermaid_png()))


















In [None]:
analysts[0]

NOW EXECUTING THE WHOLE THING...

* **PAY ATTENTION:** the following code is using the `topic` variable I defined in the first sub-graph.
* **PAY ATTENTION:** the text immediately below the following code is the response of the application...


In [None]:
from IPython.display import Markdown
messsages = [HumanMessage(f"So you said you were writting an article on {topic}?")]
thread {"configurable": {"thread_id": "1"}}
interview = interview_graph.invoke({"analyst": analysts[0], "messages": messages, "max_num_turns": 2}, 
                                   thread)
Markdown(interview["sections"][0])

SyntaxError: expression expected after dictionary key and ':' (3338379614.py, line 3)

Now I will buuld the RESEARCH REPORT GENERATOR APPLICAION Part....

* The parent graph will use the previous two sub-graphs.
* **I am going to use a map-reduce operation with Send to allow parallel interviews**.
    * I will parallelize the interviews in a map step.
    * I will combine and summarize the interviews into the report in a reduce step.
* Then I will add a final step to write an intro and conclusion to the final report.

In [None]:
import operator
from typing import List, Annotated
from typing_extensions import TypedDict

class ResearchGraphState(TypedDict):
    topic: str  ##research topic
    max_analysts: int  # number of analysts per analysis task
    human_analyst_feedback: str ## human feedback
    analyst: List[Analyst]  ## the analyst asking the questions
    sections: Annotated[list,operator.add] ## send()  API key
    introduction: str  ## introduction for the final report
    content: str ## Content for the final report
    conclustion: str  ## Conclustion for the final report
    final_report: ## Final report


In [None]:
from langgraph.constants import Send

## pay attention see how "Send" is used....

def initiate_all_interviews(state:ResearchGraphState):
    """This is the "map" step where we run each interview sub-graph using "Send" API"""

    ## check if human feedback....
    human_analyst_feedback=state.get("human_analyst_feedback")
    if human_analyst_feedback:
        ##return to create_analysts....
        return "create_analysts"
    else:
        topic= state["topic"]
        return [Send("conduct_interview", {"analyst": analyst,
                                           "messages": [HumanMessage(
                                               content=f"So you said you were writting and article on {topic}?"

                                           ) ]}) for analyst in state["analysts"]]

                        
report_writer_instructions = """You are a technical writer creating a report on this overall topic: 

{topic}
    
You have a team of analysts. Each analyst has done two things: 

1. They conducted an interview with an expert on a specific sub-topic.
2. They write up their finding into a memo.

Your task: 

1. You will be given a collection of memos from your analysts.
2. Think carefully about the insights from each memo.
3. Consolidate these into a crisp overall summary that ties together the central ideas from all of the memos. 
4. Summarize the central points in each memo into a cohesive single narrative.

To format your report:
 
1. Use markdown formatting. 
2. Include no pre-amble for the report.
3. Use no sub-heading. 
4. Start your report with a single title header: ## Insights
5. Do not mention any analyst names in your report.
6. Preserve any citations in the memos, which will be annotated in brackets, for example [1] or [2].
7. Create a final, consolidated list of sources and add to a Sources section with the `## Sources` header.
8. List your sources in order and do not repeat.

[1] Source 1
[2] Source 2

Here are the memos from your analysts to build your report from: 

{context}"""


def write_report(state:ResearchGraphState):
    ## full set sections...

    sections = state["sections"]
    topic = state["topic"]

    ## concat all sections together here...

    formatted_str_sections = "\n\n".join([f"{sections}" for section in sections])

    ## summmarize the sections into a final report....

    instructions = intro_conclusion_instructions.format(topic=topic, formatted_str_sections=formatted_str_sections)
    intro = llm.invoke([instructions]+ [HumanMessage(content=f"Write the report introduction")])
    return {"introduction": intro.content}

def write_conclusion(state:ResearchGraphState):
    ## full set of sections...

    sections =  state["sections"]
    topic = state["topic"]

    ## concat all sections togethers....
    formatted_str_sections = "\n\n".join([f"{section}" for section in sections])

    ## summmaize the sectionz in to a final report....

    def finalize_report(state: ResearchGraphState):
        """ This is the 'reduce' step to augment where we gather all the sections, combine them, and
        reflect on them to write the intro/conclusion. """

        ## save the full final report....
        content = state["content"]
        if content.startswith("## Insights"):
            content = content.strip("## Instights")
        if "## Sources" in content:
            try:
                content, sources = content.split("\n## Soources\n")
            except:
                sources = None
        else:
            sources = None

        final_report = state["introduction"] + "\n\n---\n\n" + content + "\n\n---\n\n" + state["conclusion"]
        if sources is not None:
            final_report += "\n\n## Sources\n" + sources
        return {"final_report": final_report}
    
    ## Add node and edges here...

    builder = StateGraph(ResearchGraphState)
    builder.add_node("human_analysts", create_analysts)
    builder.add_node("human_feedback", human_feedback)
    builder.add_node("conduct_interview", interview_builder.compile())
    builder.add_node("write_report", write_report)
    builder.add_node("write_introduction", write_introduction)
    builder.add_node("write_conclusion", write_conclusion)
    builder.add_node("finalize_report", finalize_report)

    ## logic edges here...

    builder.add_edge(START, "create_analyst")
    builder.add_edge("create_analyst", "human_feedback")
    builder.add_conditional_edges("human_feedback", initiate_all_interviews, ["create_analysts", "conduct_interview"])
    builder.add_edge("conduct_interview", "write_report")
    builder.add_edge("conduct_interview", "write_introduction")
    builder.add_edge("conduct_interview", "write_conclusion")
    builder.add_edge(["write_conclusion", "write_report", "write_introduction"], "finalize_report")
    builder.add_edge("finalize_report", END)

    ## Compile everything...

    memory = MemorySaver()
    graph = builder.compile(interrupt_before= ["human_feedback"], checkpointer=memory)
    display(Image(graph.get_graph(xray=1).draw_mermaid_png()))

    


















