In [1]:
from langchain_groq import ChatGroq


llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0)

In [1]:
from langchain_ollama import OllamaLLM

llm = OllamaLLM(model="llama3.1:8b")

llm.invoke("The first man on the moon was ...")

'...Neil Armstrong! He stepped out of the lunar module Eagle and onto the Moon\'s surface on July 20, 1969, famously declaring "That\'s one small step for man, one giant leap for mankind" as he took his historic first steps.'

In [2]:
llm.invoke(
    "System: You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Do not mention that you have used the provided context. Use three sentences maximum and keep the answer concise.\nQuestion: Какая столица Туркмении?\nContext: Source: {'uid': 22, 'version': '1.0'}\nContent: Question (RU): Какой город является столицей Туркмении?\nQuestion (EN): Which city is the capital of Turkmenistan?\nAnswer: Ашхабад\n\nSource: {'uid': 56, 'version': '1.0'}\nContent: Question (RU): Как называлась столица Крымского ханства?\nQuestion (EN): What was the name of the capital of the Crimean khanate?\nAnswer: Бахчисарай\n\nSource: {'uid': 526, 'version': '1.0'}\nContent: Question (RU): Какой город является административным центром Приморского края?\nQuestion (EN): Which city is the administrative center of Primorsky Krai?\nAnswer: Владивосток\n\nSource: {'uid': 859, 'version': '1.0'}\nContent: Question (RU): В каком городе находится гробница пророка Мухаммеда?\nQuestion (EN): In which city is the tomb of the prophet Muhammad located?\nAnswer: Медина \nAnswer:"
)

'Столица Туркмении - Ашхабад.'

In [2]:
# import getpass

# os.environ["OPENAI_API_KEY"] = getpass.getpass()

from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

### gg

In [4]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from typing_extensions import List, TypedDict

# Load and chunk contents of the blog
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(docs)

USER_AGENT environment variable not set, consider setting it to identify your requests.


### 

In [10]:
import json

from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_community.vectorstores.utils import filter_complex_metadata

file_path = "data/datasets/RuBQ_2.0_dev.json"
documents = []
with open(file_path, "r", encoding="utf-8") as file:
    data = json.load(file)


for item in data:
    content = (
        f"Question (RU): {item['question_text']}\n"
        f"Question (EN): {item.get('question_eng', '')}\n"
        f"Answer: {item['answer_text']}\n"
    )
    metadata = {
        "uid": item["uid"],
        "tags": item["tags"],
        "version": item["RuBQ_version"],
    }
    documents.append(Document(page_content=content, metadata=metadata))

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250, chunk_overlap=0
)
filter_complex_metadata(documents)
splitted_docs = text_splitter.split_documents(documents)

_vectorstore = Chroma.from_documents(
    documents=splitted_docs,
    collection_name="qa_chroma",
    embedding=embeddings,
)

retriever = _vectorstore.as_retriever()    

In [68]:
from langchain_core.tools import tool


@tool("retrieve", response_format="content_and_artifact")
def retrieve(query: str):
    """Retrieve information related to a query."""
    # retrieved_docs = vector_store.similarity_search(query, k=2)
    retrieved_docs = retriever.invoke(query)
    serialized = "\n\n".join(
        (f"Source: {doc.metadata}\n" f"Content: {doc.page_content}")
        for doc in retrieved_docs
    )
    return serialized, retrieved_docs

In [69]:
from pydantic import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

class GradeDocuments(BaseModel):
    binary_score: str = Field(description="Binary 'yes' or 'no' relevance score")


# Define the prompt
system = (
    "You are a grader assessing relevance of a retrieved document to a user question."
    "If the document contains keyword(s) or semantic meaning related to the question, grade it as relevant."
    "Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."
    # "Dont forget to consider chat history when answering the question."
)

grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Document: {document} Question: {question}"),
    ]
)
retrieval_grader = grade_prompt | llm.with_structured_output(GradeDocuments)


@tool("grade")
def grade(query: str, retrieved_docs: List[Document]):
    """Grade relevance of retrieved documents."""
    lst = []
    for doc in retrieved_docs:
        result = retrieval_grader.invoke(
            {"question": query, "document": doc.page_content}
        )
        lst.append(result.binary_score)

    if all(lst) == 'no':
        return 'web search'
    return 'generate answer'

In [70]:
from langgraph.prebuilt import create_react_agent
from langgraph.checkpoint.memory import MemorySaver

memory = MemorySaver()
agent_executor = create_react_agent(llm, [retrieve, grade], checkpointer=memory)

In [72]:
config = {"configurable": {"thread_id": "def234"}}

q1 = "Какой город является столицей Туркмении?"
q2 = "Что насчет Удмуртской республики?"
# q1 = "What is the weather in sf?"
# q2 = "What about new york?"

input_message = q1

for event in agent_executor.stream(
    {"messages": [{"role": "user", "content": input_message}]},
    stream_mode="values",
    config=config,
):
    event["messages"][-1].pretty_print()


Какой город является столицей Туркмении?
Tool Calls:
  retrieve (call_pqf8)
 Call ID: call_pqf8
  Args:
    query: столица Туркмении
Name: retrieve

Source: {'uid': 22, 'version': '1.0'}
Content: Question (RU): Какой город является столицей Туркмении?
Question (EN): Which city is the capital of Turkmenistan?
Answer: Ашхабад

Source: {'uid': 56, 'version': '1.0'}
Content: Question (RU): Как называлась столица Крымского ханства?
Question (EN): What was the name of the capital of the Crimean khanate?
Answer: Бахчисарай

Source: {'uid': 526, 'version': '1.0'}
Content: Question (RU): Какой город является административным центром Приморского края?
Question (EN): Which city is the administrative center of Primorsky Krai?
Answer: Владивосток

Source: {'uid': 859, 'version': '1.0'}
Content: Question (RU): В каком городе находится гробница пророка Мухаммеда?
Question (EN): In which city is the tomb of the prophet Muhammad located?
Answer: Медина

Столицей Туркмении является город Ашхабад.


In [74]:
input_message = q2

for event in agent_executor.stream(
    {"messages": [{"role": "user", "content": input_message}]},
    stream_mode="values",
    config=config,
):
    event["messages"][-1].pretty_print()


Что насчет Удмуртской республики?
Tool Calls:
  retrieve (call_4gav)
 Call ID: call_4gav
  Args:
    query: столица Удмуртской республики
Name: retrieve

Source: {'uid': 221, 'version': '1.0'}
Content: Question (RU): Какой город является столицей Удмуртской республики?
Question (EN): Which city is the capital of the Udmurt Republic?
Answer: Ижевск

Source: {'uid': 526, 'version': '1.0'}
Content: Question (RU): Какой город является административным центром Приморского края?
Question (EN): Which city is the administrative center of Primorsky Krai?
Answer: Владивосток

Source: {'uid': 6554, 'version': '2.0'}
Content: Question (RU): Какой город является столицей республики Карелия?
Question (EN): Which city is the capital of the Republic of Karelia?
Answer: Петрозаводск

Source: {'uid': 6023, 'version': '2.0'}
Content: Джебель-Шаммар, Хиджаз, Государство дервишей, Украинская держава, Первая Португальская республика, Ньюфаундленд, Балтийское герцогство, Крымское краевое правительство, Всев

In [186]:
from langchain_core.tools import tool


# @tool("retrieve_docs", response_format="content")
# def retriever_tool(query: str):
#     """Retrieve information related to a query."""
#     # retrieved_docs = vector_store.similarity_search(query, k=2)
#     retrieved_docs = retriever.invoke(query)
#     serialized = "\n\n".join(
#         (f"Source: {doc.metadata}\n" f"Content: {doc.page_content}") for doc in retrieved_docs
#     )
#     return serialized#, retrieved_docs


from langchain.tools.retriever import create_retriever_tool

retriever_tool = create_retriever_tool(
    retriever,
    "retrieve_documents",
    "Search and return information relevenat to the input question",
)

tools = [retriever_tool]

tools = [retriever_tool]

In [187]:
from typing import Annotated, Sequence
from typing_extensions import TypedDict

from langchain_core.messages import BaseMessage

from langgraph.graph.message import add_messages


class AgentState(TypedDict):
    # The add_messages function defines how an update should be processed
    # Default is to replace. add_messages says "append"
    messages: Annotated[Sequence[BaseMessage], add_messages]

In [215]:
from typing import Annotated, Literal, Sequence
from typing_extensions import TypedDict

from langchain import hub
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, ToolMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.runnables import Runnable, RunnableConfig
from pydantic import BaseModel, Field

from langchain_community.tools.tavily_search import TavilySearchResults
from langgraph.prebuilt import tools_condition
from langchain_core.runnables import RunnableConfig, chain

### Edges


def grade_documents(state) -> Literal["generate", "rewrite"]:
    """
    Determines whether the retrieved documents are relevant to the question.

    Args:
        state (messages): The current state

    Returns:
        str: A decision for whether the documents are relevant or not
    """

    print("---CHECK RELEVANCE---")

    # Data model
    class grade(BaseModel):
        """Binary score for relevance check."""

        binary_score: str = Field(description="Relevance score 'yes' or 'no'")

    # LLM
    # model = ChatOpenAI(temperature=0, model="gpt-4-0125-preview", streaming=True)

    # LLM with tool and validation
    model = ChatGroq(model="llama-3.1-70b-versatile", temperature=0, streaming=True)
    llm_with_tool = model.with_structured_output(grade)

    # Prompt
    prompt = PromptTemplate(
        template="""You are a grader assessing relevance of a retrieved document to a user question. \n 
        Here is the retrieved document: \n\n {context} \n\n
        Here is the user question: {question} \n
        If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
        Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.""",
        input_variables=["context", "question"],
    )

    # Chain
    chain = prompt | llm_with_tool

    messages = state["messages"]
    last_message = messages[-1]

    question = [m for m in messages if isinstance(m, HumanMessage)][-1].content
    docs = last_message.content

    scored_result = chain.invoke({"question": question, "context": docs})

    score = scored_result.binary_score

    if score == "yes":
        print("---DECISION: DOCS RELEVANT---")
        return "generate"

    else:
        print("---DECISION: DOCS NOT RELEVANT---")
        print(score)
        return "rewrite"


### Nodes


def agent(state):
    """
    Invokes the agent model to generate a response based on the current state. Given
    the question, it will decide to retrieve using the retriever tool, or simply end.

    Args:
        state (messages): The current state

    Returns:
        dict: The updated state with the agent response appended to messages
    """
    print("---CALL AGENT---")
    messages = state["messages"]
    # model = ChatOpenAI(temperature=0, streaming=True, model="gpt-4-turbo")
    model = ChatGroq(model="llama-3.1-70b-versatile", temperature=0, streaming=True)

    model = model.bind_tools(tools)
    response = model.invoke(messages)
    # We return a list, because this will get added to the existing list
    return {"messages": [response]}


def rewrite(state):
    """
    Transform the query to produce a better question.

    Args:
        state (messages): The current state

    Returns:
        dict: The updated state with re-phrased question
    """

    print("---TRANSFORM QUERY---")
    messages = state["messages"]
    # question = messages[0].content
    question = [m for m in messages if isinstance(m, HumanMessage)][-1].content

    msg = [
        HumanMessage(
            content=f""" \n 
    Look at the input and try to reason about the underlying semantic intent / meaning. \n 
    Here is the initial question:
    \n ------- \n
    {question} 
    \n ------- \n
    Formulate an improved question: """,
        )
    ]

    # Grader
    # model = ChatOpenAI(temperature=0, model="gpt-4-0125-preview", streaming=True)
    model = ChatGroq(model="llama-3.1-70b-versatile", temperature=0, streaming=True)

    response = model.invoke(msg)
    return {"messages": [response]}


def generate(state):
    """
    Generate answer

    Args:
        state (messages): The current state

    Returns:
         dict: The updated state with re-phrased question
    """
    print("---GENERATE---")
    messages = state["messages"]
    question = [m for m in messages if isinstance(m, HumanMessage)][-1].content
    last_message = messages[-1]

    docs = last_message.content

    prompt = ChatPromptTemplate(
        [
            (
                "system",
                (
                    "You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. "
                    "If you don't know the answer, just say that you don't know. Do not mention that you have used the provided context. "
                    "Use three sentences maximum and keep the answer concise.\n"
                    "Question: {question}"
                    "\nContext: {context} "
                    "\nAnswer:"
                ),
            )
        ]
    )    

    # LLM
    # llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True)
    llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0, streaming=True)

    # Chain
    rag_chain = prompt | llm | StrOutputParser()

    # Run
    response = rag_chain.invoke({"context": docs, "question": question})
    return {"messages": [AIMessage(response)]}


def web_search(state: AgentState):
    """
    Web search based on the question.

    Args:
        state (messages): The current graph state

    Returns:
        dict: Updates documents with appended web results
    """
    messages = state['messages']
    question = [m for m in messages if isinstance(m, HumanMessage)][-1].content
    # last_message = messages[-1]

    # docs = last_message.content

    print("---WEB SEARCH---")

    model = ChatGroq(model="llama-3.1-70b-versatile", temperature=0, streaming=True)
    web_search_tool = TavilySearchResults()
    llm_with_tools = model.bind_tools([web_search_tool])
    
    @chain
    def tool_chain(user_input: str, config: RunnableConfig):
        ai_msg = llm_with_tools.invoke(user_input, config=config)
        tool_msgs = web_search_tool.batch(ai_msg.tool_calls, config=config)
        return llm_with_tools.invoke(
            [ai_msg, *tool_msgs], config=config
        )

    return {"messages": tool_chain.invoke(question)}

In [216]:
from langgraph.graph import END, StateGraph, START
from langgraph.prebuilt import ToolNode

# Define a new graph
workflow = StateGraph(AgentState)

# Define the nodes we will cycle between
workflow.add_node("agent", agent)  # agent
retrieve = ToolNode([retriever_tool])
workflow.add_node("retrieve", retrieve)  # retrieval
workflow.add_node("rewrite", rewrite)  # Re-writing the question
workflow.add_node("generate", generate)
workflow.add_node("web_search", web_search)
# retrieve = ToolNode([retriever_tool])


# Call agent node to decide to retrieve or not
workflow.add_edge(START, "agent")

# Decide whether to retrieve
workflow.add_conditional_edges(
    "agent",
    # Assess agent decision
    tools_condition,
    {
        # Translate the condition outputs to nodes in our graph
        "tools": "retrieve",
        END: "web_search",
    },
)

# Edges taken after the `action` node is called.
workflow.add_conditional_edges(
    "retrieve",
    # Assess agent decision
    grade_documents,
)
workflow.add_edge("web_search", END)
workflow.add_edge("generate", END)
workflow.add_edge("rewrite", "agent")

# Compile
memory = MemorySaver()
graph = workflow.compile(checkpointer=memory)

In [217]:
# q1 = "Какой город является столицей Туркмении?"
# q2 = "Что насчет Удмуртской республики?"
q1 = "What is the weather in sf?"
q2 = "What about new york?"

config = {"configurable": {"thread_id": "1"}}

In [218]:

for event in graph.stream(
    {"messages": [{"role": "user", "content": q1}]},
    stream_mode="values",
    config=config,
):
    event["messages"][-1].pretty_print()


What is the weather in sf?
---CALL AGENT---

Tool use failed: JSON does not match the expected schema for tool calls
---WEB SEARCH---

The current weather in San Francisco is foggy with a temperature of 46.9°F (8.3°C) and a wind speed of 4.7 mph (7.6 km/h).


In [197]:
for event in graph.stream(
    {"messages": [{"role": "user", "content": q2}]},
    stream_mode="values",
    config=config,
):
    event["messages"][-1].pretty_print()


What about new york?
---CALL AGENT---
---WEB SEARCH---

New York City is the largest and most influential American metropolis and the most populous and the most international city in the country.
