In [1]:
#TODO
# Include source to web search

## Import api keys

In [2]:
import yaml

with open('secrets.yml', 'r') as f:
    secrets = yaml.load(f, Loader=yaml.SafeLoader)

## Defining the model

To test it, first run 'ollama serve' in a local terminal.

In [3]:
# Uncomment to install dependencies

# %pip install transformers -U
# %pip -q install langchain-groq

In [4]:
from langchain_community.llms import HuggingFaceEndpoint
from langchain_community.chat_models.huggingface import ChatHuggingFace
from langchain_groq import ChatGroq
import os

os.environ["GROQ_API_KEY"] = secrets['groq'][0]
chat_model = ChatGroq(
            model="llama3-70b-8192",
        )
json_model = ChatGroq(
            model="llama3-70b-8192",
        ).bind(response_format={"type": "json_object"})

# If necessary to run without GROQ, uncomment this

# llm = HuggingFaceEndpoint(repo_id="meta-llama/Meta-Llama-3-8B-Instruct", huggingfacehub_api_token=secrets['huggingface'][0])
# chat_model = ChatHuggingFace(llm=llm)

In [5]:
chat_model.invoke('Hello, who are you?')

AIMessage(content="Nice to meet you! I am LLaMA, an AI assistant developed by Meta AI that can understand and respond to human input in a conversational manner. I'm not a human, but a computer program designed to simulate conversation, answer questions, and even generate text. I can chat with you about a wide range of topics, from science and history to entertainment and culture. I'm constantly learning and improving, so please bear with me if I make any mistakes. What would you like to talk about?", response_metadata={'token_usage': {'completion_tokens': 102, 'prompt_tokens': 16, 'total_tokens': 118, 'completion_time': 0.288470401, 'prompt_time': 0.007657369, 'queue_time': None, 'total_time': 0.29612777}, 'model_name': 'llama3-70b-8192', 'system_fingerprint': 'fp_2f30b0b571', 'finish_reason': 'stop', 'logprobs': None}, id='run-b1684981-c675-47cf-8e1d-79a47ed9f7a7-0')

## Tool selector chain

In [6]:
from langchain_core.prompts import ChatPromptTemplate
from langchain.prompts import PromptTemplate

from langchain_core.output_parsers import StrOutputParser
from langchain_core.output_parsers import JsonOutputParser

In [7]:
tool_selector_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are an expert at reading a QUERY from a user and routing to our internal knowledge system\
     or directly to final answer. \n

    Use the following criteria to decide how to route the query to one of our available tools: \n\n
    
    If the user asks anything about LangSmith, you should use the 'RAG_retriever' tool.
    
    For any mathematical problem you should use 'calculator'. Be sure that you have all the necessary
    data before routing to this tool.

    If the user asks for a modification in the model being analyzed, use the tool 'model_modifier'.

    If you are unsure or the person is asking a question you don't understand then choose 'web_search'

    You do not need to be stringent with the keywords in the question related to these topics. Otherwise, use web_search.
    Give a choice contained in ['RAG_retriever','calculator','model_modifier','web_search'].
    Return the a JSON with a single key 'router_decision' and no premable or explaination.
    Use the initial query of the user and any available context to make your decision about the tool to be used.
    <|eot_id|><|start_header_id|>user<|end_header_id|>
    QUERY : {query} \n
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["query"],
)

tool_selector_chain = tool_selector_prompt | json_model | JsonOutputParser()

initial_query = 'Please, let me know the weather in San Francisco'

print(tool_selector_chain.invoke({"query": initial_query}))

{'router_decision': 'web_search'}


## RAG Question generator chain

In [8]:
## RAG QUESTIONS
search_rag_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a master at working out the best questions to ask our knowledge agent to get the best info for the customer.

    Given the INITIAL_QUERY, work out the best questions that will find the best \
    info for helping to write the final answer. Write the questions to our knowledge system not to the customer.

    Return a JSON with a single key 'questions' with no more than 3 strings of and no preamble or explaination.

    <|eot_id|><|start_header_id|>user<|end_header_id|>
    INITIAL_QUERY: {initial_query} \n
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["initial_query"],
)

question_rag_chain = search_rag_prompt | json_model | JsonOutputParser()

research_info = None
query = 'What are the main benefits of using LangSmith for developing a tool to levarage LLMs?'

print(question_rag_chain.invoke({"initial_query": query}))

{'questions': ['What are the key features of LangSmith?', 'How does LangSmith facilitate LLM development?', 'What are the advantages of using LangSmith over other LLM development tools?']}


## RAG chain

In [9]:
# Uncomment to install dependencies

# %pip install beautifulsoup4
# %pip install faiss-cpu

In [10]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.runnables import RunnablePassthrough

# Load the data that will be used by the retriever
loader = WebBaseLoader("https://docs.smith.langchain.com/user_guide")
docs = loader.load()

# Set the embedding model
embeddings = OllamaEmbeddings(model="llama3")

# Split the data and vectorize it
text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split_documents(docs)
vector = FAISS.from_documents(documents, embeddings)

# Define a chain to gather data and a retriever
retriever = vector.as_retriever()

KeyboardInterrupt: 

In [None]:
#RAG Chain
rag_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\n

     <|eot_id|><|start_header_id|>user<|end_header_id|>
    QUESTION: {question} \n
    CONTEXT: {context} \n
    Answer:
    <|eot_id|>
    <|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables=["question","context"],
)

rag_chain = (
    {"context": retriever , "question": RunnablePassthrough()}
    | rag_prompt
    | chat_model
    | StrOutputParser()
)

## Model modifier chain

In [None]:
## MODEL MODIFIER
model_modifier_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a specialist at building JSON to modify a energy system model, whenever the user asks \
    you to modify a parameter, you will build a JSON object with the desired modifications.
    
    Given the INITIAL_QUERY, determine the parameter that the user wants to change, and the new value that should be applied \
    and with this information, return a JSON with only two keys 'parameter' and 'new_value' with no preamble or explanaition

    <|eot_id|><|start_header_id|>user<|end_header_id|>
    INITIAL_QUERY: {initial_query} \n
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["initial_query"],
)

model_modifier_chain = model_modifier_prompt | json_model | JsonOutputParser()

query = 'I want the lifetime of wind power plants to be modified to 50 years'

print(model_modifier_chain.invoke({"initial_query": query}))

{'parameter': 'wind_lifetime', 'new_value': 50}


## Web search chain

In [None]:
## Search keywords
search_keyword_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a master at working out the best keywords to search for in a web search to get the best info for the user.

    Given the INITIAL_QUERY, work out the best keywords that will find the info requested by the user
    The keywords should have between 3 and 5 words each, if the query allows for it.

    Return a JSON with a single key 'keywords' with no more than 3 keywords and no preamble or explaination.

    <|eot_id|><|start_header_id|>user<|end_header_id|>
    INITIAL_QUERY: {initial_query} \n
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["initial_query"],
)

search_keyword_chain = search_keyword_prompt | json_model | JsonOutputParser()

query = 'Who is the current holder of the speed skating world record on 500 meters?'

print(search_keyword_chain.invoke({"initial_query": query}))

{'keywords': ['speed skating 500m record', 'world record 500m skating', '500m speed skating champion']}


## Web answer analyzer

In [None]:
web_answer_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are an expert at summarizing a bunch of data to extract only the important bits from it.

    Given the user's QUERY and the SEARCH_RESULTS, summarize as briefly as possible the information
    searched by the user.
    
    If it helps to provide a more precise answer, you can also make use of the CONTEXT.

    <|eot_id|><|start_header_id|>user<|end_header_id|>
    QUERY: {query} \n
    SEARCH_RESULTS: {search_results} \n
    CONTEXT: {context} \n
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["initial_query","search_results"],
)

web_answer_chain = web_answer_prompt | chat_model | StrOutputParser()

## Calculator chain

In [None]:
## CALCULATOR
calculator_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a specialist at building JSON to do calculations using a calculator tool.
    
    You can only output a single format of JSON object consisting in two operands
    and the operation. The name of the only three keys are 'operation', 'op_1' and 'op_2' \n
    
    'operation' can only be [+,-,*,/,^]
    'op_1' and 'op_2' must be integers or float\n
    
    If you judge that the equation consists of more than one operation, solve only one,
    the calculator can be called multiple times and the other results will be solved
    later.

    <|eot_id|><|start_header_id|>user<|end_header_id|>
    INITIAL_QUERY: {initial_query} \n
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["initial_query"],
)

calculator_chain = calculator_prompt | json_model | JsonOutputParser()

research_info = None
query = 'How much is 27 to the power of 5 plus 7?'

print(calculator_chain.invoke({"initial_query": query}))

{'operation': '^', 'op_1': 27, 'op_2': 5}


## Output generator chain

In [None]:
## OUTPUT GENERATOR
output_generator_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a specialist at answering the user based on context given. \n
    
    Given the INITIAL_QUERY and a CONTEXT, generate an answer for the query
    asked by the user. You should make use of the provided information
    to answer the user in the best possible way. If you think the answer
    does not answer the user completely, ask the user for the necessary
    information if possible. \n

    <|eot_id|><|start_header_id|>user<|end_header_id|>
    INITIAL_QUERY: {initial_query} \n
    CONTEXT: {context} \n
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["initial_query","context"],
)

output_generator_chain = output_generator_prompt | chat_model | StrOutputParser()

research_info = None
query = 'Is my car more powerful than a GT-R R32?'
context = 'The car owned by the user is from 2010'

print(output_generator_chain.invoke({"initial_query": query, "context": context}))

To determine if your car is more powerful than a GT-R R32, we need to know the make and model of your car. The GT-R R32, a Japanese sports car, has a twin-turbocharged 2.6-liter inline-6 engine producing around 276 horsepower and 260 lb-ft of torque.

Since you mentioned your car is from 2010, could you please tell me the make and model of your car? That way, I can look up its horsepower and torque ratings and compare them to the GT-R R32's specs, giving you a more accurate answer.


## Answer Iterator Chain

In [None]:
## ANSWER ITERATOR
answer_iterator_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a specialist at deciding if the already available information is enough to
    fully answer the user query. \n
    
    Given a INITIAL_QUERY and the available CONTEXT, decide if the available information
    is already enough to answer the query proposed by the user. \n
    
    Your job is to coordinate the usage of many tools, one at a time. To do this you will
    decide what information you need next, with the restriction that you can only get one
    information per iteration, and request it to the pipeline. \n
    
    Your output should be a JSON object containing two keys, 'ready_to_answer' and
    'next_query'. 'ready_to_answer' is a boolean that indicates if all necessary
    info is present and 'next_query' is a query that you should develop so the next
    agent in the pipeline can search for the required information. \n

    <|eot_id|><|start_header_id|>user<|end_header_id|>
    INITIAL_QUERY: {initial_query} \n
    CONTEXT: {context} \n
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["initial_query","context"],
)

answer_iterator_chain = answer_iterator_prompt | json_model | JsonOutputParser()

research_info = None
query = 'Is my car more powerful than a GT-R R32?'
context = ['The car owned by the user is from 2010']

print(answer_iterator_chain.invoke({"initial_query": query, "context": context}))

{'ready_to_answer': False, 'next_query': "What is the horsepower of the user's car?"}


## State

In [None]:
from langchain.schema import Document
from langgraph.graph import END, StateGraph
from typing_extensions import TypedDict
from typing import List

### State

class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        initial_query: user input
        next_query: partial query generated by the agent
        num_steps: number of steps
        selected_tool: name of the selected tool
        rag_questions: questions used for retrieval
        tool_parameters: parameters to be used by tools
        context: list of context generated for the query
        info_needed: whether to add search info
        final_answer: LLM generation
    """
    initial_query : str
    next_query: str
    num_steps : int
    selected_tool: str
    rag_questions : List[str]
    tool_parameters: str
    context : List[str]
    info_needed : bool
    final_answer : str

## Tool Selector Node

In [None]:
def tool_selector(state):
    
    print("---TOOL SELECTION---")
    query = state['next_query']
    num_steps = state['num_steps']
    num_steps += 1
    
    print(f'QUERY: {query}')

    router = tool_selector_chain.invoke({"query": query})
    router_decision = router['router_decision']
    
    print(f'SELECTED TOOL: {router_decision}\n')
    
    return {"selected_tool": router_decision,
            "num_steps": num_steps}

## RAG Node

In [None]:
def research_info_rag(state):

    print("---RAG LANGSMITH RETRIEVER---")
    initial_query = state['next_query']
    context = state['context']
    num_steps = state['num_steps']
    num_steps += 1

    questions = question_rag_chain.invoke({"initial_query": initial_query})
    questions = questions['questions']

    rag_results = []
    for idx, question in enumerate(questions):
        print(f'QUESTION {idx}: {question}')
        temp_docs = rag_chain.invoke(question)
        print(f'ANSWER FOR QUESTION {idx}: {temp_docs}')
        question_results = question + '\n\n' + temp_docs + "\n\n\n"
        if rag_results is not None:
            rag_results.append(question_results)
        else:
            rag_results = [question_results]
    print(f'FULL ANSWERS: {rag_results}\n')
    
    return {"context": context + [rag_results],
            "rag_questions": questions,
            "num_steps": num_steps}

## Web Search Tool

In [None]:
# Uncomment to install dependencies

# %pip install -U langchain-community tavily-python

In [None]:
from langchain_community.tools.tavily_search import TavilySearchResults
import os

os.environ["TAVILY_API_KEY"] = secrets['tavily'][0]
web_search_tool = TavilySearchResults()

## Web Search Node

In [None]:
def research_info_web(state):

    print("---RESEARCH INFO SEARCHING---")
    initial_query = state['next_query']
    context = state['context']
    num_steps = state['num_steps']
    num_steps += 1

    # Web search
    keywords = search_keyword_chain.invoke({"initial_query": initial_query, "context": context})
    keywords = keywords['keywords']
    full_searches = []
    for idx, keyword in enumerate(keywords[:1]):
        print(f'KEYWORD {idx}: {keyword}')
        temp_docs = web_search_tool.invoke({"query": keyword})
        if type(temp_docs) == list:
            web_results = "\n".join([d["content"] for d in temp_docs])
            web_results = Document(page_content=web_results)
        elif type(temp_docs) == dict:
            web_results = temp_docs["content"]
            web_results = Document(page_content=web_results)
        else:
            web_results = 'No results'
        print(f'RESULTS FOR KEYWORD {idx}: {web_results}')
        if full_searches is not None:
            full_searches.append(web_results)
        else:
            full_searches = [web_results]
    print(f'FULL RESULTS: {full_searches}\n')
    
    processed_searches = web_answer_chain.invoke({"query": initial_query, "search_results": full_searches, "context": context})
    
    print(f'PROCESSED RESULT: {processed_searches}')
    
    return {"context": context + [processed_searches],
            "num_steps": num_steps}

## Calculator Node

In [None]:
def calculator(state):

    print("---CALCULATOR TOOL---")
    
    query = state['next_query']
    context = state['context']
    parameters = calculator_chain.invoke({"initial_query": query})
    operation = parameters['operation']
    op_1 = parameters['op_1']
    op_2 = parameters['op_2']
    num_steps = state['num_steps']
    num_steps += 1
    
    print(f'OPERATION: {operation}')
    print(f'OPERAND 1: {op_1}')
    print(f'OPERAND 2: {op_2}')

    if operation == "+":
        result = op_1 + op_2
    elif operation == "-":
        result = op_1 - op_2
    elif operation == "/":
        result = op_1 / op_2
    elif operation == "*":
        result = op_1 * op_2
    elif operation == "^":
        result = op_1 ** op_2
    else:
        result = 'ERROR'
        
    if result == 'ERROR':
        str_result = 'Unable to execute the selected operation'
    else:
        str_result = f'{op_1} {operation} {op_2} = {result}'
        
    print(f'RESULT: {str_result}\n')
        
    return {"context": context + [str_result],
            "num_steps": num_steps}

## Date Getter Node

In [None]:
from datetime import datetime

def date_getter(state):

    print("---DATE GETTER TOOL---")
    context = state['context']
    num_steps = state['num_steps']
    num_steps += 1
    
    current_date = datetime.now().strftime("%d %B %Y, %H:%M:%S")
    
    result = f'The current date and time are {current_date}'
    
    print(f'CURRENT DATE: {current_date}\n')

    return {"context": context + [result],
            "num_steps": num_steps}

## Model Modifier Node

In [None]:
def model_modifier(state):

    print("---MODEL MODIFIER TOOL---")
    query = state['next_query']
    context = state['context']
    num_steps = state['num_steps']
    num_steps += 1
    
    parameters_json = model_modifier_chain.invoke({"initial_query": query})
    print(f'JSON:\n{parameters_json}\n')
    
    result = f'The model was successfully modified'

    return {"context": context + [result],
            "num_steps": num_steps}

## Output Generator Node

In [None]:
def output_generator(state):
    print("---GENERATE OUTPUT---")
    ## Get the state
    initial_query = state['initial_query']
    context = state['context']
    num_steps = state['num_steps']
    num_steps += 1

    # Generate draft email
    answer = output_generator_chain.invoke({"initial_query": initial_query,
                                            "context": context})
    print(f'GENERATED OUTPUT:\n{answer}\n')
    
    return {"final_answer": answer}

## Answer Iterator Node

In [None]:
def iterate_over_answer(state):
    print("---ANSWER ITERATOR---")
    ## Get the state
    initial_query = state['initial_query']
    context = state['context']
    num_steps = state['num_steps']
    num_steps += 1

    # Generate draft email
    output = answer_iterator_chain.invoke({"initial_query": initial_query,
                                           "context": context
                                           })
    
    return {"next_query": output,
            "num_steps": num_steps}

In [None]:
def state_printer(state):
    """print the state"""
    print("------------------STATE PRINTER------------------")
    print(f"Num Steps: {state['num_steps']} \n")
    print(f"Initial Query: {state['initial_query']} \n" )
    print(f"Next Query: {state['next_query']} \n" )
    print(f"RAG Questions: {state['rag_questions']} \n")
    print(f"Tool Parameters: {state['tool_parameters']} \n")
    print(f"Context: {state['context']} \n" )
    return

In [None]:
def final_answer_printer(state):
    """prints final answer"""
    print("------------------FINAL ANSWER------------------")
    print(f"Final Answer: {state['final_answer']} \n")
    return

## Conditional Edges

In [None]:
def route_to_tool(state):
    """
    Route to the necessary tool.
    Args:
        state (dict): The current graph state
    Returns:
        str: Next node to call
    """
    selection = state['selected_tool']
    
    if selection == 'RAG_retriever':
        print("---ROUTE QUERY TO RAG RETRIEVER---")
        return "RAG_retriever"
    elif selection == 'web_search':
        print("---ROUTE QUERY TO WEB SEARCH---")
        return "web_search"
    elif selection == 'calculator':
        print("---ROUTE QUERY TO CALCULATOR---")
        return "calculator"
    elif selection == 'model_modifier':
        print("---ROUTE QUERY TO MODEL MODIFIER---")
        return "model_modifier"

In [None]:
def route_to_iterate(state):

    print("---ROUTE TO ITERATE---")
    next_query = state["next_query"]

    print(next_query)
    if next_query['ready_to_answer']:
        print("---GENERATE FINAL ANSWER---")
        return "ready_to_answer"
    else:
        print("---GATHER MORE CONTEXT---")
        return "need_context"

## Build the graph

### Build the nodes

In [None]:
# Uncomment to install dependencies

# %pip install -U langgraph

In [None]:
workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("tool_selector", tool_selector)
workflow.add_node("research_info_rag", research_info_rag) # RAG search
workflow.add_node("research_info_web", research_info_web) # web search
workflow.add_node("state_printer", state_printer)
workflow.add_node("calculator", calculator)
workflow.add_node("date_getter", date_getter)
workflow.add_node("model_modifier", model_modifier)
workflow.add_node("output_generator", output_generator)
workflow.add_node("iterate_over_answer", iterate_over_answer)
workflow.add_node("final_answer_printer", final_answer_printer)

### Add edges

In [None]:
workflow.set_entry_point("date_getter")
workflow.add_edge("date_getter", "iterate_over_answer")

workflow.add_conditional_edges(
    "iterate_over_answer",
    route_to_iterate,
    {
        "ready_to_answer": "output_generator",
        "need_context": "tool_selector",
    },
)

workflow.add_conditional_edges(
    "tool_selector",
    route_to_tool,
    {
        "RAG_retriever": "research_info_rag",
        "web_search": "research_info_web",
        "calculator": "calculator",
        "model_modifier": "model_modifier"
    },
)
workflow.add_edge("research_info_rag", "state_printer")
workflow.add_edge("research_info_web", "state_printer")
workflow.add_edge("calculator", "state_printer")
workflow.add_edge("model_modifier", "state_printer")

workflow.add_edge("state_printer", "iterate_over_answer")

workflow.add_edge("output_generator", "final_answer_printer")
workflow.add_edge("final_answer_printer", END)

In [None]:
# Compile
app = workflow.compile()

In [None]:
#query = 'If I pay half the age of Tom Jobim plus the height of the Empire State for a car, how much I\'ve paid?'
#query = 'What is 10 to the power of 0.4?'
#query = 'What is the temperature and humidity in Migliarino right now? And also, what time is it?'
#query = 'Modify the parameter X to 24 for me please'
#query = 'What are some of the most important things that happened today in past years?'
#query = 'What day is today?'
#query = 'How can LangSmith help in my project?'
#query = 'I am always coming but never arrive. What am I?'
#query = 'Change the lifetime of wind power plants to 25 years please'
query = 'Divide the height of the Burj Khalifa by Ronaldinho Gaucho\'s age, then add the current temperature in Paris (in Celsius)'
#query = 'What are good famous and more casual board games that can be played by two players?'

# run the agent
inputs = {"initial_query": query, "num_steps": 0, "context": []}
for output in app.stream(inputs):
    for key, value in output.items():
        print(f"Finished running: {key}:")