In [63]:
#TODO
# Include source to web search
# Include a case if the user asks for more than one action on the modelling tools in a single query (for now we'll take care of just one at a time)
# Connect all outputs of modelling tools to a decision router to verify if there was a valid selection

In [64]:
### Necessary dependencies

## Model instantiation
# %pip install transformers -U
# %pip -q install langchain-groq

## RAG node
# %pip install beautifulsoup4
# %pip install faiss-cpu

## Web search node
# %pip install -U langchain-community tavily-python

## Graph building
# %pip install -U langgraph

## Import api keys

In [65]:
import yaml

with open('secrets.yml', 'r') as f:
    secrets = yaml.load(f, Loader=yaml.SafeLoader)

## Defining the model

To test it, first run 'ollama serve' in a local terminal. (necessary only for the embeddings of the RAG)

In [66]:
from langchain_groq import ChatGroq
import os

os.environ["GROQ_API_KEY"] = secrets['groq'][0]
chat_model = ChatGroq(
            model="llama3-70b-8192",
        )
json_model = ChatGroq(
            model="llama3-70b-8192",
        ).bind(response_format={"type": "json_object"})

## Alternative model

Use this one if GROQ stops working (will need to figure out the response as JSON)

In [67]:
# from langchain_community.llms import HuggingFaceEndpoint
# from langchain_community.chat_models.huggingface import ChatHuggingFace

# llm = HuggingFaceEndpoint(repo_id="meta-llama/Meta-Llama-3-8B-Instruct", huggingfacehub_api_token=secrets['huggingface'][0])
# chat_model = ChatHuggingFace(llm=llm)

In [68]:
chat_model.invoke('Hello, who are you?')

AIMessage(content="Nice to meet you! I'm LLaMA, an AI assistant developed by Meta AI that can understand and respond to human input in a conversational manner. I'm not a human, but a computer program designed to simulate conversation, answer questions, and even generate text based on the input I receive.\n\nI can chat with you about a wide range of topics, from science and history to entertainment and culture. I can also help with language-related tasks, such as language translation, grammar correction, and text summarization.\n\nSo, what's on your mind? Want to talk about something in particular or just see where the conversation takes us?", response_metadata={'token_usage': {'completion_tokens': 129, 'prompt_tokens': 16, 'total_tokens': 145, 'completion_time': 0.368571429, 'prompt_time': 0.004761315, 'queue_time': None, 'total_time': 0.373332744}, 'model_name': 'llama3-70b-8192', 'system_fingerprint': 'fp_87cbfbbc4d', 'finish_reason': 'stop', 'logprobs': None}, id='run-013dd514-2c24-

## State

Defines the graph's state dictionary

In [69]:
from langchain.schema import Document
from langgraph.graph import END, StateGraph
from typing_extensions import TypedDict
from typing import List

### State

class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        initial_query: user input
        next_query: partial query generated by the agent
        num_steps: number of steps
        selected_tool: name of the selected tool
        identified_model: name of the model identified by the agent
        rag_questions: questions used for retrieval
        tool_parameters: parameters to be used by tools
        context: list of context generated for the query
        complete_data: indicates completeness of data
        final_answer: LLM generation
    """
    initial_query : str
    query_type: str
    next_query: str
    num_steps : int
    selected_tool: str
    identified_model: str
    rag_questions : List[str]
    tool_parameters: str
    context : List[str]
    complete_data : bool
    final_answer : str

## Type identifier node

In [70]:
from langchain.prompts import PromptTemplate

from langchain_core.output_parsers import StrOutputParser
from langchain_core.output_parsers import JsonOutputParser

In [71]:
type_identifier_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are an expert at identifying the type of a query provided by the user among
    the types "general", "energy_system" and "mixed".

    "general": the query is related to some generic topic, it may consist of one or more
    points that require searching for information. \n
    
    "energy_system": the query is a direct command related to the energy system model, it can
    be a request to change parameters, plot data, run simulations, or anything on this lines.
    To be characterized as this class, it should need no external information. Names of
    simulations, scenarios, parameters and any other potential name is assumed to be know by our tools. \n
    
    "mixed": the query is related to the energy system model, but it requires external data for the
    command to be complete. It MUST be related to running anything related to the energy system,
    otherwise it is not mixed. \n
    
    You must output a JSON with a single key 'query_type' containing exclusivelly the 
    selected type. \n
    <|eot_id|><|start_header_id|>user<|end_header_id|>
    QUERY : {query} \n
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["query"],
)
type_identifier_chain = type_identifier_prompt | json_model | JsonOutputParser()

# query = 'Modify the cost of CO2 in 2020 to be same price as a liter of Coca Cola'
# print(type_identifier_chain.invoke({"query": query}))

In [72]:
def type_identifier(state):
    
    print("---TYPE IDENTIFIER---")
    query = state['initial_query']
    num_steps = state['num_steps']
    num_steps += 1
    
    print(f'QUERY: {query}')

    gen = type_identifier_chain.invoke({"query": query})
    selected_type = gen['query_type']
    
    print(f'IDENTIFIED_TYPE: {selected_type}\n')
    
    return {"query_type": selected_type,
            "num_steps": num_steps}

## Energy System tool selector node

In [73]:
es_tool_selector_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are an expert at reading the user QUERY and routing it to the correct tool in our
    modelling system. \n

    Use the following criteria to decide how to route the query to one of our available tools: \n\n
    
    If the user asks for any modification on any particular model, select 'model_modifier'. \n
    
    If the user asks to plot anything, select 'data_plotter'. \n
    
    If the user asks to run a simulation of any particular model, select 'sim_runner'. \n

    You must output a JSON object with two keys:
    'selected_tool' containing one of the following values ['model_modificator', 'data_plotter', 'sim_runner'];
    'selected_model' containing the name of the model to be manipulated. \n
    
    If the user didn't provide a model name, fill the key 'selected_model' with 'NO_MODEL'. \n
    
    <|eot_id|><|start_header_id|>user<|end_header_id|>
    QUERY : {query} \n
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["query"],
)
es_tool_selector_chain = es_tool_selector_prompt | json_model | JsonOutputParser()

# initial_query = 'Modify the lifetime of wind power plants to 20 years'
# print(es_tool_selector_chain.invoke({"query": initial_query}))

In [74]:
def es_tool_selector(state):
    
    print("---ENERGY SYSTEM TOOL SELECTION---")
    query = state['initial_query']
    num_steps = state['num_steps']
    num_steps += 1
    
    print(f'QUERY: {query}')

    router = es_tool_selector_chain.invoke({"query": query})
    router_decision = router['selected_tool']
    identified_model = router['selected_model']
    
    print(f'SELECTED TOOL: {router_decision}')
    print(f'IDENTIFIED MODEL: {identified_model}\n')
    
    return {"selected_tool": router_decision,
            "identified_model": identified_model,
            "num_steps": num_steps}

## Model selector node

In [129]:
from os import walk

def model_selector(state):
    num_steps = state['num_steps']
    num_steps += 1
    
    print("No valid model was found for the requested action, the available models are:\n")
    
    available_models = next(walk('Models'), (None, None, []))[2]
    for i in range(len(available_models)):
        print(f'{i+1}: {available_models[i]}')
    
    selected_model = input('Please, inform the number of the desired model:\n')
    
    return {"identified_model": available_models[int(selected_model)-1]}

In [124]:
def validated_model(state):
    return None

## Mixed node

In [76]:
mixed_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are an expert at reading the user QUERY and the available CONTEXT to decide if there
    is already enough information gathered to fulfill the energy system related command
    made by the user. \n
    
    You must be certain that you have all the data before deciding to send it to the
    modelling section of the pipeline.

    You must output a JSON object with a single key 'complete_data' containing a boolean
    on whether you have enough data for the user's request or not. \n
    <|eot_id|><|start_header_id|>user<|end_header_id|>
    QUERY : {query} \n
    CONTEXT: {context} \n
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["query","context"],
)
mixed_chain = mixed_prompt | json_model | JsonOutputParser()

# initial_query = 'Modify the lifetime of wind power plants to be the age of Ronaldinho Gaucho plus Oprah age'
# context = ['The current age of Ronaldinho Gaucho is 44 years old', 'Oprah is 68 years old']
# print(mixed_chain.invoke({"query": initial_query, "context": context}))

In [77]:
def mixed(state):
    
    print("---TOOL SELECTION---")
    query = state['initial_query']
    context = state['context']
    num_steps = state['num_steps']
    num_steps += 1
    
    print(f'QUERY: {query}')
    print(f'CONTEXT: {context}')

    decision = mixed_chain.invoke({"query": query, "context": context})
    decision = decision['complete_data']
    
    print(f'DATA IS COMPLETE: {decision}\n')
    
    return {"complete_data": decision,
            "num_steps": num_steps}

## Tool selector node

In [78]:
tool_selector_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are an expert at reading a QUERY from a user and routing to our internal knowledge system\
     or directly to final answer. \n

    Use the following criteria to decide how to route the query to one of our available tools: \n\n
    
    If the user asks anything about LangSmith, you should use the 'RAG_retriever' tool.
    
    For any mathematical problem you should use 'calculator'. Be sure that you have all the necessary
    data before routing to this tool.

    If you are unsure or the person is asking a question you don't understand then choose 'web_search'

    You do not need to be stringent with the keywords in the question related to these topics. Otherwise, use web_search.
    Give a choice contained in ['RAG_retriever','calculator','web_search'].
    Return the a JSON with a single key 'router_decision' and no premable or explaination.
    Use the initial query of the user and any available context to make your decision about the tool to be used.
    <|eot_id|><|start_header_id|>user<|end_header_id|>
    QUERY : {query} \n
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["query"],
)
tool_selector_chain = tool_selector_prompt | json_model | JsonOutputParser()

# initial_query = 'Please, let me know the weather in San Francisco'
# print(tool_selector_chain.invoke({"query": initial_query}))

In [79]:
def tool_selector(state):
    
    print("---TOOL SELECTION---")
    query = state['next_query']
    num_steps = state['num_steps']
    num_steps += 1
    
    print(f'QUERY: {query}')

    router = tool_selector_chain.invoke({"query": query})
    router_decision = router['router_decision']
    
    print(f'SELECTED TOOL: {router_decision}\n')
    
    return {"selected_tool": router_decision,
            "num_steps": num_steps}

## Web/RAG answer analyzer prompt

In [80]:
answer_analyzer_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are an expert at summarizing a bunch of data to extract only the important bits from it.

    Given the user's QUERY and the SEARCH_RESULTS, summarize as briefly as possible the information
    searched by the user. Don't give any preamble or introduction, go directly to the summary
    of the requested information.
    
    If it helps to provide a more precise answer, you can also make use of the CONTEXT.

    <|eot_id|><|start_header_id|>user<|end_header_id|>
    QUERY: {query} \n
    SEARCH_RESULTS: {search_results} \n
    CONTEXT: {context} \n
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["query","search_results","context"],
)
answer_analyzer_chain = answer_analyzer_prompt | chat_model | StrOutputParser()

# query = 'How much does a liter of Coca Cola cost in Brazil?'
# search = page_content="Brazil - Coca-Cola - price, May 2024. The price is 0.86 USD. The average price for all countries is 1.04 USD. The database includes 90 countries. Definition: The Coca - Cola prices are for a bottle of 0.5 l. Adjustments were made to the various measuring units across countries to arrive at a uniform measure of 0.5 l.\nBased on 90 countries included in our data base, the average price is 1.04 USD. Looking at the latest data, the lowest price was 0.22 USD (Nigeria) and the highest price was 2.60 USD (Norway). Definition: The Coca - Cola prices are for a bottle of 0.5 l.\nSee current prices by country for prices of items we do track. You can see prices only for countries where we have decent number of contributors. Prices by Country of Coke/Pepsi (0.33 liter bottle) (Restaurants)\nCoca-Cola FEMSA is the largest independent bottler of Coca-Cola products in the world, and the largest of several local bottling partners in Brazil. Within FEMSA's South America operating division (of which Brazil is the largest single market), FEMSA reported a 25.9%% increase in the average price per unit case for the first six months of 2022.\nLarge corporations. There are three main soda companies in the country. Data below is provided by Afrebras. Coca-Cola Company, which has a market share of 55%% in volume and 62%% in value. AmBev, with a market share of 19%% in volume and 21%% in value. Brasil Kirin, with a market share of 5%% in volume and 4%% in value."
# print(answer_analyzer_chain.invoke({"query": query, "search_results": search, "context": []}))

## RAG node

For now it's just a placeholder that searchs questions about LangSmith

In [81]:
## RAG QUESTIONS
search_rag_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a master at working out the best questions to ask our knowledge agent to get the best info for the customer.

    Given the INITIAL_QUERY, work out the best questions that will find the best \
    info for helping to write the final answer. Write the questions to our knowledge system not to the customer.

    Return a JSON with a single key 'questions' with no more than 3 strings of and no preamble or explaination.

    <|eot_id|><|start_header_id|>user<|end_header_id|>
    INITIAL_QUERY: {initial_query} \n
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["initial_query"],
)
question_rag_chain = search_rag_prompt | json_model | JsonOutputParser()

# query = 'What are the main benefits of using LangSmith for developing a tool to levarage LLMs?'
# print(question_rag_chain.invoke({"initial_query": query}))

In [82]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.runnables import RunnablePassthrough

# Load the data that will be used by the retriever
loader = WebBaseLoader("https://docs.smith.langchain.com/user_guide")
docs = loader.load()

# Set the embedding model
embeddings = OllamaEmbeddings(model="llama3")

# Split the data and vectorize it
text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split_documents(docs)
vector = FAISS.from_documents(documents, embeddings)

# Define a chain to gather data and a retriever
retriever = vector.as_retriever()

In [83]:
#RAG Chain
rag_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\n

     <|eot_id|><|start_header_id|>user<|end_header_id|>
    QUESTION: {question} \n
    CONTEXT: {context} \n
    Answer:
    <|eot_id|>
    <|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables=["question","context"],
)
rag_chain = (
    {"context": retriever , "question": RunnablePassthrough()}
    | rag_prompt
    | chat_model
    | StrOutputParser()
)

In [84]:
def research_info_rag(state):

    print("---RAG LANGSMITH RETRIEVER---")
    initial_query = state['next_query']
    context = state['context']
    num_steps = state['num_steps']
    num_steps += 1

    questions = question_rag_chain.invoke({"initial_query": initial_query})
    questions = questions['questions']

    rag_results = []
    for idx, question in enumerate(questions):
        print(f'QUESTION {idx}: {question}')
        temp_docs = rag_chain.invoke(question)
        print(f'ANSWER FOR QUESTION {idx}: {temp_docs}')
        question_results = question + '\n\n' + temp_docs + "\n\n\n"
        if rag_results is not None:
            rag_results.append(question_results)
        else:
            rag_results = [question_results]
    print(f'FULL ANSWERS: {rag_results}\n')
    
    processed_searches = answer_analyzer_chain.invoke({"query": initial_query, "search_results": rag_results, "context": context})
    
    return {"context": context + [processed_searches],
            "rag_questions": questions,
            "num_steps": num_steps}

## Web search node

In [85]:
## Search keywords
search_keyword_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a master at working out the best keywords to search for in a web search to get the best info for the user.

    Given the INITIAL_QUERY, work out the best keywords that will find the info requested by the user
    The keywords should have between 3 and 5 words each, if the query allows for it.

    Return a JSON with a single key 'keywords' with no more than 3 keywords and no preamble or explaination.

    <|eot_id|><|start_header_id|>user<|end_header_id|>
    INITIAL_QUERY: {initial_query} \n
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["initial_query"],
)
search_keyword_chain = search_keyword_prompt | json_model | JsonOutputParser()

# query = 'Who is the current holder of the speed skating world record on 500 meters?'
# print(search_keyword_chain.invoke({"initial_query": query}))

In [86]:
from langchain_community.tools.tavily_search import TavilySearchResults
import os

os.environ["TAVILY_API_KEY"] = secrets['tavily'][0]
web_search_tool = TavilySearchResults()

In [87]:
def research_info_web(state):

    print("---RESEARCH INFO SEARCHING---")
    initial_query = state['next_query']
    context = state['context']
    num_steps = state['num_steps']
    num_steps += 1

    # Web search
    keywords = search_keyword_chain.invoke({"initial_query": initial_query, "context": context})
    keywords = keywords['keywords']
    full_searches = []
    for idx, keyword in enumerate(keywords):
        print(f'KEYWORD {idx}: {keyword}')
        temp_docs = web_search_tool.invoke({"query": keyword})
        if type(temp_docs) == list:
            web_results = "\n".join([d["content"] for d in temp_docs])
            web_results = Document(page_content=web_results)
        elif type(temp_docs) == dict:
            web_results = temp_docs["content"]
            web_results = Document(page_content=web_results)
        else:
            web_results = 'No results'
        print(f'RESULTS FOR KEYWORD {idx}: {web_results}')
        if full_searches is not None:
            full_searches.append(web_results)
        else:
            full_searches = [web_results]
    print(f'FULL RESULTS: {full_searches}\n')
    
    processed_searches = answer_analyzer_chain.invoke({"query": initial_query, "search_results": full_searches, "context": context})
    
    print(f'PROCESSED RESULT: {processed_searches}')
    
    return {"context": context + [processed_searches],
            "num_steps": num_steps}

## Model modifier node

In [88]:
## MODEL MODIFIER
model_modifier_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a specialist at building JSON to modify a energy system model, whenever the user asks \
    you to modify a parameter, you will build a JSON object with the desired modifications.
    
    Given the INITIAL_QUERY, determine the parameter that the user wants to change, and the new value that should be applied \
    and with this information, return a JSON with only two keys 'parameter' and 'new_value' with no preamble or explanaition

    <|eot_id|><|start_header_id|>user<|end_header_id|>
    INITIAL_QUERY: {initial_query} \n
    CONTEXT: {context} \n
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["initial_query","context"],
)
model_modifier_chain = model_modifier_prompt | json_model | JsonOutputParser()

# query = 'I want the lifetime of wind power plants to be modified to 50 years'
# context = []
# print(model_modifier_chain.invoke({"initial_query": query, "context": context}))

In [89]:
def model_modifier(state):

    print("---MODEL MODIFIER TOOL---")
    query = state['initial_query']
    context = state['context']
    num_steps = state['num_steps']
    num_steps += 1
    
    parameters_json = model_modifier_chain.invoke({"initial_query": query, "context": context})
    print(f'JSON:\n{parameters_json}\n')
    
    result = f'The model was successfully modified'

    return {"context": context + [result],
            "num_steps": num_steps}

## Data plotter node

In [90]:
## DATA PLOTTER
data_plotter_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a specialist at building JSON to plot data from an energy system model, whenever the user asks
    you to plot data from the model, you will build a JSON object with the desired plot and scenario.
    
    Given the INITIAL_QUERY and the CONTEXT, determine the details required by the plot. You will return
    a JSON object with only three keys, 'model_name', 'scenario_name', 'plot_type'.

    <|eot_id|><|start_header_id|>user<|end_header_id|>
    INITIAL_QUERY: {initial_query} \n
    CONTEXT: {context} \n
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["initial_query", "context"],
)
data_plotter_chain = data_plotter_prompt | json_model | JsonOutputParser()

# query = 'Give me the Sankey plot of the model SimpleDemo and scenario LowGasPrice'
# context = []
# print(data_plotter_chain.invoke({"initial_query": query, "context": context}))

In [91]:
def data_plotter(state):

    print("---DATA PLOTTER TOOL---")
    query = state['initial_query']
    context = state['context']
    num_steps = state['num_steps']
    num_steps += 1
    
    parameters_json = data_plotter_chain.invoke({"initial_query": query, "context": context})
    print(f'JSON:\n{parameters_json}\n')
    
    result = f'The plot was successfully generated'

    return {"context": context + [result],
            "num_steps": num_steps}

## Simulation runner node

In [92]:
## SIM RUNNER
sim_runner_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a specialist at building JSON to run simulations of an energy system model, whenever the user asks
    you to run a new simulation of the model, you will build a JSON object with the desired model.
    
    Given the INITIAL_QUERY and the CONTEXT, determine the details required by the plot. You will return
    a JSON object with a single key, 'model_name'.

    <|eot_id|><|start_header_id|>user<|end_header_id|>
    INITIAL_QUERY: {initial_query} \n
    CONTEXT: {context} \n
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["initial_query", "context"],
)
sim_runner_chain = sim_runner_prompt | json_model | JsonOutputParser()

# query = 'Run the SimpleDemo model'
# context = []
# print(sim_runner_chain.invoke({"initial_query": query, "context": context}))

In [93]:
def sim_runner(state):

    print("---SIMULATION RUNNER TOOL---")
    query = state['initial_query']
    context = state['context']
    num_steps = state['num_steps']
    num_steps += 1
    
    parameters_json = sim_runner_chain.invoke({"initial_query": query, "context": context})
    print(f'JSON:\n{parameters_json}\n')
    
    result = f'The new simulation was successfully submited'

    return {"context": context + [result],
            "num_steps": num_steps}

## Calculator node

In [94]:
## CALCULATOR
calculator_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a specialist at building JSON to do calculations using a calculator tool.
    
    You can only output a single format of JSON object consisting in two operands
    and the operation. The name of the only three keys are 'operation', 'op_1' and 'op_2' \n
    
    'operation' can only be [+,-,*,/,^]
    'op_1' and 'op_2' must be integers or float\n
    
    If you judge that the equation consists of more than one operation, solve only one,
    the calculator can be called multiple times and the other results will be solved
    later.

    <|eot_id|><|start_header_id|>user<|end_header_id|>
    INITIAL_QUERY: {initial_query} \n
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["initial_query"],
)
calculator_chain = calculator_prompt | json_model | JsonOutputParser()

# query = 'How much is 27 to the power of 5 plus 7?'
# print(calculator_chain.invoke({"initial_query": query}))

In [95]:
def calculator(state):

    print("---CALCULATOR TOOL---")
    
    query = state['next_query']
    context = state['context']
    parameters = calculator_chain.invoke({"initial_query": query})
    operation = parameters['operation']
    op_1 = parameters['op_1']
    op_2 = parameters['op_2']
    num_steps = state['num_steps']
    num_steps += 1
    
    print(f'OPERATION: {operation}')
    print(f'OPERAND 1: {op_1}')
    print(f'OPERAND 2: {op_2}')

    if operation == "+":
        result = op_1 + op_2
    elif operation == "-":
        result = op_1 - op_2
    elif operation == "/":
        result = op_1 / op_2
    elif operation == "*":
        result = op_1 * op_2
    elif operation == "^":
        result = op_1 ** op_2
    else:
        result = 'ERROR'
        
    if result == 'ERROR':
        str_result = 'Unable to execute the selected operation'
    else:
        str_result = f'{op_1} {operation} {op_2} = {result}'
        
    print(f'RESULT: {str_result}\n')
        
    return {"context": context + [str_result],
            "num_steps": num_steps}

## Output generator node

In [96]:
## OUTPUT GENERATOR
output_generator_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a specialist at answering the user based on context given. \n
    
    Given the INITIAL_QUERY and a CONTEXT, generate an answer for the query
    asked by the user. You should make use of the provided information
    to answer the user in the best possible way. If you think the answer
    does not answer the user completely, ask the user for the necessary
    information if possible. \n
    
    It's important never to cite that you got it from a context, the user should
    think that you know the information.

    <|eot_id|><|start_header_id|>user<|end_header_id|>
    INITIAL_QUERY: {initial_query} \n
    CONTEXT: {context} \n
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["initial_query","context"],
)
output_generator_chain = output_generator_prompt | chat_model | StrOutputParser()

# query = 'Is my car more powerful than a GT-R R32?'
# context = 'The car owned by the user is from 2010 and has 100 hp'
# print(output_generator_chain.invoke({"initial_query": query, "context": context}))

In [97]:
def output_generator(state):
    print("---GENERATE OUTPUT---")
    ## Get the state
    initial_query = state['initial_query']
    context = state['context']
    num_steps = state['num_steps']
    num_steps += 1

    answer = output_generator_chain.invoke({"initial_query": initial_query,
                                            "context": context})
    print(f'GENERATED OUTPUT:\n{answer}\n')
    
    return {"final_answer": answer}

## Context analyzer node

In [98]:
## CONTEXT ANALYZER
context_analyzer_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a specialist at deciding if the already available information is enough to
    fully answer the user query. \n
    
    Given a INITIAL_QUERY and the available CONTEXT, decide if the available information
    is already enough to answer the query proposed by the user. \n
    
    Your job is to coordinate the usage of many tools, one at a time. To do this you will
    decide what information you need next, with the restriction that you can only get one
    information per iteration, and request it to the pipeline. \n
    
    Your output should be a JSON object containing two keys, 'ready_to_answer' and
    'next_query'. 'ready_to_answer' is a boolean that indicates if all necessary
    info is present and 'next_query' is a query that you should develop so the next
    agent in the pipeline can search for the required information. \n
    
    In the following situations you must output 'next_query' as "<KEEP_QUERY>":
    - User asks to modify parameters or characteristics of an energy system model;
    - Plotting, they don't require extra information, the tools can handle it perfectly;
    - User asks you to run a new simulation on an energy modeling system;
    - User gives you a direct command related to modelling;
    - The user asks anything about LangSmith (understand that as having the word LangSmith) \n
    
    You also have access to the last NEXT_QUERY you generated, to avoid repeating yourself.
    Never output the same 'next_query' that you've already asked in NEXT_QUERY. \n
    
    Consider that for you boolean answer the words false and true should always be written
    in full lower case. \n

    <|eot_id|><|start_header_id|>user<|end_header_id|>
    INITIAL_QUERY: {initial_query} \n
    CONTEXT: {context} \n
    NEXT_QUERY: {next_query} \n
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["initial_query","context","next_query"],
)
context_analyzer_chain = context_analyzer_prompt | json_model | JsonOutputParser()

# query = 'Is my car more powerful than a GT-R R32?'
# context = ['The car owned by the user is from 2010']
# print(context_analyzer_chain.invoke({"initial_query": query, "context": context, "next_query": ''}))

In [99]:
def context_analyzer(state):
    print("---CONTEXT ANALYZER---")
    ## Get the state
    initial_query = state['initial_query']
    next_query = state['next_query']
    context = state['context']
    num_steps = state['num_steps']
    num_steps += 1

    output = context_analyzer_chain.invoke({"initial_query": initial_query,
                                           "next_query": next_query,
                                           "context": context
                                           })
    
    if output['next_query'] == '<KEEP_QUERY>':
        output['next_query'] = state['initial_query']
    
    return {"next_query": output,
            "num_steps": num_steps}

## Date Getter Node

In [100]:
from datetime import datetime

def date_getter(state):

    print("---DATE GETTER TOOL---")
    context = state['context']
    num_steps = state['num_steps']
    num_steps += 1
    
    current_date = datetime.now().strftime("%d %B %Y, %H:%M:%S")
    
    result = f'The current date and time are {current_date}'
    
    print(f'CURRENT DATE: {current_date}\n')

    return {"context": context + [result],
            "num_steps": num_steps}

## Printers

In [101]:
def state_printer(state):
    """print the state"""
    print("------------------STATE PRINTER------------------")
    print(f"Num Steps: {state['num_steps']} \n")
    print(f"Initial Query: {state['initial_query']} \n" )
    print(f"Next Query: {state['next_query']} \n" )
    print(f"RAG Questions: {state['rag_questions']} \n")
    print(f"Tool Parameters: {state['tool_parameters']} \n")
    print(f"Context: {state['context']} \n" )
    return

In [102]:
def final_answer_printer(state):
    """prints final answer"""
    print("------------------FINAL ANSWER------------------")
    print(f"Final Answer: {state['final_answer']} \n")
    return

## Conditional Edges

In [103]:
def route_to_type(state):
    """
    Route to the right path based on query type.
    Args:
        state (dict): The current graph state
    Returns:
        str: Next node to call
    """
    type = state['query_type']
    
    if type == 'general':
        print("---ROUTE QUERY TO GENERAL PATH---")
        return "general"
    elif type == 'energy_system':
        print("---ROUTE QUERY TO ENERGY SYSTEM PATH---")
        return "energy_system"
    elif type == 'mixed':
        print("---ROUTE QUERY TO MIXED PATH---")
        return "mixed"

In [104]:
def route_from_mix(state):

    print("---ROUTE TO MIX---")
    data_completeness = state['complete_data']

    print(data_completeness)
    if data_completeness:
        print("---APPLY COMMAND---")
        return "complete_data"
    else:
        print("---GATHER MORE CONTEXT---")
        return "needs_data"

In [105]:
from os import walk

def validate_selected_model(state):
    identified_model = state['identified_model']
    available_models = next(walk('Models'), (None, None, []))[2]
    
    if identified_model == 'NO_MODEL' or not(f'{identified_model}.xlsx' in available_models):
        return 'select_model'
    else:
        return 'model_is_valid'

In [106]:
def route_to_es_tool(state):
    """
    Route to the necessary tool.
    Args:
        state (dict): The current graph state
    Returns:
        str: Next node to call
    """
    selection = state['selected_tool']
    
    if selection == 'data_plotter':
        print("---ROUTE QUERY TO DATA PLOTTER---")
        return "data_plotter"
    elif selection == 'sim_runner':
        print("---ROUTE QUERY TO SIMULATION RUNNER---")
        return "sim_runner"
    elif selection == 'model_modifier':
        print("---ROUTE QUERY TO MODEL MODIFIER---")
        return "model_modifier"

In [107]:
def route_to_tool(state):
    """
    Route to the necessary tool.
    Args:
        state (dict): The current graph state
    Returns:
        str: Next node to call
    """
    selection = state['selected_tool']
    
    if selection == 'RAG_retriever':
        print("---ROUTE QUERY TO RAG RETRIEVER---")
        return "RAG_retriever"
    elif selection == 'web_search':
        print("---ROUTE QUERY TO WEB SEARCH---")
        return "web_search"
    elif selection == 'calculator':
        print("---ROUTE QUERY TO CALCULATOR---")
        return "calculator"

In [108]:
def route_to_iterate(state):

    print("---ROUTE TO ITERATE---")
    next_query = state['next_query']

    print(next_query)
    if next_query['ready_to_answer']:
        print("---GENERATE FINAL ANSWER---")
        return "ready_to_answer"
    else:
        print("---GATHER MORE CONTEXT---")
        return "need_context"

## Build the graph

### Build the nodes

In [131]:
workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("type_identifier", type_identifier)
workflow.add_node("es_tool_selector", es_tool_selector)
workflow.add_node("model_selector", model_selector)
workflow.add_node("validated_model", validated_model)
workflow.add_node("mixed", mixed)
workflow.add_node("tool_selector", tool_selector)
workflow.add_node("research_info_rag", research_info_rag) # RAG search
workflow.add_node("research_info_web", research_info_web) # web search
workflow.add_node("state_printer", state_printer)
workflow.add_node("calculator", calculator)
workflow.add_node("date_getter", date_getter)
workflow.add_node("model_modifier", model_modifier)
workflow.add_node("data_plotter", data_plotter)
workflow.add_node("sim_runner", sim_runner)
workflow.add_node("output_generator", output_generator)
workflow.add_node("context_analyzer", context_analyzer)
workflow.add_node("final_answer_printer", final_answer_printer)

### Add edges

In [132]:
workflow.set_entry_point("date_getter")
workflow.add_edge("date_getter", "type_identifier")
workflow.add_conditional_edges(
    "type_identifier",
    route_to_type,
    {
        "general": "context_analyzer",
        "energy_system": "es_tool_selector",
        "mixed": "mixed",
    }
)

workflow.add_conditional_edges(
    "mixed",
    route_from_mix,
    {
        "complete_data": "es_tool_selector",
        "needs_data": "context_analyzer"
    }
)

workflow.add_conditional_edges(
    "es_tool_selector",
    validate_selected_model,
    {
        "select_model": "model_selector",
        "model_is_valid": "validated_model"
    }
)

workflow.add_conditional_edges(
    "validated_model",
    route_to_es_tool,
    {
        "model_modifier": "model_modifier",
        "data_plotter": "data_plotter",
        "sim_runner": "sim_runner",
    }
)

workflow.add_conditional_edges(
    "model_selector",
    route_to_es_tool,
    {
        "model_modifier": "model_modifier",
        "data_plotter": "data_plotter",
        "sim_runner": "sim_runner",
    }
)
workflow.add_edge("model_modifier", "output_generator")
workflow.add_edge("data_plotter", "output_generator")
workflow.add_edge("sim_runner", "output_generator")

workflow.add_conditional_edges(
    "context_analyzer",
    route_to_iterate,
    {
        "ready_to_answer": "output_generator",
        "need_context": "tool_selector",
    },
)

workflow.add_conditional_edges(
    "tool_selector",
    route_to_tool,
    {
        "RAG_retriever": "research_info_rag",
        "web_search": "research_info_web",
        "calculator": "calculator",
    },
)
workflow.add_edge("research_info_rag", "state_printer")
workflow.add_edge("research_info_web", "state_printer")
workflow.add_edge("calculator", "state_printer")

workflow.add_conditional_edges(
    "state_printer",
    route_to_type,
    {
        "general": "context_analyzer",
        "mixed": "mixed",
    }
)

workflow.add_edge("output_generator", "final_answer_printer")
workflow.add_edge("final_answer_printer", END)

In [133]:
# Compile
app = workflow.compile()

In [134]:
#query = 'If I pay half the age of Tom Jobim plus the height of the Empire State for a car, how much I\'ve paid?'
#query = 'What is 10 to the power of 0.4?'
#query = 'What is the temperature and humidity in Migliarino right now? And also, what time is it?'
#query = 'Modify the parameter X to 24 for me please'
#query = 'What are some of the most important things that happened today in past years?'
#query = 'What day is today?'
#query = 'How can LangSmith help in my project?'
#query = 'I am always coming but never arrive. What am I?'
#query = 'Change the lifetime of wind power plants to 25 years please'
#query = 'Divide the height of the Burj Khalifa by Ronaldinho Gaucho\'s age, then add the current temperature in Paris (in Celsius)'
#query = 'What are good famous and more casual board games that can be played by two players?'
#query = 'Divide the number of visitors that the Eiffel tower receives yearly by the number of cars in the city of São Paulo, Brazil'
#query = 'Change the lifetime of wind power plants to be the age of Olaf Scholz'
query = 'Modify the lifetime of wind power plants to be the same value as the price of one liter of Coca Cola in Brazil.'

# run the agent
inputs = {"initial_query": query, "next_query": '', "num_steps": 0, "context": []}
for output in app.stream(inputs, {"recursion_limit": 50}):
    for key, value in output.items():
        print(f"Finished running <{key}> \n")

---DATE GETTER TOOL---
CURRENT DATE: 25 June 2024, 22:47:52

Finished running <date_getter> 

---TYPE IDENTIFIER---
QUERY: Modify the lifetime of wind power plants to be the same value as the price of one liter of Coca Cola in Brazil.
IDENTIFIED_TYPE: mixed

---ROUTE QUERY TO MIXED PATH---
Finished running <type_identifier> 

---TOOL SELECTION---
QUERY: Modify the lifetime of wind power plants to be the same value as the price of one liter of Coca Cola in Brazil.
CONTEXT: ['The current date and time are 25 June 2024, 22:47:52']
DATA IS COMPLETE: False

---ROUTE TO MIX---
False
---GATHER MORE CONTEXT---
Finished running <mixed> 

---CONTEXT ANALYZER---
---ROUTE TO ITERATE---
{'ready_to_answer': False, 'next_query': 'What is the current price of one liter of Coca Cola in Brazil?'}
---GATHER MORE CONTEXT---
Finished running <context_analyzer> 

---TOOL SELECTION---
QUERY: {'ready_to_answer': False, 'next_query': 'What is the current price of one liter of Coca Cola in Brazil?'}
SELECTED TO