# 1. SETUP AND IMPORTS

In [4]:
import logging
import os
import getpass
import time
import uuid
from datetime import datetime, timedelta

# LangChain/LangGraph Components
from langchain_core.tools import tool
from langchain_core.messages import SystemMessage, HumanMessage, ToolMessage
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chat_models import init_chat_model
from langgraph.prebuilt import ToolNode
from langgraph.prebuilt import tools_condition
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langgraph.graph import MessagesState, StateGraph, END
from langgraph.checkpoint.memory import MemorySaver
from typing import List
from langchain_core.documents import Document


# Utility
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# --- Configuration ---
if not os.getenv("OPENAI_API_KEY"):
    raise ValueError("Please set your GOOGLE_API_KEY variable.")

# if not os.getenv("GOOGLE_API_KEY"):
#     raise ValueError("Please set your GOOGLE_API_KEY variable.")

# --- RAG Setup (Placeholder) ---
PDF_PATH = "data.pdf"

logging.basicConfig(
    filename='app.log',
    encoding="utf-8",
    filemode="a",
    format="%(levelname)s:%(name)s:%(message)s")

In [5]:
class InvalidLocationError(ValueError):
    """Exception raised for unsupported locations."""
    pass

class InvalidDateError(ValueError):
    """Exception raised for invalid date formats."""
    pass

# 2 Compulsory tool: get_weather

In [6]:
@tool
def get_weather(date: str, location: str) -> str:
    """
    Returns the weather forecast for a specific date and city.
    """
    # Basic Error Management: Check for missing location
    if not location:
        error_msg = "Location is not provided"
        logging.error(error_msg)
        raise InvalidLocationError(error_msg)

    # Parse the date, raise error if not in the right format
    try:
        forecast_date = datetime.strptime(date, '%Y-%m-%d').date()
        today = datetime.now().date()
    except:
        error_msg = "Invalid date format. The date MUST be in 'YYYY-MM-DD' format."
        logging.error(error_msg)
        raise InvalidDateError(error_msg)
    
    # Basic Error Management: Check if the forecast is too far in the future
    if forecast_date > today + timedelta(days=7):
        error_msg = f"Forecast for {date} is too far in the future. We can only predict the weather for the next 7 days."
        logging.error(error_msg)
        raise InvalidDateError(error_msg)
    
    # Mocked API Logic
    if location.lower() in ["vilafranca", "sitges"] and forecast_date >= today:
        day_of_week = forecast_date.weekday()
        
        if day_of_week in [5, 6]: # Weekend
            weather = "sunny"
            temp = "28°C"
        elif day_of_week in [0, 1, 2]: # Start of week
            weather = "partly cloudy"
            temp = "25°C"
        else: # Mid-week
            weather = "light rain"
            temp = "22°C"
        logging.info("Weather data retrieved")
        return f"The weather in {location} on {date} will be **{weather}** with a high of **{temp}**."
    
    elif forecast_date < today:
        logging.info("Weather data retrieved")
        return f"Historical weather in {location} on {date}: It was a warm, sunny day."
        
    else:
        logging.info("Weather data retrieved")
        return f"Weather forecast for {location} on {date}: Data unavailable, but expect typical Spanish weather."

# List of all available tools
TOOLS = [get_weather]

# 3. RAG implementation

In [7]:
def setup_rag(pdf_path: str):
    """Loads, chunks, and indexes the PDF guide into a FAISS VectorStore."""
   
    # 1. Load Document
    try:
        loader = PyPDFLoader(pdf_path)
        docs = loader.load()
    except Exception as e:
        print(f"Error loading PDF: {e}. Using a placeholder document.")
        docs = [{"page_content": "Vilafranca is considered the city of wine. Sitges is a beautiful coastal town known for its film festival, beaches, and historical sites like the Maricel Museum."}]
        
    # 2. Chunking Strategy (RecursiveCharacterTextSplitter is robust)
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    splits = text_splitter.split_documents(docs)
    
    # 3. Embeddings (using paraphrase-multilingual-MiniLM-L12-v2 for the multilingual approach)
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
    
    # 4. Vector Store (FAISS)
    vectorstore = FAISS.from_documents(splits, embeddings)
    
    # 5. Retriever
    retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
    return retriever

# Initialize the RAG Retriever
RAG_RETRIEVER = setup_rag(PDF_PATH)

# 4. Model and State Definition

In [None]:
MODEL_NAME = "gemini-2.5-flash-lite"
llm = init_chat_model(
    model=MODEL_NAME,
    model_provider="google_genai", 
    temperature=0.3,
    max_tokens=2048
).bind_tools(TOOLS) # Bind the get_weather tool

# System Prompt to define the Agent's persona and instructions
SYSTEM_PROMPT = SystemMessage(
    content=(
        "You are a helpful and friendly Tourist Assistant for the Vilafranca and surrounding areas. "
        "Your primary source of information is the provided RAG context about the local guide. "
        "Always maintain context from the previous turns of the conversation. "
        f"If the user asks about the weather, you MUST use the 'get_weather' tool, regardless of the date used. Use it even when the user has not specified a specific date to find the most appropiate date for a certain plan, considering that today is {datetime.now().date()}."
        "You MUST answer using the language the user used in their original prompt, not the language in the query passed to the RAG system (e.g., if the user asks in Spanish and the RAG system answers in Catalan, answer in Spanish),"
        "even if the retrieved RAG context is in a different language (e.g., Catalan)."
        "If the information is not in your RAG context or is not related with turism say that you do not know."
    )
)



Since we want to be able to retrieve the context from the RAG, we add an extra field inside the State of our agent

In [9]:
class TouristAgentState(MessagesState):
    retrieved_documents: List[Document]
    transformed_query: str
    
AgentState = TouristAgentState

# 5. Nodes and edges: Graph

To allow a better performance, we will use an auxiliar llm to transform the user's query to substitue any confusing term or structure considering the chat's history

In [10]:
REWRITE_PROMPT_TEMPLATE = \
"""You are an agent which sole goal is to transform a given query using the chat history between a user and an AI. 
The transformation has to make the query not depend on context and substitue any pronoun or ambiguity for their corresponding keyword words that appeared before in the chat.

Query: \n{query}
Chat history: \n{chat_history}"""

rewrite_prompt = ChatPromptTemplate.from_template(REWRITE_PROMPT_TEMPLATE)

MODEL_NAME = "gemini-2.5-flash-lite"
aux_llm = init_chat_model(
    model=MODEL_NAME,
    model_provider="google_genai", 
    temperature=0.3,
    max_tokens=2048
)

rewrite_chain = rewrite_prompt | aux_llm | StrOutputParser()

Once our rewriting feature has been defined, we have to create the main functions which will consitute the nodes of our graph: One function to rewrite the user's query, one to retrieve the information from the RAG and one to use both the query and the context to give a proper answer. (The weather node will be defined using the previously defined function)

In [11]:
def format_chat_history(messages: list) -> str:
    """Receives a list of messages corresponding to a conversation between a user (human) and an agent (ai) and returns a formatted string respresenting the last 5 interactions"""
    history_str = ""
    for msg in messages[:-5]:
        if msg.role == "user":
            history_str += f"Human:\n{msg.content}"
        if msg.role == "assistant":
            history_str += f"AI:\n{msg.content}"
    return history_str

In [12]:
def rewrite_query_node(state: AgentState) -> dict:
    """Catches last message and all previous messages in chat history and rewrites the last message into a more convenient format"""
    current_query = state['messages'][-1]
    chat_history = format_chat_history(state['messages'][:-1])

    response = rewrite_chain.invoke({
        'query': current_query,
        'chat_history': chat_history
    })

    return {'transformed_query': response}

def retrieve_context_node(state: AgentState) -> dict:
    """Returns the most important chunkcs of information from the RAG system considering the transformed query"""
    docs = RAG_RETRIEVER.invoke(state['transformed_query'])
    return {"retrieved_documents": docs}

def agent_node(state: AgentState) -> dict:
    """Invokes the LLM considering its role, the context from the RAG system and previous messages"""
    context = "\n\n--- RAG Context ---\n" + "\n".join([doc.page_content for doc in state["retrieved_documents"]])
    messages_with_context = [SYSTEM_PROMPT] + [SystemMessage(content=context)] + state["messages"]
    response = llm.invoke(messages_with_context)
    return {"messages": response}

In [13]:
# B. Node 2: Tool Node (weather tool)
tool_node = ToolNode(TOOLS)

# C. Define the Graph
graph_builder = StateGraph(AgentState)

# Add Nodes
graph_builder.add_node("rewriter", rewrite_query_node)
graph_builder.add_node("rag", retrieve_context_node)
graph_builder.add_node("agent", agent_node)
graph_builder.add_node("tools", tool_node)

# Set Entry Point
graph_builder.set_entry_point("rewriter")

# Define Edges/Transitions
graph_builder.add_edge("rewriter", "rag")
graph_builder.add_edge("rag", "agent")
# LangGraph's prebuilt tools_condition checks if the LLM requested a function call.
graph_builder.add_conditional_edges(
    "agent",
    tools_condition,
    {
        "tools": "tools",  # If tool call, go to 'tools' node
        END: END,          # If no tool call, finish the conversation
    },
)

# 2. From 'tools' back to 'agent'
# After the tool runs, its output is returned to the 'agent' node for synthesis into a final answer.
graph_builder.add_edge("tools", "agent")

# Compile the graph
memory = MemorySaver() # To maintain state/context across turns
graph = graph_builder.compile(checkpointer=memory)

print("Agent graph compiled successfully.")

Agent graph compiled successfully.


# 6. EXECUTION AND DEMONSTRATION

In [14]:
# Configuration for the Agent (using a unique thread ID for multi-turn dialogue)
thread_id = str(uuid.uuid4())
config = {"configurable": {"thread_id": thread_id}}

def run_agent(prompt: str, return_context=False, return_time=False, has_used_tool=False):
    time.sleep(5)
    t0 = datetime.now()
    """Helper function to run the agent and print the final output."""
    print(f"\n==================================\n User: {prompt}\n==================================")
    
    # Prepare the input message
    input_message = {"messages": [{"role": "user", "content": prompt}]}
    
    # Run the graph
    final_state = graph.invoke(input_message, config=config)

    # Print answer from assistant
    final_answer = final_state["messages"][-1].content
    print(f"\n==================================\n Assistant: {final_answer}\n==================================")
    
    duration = datetime.now() -t0
    results = {
        "answer": final_answer
    }
    if return_context:
        if final_state.get("retrieved_documents"):
            results['context'] = [doc.page_content for doc in final_state["retrieved_documents"]]
        else:
            results['context'] = []
    if return_time:
        results["duration"] = duration

    if has_used_tool:
        tool_was_called = False
        for msg in final_state["messages"]:
            if isinstance(msg, ToolMessage):
                tool_was_called = True
                break
        results['has_used_tool'] = tool_was_called

    return results 


In [15]:
run_agent("Quiero ir a Vilafranca. ¿Qué museos hay allí?")


 User: Quiero ir a Vilafranca. ¿Qué museos hay allí?

 Assistant: A Vilafranca del Penedès hi ha el Convent de Sant Francesc, la Basílica de Santa Maria i el Museu del Vi.


{'answer': 'A Vilafranca del Penedès hi ha el Convent de Sant Francesc, la Basílica de Santa Maria i el Museu del Vi.'}

In [16]:
run_agent("¿Qué me puedes decir sobre ese museo?")


 User: ¿Qué me puedes decir sobre ese museo?

 Assistant: El Museu del Vi de Vilafranca del Penedès forma part de l'oferta museística de l'àmbit del Penedès, que inclou 12 museus que van rebre més de 200.000 visites el 2019. Aquests museus ofereixen una mirada diversa de la cultura catalana dels segles XIX i XX, amb temàtiques com cases museu, museus d'autor i col·leccions contemporànies. També hi ha museus relacionats amb l'activitat vitivinícola.


{'answer': "El Museu del Vi de Vilafranca del Penedès forma part de l'oferta museística de l'àmbit del Penedès, que inclou 12 museus que van rebre més de 200.000 visites el 2019. Aquests museus ofereixen una mirada diversa de la cultura catalana dels segles XIX i XX, amb temàtiques com cases museu, museus d'autor i col·leccions contemporànies. També hi ha museus relacionats amb l'activitat vitivinícola."}

Invocación correcta (Éxito)

In [17]:
run_agent("Quiero ir al museo cuando llueva. ¿Cuándo me recomeindas que vaya?")


 User: Quiero ir al museo cuando llueva. ¿Cuándo me recomeindas que vaya?

 Assistant: No dispongo de información meteorológica detallada para los próximos días en Vilafranca del Penedès. Por lo tanto, no puedo recomendarte una fecha específica para ir al museo basándome en la lluvia.


{'answer': 'No dispongo de información meteorológica detallada para los próximos días en Vilafranca del Penedès. Por lo tanto, no puedo recomendarte una fecha específica para ir al museo basándome en la lluvia.'}

Invocación con fallo de entrada

In [18]:

run_agent("¿Y la predicción para 11-2025-11?")


 User: ¿Y la predicción para 11-2025-11?


AttributeError: 'HumanMessage' object has no attribute 'role'

Invocación con fallo lógico

In [None]:
run_agent("¿Y la predicción para 2030-01-01?")


 User: ¿Y la predicción para 2030-01-01?
2030-01-01
2030-01-01 2030-01-01

 Assistant: Lo siento, no puedo darte la predicción para esa fecha. Sólo puedo predecir el tiempo para los próximos 7 días.


'Lo siento, no puedo darte la predicción para esa fecha. Sólo puedo predecir el tiempo para los próximos 7 días.'

# 7. Metrics

We prepare a series of questions for the rag and will evaluate different metrics considering the answers and the information retrieved from the RAG

In [None]:
rag_prompts = [
    "¿Que castillos hay por el Penedés?",
    "¿Que bodegas mas famosas hay por el Penedés?",
    "En que región del Penedés/Garraf hay mas hoteles?",
    "¿Donde hay más segundas residencias en el area Penedés/Anoia/Garraf?",
    "¿Donde hay hacimientos arqueológicos en la zona Penedés/Anoia/Garraf?",
    "¿En que consiste la ruta del vino?",
    "¿Hay muchos festivales relacionados con el vino en la zona Penedés/Anoia/Garraf?",
    "¿Donde puedo acampar en Penedés/Anoia/Garraf?",
    "¿Cual es la mejor ruta caminando cerca (A 20 km máximo) de Vilafranca del Penedés?",
    "¿Que tipo de turismo es el más famoso en la zona de Penedés/Anoia/Garraf?"
]

In [None]:
tool_prompts = [
    "Que tiempo hace mañana en Sitges?",
    "Que tiempo hace mañana en Vilafranca del Penedés?",
    "Llueve dentro de tres días en Vilafranca del Penedés?",
    "Cual es el mejor día para pasear por Vilafranca del Penedés?"
]

## Mesure 1: Context precision:

context_precision = number of relevant chuncks (useful chuncks) / number of retrieved chunks

This metric will be evaluated manually

In [None]:
context_list = []
answer_list = []
duration_list = []
for num_prompt, prompt in enumerate(rag_prompts):
    result = run_agent(prompt , return_context=True, return_time=True)
    answer_list.append(result['answer'])
    context_list.append(result['context'])
    duration_list.append(result['duration'])
    print(f"For prompt #{num_prompt} {prompt} context retrieved is:\n" + "\n\n".join(["Context #" + str(num_context + 1) + ": " + single_context for num_context, single_context in enumerate(context)]))


 User: ¿Que castillos hay por el Penedés?

 Assistant: En Penedès, puedes encontrar varios castillos. Algunos de ellos son el Castell de Gelida, el conjunto monumental de Sant Martí Sarroca, la ciutadella y los castillos de Calafell, el Castell de Claramunt, los castillos de Subirats, Mediona, Tous, Òdena, Jorba o la Llacuna.
For prompt #0 ¿Que castillos hay por el Penedés? context retrieved is:
Context #1: La taula 1 permet veure la distribució d’aquests recursos per tipologies. 
El patrimoni d’aquest espai s’identifica clarament amb la funció 
defensiva: Hi trobem torres de vigilància, restes de muralles, castells i 
fortificacions escampades per tota l’àrea. Per això, Anoia Turisme ha 
centrat la seva estratègia de promoció en el concepte de ‘Terra de 
castells’ i identifica la comarca amb el seu llegat medieval. També 
Penedès Turisme identifica una sèrie de castells en la seva selecció de 
llocs d’interés, però els integra en un conjunt més ampli, amb restes 
arqueològiques, mone

Summary of relevant chuncks retrieved for the answer for each prompt:

Prompt 1: 2/3

Prompt 2: 1/3

Prompt 3: 1/3

Prompt 4: 2/3

Prompt 5: 1/3

Prompt 6: 1/3

Prompt 7: 1/3

Prompt 8: 1/3

Prompt 9: 2/3

Prompt 10: 1/3


context_precision = 13/30; aprox 43%

Change chunk_size to a lower value and rerun everything and reevaluate the result manually again

## Mesure 2: Faithfuness: 

Faithfuness = number of sentences based on the retrieved information / number of sentences in the answer of the llm

We allow a llm to decide whether each sentence in the generated answer is based in any retrieved chunck.

In [None]:
assist_llm = init_chat_model(
    model=MODEL_NAME,
    model_provider="google_genai", 
    temperature=0.3,
    max_tokens=2048
)

In [None]:
system_msg = SystemMessage(content="You are an AI whose sole purpose is to decide if a sentence's affirmation is based on a series of chuncks of information. Give only the answers as 1 if True or or 0 if False.")

In [None]:
faithfuness_list = []
for idx, answer in enumerate(answer_list):
    results = []
    for sentence in answer.split("."):
        time.sleep(5)
        human_msg = HumanMessage(f"Sentence: {sentence}\nChunks of information: " + "\n".join(["Chunk #" + str(num_context + 1) + ": " + single_context for num_context, single_context in enumerate(context_list[idx])]))
        messages = [system_msg, human_msg]
        result = assist_llm.invoke(messages)
        try:
            numerical_answer = int(result.content)
            results.append(numerical_answer)
        except ValueError:
            pass 
    faithfuness = sum(results)/len(results)
    faithfuness_list.append(faithfuness)
    print(f"For prompt #{idx+1} faitfhfulness score is {faithfuness}")
print(f"Total faithfulness is {sum(faithfuness_list)/len(faithfuness_list)}")

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/usage?tab=rate-limit. 
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 200
Please retry in 25.363522529s. [links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 200
}
, retry_delay {
  seco

ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/usage?tab=rate-limit. 
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 200
Please retry in 22.96830764s. [links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 200
}
, retry_delay {
  seconds: 22
}
]

## Mesure 3: RAG delay

We will visualize the ammount of time needed for the agent to deal with each prompt

In [None]:
from matplotlib import pyplot as plt

In [None]:
tokens_list = []
for prompt in rag_prompts:
    tokens_list.append(llm.get_num_tokens(prompt))


Visualization of number of tokens in prompt vs ammount of time taken for agent to answer

In [None]:
plt.scatter(tokens_list, duration_list)

In [None]:
result_list = []
for tool_prompt in tool_prompts:
    result = run_agent(tool_prompt, has_used_tool=True)
    result_list.append(result['has_used_tool'])
print(f"Precision of correct tool calls is: {sum(result_list)/len(result_list):.2f}")
    