In [25]:
from langchain.agents import Tool, AgentExecutor, create_react_agent
from langchain.prompts import PromptTemplate
from langchain_ollama import OllamaLLM
import requests
from bs4 import BeautifulSoup
import textwrap

def fetch_headlines(url):
    """Fetch and extract headlines from a news website"""
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Look for common headline patterns in news sites
        headlines = []
        for heading in soup.find_all(['h1', 'h2', 'h3']):
            text = heading.get_text().strip()
            if text and len(text) > 10 and len(text) < 200:  # Reasonable headline length
                headlines.append(text)
        
        # Return only the headlines, formatted clearly
        if headlines:
            return "\n\n".join([f"{i+1}. {headline}" for i, headline in enumerate(headlines[:10])])
        else:
            return "No headlines found on the page."
            
    except Exception as e:
        return f"Error fetching content: {str(e)}"

tools = [
    Tool(
        name="HeadlineFetcher",
        func=fetch_headlines,
        description="Fetches headlines from a news website. Input should be a complete URL."
    )
]

# Corrected prompt with tool_names variable
prompt = PromptTemplate.from_template(
    """You are a headline extraction agent. Your ONLY job is to extract the exact number of headlines requested.

    {tools}
    
    Follow this format:
    Question: the input question
    Thought: your reasoning about how to get the headlines
    Action: the action to take, should be one of [{tool_names}]
    Action Input: the URL to fetch headlines from
    Observation: the result with numbered headlines
    Thought: I now have the headlines and will select the requested number
    Final Answer: ONLY the exact number of headlines requested in a numbered list
    
    Question: {input}
    {agent_scratchpad}"""
)

llm = OllamaLLM(
    model="llama3.2", 
    temperature=0.1,
    system="You are a headline extraction agent. Extract EXACTLY the number of headlines requested. No more, no less."
)

agent = create_react_agent(llm, tools, prompt)
agent_executor = AgentExecutor(
    agent=agent, 
    tools=tools, 
    verbose=True, 
    handle_parsing_errors=True,
    max_iterations=2,  # Only allow one tool use
    early_stopping_method="force"
)

def ask_agent(question, width=80):
    """Consulta al agente y formatea la salida con wrap de texto"""
    # Ejecutar el agente
    result = agent_executor.invoke({"input": question})
    
    # Post-process the output to ensure we only get the requested number of headlines
    output = result["output"]
    
    # If we're asking for a specific number of headlines, ensure we only return that many
    if "top 3 headlines" in question.lower():
        lines = output.split("\n")
        headlines = [line for line in lines if line.strip() and (line.strip()[0].isdigit() or line.strip()[0] == "•")]
        output = "\n".join(headlines[:3])
    
    # También devolver el resultado original
    return output, result

# Example usage
url = "https://www.lanacion.com.ar/"
response, results = ask_agent(f"Visit {url} and tell me the top 3 headlines, nothing else")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: To get the top 3 headlines from the given URL, I will use the HeadlineFetcher function to fetch the headlines from the news website.

Action: HeadlineFetcher
Action Input: https://www.lanacion.com.ar/
[0m[36;1m[1;3m1. La página que buscás no está disponible.

2. ¿A cuánto cotizan el dólar blue y el oficial este viernes 11 de abril?

3. México suspendió las exportaciones de 10 frigoríficos argentinos por un incumplimiento sanitario

4. Comunidad de Negocios

5. Javier Milei: las últimas medidas del Gobierno

6. Actualidad política

7. La decisión que tomó acerca de las escenas de sexo y la verdad acerca su pelea con Paris Jackson

8. Ester Expósito

9. Hay paro de subtes este viernes: a qué hora empieza y cuánto dura

10. Retiran el busto de Néstor Kirchner de un hospital de El Calafate que depende de la Nación[0m[32;1m[1;3m1. 3[0mInvalid Format: Missing 'Action:' after 'Thought:'[32;1m[1;3m[0m

[1m> Finish

In [26]:
results

{'input': 'Visit https://www.lanacion.com.ar/ and tell me the top 3 headlines, nothing else',
 'output': 'Agent stopped due to iteration limit or time limit.'}