In [1]:
from langchain.agents import create_agent
from langchain.agents.structured_output import ToolStrategy
from typing import TypedDict

from langchain_ollama import ChatOllama
from langfuse.langchain import CallbackHandler
from tools import web_operations

llm_model = ChatOllama(model="qwen3")

# Initialize Langfuse CallbackHandler for LangGraph/Langchain (tracing)
langfuse_handler = CallbackHandler() 
llm_config = {"configurable": {"thread_id": "internet_agent"}, "recursion_limit": 20, "callbacks": [langfuse_handler]}

internet_agent_prompt = """You are a helpful internet researcher. 
    - Navigate to a webpage and scrape its content without the links.
    - Be concise, and provide a 200 summary of the webpage content.
    - Tag the webpage content as [user post, news article, documentation, sales page, product advertisement, book, etc.]
    - Determine it it contains relevant information for the topic.
    - do not use bullets or numbered lists.
    - Provide your answer in markdown format.
"""
# Define structured output sche
class SummaryOutput(TypedDict):
    classification: str
    is_relevant_information: bool
    title: str
    summary: str

# Define the tool strategy
# Create the agent
internet_agent = create_agent(name="internet_agent", model=llm_model, tools=[web_operations.scrape_webpages], 
    # system_prompt=internet_agent_prompt, 
    # response_format=ToolStrategy(List)
    )

messages = {"messages": [("user", """extrae una lista de palabras clave que deber√≠a tener un curr√≠culum para esta oferta de trabajo, unicamente las palabras. 
    url:https://www.linkedin.com/jobs/view/4362181901/
    Example output format: ['Python', 'Data Science']""")]}
async for step in internet_agent.astream(messages, config=llm_config, stream_mode="values" ):
    step["messages"][-1].pretty_print()


extrae una lista de palabras clave que deber√≠a tener un curr√≠culum para esta oferta de trabajo, unicamente las palabras. 
    url:https://www.linkedin.com/jobs/view/4362181901/
    Example output format: ['Python', 'Data Science']
Name: internet_agent

<think>
Okay, the user wants a list of keywords for a resume based on a job posting. The URL provided is a LinkedIn job listing. First, I need to scrape the webpage to get the content. The function scrape_webpages is available, so I'll use that. The parameters include extracting links and setting a minimum number of words per line. Since the user wants keywords, maybe extracting links isn't necessary, but the function might still need it. Wait, the function's description says it extracts links if the parameter is true. But the user's example output is just keywords, not links. Maybe the function's main purpose is to scrape the content, and the links are an extra. Let me check the function parameters again. The function has 'extract_li

In [2]:
from langchain.agents import create_agent
from langchain.agents.structured_output import ToolStrategy
from typing import TypedDict

from langchain_ollama import ChatOllama
from langfuse.langchain import CallbackHandler

def summarize_text_llm(text: str, 
    llm_model = ChatOllama(model="qwen3"),
    llm_config = {"configurable": {"thread_id": "summarization_agent"}, "recursion_limit": 4, "callbacks": [CallbackHandler()]},
    system_prompt = "Summarize the given text in 100 words /no_think",
    response_format=None
    ) -> str :

    summarization_agent = create_agent(name="internet_agent", model=llm_model, system_prompt=system_prompt, response_format=response_format )
    return summarization_agent.invoke({"messages": [("user", text)]}, config=llm_config)


In [7]:
# 1. Scrap webpage with links
# 2. Scrap each link and extract text
# 3. Sumarize the extracted text, extract the topic and keywords from it
# 4. Store relevant text in a database, delete the rest
from tools import web_operations


url = 'https://www.linkedin.com/jobs/view/4362181901/'
# url = 'https://en.wikipedia.org/wiki/LangChain'
# 1. Scrap webpage with links

webpage = web_operations.scrape_webpages.invoke(input={'url':url, 'include_links':True })
print(webpage[0].metadata['title'])


DXC Technology hiring Analista t√©cnico/funcional inform√°tico/a in Sant Cugat del Vall√®s, Catalonia, Spain | LinkedIn


In [8]:

class SummaryOutput(TypedDict):
    language: str
    summary: str

summary = summarize_text_llm(text=webpage[0].page_content, llm_model = ChatOllama(model="qwen3"), 
    system_prompt = """Act as a language detection and summarization agent. Your task is to: 
        Detect the language of the provided text and return it clearly. 
        Summarize the text in 500 words, ensuring that the summary captures the main ideas, key points, and overall context of the original text without adding any external information or opinions. 
        /no_think""",
    response_format=ToolStrategy(SummaryOutput))

webpage[0].metadata['summary'] = summary['structured_response']['summary']
webpage[0].metadata['language'] = summary['structured_response']['language']

import textwrap

print(webpage[0].metadata['language'])
print(textwrap.fill(webpage[0].metadata['summary'], width=100))


Spanish
DXC Technology is seeking a Technical/Functional IT Analyst for a position in Sant Cugat del Vall√®s,
Catalonia, Spain. The role involves analyzing requirements, designing IT solutions, and
collaborating with multidisciplinary teams to draft and review technical and commercial proposals
for public administration projects. The analyst will also be responsible for developing standardized
templates and documentation to streamline the proposal process. The position requires experience in
drafting technical and commercial proposals for IT projects, particularly in public administration,
as well as knowledge of regional IT systems in Catalonia such as CTTI, IMI, and ACA. Familiarity
with Low Code platforms like Outsystems and Appian is desirable. The candidate should be proficient
in tools for enterprise architecture modeling, such as Archi, Visio, and UML, and have experience
with project management methodologies like Scrum and Kanban. Strong communication skills in both
Spanish and

In [9]:

topic = summarize_text_llm(text=webpage[0].metadata['summary'], llm_model = ChatOllama(model="qwen3:1.7b"), system_prompt = "Extract the topic of the given text in no more than 10 words /no_think",)
webpage[0].metadata['topic'] = topic['messages'][1].content

print(webpage[0].metadata['topic'])


DXC Technology seeks IT Analyst in Catalonia for public administration projects.


In [10]:

keywords = summarize_text_llm(text=webpage[0].metadata['summary'], llm_model = ChatOllama(model="qwen3:1.7b", temperature=0.1), 
    system_prompt = "Extract the list of keywords of the given text. separed by |. do not group them. do not include categories. /no_think",)

webpage[0].metadata['keywords'] = keywords['messages'][1].content

print(textwrap.fill(webpage[0].metadata['keywords'], width=100))


Technical/Functional IT Analyst | Sant Cugat del Vall√®s, Catalonia, Spain | analyzing requirements |
designing IT solutions | collaborating with multidisciplinary teams | drafting and reviewing
technical and commercial proposals | developing standardized templates | Low Code platforms like
Outsystems and Appian | enterprise architecture modeling tools like Archi, Visio, UML | project
management methodologies like Scrum and Kanban | communication skills in Spanish and Catalan


In [11]:
document_classes = [ "Article", "Advertise", "Blog Post", "Job Offer", "News", "Research Paper", 
    "Thesis", "Email", "Legal Document", "Instruction Manual", "Social Media Post", 
    "Product Review", "Service Review", "Technical Report", "User Guide", "White Paper", 
    "Script", "Press Release", "FAQ", "Resume/CV", "Invoice", "Newsletter", "Story", 
    "Code Documentation", "Policy Document", ]

document_type = summarize_text_llm(text=webpage[0].metadata['summary'], llm_model = ChatOllama(model="qwen3:1.7b"), system_prompt = f"Point wich type of document is the text ({','.join(document_classes)}). Do not elavorate. Do not format. /no_think",)
webpage[0].metadata['document_type'] = document_type['messages'][1].content

print(webpage[0].metadata['document_type'])


Job Offer


In [12]:
display(webpage[0].metadata)

{'source': 'https://www.linkedin.com/jobs/view/4362181901/',
 'title': 'DXC Technology hiring Analista t√©cnico/funcional inform√°tico/a in Sant Cugat del Vall√®s, Catalonia, Spain | LinkedIn',
 'description': 'Posted 8:52:29 PM. Job DescriptionJob DescriptionDXC Technology es una compa√±√≠a global de servicios profesionales cuya‚Ä¶See this and similar jobs on LinkedIn.',
 'language': 'Spanish',
 'summary': 'DXC Technology is seeking a Technical/Functional IT Analyst for a position in Sant Cugat del Vall√®s, Catalonia, Spain. The role involves analyzing requirements, designing IT solutions, and collaborating with multidisciplinary teams to draft and review technical and commercial proposals for public administration projects. The analyst will also be responsible for developing standardized templates and documentation to streamline the proposal process. The position requires experience in drafting technical and commercial proposals for IT projects, particularly in public administration,