In [18]:
from pydantic_ai import Agent
from pydantic import BaseModel, Field
import agents
from dotenv import load_dotenv

load_dotenv()

True

In [None]:
# Clarifier Agent: Understands user intent and refines research questions
# Research Agent: Performs systematic exploration across three stages

# Verifier/Synthesizer Agent: Verifies that the claims are correct and combines findings into a cohesive article



In [5]:
# Prepare for the data - youtube podcasts

from pathlib import Path

data_folder = Path('../data_cache/youtube_videos/')
data_files = sorted(data_folder.glob("*.txt"))


In [6]:
import docs
from tqdm.auto import tqdm

documents = []

for f in tqdm(data_files):
    filename = f.name
    video_id, _ = filename.split('.')
    content = f.read_text(encoding='utf-8')
    chunks = docs.sliding_window(content, size=3000, step=1500)

    for chunk in chunks:
        chunk['video_id'] = video_id
        documents.append(chunk)


  0%|          | 0/190 [00:00<?, ?it/s]

In [7]:
from minsearch import Index


index = Index(
    text_fields=["content"],
    keyword_fields=["video_id"]
)

index.fit(documents)

<minsearch.minsearch.Index at 0x136dd5d00>

In [8]:
# Start building the tools for 

# Seach tools - retreival capabilities
# Verify agent - get the video id and verify

In [10]:
from typing import Any, Dict, List, TypedDict, Optional

class SearchResult(TypedDict):
    """Represent a single search result entry"""
    start: int
    content: str
    video_id: str
    _id: int


class SearchTools:
    def __init__(self, index):
        self.index = index

    def search(self, query: str) -> List[SearchResult]:
        """
        Search the index for documents matching the given query.
    
        Args:
            query (str): The search query string.
    
        Returns:
            List[SearchResult]: A list of search results. Each result dictionary contains:
                - start (int): The starting position or offset within the source file.
                - content (str): A text excerpt or snippet containing the match.
                - video_id (str): Youtube video_id for the snippet.
                - _id (int): The unique id for the document
        """
        return self.index.search(
            query=query,
            num_results=5,
            output_ids=True,
        )

    
    def get_document_by_id(self, _id: int) -> Optional[SearchResult]:
        """
        Retrieve a document by its unique ID.

        Args:
            _id (int): The document id.

        Returns:
            SearchResult: The document corresponding to the given ID or None if it's not in the index.
        """
        if _id < 0 or _id >= len(self.index.docs):
            return None

        return self.index.docs[_id]


In [11]:
tools = SearchTools(index)

In [21]:
# tool calling monitoring 

from pydantic_ai.messages import FunctionToolCallEvent

class NamedCallback:

    def __init__(self, agent):
        self.agent_name = agent.name

    async def print_function_calls(self, ctx, event):
        # Detect nested streams
        if hasattr(event, "__aiter__"):
            async for sub in event:
                await self.print_function_calls(ctx, sub)
            return

        if isinstance(event, FunctionToolCallEvent):
            tool_name = event.part.tool_name
            args = event.part.args
            print(f"TOOL CALL ({self.agent_name}): {tool_name}({args})")

    async def __call__(self, ctx, event):
        return await self.print_function_calls(ctx, event)



In [22]:
# building clarify agent 
# Insturctions - outputstructure
# Run

from pydantic import BaseModel, Field
from pydantic_ai import Agent

In [29]:
clarifier_instructions = """
Your task is to understand what user wants and what their intent is
Later this will be passed to the researcher to go deeper in exploring it

Use your own knowledge as well as the results from the search to clarify 
the intent of the user. 
ask the user for clarification once

after that, process the response and prepare the handoff to 
the research agent
"""

clarifier = Agent(
    name='clarifier',
    instructions=clarifier_instructions,
    tools=[tools.search],
    model='gpt-4o-mini'
)

In [None]:
question = "Do research on making money with AI"

results = await clarifier.run(
    user_prompt=question,
    event_stream_handler=NamedCallback(clarifier)
)

In [26]:
print(results.output)

Could you please clarify if you're looking for specific ideas or strategies on how to make money with AI, such as building AI applications, investing in AI companies, or using AI tools for businesses? Your specific interest will help narrow down the research effectively.


In [None]:
# Base on the first input, iterate and refine

# Better instruction
# Better output sturcture

clarifier_instructions = """
You are the CLARIFIER agent.

ROLE
Your job is to interpret and refine the user's research request so that it can be passed
to the RESEARCH agent for structured exploration.

OBJECTIVES
1. Understand what the user truly wants to learn or achieve (their intent).
2. Identify the core topic and any implicit goals (e.g., learn, compare, evaluate, predict, build).
3. Ask the user one targeted clarification question — to confirm scope, focus, or purpose.
4. Once the user responds, synthesize a refined version of their request that includes:
   - The clarified intent (what the user ultimately wants)
   - The initial request (in their own words)
   - The refined research focus (a precise version suitable for the RESEARCH agent)
   - 3–7 search queries that capture the clarified scope and intent
   - A short instruction summary for the RESEARCH agent explaining what to explore

DATA SOURCES
- You may use your own general knowledge to infer user intent.
- You may use the `search()` tool to quickly check ambiguous terms or context.

INTENT HANDLING
- Before searching, infer the underlying intent behind the user's request.
  Examples:
    - “getting into ML” → learning pathways, beginner resources, first projects
    - “AI safety concerns” → risks, ethical challenges, mitigation strategies
    - “startup funding trends” → investment patterns, valuations, stages
- Generate searches that reflect this **intent**, not just literal words.

CONSTRAINTS
- Ask the user for clarification **once only**.
- Do not fabricate information; if uncertain, clarify directly with the user.
- The goal is to output a structured handoff ready for the RESEARCH agent's Stage 1 process.
"""

In [30]:
# Initial requests
# refined requests
# user_intent
# queries
# instructions

class ResearchInstructions(BaseModel):
    """
    Output of the CLARIFIER agent.
    Provides both the user's raw input and the refined, structured guidance
    for the RESEARCH agent to begin its first stage.
    """

    initial_request: str = Field(
        ...,
        description="The user's original question or request, captured verbatim."
    )
    refined_request: str = Field(
        ...,
        description="A clarified, rephrased, and contextually grounded version of the initial request."
    )
    user_intent: str = Field(
        ...,
        description=(
            "A short summary (1–2 sentences) of what the user truly wants to accomplish "
            "or learn, inferred from both the initial request and clarification."
        )
    )
    queries: List[str] = Field(
        ...,
        description=(
            "A list of 3–7 specific search queries derived from the refined request, "
            "covering complementary angles or subtopics the RESEARCH agent should explore."
        )
    )
    instructions: str = Field(
        ...,
        description=(
            "Concise operational guidance for the RESEARCH agent, explaining how to use "
            "the queries and what to prioritize during Stage 1 research."
        )
    )

clarifier = Agent(
    name='clarifier_v2',
    instructions=clarifier_instructions,
    tools=[tools.search],
    output_type=ResearchInstructions,
    model='gpt-4o-mini'
)


In [None]:
# TODO: Callback

# Test the clarifier with a sample interaction:

callback = NamedCallback(clarifier)

results = await clarifier.run(
    user_prompt='I want to work as a freelancer',
    event_stream_handler=callback
)

print(results.output)

results = await clarifier.run(
    user_prompt='I want to work as a freelancer',
    message_history=results.new_messages(),
    event_stream_handler=callback,
    output_type=ResearchInstructions
)

initial_request='I want to work as a freelancer' refined_request='How can I start my career as a freelancer?' user_intent='The user wants guidance on how to begin a freelancing career, including steps, platforms, and strategies.' queries=['How to start freelancing as a beginner?', 'Best freelancing platforms for new freelancers', 'Tips for successfully landing freelance jobs', 'Managing finances as a freelancer', 'Skills in demand for freelancers in 2023'] instructions='Research the steps and resources available for starting a freelancing career, including platforms, tips for job sourcing, and essential skills.'
TOOL CALL (clarifier_v2): search({"query":"how to start freelancing"})


In [None]:
print('initial_request: ', results.output.initial_request)
print('refined_request: ', results.output.refined_request)
print('user_intent: ', results.boutput.user_intent)
print('instructions: ', results.output.instructions)

initial_request:  I want to work as a freelancer
refined_request:  What steps can I take to start freelancing, including tips and platforms to use?
user_intent:  The user wants practical advice and information on how to start a freelancing career.
instructions:  Gather information about the initial steps for becoming a freelancer, including practical advice, platform recommendations, and tips for time management.


In [40]:
research_task = results.output
research_task

ResearchInstructions(initial_request='I want to work as a freelancer', refined_request='What steps can I take to start freelancing, including tips and platforms to use?', user_intent='The user wants practical advice and information on how to start a freelancing career.', queries=['Steps to start freelancing', 'Best platforms for freelancers', 'Common skills needed for freelancing', 'Tips for succeeding in freelance work', 'How to manage time and workload as a freelancer'], instructions='Gather information about the initial steps for becoming a freelancer, including practical advice, platform recommendations, and tips for time management.')

In [None]:
### Reserach agent

# Output from the clarify agent into the research agent

# instruction
# tool
# agent
# output scheduma



In [None]:
# Start with the basic structure
class Reference(BaseModel):
    document_id: int
    quote: str
    timestamp: str

class ResearchKeyword(BaseModel):
    keyword: str
    relevant_references: List[Reference]

class VerifiableInsight(BaseModel):
    insight: str
    references: List[Reference]

class ResearchStageReport(BaseModel):
    stage: int
    explored_keywords: List[ResearchKeyword]
    verifiable_insights: List[VerifiableInsight]
    stage_summary: str
    recommended_next_steps: str
    recommended_next_keywords: List[str]


In [39]:
researcher_instructions = """
You are the RESEARCH agent.

ROLE
You perform structured research on a proprietary podcast/video database for a specific stage
of exploration (Stage 1, 2, or 3).

DATA SOURCE
- You may ONLY use the `search()` function, which returns transcript snippets with:
  { video_id, _id }
- Every reference must cite a real snippet with a valid `youtube_id`, `timestamp` and `_id`.
- Do not invent data, names, or timestamps.

STAGES

Stage 1 — Initial Search
- Use the user's question or clarified keywords from context.
- Identify 3–5 primary keywords, run one or more searches.
- Summarize the main findings, highlighting initial insights and directions.

Stage 2 — Expansion
- Build upon Stage 1 outputs (from context).
- Generate 5–7 related or complementary queries.
- Summarize recurring ideas and patterns across new results.

Stage 3 — Deep Dive
- Build upon Stage 1 and Stage 2.
- Generate 5–7 deeper or contrasting queries.
- Explore nuances, counterpoints, or mechanisms.
- Provide a more analytical synthesis.

CONSTRAINTS
- Use context from previous stages to guide deeper exploration.
- You must perform the necessary amount of queries for each stage:
    - 3-5 for stage 1
    - 5-7 for stage 2
    - 5-7 for stage 3
"""

researcher = Agent(
    name='researcher',
    instructions=researcher_instructions,
    tools=[tools.search],
    model='gpt-4o-mini',
    output_type=ResearchStageReport
)


In [47]:
# Create a helper function to execute research stages
# TODO: Async, Await


async def do_research(
    stage :int,
    stage_instructions: str,
    previous_stages: List[ResearchStageReport]
) -> ResearchStageReport:
    """pass in the previous stage output to the current stage
    Create a new user prompt - user prompot with 
    Keep searching
    """
    previous_stages_json = '\n'.join([r.model_dump_json() for r in previous_stages])
    
    user_prompt = f"""
    Current stage: {stage}

    Stage instrustructions:
    {stage_instructions}

    Previous stages:
    {previous_stages_json}
    """

    callback = NamedCallback(researcher)
    
    results = await researcher.run(
        user_prompt=user_prompt,
        event_stream_handler=callback
    )

    return results.output

In [48]:
research_task

ResearchInstructions(initial_request='I want to work as a freelancer', refined_request='What steps can I take to start freelancing, including tips and platforms to use?', user_intent='The user wants practical advice and information on how to start a freelancing career.', queries=['Steps to start freelancing', 'Best platforms for freelancers', 'Common skills needed for freelancing', 'Tips for succeeding in freelance work', 'How to manage time and workload as a freelancer'], instructions='Gather information about the initial steps for becoming a freelancer, including practical advice, platform recommendations, and tips for time management.')

In [49]:
stage_1_instructions = f"""
do initial research using this instructions:

{research_task.model_dump_json()}
"""

stage_1 = await do_research(
    stage=1,
    stage_instructions=stage_1_instructions,
    previous_stages=[]
)

TOOL CALL (researcher): search({"query": "Steps to start freelancing"})
TOOL CALL (researcher): search({"query": "Best platforms for freelancers"})
TOOL CALL (researcher): search({"query": "Common skills needed for freelancing"})
TOOL CALL (researcher): search({"query": "Tips for succeeding in freelance work"})
TOOL CALL (researcher): search({"query": "How to manage time and workload as a freelancer"})


In [51]:
for kw in stage_1.explored_keywords:
    print(kw.keyword)
    for ref in kw.relevant_references:
        print(ref)
    print()
# stage_1

Steps to start freelancing
document_id=3252 quote='you can definitely do freelancing um there are different ways of how to transition into freelancing ... a weekend freelancer ... there’s the transitioning phase where people say hey I have a part-time job ... there’s the fulltime jump into the cold water' timestamp='49:30'

Best platforms for freelancers
document_id=3252 quote='you usually do it with those online freelancing platforms because they are small projects on them... the weekend projects ... platforms such as upwork' timestamp='49:04'

Common skills needed for freelancing
document_id=7182 quote='and for me freelancing is exchanging time for money ... you are responsible ... for the outcomes so you need to have good communication skills' timestamp='6:03'

Tips for succeeding in freelance work
document_id=3400 quote="iterate improve your profile right that was one of the things ... while you're doing that see where you want to focus your skills or whether you want to learn some

In [52]:
# imporve the output structure and the prompt


class Reference(BaseModel):
    """
    A single, verifiable citation to a transcript snippet or video segment.
    Must correspond to a real snippet returned by the `search()` tool.
    """
    document_id: int = Field(..., description="Internal ID of the transcript snippet.")
    quote: str = Field(..., description="Exact snippet that supports the keyword or insight.")
    timestamp: str = Field(..., description="Timestamp in the source video where the quote occurs, 'mm:ss' or 'h:mm:ss'")
    relevance_to_keyword: str = Field(..., description="Explanation of *how* this quote supports or illustrates the specific keyword or concept being explored.")
    relevance_to_user_intent:  str = Field(..., description="Explanation of *how* this quote help the user with their intent.")

class ResearchKeyword(BaseModel):
    """
    Represents a keyword explicitly searched during this research stage.
    Each keyword must match an actual query used in the search tool calls.
    """
    keyword: str = Field(..., description="The exact keyword or phrase used in the search() tool call.")
    relevant_references: List[Reference] = Field(
        ..., 
        description="List of transcript snippets directly relevant to this keyword. Each must include a 'relevance_to_keyword' explanation."
    )


class VerifiableInsight(BaseModel):
    """
    A synthesized insight that can be traced back to specific evidence.
    Each insight must be supported by at least one real reference.
    """
    insight: str = Field(..., description="An insight derived from the research, phrased in an evidence-based, verifiable way.")
    references: List[Reference] = Field(..., description="Citations that directly support this insight. Must contain valid timestamps and IDs.")


class ResearchStageReport(BaseModel):
    """
    Structured output for each research stage (1–3).
    Ensures traceability between searches, keywords, and findings.
    """
    stage: int = Field(..., description="The research stage number (1 = Initial Search, 2 = Expansion, 3 = Deep Dive).")
    explored_keywords: List[ResearchKeyword] = Field(
        ..., 
        description="List of the *exact* keywords used in this stage's search() calls, along with references showing their relevance."
    )
    verifiable_insights: List[VerifiableInsight] = Field(
        ..., 
        description="List of data-backed insights derived from the references gathered at this stage."
    )
    stage_summary: str = Field(..., description="Analytical summary of what was learned at this stage, connecting evidence to emerging themes.")
    recommended_next_steps: str = Field(..., description="Guidance for what to do in the next stage — e.g., new angles, counterpoints, or subtopics.")
    recommended_next_keywords: List[str] = Field(
        ..., 
        description="Suggested next queries based on gaps or promising directions discovered in this stage."
    )



researcher_instructions = """
You are the RESEARCH agent.

ROLE
You perform structured research on a proprietary podcast/video database for a specific stage
of exploration (Stage 1, 2, or 3).

DATA SOURCE
- You may ONLY use the `search()` function
- Every reference must cite a real snippet with a valid `youtube_id`, `timestamp` and `_id`.
- Do not invent data, names, or timestamps.

STAGES

Stage 1 — Initial Search
- Use the user’s question or clarified keywords from context.
- Identify 3–5 primary keywords, run one or more searches.
- Summarize the main findings, highlighting initial insights and directions.

Stage 2 — Expansion
- Build upon Stage 1 outputs (from context).
- Generate 5–7 related or complementary queries.
- Summarize recurring ideas and patterns across new results.

Stage 3 — Deep Dive
- Build upon Stage 1 and Stage 2.
- Generate 5–7 deeper or contrasting queries.
- Explore nuances, counterpoints, or mechanisms.
- Provide a more analytical synthesis.

CONSTRAINTS
- Use context from previous stages to guide deeper exploration.
- You must perform the necessary amount of queries for each stage:
    - 3-5 for stage 1
    - 5-7 for stage 2
    - 5-7 for stage 3
"""


In [53]:
researcher = Agent(
    name='researcher_v2',
    instructions=researcher_instructions,
    tools=[tools.search],
    model='gpt-4o-mini',
    output_type=ResearchStageReport
)

In [54]:
stage_1 = await do_research(
    stage=1,
    stage_instructions=stage_1_instructions,
    previous_stages=[]
)

TOOL CALL (researcher_v2): search({"query": "Steps to start freelancing"})
TOOL CALL (researcher_v2): search({"query": "Best platforms for freelancers"})
TOOL CALL (researcher_v2): search({"query": "Common skills needed for freelancing"})
TOOL CALL (researcher_v2): search({"query": "Tips for succeeding in freelance work"})
TOOL CALL (researcher_v2): search({"query": "How to manage time and workload as a freelancer"})


In [55]:
for kw in stage_1.explored_keywords:
    print(kw.keyword)
    for ref in kw.relevant_references:
        print(ref)
    print()

for insight in stage_1.verifiable_insights:
    print(insight)


Steps to start freelancing
document_id=3253 quote="I mean you can definitely do freelancing... there are different ways of how to transition into freelancing... there's the fulltime jump into the cold water... there are people that... try to hustle their way..." timestamp='47:51' relevance_to_keyword='This quote discusses different paths to starting as a freelancer, including transitioning slowly or taking the plunge into full-time freelancing.' relevance_to_user_intent='It provides insight into the initial steps someone can take when considering freelancing.'

Best platforms for freelancers
document_id=7182 quote='...places like Upwork... create a profile and then directly apply to them and here the pricing... I believe platforms like Upwork and Fiverr...' timestamp='26:48' relevance_to_keyword='This illustrates popular platforms that freelancers can use to find work and navigate pricing.' relevance_to_user_intent='It offers practical advice on where to begin searching for freelance o

In [56]:
stage_2 = await do_research(
    stage=2,
    stage_instructions="continue research",
    previous_stages=[stage_1]
)

stage_3 = await do_research(
    stage=3,
    stage_instructions="continue research, go deeper and broader, explore tangently relaveted topics",
    previous_stages=[stage_1, stage_2]
)


TOOL CALL (researcher_v2): search({"query": "Financial management for freelancers"})
TOOL CALL (researcher_v2): search({"query": "Long-term success in freelancing"})
TOOL CALL (researcher_v2): search({"query": "Niche freelancing opportunities"})
TOOL CALL (researcher_v2): search({"query": "Freelance marketing strategies"})
TOOL CALL (researcher_v2): search({"query": "Client relationship management"})
TOOL CALL (researcher_v2): search({"query": "Tools for freelancers"})
TOOL CALL (researcher_v2): search({"query": "Freelance financial strategies"})
TOOL CALL (researcher_v2): search({"query": "Challenges of part-time to full-time freelancing"})
TOOL CALL (researcher_v2): search({"query": "Stress management for freelancers"})
TOOL CALL (researcher_v2): search({"query": "Motivational techniques for freelance success"})
TOOL CALL (researcher_v2): search({"query": "Scalability of freelance businesses"})
TOOL CALL (researcher_v2): search({"query": "Long-term financial planning for freelancers"

### Synthesizer and Verifier

In [79]:
# get stage 1, 2, 3 results
# Combine them together into one article
synthesizer_instructions = """
You synthesize research findings from all three stages (StageReports 1–3)
into a cohesive, factual final report.

TASKS
1. Read and interpret all reports
2. Verify each claim you put in the article
3. Make sure the output matches the intention of the user
4. Create the article

ARTICLE RULES
- The article should have instruction, 5-6 sections and conclusion 
- Each section should group 3–4 related claims.
- Each claim: 3–4 sentences and reference
- Do not add new facts beyond what's supported in reports
- You must verify each source
"""


In [None]:
synthesizer = Agent(
    name='synthesizer',
    instructions=synthesizer_instructions,
    tools=[tools.get_document_by_id],
    model='gpt-4o-mini'
)

In [81]:
all_reports = [stage_1, stage_2, stage_3]
reports = '\n'.join([r.model_dump_json() for r in all_reports])

user_prompt = f"""
initial request:
{research_task.model_dump_json()}

reports:
{reports}
"""

callback = NamedCallback(synthesizer)

results = await synthesizer.run(
    user_prompt=user_prompt,
    event_stream_handler=callback.print_function_calls
)

In [82]:
all_reports

[ResearchStageReport(stage=1, explored_keywords=[ResearchKeyword(keyword='Steps to start freelancing', relevant_references=[Reference(document_id=3253, quote="I mean you can definitely do freelancing... there are different ways of how to transition into freelancing... there's the fulltime jump into the cold water... there are people that... try to hustle their way...", timestamp='47:51', relevance_to_keyword='This quote discusses different paths to starting as a freelancer, including transitioning slowly or taking the plunge into full-time freelancing.', relevance_to_user_intent='It provides insight into the initial steps someone can take when considering freelancing.')]), ResearchKeyword(keyword='Best platforms for freelancers', relevant_references=[Reference(document_id=7182, quote='...places like Upwork... create a profile and then directly apply to them and here the pricing... I believe platforms like Upwork and Fiverr...', timestamp='26:48', relevance_to_keyword='This illustrates 

In [83]:
print(results.output)

# A Practical Guide to Starting Your Freelancing Career

Freelancing offers the freedom to choose your work, clients, and schedule. To help you embark on this exciting journey, this guide consolidates actionable steps, platform recommendations, essential skills, and tips for managing your freelance business effectively.

## 1. Steps to Start Your Freelancing Journey

Beginning your freelancing career can be approached in various ways. One path is to transition gradually from a traditional job, allowing you to build a client base while managing your current responsibilities. Alternatively, some opt for an immediate full-time commitment to freelancing, jumping into the "cold water." This decision significantly impacts workload management and personal adaptability (Stage 1 Report, Document ID: 3253).

To launch effectively, consider engaging with smaller agencies, which can offer avenues for securing clients while allowing you to gain hands-on experience (Stage 2 Report, Document ID: 3407

In [71]:
tools.get_document_by_id(3408)

{'start': 36000,
 'content': "has is built differently um\n33:55 everyone has different aspirations and\n33:58 uh again um I think first of all if\n34:00 someone starts out freelancing and they\n34:02 have had their first initial success\n34:04 with the first clients and they see it's\n34:06 working and it's working for them as a\n34:08 let's say as a source of supporting\n34:10 their um uh their lifestyle their life\n34:14 and everything because that's why I\n34:15 think for me it's I call it a lifestyle\n34:17 business\n34:19 um if you get to that stage you have\n34:21 that confidence that you don't worry oh\n34:24 will I get the next client and so on so\n34:25 forth for so that's probably the moment\n34:27 where you ask yourself should I grow as\n34:30 a business um and that's something that\n34:33 I've done myself uh in my second year\n34:36 because I thought the first year went so\n34:37 well uh now the next natural step is to\n34:39 become let's say an agency where I'm\n34:42 pre

In [None]:
# version 2 improvment

In [84]:
synthesizer_instructions = """
You are the SYNTHESIZER agent.

ROLE
You create a cohesive, factual article by synthesizing verified information from all
three research stages (StageReports 1–3).

DATA SOURCES
- You will receive one or more `ResearchStageReport` objects, each containing
  verifiable references with document_ids, timestamps, and quotes.
- You have access to the tool `get_document_by_id` to retrieve full source text
  for any reference.
- You must use this tool to verify every claim that appears in your article.

TASKS
1. Carefully read all StageReports and extract recurring insights and verified facts.
2. Use `get_document_by_id` to check each cited reference and confirm that
   the quote or insight is correctly represented.
3. Only include claims that are explicitly supported by at least one verified source.
4. Synthesize related findings into 5–6 cohesive sections with a logical flow.
5. Ensure that the article aligns with the original user intent (as passed from the clarifier).

ARTICLE STRUCTURE
- Introduction: Summarize what the article will explore and why it matters.
- 5-6 body sections, each:
  - Centered on one major theme or subtopic.
  - Contains 3–4 related claims (each 3–4 sentences long).
  - Each claim includes an in-text reference
- Conclusion: Summarize the most important insights and actionable takeaways.

VERIFICATION RULES
- For every claim, retrieve at least one cited source using `get_document_by_id`
  and confirm that the text supports the claim.
- If a reference cannot be verified or is inconsistent, omit it.
- Do not invent or infer facts beyond what’s supported by verified material.

STYLE
- Maintain factual, neutral, and coherent tone.
- Avoid speculation, exaggeration, or unsupported synthesis.
- Write in clear prose suitable for an informed but general audience.

OUTPUT
- A single, well-structured factual article ready for presentation.
- All references cited
""".strip()

synthesizer = Agent(
    name='synthesizer_v2',
    instructions=synthesizer_instructions,
    tools=[tools.get_document_by_id],
    model='gpt-4o-mini',
)   


In [85]:
all_reports = [stage_1, stage_2, stage_3]
reports = '\n'.join([r.model_dump_json() for r in all_reports])

user_prompt = f"""
initial request:
{research_task.model_dump_json()}

reports:
{reports}
"""

callback = NamedCallback(synthesizer)

results = await synthesizer.run(
    user_prompt=user_prompt,
    event_stream_handler=callback.print_function_calls
)

TOOL CALL (synthesizer_v2): get_document_by_id({"_id": 3253})
TOOL CALL (synthesizer_v2): get_document_by_id({"_id": 7182})
TOOL CALL (synthesizer_v2): get_document_by_id({"_id": 4577})
TOOL CALL (synthesizer_v2): get_document_by_id({"_id": 3400})
TOOL CALL (synthesizer_v2): get_document_by_id({"_id": 3416})
TOOL CALL (synthesizer_v2): get_document_by_id({"_id": 3407})
TOOL CALL (synthesizer_v2): get_document_by_id({"_id": 3252})
TOOL CALL (synthesizer_v2): get_document_by_id({"_id": 3408})
TOOL CALL (synthesizer_v2): get_document_by_id({"_id": 3399})
TOOL CALL (synthesizer_v2): get_document_by_id({"_id": 3254})


In [73]:
print(results.output)

# Starting Your Freelancing Career: A Comprehensive Guide

Freelancing offers a unique opportunity for individuals looking to enhance their lifestyle, gain more autonomy over their work, and potentially earn a substantial income. However, embarking on a freelancing career requires careful planning and the development of a diverse skill set. This article delves into the essential steps to start freelancing, the best platforms for finding work, vital skills, and tips for thriving in this ever-evolving environment.

## Steps to Start Freelancing

Transitioning into freelancing can be approached in various ways. One common method is the gradual shift from a traditional job to freelancing, often initiated as "weekend freelancing," where individuals take on small projects outside of their regular job hours to test the waters (Stage 1, Report 1). Others may jump into freelancing full-time, which can be a riskier but rewarding endeavor if they have secured a client base or have sufficient savi

In [None]:
# learning

# Required for the agent
# 1. Function -> tool
# 2. Instruction 
# 3. Output Schema


# Agent with instruction, name, tool, output_type

### Note

##### Required for the agent
    - Step of building the agent

    1. tool -> functino -> class object
    2. Instruction -> System instruction Role, data source, tasks, structure, verifications, style, output
    3. Output Schema -> Like a data schema
    
    Build Agent with the instruction, name, tool, output_type
    Run the user query and monitoring 

    Final step
    1. Keep iterating and improving slowly
    - Note: Questino is how can we have a better way to monitor and improve