In [1]:
from youtube_transcript_api import YouTubeTranscriptApi

In [4]:
video_id = 'r0gHbW_MmaQ'

ytt_api = YouTubeTranscriptApi()
transcript = ytt_api.fetch(video_id)

In [5]:
def format_timestamp(seconds: float) -> str:
    total_seconds = int(seconds)
    hours, remainder = divmod(total_seconds, 3600)
    minutes, secs = divmod(remainder, 60)

    if hours > 0:
        return f"{hours}:{minutes:02}:{secs:02}"
    else:
        return f"{minutes}:{secs:02}"

def make_subtitles(transcript) -> str:
    lines = []

    for entry in transcript:
        ts = format_timestamp(entry.start)
        text = entry.text.replace('\n', ' ')
        lines.append(ts + ' ' + text)

    return '\n'.join(lines)

In [10]:
from youtube_transcript_api import YouTubeTranscriptApi
from typing import Any

def get_youtube_transcripts(video_id: str) -> str:
    """
    Fetch subtitles for a YouTube video.

    Args:
        video_id: The YouTube video ID (e.g., "dQw4w9WgXcQ").
    """
    ytt_api = YouTubeTranscriptApi()
    transcript = ytt_api.fetch(video_id)
    return make_subtitles(transcript)


In [11]:
import dotenv
dotenv.load_dotenv()

True

In [12]:
import os
brave_api_key = os.getenv('BRAVE_API_KEY')
len(brave_api_key)

31

In [34]:
from urllib.parse import urlparse, parse_qs

def extract_watch_video_id(url: str) -> str | None:
    """
    Extract video ID from a YouTube watch URL:
    https://www.youtube.com/watch?v=VIDEOID&other=params

    Returns None if no v parameter exists.
    """
    parsed = urlparse(url)
    params = parse_qs(parsed.query)

    return params.get("v", [None])[0]

In [35]:

import time
from typing import List, Dict
import requests

from youtube_transcript_api import YouTubeTranscriptApi


class ResearchTools:

    def __init__(self, brave_api_key: str, sleep_time: float = 1):
        self.brave_api_key = brave_api_key
        self.sleep_time = sleep_time

    def brave_search(self, query: str, num_results: int = 20) -> List[Dict[str, str]]:
        """
        Search the web
    
        Args:
            query: The search query string.
    
        Raises:
            requests.HTTPError: If the API request fails (non-200 response).
            KeyError: If the response format is unexpected.
            requests.RequestException: For network-related errors.
        """
    
        time.sleep(self.sleep_time)

        url = "https://api.search.brave.com/res/v1/web/search"
    
        headers = {
            "X-Subscription-Token": self.brave_api_key,
            "Accept": "application/json",
            "Accept-Encoding": "gzip",
        }
        
        params = {
            "q": query,
            "count": num_results,
        }
    
        response = requests.get(url, params=params, headers=headers)
        response.raise_for_status()
        brave_response = response.json()
    
        search_results = brave_response['web']['results']
    
        output: List[Dict[str, str]] = []
        for r in search_results:
            output.append({
                'title': r['title'],
                'url': r['url'],
                'description': r['description'],
            })
    
        return output

    def brave_youtube_search(self, query: str, num_results: int = 20) -> List[Dict[str, str]]:
        """
        Search the web
    
        Args:
            query: The search query string.
    
        Raises:
            requests.HTTPError: If the API request fails (non-200 response).
            KeyError: If the response format is unexpected.
            requests.RequestException: For network-related errors.
        """
    
        time.sleep(self.sleep_time)

        url = "https://api.search.brave.com/res/v1/videos/search"
    
        headers = {
            "X-Subscription-Token": self.brave_api_key,
            "Accept": "application/json",
            "Accept-Encoding": "gzip",
        }
        
        params = {
            "q": f"{query} site:youtube.com",
            "count": num_results,
        }
    
        response = requests.get(url, params=params, headers=headers)
        response.raise_for_status()
        brave_response = response.json()
    
        results = []
    
        for r in brave_response['results']:
            video_id = extract_watch_video_id(r['url'])
            video = r['video']
            results.append({
                'video_id': video_id,
                'title': r['title'],
                'description': r.get('description', ''),
                'duration': video.get('duration', ''),
                'creator': video.get('creator', ''),
            })

        return results
    
    def fetch_content(self, url: str) -> str:
        """
        Retrieve webpage content.
    
        Args:
            url: The target webpage URL.
    
        Returns:
            The textual content of the page as a UTF-8 decoded string.
    
        Raises:
            requests.HTTPError: If the HTTP request returns a non-success status code.
            requests.RequestException: For network-related errors.
            UnicodeDecodeError: If the response body cannot be decoded as UTF-8.
        """
        jina_prefix = "https://r.jina.ai"
        final_url = f"{jina_prefix}/{url}"
    
        response = requests.get(final_url)
        response.raise_for_status()
    
        return response.content.decode("utf-8")

    def get_youtube_transcripts(self, video_id: str) -> str:
        """
        Fetch subtitles for a YouTube video.
    
        Args:
            video_id: The YouTube video ID (e.g., "dQw4w9WgXcQ").
        """
        ytt_api = YouTubeTranscriptApi()
        transcript = ytt_api.fetch(video_id)
        return make_subtitles(transcript)


In [36]:
research_tools_object = ResearchTools(brave_api_key)

In [37]:
videos = research_tools_object.brave_youtube_search('pydantic ai')
videos[:3]

[{'video_id': '-WB0T0XmDrY',
  'title': 'Build Production-Ready AI Agents in Python with Pydantic AI - YouTube',
  'description': 'Check out https://www.squarespace.com/arjancodes to save 10% off your first purchase of a website or domain using code ARJANCODES.Pydantic AI lets you integr...',
  'duration': '15:50',
  'creator': 'ArjanCodes'},
 {'video_id': 'PkQIREapb9o',
  'title': 'Pydantic Crash Course - Build Reliable Python & AI Applications - YouTube',
  'description': 'Learn Pydantic in this complete 90-minute crash course covering type hints, data validation, nested models, and structured output with LLMs.Get the free bonu...',
  'duration': '01:22:22',
  'creator': 'Dave Ebbelaar'},
 {'video_id': '-WB0T0XmDrY',
  'title': 'PydanticAI: the AI Agent Framework Winner - YouTube',
  'description': 'Check out https://www.squarespace.com/arjancodes to save 10% off your first purchase of a website or domain using code ARJANCODES.Pydantic AI lets you integr...',
  'duration': '15:50',
 

In [38]:
from pydantic_ai import Agent
from toyaikit.tools import get_instance_methods


In [39]:
from pydantic_ai.messages import FunctionToolCallEvent

class NamedCallback:

    def __init__(self, agent):
        self.agent_name = agent.name

    async def print_function_calls(self, ctx, event):
        # Detect nested streams
        if hasattr(event, "__aiter__"):
            async for sub in event:
                await self.print_function_calls(ctx, sub)
            return

        if isinstance(event, FunctionToolCallEvent):
            tool_name = event.part.tool_name
            args = event.part.args
            print(f"TOOL CALL ({self.agent_name}): {tool_name}({args})")

    async def __call__(self, ctx, event):
        return await self.print_function_calls(ctx, event)

In [40]:
research_tools = get_instance_methods(research_tools_object)

In [41]:
research_instructions = """
You are a web + video research assistant.

Goal:
When the user asks a question, you will

(1) search the web and
(2) search for relevant videos (YouTube)

Then present a concise menu of what you found and ask the user what they want
to explore next. After the user chooses, you fetch deeper content
(webpage full text or video transcript) and continue iteratively.

Core workflow (always follow):
1) Understand the query
   - Extract the user’s main question and 2–5 key keywords/subtopics.
   - If the topic is broad, identify likely angles (definitions, how-to,
      comparisons, timeline, pros/cons, controversies).

2) Do two searches (minimum):
   A) Web search: 1–2 queries (different phrasing) to get good coverage.
   B) Video search: 1–2 queries (different phrasing) targeting YouTube/video sources.

3) Curate results (before fetching anything deep):
   - Pick 4–8 web results and 4–8 video results that look most relevant and credible.
   - Prefer: official docs, primary sources, well-known outlets, high-signal explainers,
     recency when relevant.
   - Avoid duplicates and near-duplicates.

4) Present findings to the user as a guided menu:
   - Give a short “What I found” summary (2–4 bullets max).
   - Then show two lists:
     - “Web results” (title + 1-line why it matters + URL)
     - “Video results” (title + creator + duration if available + 1-line why it matters + video URL or ID)
   - After the lists, ask: “Which items should I open next?” and instruct the user to reply with numbers (e.g., W2, W5, V1, V4).

5) Fetch deeper content only after user selection:
   - If user picks web items: fetch full content for each selected URL.
   - If user picks video items: fetch transcripts for each selected video (if available).
   - Then summarize each selection with:
     - key takeaways
     - notable quotes (short excerpts only)
     - any disagreements/uncertainty
     - “what to check next” suggestions

6) Iterate:
   - Ask a follow-up choice question after each round:
     - “Want to expand to adjacent topics, compare sources, or go deeper on one?”
   - Keep the user in control of the direction.

Output rules:
- Keep the first pass lightweight: do not dump full page text or long transcripts.
- Be explicit about source URLs (always include them).
- Use clear, structured formatting.
- If the user’s question is time-sensitive (“latest”, “today”, “2026”, etc.), prioritize recent sources and note publication dates when available.
- If transcripts aren’t available, say so and offer to summarize the video from its description/chapters (if present) or pick another video.

Safety/quality:
- Don’t fabricate sources, quotes, or claims.
- If sources conflict, state the conflict and show both.
- Prefer primary/official sources when possible.

When showing the user the result of your search, pre-select some of the
most promising content and ask if they want to explore it deeper, or if they want to
explore some other content.
""".strip()


research_agent = Agent(
    name='research',
    model='gpt-4o-mini',
    instructions=research_instructions,
    tools=research_tools,
)

callback = NamedCallback(research_agent)

In [42]:
from pydantic_ai import RunUsage

messages = []
usage = RunUsage()

while True:
    user_prompt = input('You:')
    if user_prompt.lower().strip() == 'stop':
        break

    with research_agent.sequential_tool_calls():
        result = await research_agent.run(
            user_prompt,
            message_history=messages,
            event_stream_handler=callback
        )

    usage = usage + result.usage()

    print(result.output)
    messages.extend(result.new_messages())

You: pydantic ai


TOOL CALL (research): brave_search({"query": "Pydantic AI"})
TOOL CALL (research): brave_youtube_search({"query": "Pydantic AI"})
### What I Found
- **Pydantic AI** is a Python framework that enhances the development of AI applications by providing type safety and structured outputs, making it especially beneficial for large language model (LLM) applications.
- It integrates well with existing Python ecosystems and is similar to FastAPI in its usability and robustness.
- Resources include official documentation, tutorials, and videos covering both beginner and advanced topics related to building AI agents using Pydantic AI.

### Web Results
1. **Pydantic AI - Pydantic AI**
   - An overview of Pydantic AI's capabilities, validation applications, and supported models.
   - [Read more](https://ai.pydantic.dev/)

2. **Pydantic AI: A Beginner’s Guide With Practical Examples | DataCamp**
   - A practical guide focusing on building agentic AI systems with Pydantic AI frameworks.
   - [Read mo

You: go


TOOL CALL (research): fetch_content({"url": "https://ai.pydantic.dev/"})
TOOL CALL (research): fetch_content({"url": "https://www.datacamp.com/tutorial/pydantic-ai-guide"})
TOOL CALL (research): get_youtube_transcripts({"video_id": "-WB0T0XmDrY"})
TOOL CALL (research): get_youtube_transcripts({"video_id": "PkQIREapb9o"})
### Content Summary from Pydantic AI Resources

#### Web Insights

1. **Pydantic AI Overview:**
   - Pydantic AI is a framework aimed at enhancing the creation of production-grade applications with Generative AI through structured outputs and type safety. It aims to offer the same ease of use as FastAPI.
   - Key capabilities include seamless observability, integration with major AI models, and the ability to customize models easily.

2. **Beginner's Guide to Pydantic AI:**
   - This tutorial focuses on building structured AI agents, producing validated outputs, managing dependencies, and enabling seamless interaction with LLMs (Large Language Models).
   - It highligh

You: stop
