In [28]:
import os
from dotenv import load_dotenv
load_dotenv("keys.env")

True

In [29]:
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
TAVILY_API_KEY = os.environ["TAVILY_SEARCH_API_KEY"]

In [30]:
from tavily import TavilyClient, AsyncTavilyClient
tavily_client = AsyncTavilyClient(api_key=TAVILY_API_KEY)

# Sample Search
# response = await tavily_client.search("What is PydanticAI?", max_results=3)
# print(response['results'])

In [31]:
from __future__ import annotations as _annotations

import asyncio
from dataclasses import dataclass
from typing import Any

from devtools import debug
from httpx import AsyncClient
import datetime
from typing_extensions import TypeAlias
from pydantic_ai import Agent, ModelRetry, RunContext
from pydantic import BaseModel, Field
from pydantic import field_validator, ValidationError
import pandas as pd
from typing import Any, List, Dict
from typing import Annotated, Union
import pandas as pd
import ast 


## Dependecy and Response Classes

In [32]:
@dataclass
class SearchDataclass:
    max_results: int
    todays_date: str

@dataclass
class ResearchDependencies:
    todays_date: str

class ResearchTable(BaseModel):
    """Response when dictionary for Dataframe could be successfully generated."""
    dictionary: str = Field(description='This is a python dictory which represents a Pandas Dataframe of the requested topic. Name the columns based on the research topic and results.')


class InvalidTable(BaseModel):
    """Response when the user's research query result could't be put into a Dataframe"""
    error_message: str

Response: TypeAlias = Union[ResearchTable, InvalidTable]


## Agent Creation

In [33]:
research_analyst_agent = Agent('openai:gpt-4o',
                     deps_type=ResearchDependencies,
                     result_type=Response,
                     system_prompt="""You are a helpful research analyst assistant, you are an expert in researching data insights.
                     If you are given a question you write strong keywords to do 3-5 searches in total.
                     (each with a query_number) and then combine the results to give me a python dictionary which will represent a dataframe of the results.""" )

In [34]:
@research_analyst_agent.tool #Tavily
async def get_search(search_data:RunContext[SearchDataclass],query: str, query_number: int) -> dict[str, Any]:
    """Get the search for a keyword query.

    Args:
        query: keywords to search.
    """
    print(f"Search query {query_number}: {query}")
    max_results = search_data.deps.max_results
    results = await tavily_client.get_search_context(query=query, max_results=max_results)

    return results

In [35]:
@research_analyst_agent.system_prompt
async def add_current_date(ctx: RunContext[ResearchDependencies]) -> str:
    todays_date = ctx.deps.todays_date
    system_prompt=f"""You are a helpful research analyst assistant, you are an expert in researching data insights.
                     If you are given a question you write strong keywords to do 3-5 searches in total.
                     (each with a query_number) and then combine the results to give me a python dictionary which will represent a dataframe of the results.
                    if you need todays date it is {todays_date}"""
    return system_prompt

In [36]:
current_date = datetime.date.today()
date_string = current_date.strftime("%Y-%m-%d")
deps = SearchDataclass(max_results=3, todays_date=date_string)

## Result Validator

In [37]:
@research_analyst_agent.result_validator
async def validate_result(ctx: RunContext[ResearchDependencies], result: Response) -> Response:
    if isinstance(result, InvalidTable):
        return result

    if not result.dictionary.startswith('{'):
        raise ModelRetry('Please return a dictionary for the Dataframe to be created.')

    try:
        data = ast.literal_eval(result.dictionary)
        dataframe = pd.DataFrame(data)
    except Exception as e:
        raise ModelRetry(f'Invalid dictionary: {e}') from e
    else:
        return dataframe

## Running queries to get results

In [38]:
result1 = await research_analyst_agent.run(
    'What are the top 5 edtech companies in India?', deps=deps
)

Search query 1: top edtech companies in India 2024
Search query 2: leading education technology firms in India
Search query 3: best edtech companies India 2024


In [39]:
result1.data

Unnamed: 0,Company Name,Headquarters,Funding Raised (USD Million),Description,Industry Impact
0,BYJU'S,Bangalore,2200.0,BYJU’S is an educational technology company th...,BYJU’S has over 100 million registered users.
1,Vedantu,Bangalore,,Vedantu is an online tutoring platform connect...,Vedantu offers interactive courses for K-12.
2,Classplus,Gurugram,,Classplus is a platform that allows teachers a...,Classplus empowers educators to create custom ...
3,Extramarks,Noida,,Extramarks provides learning solutions for K-1...,Extramarks focuses on comprehensive learning e...
4,Imarticus Learning,Mumbai,,Imarticus Learning offers virtual education pl...,Imarticus focuses on upskilling in various pro...


In [49]:
result2 = await research_analyst_agent.run(
    'What is the market share of Edtech by top 5 countries?', deps=deps
)

Search query 1: 2024 Edtech market share top countries
Search query 2: Edtech industry market share by country 2024
Search query 3: Top countries in Edtech market 2024


In [50]:
result2.data

Unnamed: 0,Country,Market Share 2024 (in USD Millions),CAGR (%) 2024-2031
0,USA,82240.0,13.9
1,India,3969.3,17.8
2,Japan,4564.69,14.5
3,China,25000.0,15.2
4,United Kingdom,8500.0,12.3
