In [1]:
import os
from dotenv import load_dotenv
load_dotenv("keys.env")

True

In [2]:
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
TAVILY_API_KEY = os.environ["TAVILY_SEARCH_API_KEY"]

In [3]:
from tavily import TavilyClient, AsyncTavilyClient
tavily_client = AsyncTavilyClient(api_key=TAVILY_API_KEY)

# Sample Search
response = await tavily_client.search("What is PydanticAI?", max_results=3)
print(response['results'])

[{'title': 'PydanticAI Introduction: What is PydanticAI about', 'url': 'https://aipure.ai/products/pydantic-ai/introduction', 'content': "PydanticAI is a Python Agent Framework that streamlines the development of production-grade AI applications by combining Pydantic's powerful data validation with LLM integration, offering type-safe dependency injection and model-agnostic support. PydanticAI is an innovative agent framework developed by the team behind Pydantic, designed to simplify the process of building production-grade applications with Generative AI. The framework also supports streamed responses and can validate structured responses using Pydantic models, making it particularly powerful for complex AI applications that require reliable data handling. CopilotForXcode is an Xcode Source Editor Extension that integrates GitHub Copilot, Codeium, and ChatGPT to provide AI-powered code suggestions, chat assistance, and prompt-to-code functionality within Xcode. BLACKBOX AI is an AI-po

In [None]:
from __future__ import annotations as _annotations

import asyncio
from dataclasses import dataclass
from typing import Any

from devtools import debug
from httpx import AsyncClient
import datetime
from typing_extensions import TypeAlias
from pydantic_ai import Agent, ModelRetry, RunContext
from pydantic import BaseModel, Field
from pydantic import field_validator, ValidationError
import pandas as pd
from typing import Any, List, Dict
from typing import Annotated, Union
import pandas as pd
import ast 


## Dependecy and Response Classes

In [21]:
@dataclass
class SearchDataclass:
    max_results: int
    todays_date: str

@dataclass
class ResearchDependencies:
    todays_date: str

class ResearchTable(BaseModel):
    """Response when dictionary for Dataframe could be successfully generated."""
    dictionary: str = Field(description='This is a python dictory which represents a Pandas Dataframe of the requested topic. Name the columns based on the research topic and results.')


class InvalidTable(BaseModel):
    """Response when the user's research query result could't be put into a Dataframe"""
    error_message: str

Response: TypeAlias = Union[ResearchTable, InvalidTable]


## Agent Creation

In [95]:
research_analyst_agent = Agent('openai:gpt-4o',
                     deps_type=ResearchDependencies,
                     result_type=Response,
                     system_prompt="""You are a helpful research analyst assistant, you are an expert in researching data insights.
                     If you are given a question you write strong keywords to do 3-5 searches in total.
                     (each with a query_number) and then combine the results to give me a python dictionary which will represent a dataframe of the results.""" )

In [23]:
current_date = datetime.date.today()
date_string = current_date.strftime("%Y-%m-%d")
deps = SearchDataclass(max_results=3, todays_date=date_string)

## Result Validator

In [24]:
@research_analyst_agent.result_validator
async def validate_result(ctx: RunContext[ResearchDependencies], result: Response) -> Response:
    if isinstance(result, InvalidTable):
        return result

    if not result.dictionary.startswith('{'):
        raise ModelRetry('Please return a dictionary for the Dataframe to be created.')

    try:
        ast.literal_eval(result.dictionary)
    except Exception as e:
        raise ModelRetry(f'Invalid dictionary: {e}') from e
    else:
        return result

## Running queries to get results

In [None]:
result1 = await research_analyst_agent.run(
    'What are the top 5 edtech companies in India?', deps=deps
)

In [27]:
data_dict = ast.literal_eval(result1.data.dictionary)

In [29]:
df = pd.DataFrame(data_dict)
df

Unnamed: 0,Company,Description
0,Byju's,"Largest edtech company in India, offering onli..."
1,Unacademy,Online learning platform for various competiti...
2,Toppr,Educational app providing online classes and l...
3,Vedantu,Interactive online tutoring platform for K-12 ...
4,UpGrad,Higher education and upskilling platform offer...


In [92]:
result2 = await research_analyst_agent.run(
    'What is the year on year revenue of the Edtech industry in India for the past 5 years?', deps=deps
)

In [94]:
data_dict = ast.literal_eval(result2.data.dictionary)
df = pd.DataFrame(data_dict)
df

Unnamed: 0,Year,Revenue (in billion USD)
0,2018,0.247
1,2019,0.735
2,2020,2.8
3,2021,4.2
4,2022,5.75


## Validation Testing

In [96]:
# If result is not a dataframe
result2 = await research_analyst_agent.run(
    'What is the revenue of x,y,z', deps=deps
)

In [98]:
result2.data

InvalidTable(error_message="The request is ambiguous. Please clarify what 'x, y, z' refers to, so I can conduct a precise search.")