In [7]:
import os
import pandas as pd
import numpy as np

from dataclasses import dataclass
from datetime import datetime, timedelta
from dotenv import load_dotenv
from typing import Any

from groq import Groq
from pydantic_ai import Agent, RunContext, ModelRetry
from pydantic_ai.messages import ModelRequest, ModelMessagesTypeAdapter
from pydantic_ai.result import RunResult

import nest_asyncio
nest_asyncio.apply()

# Load environment
load_dotenv()
os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')  # Ensure your Groq API key is set
os.environ['LOGFIRE_IGNORE_NO_CONFIG'] = '1'


In [8]:
@dataclass
class Deps:
    """The only dependency we need is the DataFrame we'll be working with."""
    df: pd.DataFrame

In [12]:
# Initialize the Groq client
client = Groq(api_key=os.getenv('GROQ_API_KEY'))
model_name = 'mixtral-8x7b-32768'

In [16]:

def ask_groq(user_input: str):
    response = client.chat.completions.create(
        model=model_name,
        messages=[{"role": "system", "content": "You are a helpful AI assistant."},
                  {"role": "user", "content": user_input}]
    )
    return response.choices[0].message.content

In [17]:
n_rows = 1000
start_date = datetime(2022, 1, 1)
dates = [start_date + timedelta(days=i) for i in range(n_rows)]
makes = ['Toyota', 'Honda', 'Ford', 'Chevrolet', 'Nissan', 'BMW', 'Mercedes', 'Audi', 'Hyundai', 'Kia']
models = ['Sedan', 'SUV', 'Truck', 'Hatchback', 'Coupe', 'Van']
colors = ['Red', 'Blue', 'Black', 'White', 'Silver', 'Gray', 'Green']

data = {
    'Date': dates,
    'Make': np.random.choice(makes, n_rows),
    'Model': np.random.choice(models, n_rows),
    'Color': np.random.choice(colors, n_rows),
    'Year': np.random.randint(2015, 2023, n_rows),
    'Price': np.random.uniform(20000, 80000, n_rows).round(2),
    'Mileage': np.random.uniform(0, 100000, n_rows).round(0),
    'EngineSize': np.random.choice([1.6, 2.0, 2.5, 3.0, 3.5, 4.0], n_rows),
    'FuelEfficiency': np.random.uniform(20, 40, n_rows).round(1),
    'SalesPerson': np.random.choice(['Alice', 'Bob', 'Charlie', 'David', 'Eva'], n_rows)
}

In [18]:
df = pd.DataFrame(data).sort_values('Date')


In [30]:
def execute_code(code: str):
    """Executes AI-generated Python code and returns the result."""
    try:
        result = eval(code, {"df": df, "pd": pd, "np": np})
        return result
    except Exception as e:
        return f"Error executing code `{code}`: {e}"

In [34]:
def ask_agent(question):
    """Sends a question to the AI and executes the response if it's Python code."""
    system_message = f"""
    You are an AI assistant that helps analyze a dataset. The dataset has the following columns: 
    {', '.join(df.columns)}

    Example query formats:
    - "What is the average Price?"
    - "Count the number of rows"
    - "List all unique Makes"

    Answer questions only using valid Python/Pandas expressions, such as:
    - df["Price"].mean()
    - len(df)
    - df["Make"].unique().tolist()
    """

    full_prompt = f"{system_message}\nUser Question: {question}"

    print(f"Question: {question}")
    response = ask_groq(full_prompt)
    print(f"AI Suggested Code: `{response}`")

    # Run the AI-generated Python code and print the actual result
    executed_result = execute_code(response)
    print(f"Executed Answer: {executed_result}\n---")

In [35]:
ask_agent("What are the column names in this dataset?")

Question: What are the column names in this dataset?
AI Suggested Code: `df.columns.tolist()`
Executed Answer: ['Date', 'Make', 'Model', 'Color', 'Year', 'Price', 'Mileage', 'EngineSize', 'FuelEfficiency', 'SalesPerson']
---


In [36]:
ask_agent("How many rows are in this dataset?")

Question: How many rows are in this dataset?
AI Suggested Code: `len(df)`
Executed Answer: 1000
---


In [37]:
ask_agent("List all unique car makes.")

Question: List all unique car makes.
AI Suggested Code: `df["Make"].unique().tolist()`
Executed Answer: ['BMW', 'Chevrolet', 'Hyundai', 'Honda', 'Audi', 'Kia', 'Mercedes', 'Ford', 'Nissan', 'Toyota']
---
