In [1]:

import os
from openai import OpenAI
import pandas as pd
import json
import duckdb
from pydantic import BaseModel, Field
from IPython.display import Markdown
from dotenv import load_dotenv


In [2]:
import phoenix as px
from phoenix.otel import register
from openinference.instrumentation.openai import OpenAIInstrumentor
from openinference.semconv.trace import SpanAttributes
from opentelemetry.trace import Status, StatusCode
from openinference.instrumentation import TracerProvider


In [3]:
# initialize the OpenAI client
load_dotenv()
openai_api_key = os.environ.get("OPENAI_API_KEY")

client = OpenAI(api_key=openai_api_key)

MODEL = "gpt-4o-mini"

In [4]:
PROJECT_NAME = "az-tracing-agent"

In [5]:
get_phoenix_endpoint = os.getenv("PHOENIX_COLLECTOR_ENDPOINT")

In [6]:
def get_phoenix_endpoint():
    load_dotenv()
    phoenix_endpoint = os.getenv("PHOENIX_COLLECTOR_ENDPOINT")
    return phoenix_endpoint

In [7]:
# tracer_provider = register(
#     project_name=PROJECT_NAME,
#     endpoint= get_phoenix_endpoint() + "v1/traces"
# )

tracer_provider = register(
  project_name=PROJECT_NAME, # Default is 'default'
  auto_instrument=True
    # See 'Trace all calls made to a library' below
)

🔭 OpenTelemetry Tracing Details 🔭
|  Phoenix Project: az-tracing-agent
|  Span Processor: SimpleSpanProcessor
|  Collector Endpoint: https://app.phoenix.arize.com/v1/traces
|  Transport: HTTP + protobuf
|  Transport Headers: {'api_key': '****'}
|  
|  Using a default SpanProcessor. `add_span_processor` will overwrite this default.
|  
|  
|  `register` has set this TracerProvider as the global OpenTelemetry default.
|  To disable this behavior, call `register` with `set_global_tracer_provider=False`.



In [8]:
OpenAIInstrumentor().instrument(tracer_provider = tracer_provider)

Attempting to instrument while already instrumented


In [9]:
tracer = tracer_provider.get_tracer(__name__)

In [10]:
# define the path to the transactional data
TRANSACTION_DATA_FILE_PATH = 'Store_Sales_Price_Elasticity_Promotions_Data.parquet'

In [11]:
# prompt template for step 2 of tool 1
SQL_GENERATION_PROMPT = """
Generate an SQL query based on a prompt. Do not reply with anything besides the SQL query.
The prompt is: {prompt}

The available columns are: {columns}
The table name is: {table_name}
"""

In [12]:
# code for step 2 of tool 1
def generate_sql_query(prompt: str, columns: list, table_name: str) -> str:
    """Generate an SQL query based on a prompt"""
    formatted_prompt = SQL_GENERATION_PROMPT.format(prompt=prompt, 
                                                    columns=columns, 
                                                    table_name=table_name)

    response = client.chat.completions.create(
        model=MODEL,
        messages=[{"role": "user", "content": formatted_prompt}],
    )
    
    return response.choices[0].message.content

In [13]:
# code for tool 1
@tracer.tool()
def lookup_sales_data(prompt: str) -> str:
    """Implementation of sales data lookup from parquet file using SQL"""
    try:

        # define the table name
        table_name = "sales"
        
        # step 1: read the parquet file into a DuckDB table
        df = pd.read_parquet(TRANSACTION_DATA_FILE_PATH)
        duckdb.sql(f"CREATE TABLE IF NOT EXISTS {table_name} AS SELECT * FROM df")

        # step 2: generate the SQL code
        sql_query = generate_sql_query(prompt, df.columns, table_name)
        # clean the response to make sure it only includes the SQL code
        sql_query = sql_query.strip()
        sql_query = sql_query.replace("```sql", "").replace("```", "")
        
        # step 3: execute the SQL query
        result = duckdb.sql(sql_query).df()
        
        return result.to_string()
    except Exception as e:
        return f"Error accessing data: {str(e)}"

In [14]:
example_data = lookup_sales_data("Show me all the sales for store 1320 on November 1st, 2021")
print(example_data)

    Store_Number  SKU_Coded  Product_Class_Code  Sold_Date  Qty_Sold  Total_Sale_Value  On_Promo
0           1320    6173050               22875 2021-11-01         1          4.990000         0
1           1320    6174250               22875 2021-11-01         1          0.890000         0
2           1320    6176200               22975 2021-11-01         2         99.980003         0
3           1320    6176800               22800 2021-11-01         1         14.970000         0
4           1320    6177250               22975 2021-11-01         1          6.890000         0
5           1320    6177300               22800 2021-11-01         1          9.990000         0
6           1320    6177350               22800 2021-11-01         2         16.980000         0
7           1320    6177700               22875 2021-11-01         1          3.190000         0
8           1320    6178000               22875 2021-11-01         2          6.380000         0
9           1320    6178250   

In [15]:
# Construct prompt based on analysis type and data subset
DATA_ANALYSIS_PROMPT = """
Analyze the following data: {data}
Your job is to answer the following question: {prompt}
"""

In [16]:
# code for tool 2
@tracer.tool()

def analyze_sales_data(prompt: str, data: str) -> str:
    """Implementation of AI-powered sales data analysis"""
    formatted_prompt = DATA_ANALYSIS_PROMPT.format(data=data, prompt=prompt)

    response = client.chat.completions.create(
        model=MODEL,
        messages=[{"role": "user", "content": formatted_prompt}],
    )
    
    analysis = response.choices[0].message.content
    return analysis if analysis else "No analysis could be generated"

In [17]:
print(analyze_sales_data(prompt="what trends do you see in this data", 
                         data=example_data))

Based on the provided sales data for store number 1320 on a specific date (November 1, 2021), we can analyze several trends and patterns:

### 1. **Sales Distribution by Product Class**
- The data includes multiple SKUs across different product classes (e.g., 22800, 22875, 22950, etc.).
- **Product Class 24400** has a significant number of transactions, suggesting it may be a favored or high-demand class.
- **Product Class 22800** appears frequently, reflecting it might be a staple category in this store's offerings.

### 2. **Quantity Sold**
- The quantity sold ranges from 1 to 5 units per transaction.
- Some SKUs, particularly those in product class 24425, have higher quantities sold (up to 5), indicating they may be popular or frequently bought items.

### 3. **Total Sale Value Insights**
- Total sale values vary widely, with some items like SKU 6176200 (Total Sale Value: $99.98) indicating high-value products.
- Most sale values are relatively low (below $20), suggesting that many 

In [18]:
# Define tools/functions that can be called by the model
tools = [
    {
        "type": "function",
        "function": {
            "name": "lookup_sales_data",
            "description": "Look up data from Store Sales Price Elasticity Promotions dataset",
            "parameters": {
                "type": "object",
                "properties": {
                    "prompt": {"type": "string", "description": "The unchanged prompt that the user provided."}
                },
                "required": ["prompt"]
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "analyze_sales_data", 
            "description": "Analyze sales data to extract insights",
            "parameters": {
                "type": "object",
                "properties": {
                    "data": {"type": "string", "description": "The lookup_sales_data tool's output."},
                    "prompt": {"type": "string", "description": "The unchanged prompt that the user provided."}
                },
                "required": ["data", "prompt"]
            }
        }
    }
]

# Dictionary mapping function names to their implementations
tool_implementations = {
    "lookup_sales_data": lookup_sales_data,
    "analyze_sales_data": analyze_sales_data
}

In [19]:
# code for executing the tools returned in the model's response
@tracer.chain()
def handle_tool_calls(tool_calls, messages):

    for tool_call in tool_calls:   
        function = tool_implementations[tool_call.function.name]
        function_args = json.loads(tool_call.function.arguments)
        result = function(**function_args)
        messages.append({"role": "tool", "content": result, "tool_call_id": tool_call.id})
        
    return messages

In [20]:
SYSTEM_PROMPT = """
You are a helpful assistant that can answer questions about the Store Sales Price Elasticity Promotions dataset.
"""

In [21]:
def run_agent(messages):
    print("Running agent with messages:", messages)

    if isinstance(messages, str):
        messages = [{"role": "user", "content": messages}]
        
    # Check and add system prompt if needed
    if not any(
            isinstance(message, dict) and message.get("role") == "system" for message in messages
        ):
            system_prompt = {"role": "system", "content": SYSTEM_PROMPT}
            messages.append(system_prompt)

    while True:
        print("Making router call to OpenAI")
        response = client.chat.completions.create(
            model=MODEL,
            messages=messages,
            tools=tools,
        )
        messages.append(response.choices[0].message)
        tool_calls = response.choices[0].message.tool_calls
        print("Received response with tool calls:", bool(tool_calls))

        # if the model decides to call function(s), call handle_tool_calls
        if tool_calls:
            print("Processing tool calls")
            messages = handle_tool_calls(tool_calls, messages)
        else:
            print("No tool calls, returning final response")
            return response.choices[0].message.content

In [22]:
result = run_agent('show me sales for top-10 (by sales) stores')

Running agent with messages: show me sales for top-10 (by sales) stores
Making router call to OpenAI
Received response with tool calls: True
Processing tool calls
Making router call to OpenAI
Received response with tool calls: False
No tool calls, returning final response


In [23]:
#print(result)
# you can also print a formatted version of the result
Markdown(result)

Here are the sales figures for the top 10 stores by total sales:

| Store Number | Total Sales       |
|--------------|-------------------|
| 2970         | $836,341.33       |
| 3300         | $619,660.17       |
| 1320         | $592,832.07       |
| 1650         | $580,443.01       |
| 1210         | $508,393.77       |
| 1100         | $497,509.53       |
| 3080         | $495,458.24       |
| 2750         | $453,664.81       |
| 1540         | $427,777.43       |
| 880          | $420,302.09       |

Let me know if you need any more information!

In [24]:
result = run_agent('show me the id of the top selling store')
result1 = result
Markdown(result)

Running agent with messages: show me the id of the top selling store
Making router call to OpenAI
Received response with tool calls: True
Processing tool calls
Making router call to OpenAI
Received response with tool calls: False
No tool calls, returning final response


The ID of the top-selling store is 2970.

In [25]:
result = run_agent('show me the id of the second best-selling store')
result2 = result
Markdown(result)

Running agent with messages: show me the id of the second best-selling store
Making router call to OpenAI
Received response with tool calls: True
Processing tool calls
Making router call to OpenAI
Received response with tool calls: False
No tool calls, returning final response


The ID of the second best-selling store is 3300.

In [26]:
result = run_agent('show me the exact number of sum of sales of top selling store')
result3 = result
Markdown(result)

Running agent with messages: show me the exact number of sum of sales of top selling store
Making router call to OpenAI
Received response with tool calls: True
Processing tool calls
Making router call to OpenAI
Received response with tool calls: False
No tool calls, returning final response


The sum of sales of the top selling store (Store Number 2970) is approximately \$836,341.33.

In [31]:
result = run_agent('show me the total sum of all sales of all stores')
result4 = result
Markdown(result)

Running agent with messages: show me the total sum of all sales of all stores
Making router call to OpenAI
Received response with tool calls: True
Processing tool calls
Making router call to OpenAI
Received response with tool calls: False
No tool calls, returning final response


The total sum of all sales of all stores is approximately \$13,272,640.

In [32]:
result = run_agent('How much revenue generated all store generated in 1998?')
result5 = result
Markdown(result)

Running agent with messages: How much revenue generated all store generated in 1998?
Making router call to OpenAI
Received response with tool calls: True
Processing tool calls
Making router call to OpenAI
Received response with tool calls: False
No tool calls, returning final response


It appears that there is no available data regarding the total revenue generated by all stores in 1998.

In [33]:
# EVALS

In [37]:
df = pd.DataFrame(
    [
        {
            "reference": "The id number of the top selling store is 2970.",
            "query": "show me the id of the top selling store",
            "response": result1,
        },
        {
            "reference": "The id number of the second top selling store is 3300.",
            "query": "show me the id of the second selling store",
            "response": result2,
        },
        {
            "reference": "total sum of all sales is $13,272,640.00.",
            "query": "total sales of all stores",
            "response": result4,
        },
        {
            "reference": "$800",
            "query": "How much revenue generated all store generated in 1998?",
            "response": result5,
        },
    ]
)
df.head()

Unnamed: 0,reference,query,response
0,The id number of the top selling store is 2970.,show me the id of the top selling store,The ID of the top-selling store is 2970.
1,The id number of the second top selling store ...,show me the id of the second selling store,The ID of the second best-selling store is 3300.
2,"total sum of all sales is $13,272,640.00.",total sales of all stores,The total sum of all sales of all stores is ap...
3,$800,How much revenue generated all store generated...,It appears that there is no available data reg...


In [38]:
import nest_asyncio

from phoenix.evals import HallucinationEvaluator, OpenAIModel, QAEvaluator, run_evals

nest_asyncio.apply()  # This is needed for concurrency in notebook environments

# Set your OpenAI API key
eval_model = OpenAIModel(model="gpt-4o")

# Define your evaluators
hallucination_evaluator = HallucinationEvaluator(eval_model)
qa_evaluator = QAEvaluator(eval_model)

df["context"] = df["reference"]
df.rename(columns={"query": "input", "response": "output"}, inplace=True)
assert all(column in df.columns for column in ["output", "input", "context", "reference"])

# Run the evaluators, each evaluator will return a dataframe with evaluation results
# We upload the evaluation results to Phoenix in the next step
hallucination_eval_df, qa_eval_df = run_evals(
    dataframe=df, evaluators=[hallucination_evaluator, qa_evaluator], provide_explanation=True
)

run_evals |          | 0/8 (0.0%) | ⏳ 00:00<? | ?it/s

In [39]:
results_df = df.copy()
results_df["hallucination_eval"] = hallucination_eval_df["label"]
results_df["hallucination_explanation"] = hallucination_eval_df["explanation"]
results_df["qa_eval"] = qa_eval_df["label"]
results_df["qa_explanation"] = qa_eval_df["explanation"]
results_df.head()


Unnamed: 0,reference,input,output,context,hallucination_eval,hallucination_explanation,qa_eval,qa_explanation
0,The id number of the top selling store is 2970.,show me the id of the top selling store,The ID of the top-selling store is 2970.,The id number of the top selling store is 2970.,factual,"First, we examine the query, which asks for th...",correct,"To determine if the answer is correct, we need..."
1,The id number of the second top selling store ...,show me the id of the second selling store,The ID of the second best-selling store is 3300.,The id number of the second top selling store ...,factual,"First, let's examine the query: 'show me the i...",correct,"To determine if the answer is correct, we need..."
2,"total sum of all sales is $13,272,640.00.",total sales of all stores,The total sum of all sales of all stores is ap...,"total sum of all sales is $13,272,640.00.",factual,To determine if the answer is factual or hallu...,correct,"First, we identify the question, which asks fo..."
3,$800,How much revenue generated all store generated...,It appears that there is no available data reg...,$800,hallucinated,To determine if the answer is factual or hallu...,incorrect,"To determine if the answer is correct, we need..."
