In [1]:
from langchain_openai import OpenAI, ChatOpenAI
from langchain.agents.agent_types import AgentType
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
import os

In [36]:
import pandas as pd
from graphs.plotly_graphs import *
from components.standard_components import get_growth
from data_proccessing.utils import generate_location_df
import scipy
from pydantic import BaseModel, Field
from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate
from typing import Any

In [3]:
from langchain_chroma import Chroma
from langchain_core.example_selectors import SemanticSimilarityExampleSelector
from langchain_openai import OpenAIEmbeddings

In [6]:
df = pd.read_excel("data/full_iseller_data.xlsx")

In [7]:
df['closed_date'] = pd.to_datetime(df['closed_date'])
df['date'] = df['closed_date'] .dt.date
df['day_of_week_num'] = df['closed_date'].dt.dayofweek
df['day_name'] = df['closed_date'].dt.day_name()

In [8]:
def generate_sales_table(data):
    # Calculate current and last periods
    # df = pd.DataFrame(data)
    df = data.copy()

    current_quarter = df["quarter"].max()
    last_quarter = current_quarter - 1
    current_month = df["month"].max()
    last_month = current_month - 1

    # Filter dataframe once for each period
    df_current_quarter = df[df['quarter'] == current_quarter]
    df_last_quarter = df[df['quarter'] == last_quarter]
    df_current_month = df[df['month'] == current_month]
    df_last_month = df[df['month'] == last_month]

    # Groupby operation for each period
    total_revenue_df = df.groupby(['clean_product_name', 'product_type'])['total_order_amount'].sum().reset_index()
    total_revenue_df = total_revenue_df.rename(columns={'total_order_amount': 'total_revenue'})

    current_quarter_revenue = df_current_quarter.groupby(['clean_product_name', 'product_type'])['total_order_amount']\
        .sum().reset_index().rename(columns={'total_order_amount': 'current_quarter_revenue'})

    last_quarter_revenue = df_last_quarter.groupby(['clean_product_name', 'product_type'])['total_order_amount']\
        .sum().reset_index().rename(columns={'total_order_amount': 'last_quarter_revenue'})

    current_month_revenue = df_current_month.groupby(['clean_product_name', 'product_type'])['total_order_amount']\
        .sum().reset_index().rename(columns={'total_order_amount': 'current_month_revenue'})

    last_month_revenue = df_last_month.groupby(['clean_product_name', 'product_type'])['total_order_amount']\
        .sum().reset_index().rename(columns={'total_order_amount': 'last_month_revenue'})

    # Merge all dataframes into a single dataframe
    sales_performance_df = total_revenue_df.merge(current_quarter_revenue, on=["clean_product_name", "product_type"], how='left')\
        .merge(last_quarter_revenue, on=["clean_product_name", "product_type"], how='left')\
        .merge(current_month_revenue, on=["clean_product_name", "product_type"], how='left')\
        .merge(last_month_revenue, on=["clean_product_name", "product_type"], how='left')

    # Fill NaN values with 0 (if appropriate for your context)
    sales_performance_df = sales_performance_df.fillna(0)

    
    sales_performance_df['quarterly_growth'] = (sales_performance_df['current_quarter_revenue'] - sales_performance_df['last_quarter_revenue']) / sales_performance_df['last_quarter_revenue'] * 100
    sales_performance_df['monthly_growth'] = (sales_performance_df['current_month_revenue'] - sales_performance_df['last_month_revenue']) / sales_performance_df['last_month_revenue'] * 100
    
    # Calc growth
    sales_performance_df['quarter_Growth(%)'] = ((sales_performance_df['current_quarter_revenue'] - \
                                         sales_performance_df['last_quarter_revenue']) / (sales_performance_df['current_quarter_revenue'] +  sales_performance_df['last_quarter_revenue'])).round(2)
    sales_performance_df['Monthly_Growth(%)'] = ((sales_performance_df['current_month_revenue'] - \
                                            sales_performance_df['last_month_revenue']) / (sales_performance_df['current_month_revenue'] + sales_performance_df['last_month_revenue'])).round(2)
    return sales_performance_df

In [11]:
df1 = df.drop(['cancel_date', 'cancel_reason','channel','device_location','fulfillment_status','notes','on_account','outlet_code','register_name','remaining_on_account','tax_inclusive','total_shipping_amount','total_weight'], axis=1)

In [12]:
df1.head()

Unnamed: 0.1,Unnamed: 0,base_price,buying_price,cashier_id,clean_product_name,closed_date,customer_id,day,hour,month,...,total_amount,total_order_amount,week,week_of_month,year,cost_of_goods,profit,date,day_of_week_num,day_name
0,0,0,12348.0,f72af7ab-51ff-496f-b344-8cda0686df7e,ayam! korean bites,2024-02-11 06:18:51,f4ee6240-51f7-4cc4-ba24-8a6acbf978ea,11,6,2,...,15001,15001,6,2,2024,1695.0,13306.0,2024-02-11,6,Sunday
1,1,0,13577.0,f72af7ab-51ff-496f-b344-8cda0686df7e,ayam! korean bites,2024-02-04 07:11:14,b42f4d0d-00bc-432c-8f10-54167a53f0ac,4,6,2,...,25000,25000,5,1,2024,20.0,24980.0,2024-02-04,6,Sunday
2,2,1,1493.0,13c394b6-2226-4fc3-b953-ca01c0a59d71,free nasi putih,2024-02-09 12:50:47,6d5e2fcf-7732-40be-abda-04edfe52ebcf,9,12,2,...,1,1,6,2,2024,1493.0,-1492.0,2024-02-09,4,Friday
3,3,1,1493.0,7f7e33ff-b092-4f26-8c64-760362bf5bb5,free nasi putih,2024-01-05 10:20:20,2a3d6390-4fd7-4bb0-b185-bdab64aeab75,5,10,1,...,1,1,1,1,2024,1493.0,-1492.0,2024-01-05,4,Friday
4,4,1,1493.0,7f7e33ff-b092-4f26-8c64-760362bf5bb5,free nasi putih,2024-01-12 14:04:16,30d5a638-2877-43a5-a402-3838efa52452,12,14,1,...,1,1,2,2,2024,1493.0,-1492.0,2024-01-12,4,Friday


In [17]:
agent = create_pandas_dataframe_agent(
    ChatOpenAI(
        temperature=0.001, 
        model='gpt-3.5-turbo-0125',
        api_key=os.getenv('OPENAI_API_KEY')
    ),
    [df1, sales_table],
    verbose=True,
    agent_type=AgentType.OPENAI_FUNCTIONS,
)

DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='c:\\Users\\austi\\OneDrive\\Documents\\SABR Consulting\\minum_yuk_kakak\\iseller_dashboard\\.conda\\Library\\ssl\\cacert.pem'
DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='c:\\Users\\austi\\OneDrive\\Documents\\SABR Consulting\\minum_yuk_kakak\\iseller_dashboard\\.conda\\Library\\ssl\\cacert.pem'


In [14]:
agent.invoke("Which products have the highest and lowest sales quantities?")

DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'content': '\nYou are working with a pandas dataframe in Python. The name of the dataframe is `df`.\nThis is the result of `print(df.head())`:\n|    |   Unnamed: 0 |   base_price |   buying_price | cashier_id                           | clean_product_name   | closed_date         | customer_id                          |   day |   hour |   month | month_name   | order_id                             | order_type   | outlet_name             | payment_status   | product_id                           | product_name       | product_type   |   quantity |   quarter | quarter_year   | register_id                          | server_id                            | status    |   subtotal |   total_amount |   total_order_amount |   week |   week_of_month |   year |   cost_of_goods |   profit | date       |   day_of_week_num | day_name   |\n|---:|-------------:|----------



[1m> Entering new AgentExecutor chain...[0m


DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sat, 08 Jun 2024 14:29:24 GMT'), (b'Content-Type', b'text/event-stream; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'openai-organization', b'user-9qe82t1dno6gleilkloemlgn'), (b'openai-processing-ms', b'540'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'5000'), (b'x-ratelimit-limit-tokens', b'80000'), (b'x-ratelimit-remaining-requests', b'4999'), (b'x-ratelimit-remaining-tokens', b'78725'), (b'x-ratelimit-reset-requests', b'12ms'), (b'x-ratelimit-reset-tokens', b'956ms'), (b'x-request-id', b'req_3cc8935202ea429ce8b9362392bf40c4'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Set-Cookie', b'__cf_bm=PfRpWfHwTGBUR456CejuS_WTWT5JkDyJfDRi1T7whIU-1717856964-1.0.1.1-6qa8HsDCL2wBTRSe6S3CUwOcxM52GVg4AdO_SIlKiyXtNIyC8jM3SSQDUi.G2wW2xp4rgZ5iv8mJXVz_sdbN3A; path

[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "df.groupby('product_name')['quantity'].sum().nlargest(1)"}`


[0m[36;1m[1;3mproduct_name
kawa kawa anggur hijau 600ml    4531
Name: quantity, dtype: int64[0m

DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sat, 08 Jun 2024 14:29:25 GMT'), (b'Content-Type', b'text/event-stream; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'openai-organization', b'user-9qe82t1dno6gleilkloemlgn'), (b'openai-processing-ms', b'809'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'5000'), (b'x-ratelimit-limit-tokens', b'80000'), (b'x-ratelimit-remaining-requests', b'4999'), (b'x-ratelimit-remaining-tokens', b'78380'), (b'x-ratelimit-reset-requests', b'12ms'), (b'x-ratelimit-reset-tokens', b'1.214s'), (b'x-request-id', b'req_4bc7899df8ffbde8d3971ce575ad6aae'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Server', b'cloudflare'), (b'CF-RAY', b'890992ed9f5168d8-CGK'), (b'alt-svc', b'h3=":443"; ma=86400')])
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "H

[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "df.groupby('product_name')['quantity'].sum().nsmallest(1)"}`


[0m[36;1m[1;3mproduct_name
anggur ginseng intisari anggur hijau 620ml - dus    1
Name: quantity, dtype: int64[0m

DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sat, 08 Jun 2024 14:29:27 GMT'), (b'Content-Type', b'text/event-stream; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'openai-organization', b'user-9qe82t1dno6gleilkloemlgn'), (b'openai-processing-ms', b'427'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'5000'), (b'x-ratelimit-limit-tokens', b'80000'), (b'x-ratelimit-remaining-requests', b'4999'), (b'x-ratelimit-remaining-tokens', b'78425'), (b'x-ratelimit-reset-requests', b'12ms'), (b'x-ratelimit-reset-tokens', b'1.18s'), (b'x-request-id', b'req_de3be835d9852a967c28f0ae05a283ba'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Server', b'cloudflare'), (b'CF-RAY', b'890992f728fd68d8-CGK'), (b'alt-svc', b'h3=":443"; ma=86400')])
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HT

[32;1m[1;3mThe product with the highest sales quantity is "kawa kawa anggur hijau 600ml" with a total quantity of 4531. 
The product with the lowest sales quantity is "anggur ginseng intisari anggur hijau 620ml - dus" with a total quantity of 1.[0m

[1m> Finished chain.[0m


{'input': 'Which products have the highest and lowest sales quantities?',
 'output': 'The product with the highest sales quantity is "kawa kawa anggur hijau 600ml" with a total quantity of 4531. \nThe product with the lowest sales quantity is "anggur ginseng intisari anggur hijau 620ml - dus" with a total quantity of 1.'}

In [18]:
agent.invoke("How much sales contribution does the top 10 products make?")

DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'content': '\nYou are working with 2 pandas dataframes in Python named df1, df2, etc.\nThis is the result of `print(df.head())` for each dataframe:\n|    |   Unnamed: 0 |   base_price |   buying_price | cashier_id                           | clean_product_name   | closed_date         | customer_id                          |   day |   hour |   month | month_name   | order_id                             | order_type   | outlet_name             | payment_status   | product_id                           | product_name       | product_type   |   quantity |   quarter | quarter_year   | register_id                          | server_id                            | status    |   subtotal |   total_amount |   total_order_amount |   week |   week_of_month |   year |   cost_of_goods |   profit | date       |   day_of_week_num | day_name   |\n|---:|-------------:|-----

DEBUG:httpcore.connection:connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x000001F629509210>
DEBUG:httpcore.connection:start_tls.started ssl_context=<ssl.SSLContext object at 0x000001F62587BF50> server_hostname='api.openai.com' timeout=None
DEBUG:httpcore.connection:start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x000001F629620790>
DEBUG:httpcore.http11:send_request_headers.started request=<Request [b'POST']>
DEBUG:httpcore.http11:send_request_headers.complete
DEBUG:httpcore.http11:send_request_body.started request=<Request [b'POST']>
DEBUG:httpcore.http11:send_request_body.complete
DEBUG:httpcore.http11:receive_response_headers.started request=<Request [b'POST']>




[1m> Entering new AgentExecutor chain...[0m


DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sat, 08 Jun 2024 14:39:08 GMT'), (b'Content-Type', b'text/event-stream; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'openai-organization', b'user-9qe82t1dno6gleilkloemlgn'), (b'openai-processing-ms', b'503'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'5000'), (b'x-ratelimit-limit-tokens', b'80000'), (b'x-ratelimit-remaining-requests', b'4999'), (b'x-ratelimit-remaining-tokens', b'78276'), (b'x-ratelimit-reset-requests', b'12ms'), (b'x-ratelimit-reset-tokens', b'1.293s'), (b'x-request-id', b'req_a4c651d4828495d276a76b5d732dc5d4'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Set-Cookie', b'__cf_bm=FZtinOzanBjrU1jCAlfHE9sBFfepNJ9RiKP_z.BWyrg-1717857548-1.0.1.1-Cdakh1gvPTlbccHtSZqGOm.hiEX9O7igB8ez5kf.1zP6YgdR_pAUGdlTFHB8FGbqb89UWaQmaGXXWY3Sr9P1_g; pat

[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "total_revenue_top_10 = df1['total_revenue'].nlargest(10).sum()\ntotal_revenue_top_10"}`


[0m[36;1m[1;3mKeyError: 'total_revenue'[0m

DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sat, 08 Jun 2024 14:39:09 GMT'), (b'Content-Type', b'text/event-stream; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'openai-organization', b'user-9qe82t1dno6gleilkloemlgn'), (b'openai-processing-ms', b'412'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'5000'), (b'x-ratelimit-limit-tokens', b'80000'), (b'x-ratelimit-remaining-requests', b'4999'), (b'x-ratelimit-remaining-tokens', b'77486'), (b'x-ratelimit-reset-requests', b'12ms'), (b'x-ratelimit-reset-tokens', b'1.884s'), (b'x-request-id', b'req_bf0a0a32fa785ea61403e9d1e8db3a16'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Server', b'cloudflare'), (b'CF-RAY', b'8909a12fdde768e2-CGK'), (b'alt-svc', b'h3=":443"; ma=86400')])
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "H

[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': 'df1.columns'}`
responded: It seems that the column name 'total_revenue' is not available in the first dataframe. Let me check the column names in the first dataframe.

[0m[36;1m[1;3mIndex(['Unnamed: 0', 'base_price', 'buying_price', 'cashier_id',
       'clean_product_name', 'closed_date', 'customer_id', 'day', 'hour',
       'month', 'month_name', 'order_id', 'order_type', 'outlet_name',
       'payment_status', 'product_id', 'product_name', 'product_type',
       'quantity', 'quarter', 'quarter_year', 'register_id', 'server_id',
       'status', 'subtotal', 'total_amount', 'total_order_amount', 'week',
       'week_of_month', 'year', 'cost_of_goods', 'profit', 'date',
       'day_of_week_num', 'day_name'],
      dtype='object')[0m

DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sat, 08 Jun 2024 14:39:11 GMT'), (b'Content-Type', b'text/event-stream; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'openai-organization', b'user-9qe82t1dno6gleilkloemlgn'), (b'openai-processing-ms', b'393'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'5000'), (b'x-ratelimit-limit-tokens', b'80000'), (b'x-ratelimit-remaining-requests', b'4999'), (b'x-ratelimit-remaining-tokens', b'76724'), (b'x-ratelimit-reset-requests', b'12ms'), (b'x-ratelimit-reset-tokens', b'2.456s'), (b'x-request-id', b'req_ac33adacb4b3efa2421598c8f461da39'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Server', b'cloudflare'), (b'CF-RAY', b'8909a1390c2168e2-CGK'), (b'alt-svc', b'h3=":443"; ma=86400')])
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "H

[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': 'df2.columns'}`
responded: The 'total_revenue' column is not available in the first dataframe. Let me check the column names in the second dataframe.

[0m[36;1m[1;3mIndex(['clean_product_name', 'product_type', 'total_revenue',
       'current_quarter_revenue', 'last_quarter_revenue',
       'current_month_revenue', 'last_month_revenue', 'quarterly_growth',
       'monthly_growth', 'quarter_Growth(%)', 'Monthly_Growth(%)'],
      dtype='object')[0m

DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sat, 08 Jun 2024 14:39:12 GMT'), (b'Content-Type', b'text/event-stream; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'openai-organization', b'user-9qe82t1dno6gleilkloemlgn'), (b'openai-processing-ms', b'365'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'5000'), (b'x-ratelimit-limit-tokens', b'80000'), (b'x-ratelimit-remaining-requests', b'4999'), (b'x-ratelimit-remaining-tokens', b'75812'), (b'x-ratelimit-reset-requests', b'12ms'), (b'x-ratelimit-reset-tokens', b'3.14s'), (b'x-request-id', b'req_3248f1e3eaf33453be2d210cd9b13218'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Server', b'cloudflare'), (b'CF-RAY', b'8909a1424a6a68e2-CGK'), (b'alt-svc', b'h3=":443"; ma=86400')])
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HT

[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "total_revenue_top_10 = df2['total_revenue'].nlargest(10).sum()\ntotal_revenue_top_10"}`
responded: The 'total_revenue' column is available in the second dataframe. Let me calculate the sales contribution of the top 10 products from the second dataframe.

[0m[36;1m[1;3m3829472900[0m

DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sat, 08 Jun 2024 14:39:15 GMT'), (b'Content-Type', b'text/event-stream; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'openai-organization', b'user-9qe82t1dno6gleilkloemlgn'), (b'openai-processing-ms', b'1151'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'5000'), (b'x-ratelimit-limit-tokens', b'80000'), (b'x-ratelimit-remaining-requests', b'4999'), (b'x-ratelimit-remaining-tokens', b'75695'), (b'x-ratelimit-reset-requests', b'12ms'), (b'x-ratelimit-reset-tokens', b'3.228s'), (b'x-request-id', b'req_5d65b53238fcb909cd4959a22f8a8fd1'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Server', b'cloudflare'), (b'CF-RAY', b'8909a14fae9e68e2-CGK'), (b'alt-svc', b'h3=":443"; ma=86400')])
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "

[32;1m[1;3mThe top 10 products contribute a total of $3,829,472,900 to the sales.[0m

[1m> Finished chain.[0m


{'input': 'How much sales contribution does the top 10 products make?',
 'output': 'The top 10 products contribute a total of $3,829,472,900 to the sales.'}

In [16]:
sales_table = generate_sales_table(df)
sales_table.head(10)

Unnamed: 0,clean_product_name,product_type,total_revenue,current_quarter_revenue,last_quarter_revenue,current_month_revenue,last_month_revenue,quarterly_growth,monthly_growth,quarter_Growth(%),Monthly_Growth(%)
0,(es batu) crystalline cup,REGULAR,201033098,55569433.0,145463665.0,10521000.0,45048433.0,-61.79841,-76.645137,-0.45,-0.62
1,a&w root beer kaleng,REGULAR,21235100,5086300.0,16148800.0,89000.0,4997300.0,-68.503542,-98.219038,-0.52,-0.97
2,absolut vodka,VODKA,51599900,18173000.0,33426900.0,2180000.0,15993000.0,-45.633606,-86.369036,-0.3,-0.76
3,alexis anggur hijau,ANGGUR,182694900,40367800.0,142327100.0,9386500.0,30981300.0,-71.637306,-69.702692,-0.56,-0.53
4,alexis anggur merah,ANGGUR,70802100,18252100.0,52550000.0,2918100.0,15334000.0,-65.267174,-80.96974,-0.48,-0.68
5,alva vodka cucumber,VODKA,6018900,3418800.0,2600100.0,822000.0,2596800.0,31.48725,-68.345656,0.14,-0.52
6,alva vodka original,VODKA,8133800,2655000.0,5478800.0,1005000.0,1650000.0,-51.540483,-39.090909,-0.35,-0.24
7,andong strength soju green tea,SOJU,37155100,12978100.0,24177000.0,3989000.0,8989100.0,-46.32047,-55.624034,-0.3,-0.39
8,andong strength soju lychee,SOJU,49862800,18335500.0,31527300.0,4272000.0,14063500.0,-41.842467,-69.623493,-0.26,-0.53
9,andong strength soju original,SOJU,40404200,22085000.0,18319200.0,6416400.0,15668600.0,20.556575,-59.049309,0.09,-0.42


In [21]:
top10 = sales_table['total_revenue'].nlargest(10)
print(top10)

279    703002400
263    579007200
241    419350700
33     381470600
87     376361800
23     306460000
262    300999400
498    271336100
274    246677400
505    244807300
Name: total_revenue, dtype: int64


In [31]:
pagent_response = agent.invoke("How does the average price of the various types product affect the quantity sold? Generate the output as a markdown table. Make sure to include product types and their correlation. IMPORTANT: Do not read or import any other files.")

input_text = pagent_response.get('input', '')
output_text = pagent_response.get('output', '')

DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'content': '\nYou are working with 2 pandas dataframes in Python named df1, df2, etc.\nThis is the result of `print(df.head())` for each dataframe:\n|    |   Unnamed: 0 |   base_price |   buying_price | cashier_id                           | clean_product_name   | closed_date         | customer_id                          |   day |   hour |   month | month_name   | order_id                             | order_type   | outlet_name             | payment_status   | product_id                           | product_name       | product_type   |   quantity |   quarter | quarter_year   | register_id                          | server_id                            | status    |   subtotal |   total_amount |   total_order_amount |   week |   week_of_month |   year |   cost_of_goods |   profit | date       |   day_of_week_num | day_name   |\n|---:|-------------:|-----



[1m> Entering new AgentExecutor chain...[0m


DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sat, 08 Jun 2024 14:58:57 GMT'), (b'Content-Type', b'text/event-stream; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'openai-organization', b'user-9qe82t1dno6gleilkloemlgn'), (b'openai-processing-ms', b'450'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'5000'), (b'x-ratelimit-limit-tokens', b'80000'), (b'x-ratelimit-remaining-requests', b'4999'), (b'x-ratelimit-remaining-tokens', b'78233'), (b'x-ratelimit-reset-requests', b'12ms'), (b'x-ratelimit-reset-tokens', b'1.325s'), (b'x-request-id', b'req_4a24de57c2592d0fcec708a76338cdd2'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Server', b'cloudflare'), (b'CF-RAY', b'8909be31783d68de-CGK'), (b'alt-svc', b'h3=":443"; ma=86400')])
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "H

[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "import pandas as pd\nimport numpy as np\nfrom scipy.stats import pearsonr\n\ndf1['revenue_per_quantity'] = df1['total_revenue'] / df1['quantity']\naverage_price_per_product_type = df1.groupby('product_type')['revenue_per_quantity'].mean()\ncorrelation = df1.groupby('product_type').apply(lambda x: pearsonr(x['revenue_per_quantity'], x['quantity'])[0])\n\nresult_df = pd.DataFrame({'Average Price per Product Type': average_price_per_product_type, 'Correlation with Quantity Sold': correlation})\nresult_df"}`


[0m

DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'content': '\nYou are working with 2 pandas dataframes in Python named df1, df2, etc.\nThis is the result of `print(df.head())` for each dataframe:\n|    |   Unnamed: 0 |   base_price |   buying_price | cashier_id                           | clean_product_name   | closed_date         | customer_id                          |   day |   hour |   month | month_name   | order_id                             | order_type   | outlet_name             | payment_status   | product_id                           | product_name       | product_type   |   quantity |   quarter | quarter_year   | register_id                          | server_id                            | status    |   subtotal |   total_amount |   total_order_amount |   week |   week_of_month |   year |   cost_of_goods |   profit | date       |   day_of_week_num | day_name   |\n|---:|-------------:|-----

[36;1m[1;3mKeyError: 'total_revenue'[0m

DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sat, 08 Jun 2024 14:59:05 GMT'), (b'Content-Type', b'text/event-stream; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'openai-organization', b'user-9qe82t1dno6gleilkloemlgn'), (b'openai-processing-ms', b'466'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'5000'), (b'x-ratelimit-limit-tokens', b'80000'), (b'x-ratelimit-remaining-requests', b'4999'), (b'x-ratelimit-remaining-tokens', b'78225'), (b'x-ratelimit-reset-requests', b'12ms'), (b'x-ratelimit-reset-tokens', b'1.331s'), (b'x-request-id', b'req_ea033d4ec86ac662e9fdcbb8962ecf29'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Server', b'cloudflare'), (b'CF-RAY', b'8909be60095068e1-CGK'), (b'alt-svc', b'h3=":443"; ma=86400')])
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "H

[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "import pandas as pd\nimport numpy as np\nfrom scipy.stats import pearsonr\n\ndf1['revenue_per_quantity'] = df1['total_amount'] / df1['quantity']\naverage_price_per_product_type = df1.groupby('product_type')['revenue_per_quantity'].mean()\ncorrelation = df1.groupby('product_type').apply(lambda x: pearsonr(x['revenue_per_quantity'], x['quantity'])[0])\n\nresult_df = pd.DataFrame({'Average Price per Product Type': average_price_per_product_type, 'Correlation with Quantity Sold': correlation})\nresult_df"}`
responded: It seems there was an error because the 'total_revenue' column is not available in the provided dataframes. Let me correct that and try again.

[0m[36;1m[1;3mNameError: name 'pearsonr' is not defined[0m

DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sat, 08 Jun 2024 14:59:07 GMT'), (b'Content-Type', b'text/event-stream; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'openai-organization', b'user-9qe82t1dno6gleilkloemlgn'), (b'openai-processing-ms', b'377'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'5000'), (b'x-ratelimit-limit-tokens', b'80000'), (b'x-ratelimit-remaining-requests', b'4999'), (b'x-ratelimit-remaining-tokens', b'77546'), (b'x-ratelimit-reset-requests', b'12ms'), (b'x-ratelimit-reset-tokens', b'1.839s'), (b'x-request-id', b'req_7de54b19f9b0516c6203f16232fa16f8'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Server', b'cloudflare'), (b'CF-RAY', b'8909be712a3068e1-CGK'), (b'alt-svc', b'h3=":443"; ma=86400')])
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "H

[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "import pandas as pd\nimport numpy as np\nfrom scipy.stats import pearsonr\n\ndf1['revenue_per_quantity'] = df1['total_amount'] / df1['quantity']\naverage_price_per_product_type = df1.groupby('product_type')['revenue_per_quantity'].mean()\ncorrelation = df1.groupby('product_type').apply(lambda x: pearsonr(x['revenue_per_quantity'], x['quantity'])[0])\n\nresult_df = pd.DataFrame({'Average Price per Product Type': average_price_per_product_type, 'Correlation with Quantity Sold': correlation})\nresult_df"}`
responded: It seems there was an error because the `pearsonr` function was not imported. Let me correct that and try again.

[0m[36;1m[1;3mNameError: name 'pearsonr' is not defined[0m

DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sat, 08 Jun 2024 14:59:10 GMT'), (b'Content-Type', b'text/event-stream; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'openai-organization', b'user-9qe82t1dno6gleilkloemlgn'), (b'openai-processing-ms', b'390'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'5000'), (b'x-ratelimit-limit-tokens', b'80000'), (b'x-ratelimit-remaining-requests', b'4999'), (b'x-ratelimit-remaining-tokens', b'77408'), (b'x-ratelimit-reset-requests', b'12ms'), (b'x-ratelimit-reset-tokens', b'1.943s'), (b'x-request-id', b'req_9701eaac2087818a35f0404b8f01b19c'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Server', b'cloudflare'), (b'CF-RAY', b'8909be80cc7468e1-CGK'), (b'alt-svc', b'h3=":443"; ma=86400')])
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "H

[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "import pandas as pd\nimport numpy as np\nfrom scipy.stats import pearsonr\n\ndf1['revenue_per_quantity'] = df1['total_order_amount'] / df1['quantity']\naverage_price_per_product_type = df1.groupby('product_type')['revenue_per_quantity'].mean()\ncorrelation = df1.groupby('product_type').apply(lambda x: pearsonr(x['revenue_per_quantity'], x['quantity'])[0])\n\nresult_df = pd.DataFrame({'Average Price per Product Type': average_price_per_product_type, 'Correlation with Quantity Sold': correlation})\nresult_df"}`
responded: It seems there was an error with the `pearsonr` function. Let me correct that and try again.

[0m[36;1m[1;3mNameError: name 'pearsonr' is not defined[0m

DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sat, 08 Jun 2024 14:59:13 GMT'), (b'Content-Type', b'text/event-stream; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'openai-organization', b'user-9qe82t1dno6gleilkloemlgn'), (b'openai-processing-ms', b'345'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'5000'), (b'x-ratelimit-limit-tokens', b'80000'), (b'x-ratelimit-remaining-requests', b'4999'), (b'x-ratelimit-remaining-tokens', b'77177'), (b'x-ratelimit-reset-requests', b'12ms'), (b'x-ratelimit-reset-tokens', b'2.117s'), (b'x-request-id', b'req_4cc18ee0aa35351bdbee3f9d6e43dedd'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Server', b'cloudflare'), (b'CF-RAY', b'8909be932f0468e1-CGK'), (b'alt-svc', b'h3=":443"; ma=86400')])
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "H

[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "import pandas as pd\nimport numpy as np\nfrom scipy.stats import pearsonr\n\ndf1['revenue_per_quantity'] = df1['total_amount'] / df1['quantity']\naverage_price_per_product_type = df1.groupby('product_type')['revenue_per_quantity'].mean()\ncorrelation = df1.groupby('product_type').apply(lambda x: pearsonr(x['revenue_per_quantity'], x['quantity'])[0] if len(x) > 1 else np.nan)\n\nresult_df = pd.DataFrame({'Average Price per Product Type': average_price_per_product_type, 'Correlation with Quantity Sold': correlation})\nresult_df"}`
responded: It seems there was an error with the `pearsonr` function again. Let me correct that and try once more.

[0m[36;1m[1;3mNameError: name 'pearsonr' is not defined[0m

DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sat, 08 Jun 2024 14:59:16 GMT'), (b'Content-Type', b'text/event-stream; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'openai-organization', b'user-9qe82t1dno6gleilkloemlgn'), (b'openai-processing-ms', b'437'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'5000'), (b'x-ratelimit-limit-tokens', b'80000'), (b'x-ratelimit-remaining-requests', b'4999'), (b'x-ratelimit-remaining-tokens', b'77014'), (b'x-ratelimit-reset-requests', b'12ms'), (b'x-ratelimit-reset-tokens', b'2.239s'), (b'x-request-id', b'req_2c17202010a9743dbf637077b26a36d5'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Server', b'cloudflare'), (b'CF-RAY', b'8909bea6ae0468e1-CGK'), (b'alt-svc', b'h3=":443"; ma=86400')])
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "H

[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "import pandas as pd\nimport numpy as np\n\ndf1['revenue_per_quantity'] = df1['total_amount'] / df1['quantity']\naverage_price_per_product_type = df1.groupby('product_type')['revenue_per_quantity'].mean()\ncorrelation = df1.groupby('product_type').apply(lambda x: x['revenue_per_quantity'].corr(x['quantity']))\n\nresult_df = pd.DataFrame({'Average Price per Product Type': average_price_per_product_type, 'Correlation with Quantity Sold': correlation})\nresult_df"}`
responded: It seems there is still an issue with the `pearsonr` function. Let me try a different approach to calculate the correlation.

[0m[36;1m[1;3m              Average Price per Product Type  Correlation with Quantity Sold
product_type                                                                
ANGGUR                          1.025550e+05                       -0.013990
BEER                            1.222195e+05                       -0.154865
COGNAC      

DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sat, 08 Jun 2024 14:59:19 GMT'), (b'Content-Type', b'text/event-stream; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'openai-organization', b'user-9qe82t1dno6gleilkloemlgn'), (b'openai-processing-ms', b'377'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'5000'), (b'x-ratelimit-limit-tokens', b'80000'), (b'x-ratelimit-remaining-requests', b'4999'), (b'x-ratelimit-remaining-tokens', b'76372'), (b'x-ratelimit-reset-requests', b'12ms'), (b'x-ratelimit-reset-tokens', b'2.72s'), (b'x-request-id', b'req_f2e3b714404330a71ab575c94de0f23f'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Server', b'cloudflare'), (b'CF-RAY', b'8909beb80bfb68e1-CGK'), (b'alt-svc', b'h3=":443"; ma=86400')])
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HT

[32;1m[1;3mHere is the table showing the average price per product type and the correlation with the quantity sold:

| product_type | Average Price per Product Type | Correlation with Quantity Sold |
|--------------|---------------------------------|---------------------------------|
| ANGGUR       | 102,550                         | -0.014                          |
| BEER         | 122,219                         | -0.155                          |
| COGNAC       | 1,156,887                       | -0.020                          |
| EMBALASE     | 43,321                          | -0.520                          |
| FOOD         | 36,891                          | -0.032                          |
| FREE         | 21,819                          | -0.161                          |
| GIN          | 384,386                         | 0.008                           |
| LIQUOR       | 445,924                         | 0.010                           |
| MAKGEOLLI    | 258,765         

{'input': 'How does the average price of the various types product affect the quantity sold? Generate the output as a markdown table. Make sure to include product types and their correlation. IMPORTANT: Do not read or import any other files.',
 'output': 'Here is the table showing the average price per product type and the correlation with the quantity sold:\n\n| product_type | Average Price per Product Type | Correlation with Quantity Sold |\n|--------------|---------------------------------|---------------------------------|\n| ANGGUR       | 102,550                         | -0.014                          |\n| BEER         | 122,219                         | -0.155                          |\n| COGNAC       | 1,156,887                       | -0.020                          |\n| EMBALASE     | 43,321                          | -0.520                          |\n| FOOD         | 36,891                          | -0.032                          |\n| FREE         | 21,819             

In [38]:
pagent_output = 

model = OpenAI(model_name="gpt-3.5-turbo-0125", temperature=0.0)

class Answer(BaseModel):
    input: Any = Field(description="The user's input query.")
    output: Any = Field(description="The agent's response.")

parser = PydanticOutputParser(pydantic_object=Answer)

prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

prompt_and_model = prompt | agent
output = prompt_and_model.invoke({"query": "How does the average price of the product affect the quantity sold?."})
response = parser.invoke(output)

print(response)


DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'content': '\nYou are working with 2 pandas dataframes in Python named df1, df2, etc.\nThis is the result of `print(df.head())` for each dataframe:\n|    |   Unnamed: 0 |   base_price |   buying_price | cashier_id                           | clean_product_name   | closed_date         | customer_id                          |   day |   hour |   month | month_name   | order_id                             | order_type   | outlet_name             | payment_status   | product_id                           | product_name       | product_type   |   quantity |   quarter | quarter_year   | register_id                          | server_id                            | status    |   subtotal |   total_amount |   total_order_amount |   week |   week_of_month |   year |   cost_of_goods |   profit | date       |   day_of_week_num | day_name   |\n|---:|-------------:|-----



[1m> Entering new AgentExecutor chain...[0m


DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sat, 08 Jun 2024 15:18:10 GMT'), (b'Content-Type', b'text/event-stream; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'openai-organization', b'user-9qe82t1dno6gleilkloemlgn'), (b'openai-processing-ms', b'420'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'5000'), (b'x-ratelimit-limit-tokens', b'80000'), (b'x-ratelimit-remaining-requests', b'4999'), (b'x-ratelimit-remaining-tokens', b'78101'), (b'x-ratelimit-reset-requests', b'12ms'), (b'x-ratelimit-reset-tokens', b'1.424s'), (b'x-request-id', b'req_3b44ee9575ea16b2f0f86a79e3b92156'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Server', b'cloudflare'), (b'CF-RAY', b'8909da50bb6568e2-CGK'), (b'alt-svc', b'h3=":443"; ma=86400')])
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "H

[32;1m[1;3m{
    "input": "How does the average price of the product affect the quantity sold?",
    "output": "The average price of the product can affect the quantity sold in the following ways:\n1. Price Elasticity: If the product has elastic demand, an increase in average price may lead to a significant decrease in quantity sold, and vice versa.\n2. Consumer Perception: Higher prices may deter some customers from purchasing the product, resulting in lower quantity sold.\n3. Competition: Competing products with lower prices may attract customers away from the higher-priced product, impacting the quantity sold.\n4. Brand Image: Premium pricing may attract a certain segment of customers willing to pay more, affecting the quantity sold differently.\n5. Seasonality: Price changes can influence consumer behavior during different seasons, impacting the quantity sold.\nOverall, the relationship between average price and quantity sold is complex and influenced by various factors."
}[0m



ValidationError: 1 validation error for Generation
text
  str type expected (type=type_error.str)