In [1]:
# Import necessary libraries
import openai
import pandas as pd
from langchain.agents import create_csv_agent
from langchain.llms import AzureOpenAI

In [2]:
# Load the csv file as Pandas dataframe.
df = pd.read_csv('./data/Sales_Performance_Report_DQLab_Store.csv')

In [3]:
# Display the dataframe information.
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5499 entries, 0 to 5498
Data columns (total 10 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   order_id              5499 non-null   int64  
 1   order_status          5499 non-null   object 
 2   customer              5499 non-null   object 
 3   order_date            5499 non-null   object 
 4   order_quantity        5499 non-null   int64  
 5   sales                 5499 non-null   int64  
 6   discount              5499 non-null   float64
 7   discount_value        5499 non-null   int64  
 8   product_category      5499 non-null   object 
 9   product_sub_category  5499 non-null   object 
dtypes: float64(1), int64(4), object(5)
memory usage: 429.7+ KB


In [4]:
# Configure the baseline configuration of the OpenAI library for Azure OpenAI Service.
OPENAI_API_KEY = "PLEASE_ENTER_YOUR_OWNED_AOAI_SERVICE_KEY"
OPENAI_DEPLOYMENT_NAME = "PLEASE_ENTER_YOUR_OWNED_AOAI_TEXT_MODEL_NAME"
MODEL_NAME = "text-davinci-003"
openai.api_type = "azure"
openai.api_base = "https://PLESAE_ENTER_YOUR_OWNED_AOAI_RESOURCE_NAME.openai.azure.com/"
openai.api_version = "2022-12-01"
openai.api_key = "PLEASE_ENTER_YOUR_OWNED_AOAI_SERVICE_KEY"

In [5]:
# Initiate a connection to the LLM from Azure OpenAI Service via LangChain.
llm = AzureOpenAI(
    openai_api_key=OPENAI_API_KEY,
    deployment_name=OPENAI_DEPLOYMENT_NAME,
    model_name=MODEL_NAME, 
)

In [6]:
# Make a simple prompt to validate the connection with LLM from Azure OpenAI Service.
print(llm("What is the Azure OpenAI Service?"))



The Azure OpenAI service is a cloud-based service that allows data scientists, developers, and researchers to access the OpenAI platform. It provides users with access to cutting-edge AI technologies, such as machine learning, natural language processing, and computer vision. It also allows users to take advantage of OpenAI’s vast library of pre-trained models, allowing them to quickly create their own AI applications.


In [7]:
# Initiate a LangChain's CSV Agent with the LLM from Azure OpenAI Service to interact with the CSV file.
agent = create_csv_agent(AzureOpenAI(openai_api_key=OPENAI_API_KEY, deployment_name=OPENAI_DEPLOYMENT_NAME, model_name=MODEL_NAME), 
                         './data/Sales_Performance_Report_DQLab_Store.csv')

In [8]:
# Display the CSV Agent's default prompt template.
agent.agent.llm_chain.prompt.template

'\nYou are working with a pandas dataframe in Python. The name of the dataframe is `df`.\nYou should use the tools below to answer the question posed of you:\n\npython_repl_ast: A Python shell. Use this to execute python commands. Input should be a valid python command. When using this tool, sometimes output is abbreviated - make sure it does not look abbreviated before using it in your answer.\n\nUse the following format:\n\nQuestion: the input question you must answer\nThought: you should always think about what to do\nAction: the action to take, should be one of [python_repl_ast]\nAction Input: the input to the action\nObservation: the result of the action\n... (this Thought/Action/Action Input/Observation can repeat N times)\nThought: I now know the final answer\nFinal Answer: the final answer to the original input question\n\n\nThis is the result of `print(df.head())`:\n{df}\n\nBegin!\nQuestion: {input}\n{agent_scratchpad}'

In [9]:
agent.run("How many rows of data do you have?")

'5499 rows of data'

In [11]:
agent.run("Please list the years in this dataset.")

'The years in this dataset are 2010, 2012, 2011, and 2009.'

In [13]:
agent.run("Please tell me the total sales in 2011. Please tell me the total sales in 2012. And calculate the percentage of the increase from 2011 to 2012.")

'The total sales in 2011 was 4568206579 and the total sales in 2012 was 5071954389, which is a 11.03% increase.'

In [18]:
agent.run("Based on the sales trend of 2011 & 2012, please help to forecast total sales in 2013.")

'The total sales in 2013 is estimated to be 9640160968.'

In [20]:
agent.run("What is the customer's name with the highest total amount of purchases in 2012? And tell me the total amount of purchases.")



'The customer with the highest total amount of purchases in 2012 is Craig Carreira and the total amount of purchases is 82686420.'

In [22]:
agent.run("What was the top sale product category in 2012?")

'Office Supplies'