# Azure Container Apps dynamic sessions code interpreter LangChain CSV querying example

This notebook demonstrates how to use Azure Container Apps dynamic sessions and LangChain to query a CSV.

Start by copying `.env.sample` to `.env` and filling in the values for your Azure Container Apps dynamic sessions code interpreter pool management endpoint and Azure OpenAI endpoint.

In [None]:
# install the required packages
!python -m pip install python-dotenv langchain langchain-azure-dynamic-sessions azure-identity openai langchainhub langchain-openai

In [34]:
import datetime
import os
from dotenv import load_dotenv
from azure.identity import DefaultAzureCredential
from langchain_openai import AzureChatOpenAI


# Load the environment variables from the .env file
load_dotenv()


# Set up the Azure Chat OpenAI model using managed identity for authentication
azure_token = None
def token_factory():
    global azure_token
    if azure_token is None or datetime.datetime.fromtimestamp(azure_token.expires_on, datetime.timezone.utc) < datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(minutes=5):
        print("Refreshing Azure token...")
        azure_token = DefaultAzureCredential().get_token("https://cognitiveservices.azure.com/.default")
    return azure_token.token

llm = AzureChatOpenAI(
    azure_deployment="gpt-35-turbo",
    openai_api_version="2023-09-15-preview",
    streaming=True,
    temperature=0,
    azure_ad_token_provider=token_factory,
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
)

In [35]:
from langchain_azure_dynamic_sessions import SessionsPythonREPLTool
from langchain import hub
from langchain.agents import create_tool_calling_agent, AgentExecutor
from langchain_core.messages import SystemMessage


code_interpreter_tool = SessionsPythonREPLTool(
    pool_management_endpoint=os.environ["POOL_MANAGEMENT_ENDPOINT"],
)

tools = [
    code_interpreter_tool,
]

prompt = hub.pull("hwchase17/openai-functions-agent")

agent = create_tool_calling_agent(llm, [code_interpreter_tool], prompt)

agent_executor = AgentExecutor(
    agent=agent, tools=[code_interpreter_tool], verbose=True, handle_parsing_errors=True
)

# Initialize chat history with instructions on how to query data
chat_history = []
chat_history.append(SystemMessage(content=(
    "To analyze data, execute Python code to read the file and extract the data you need. "
    "Pandas is available. "
    "You must start by using `head()` checking the first few rows to see what the data looks like, "
    "and then write additional queries to complete the analysis."
)))

# Upload the data file
remote_file = code_interpreter_tool.upload_file(local_file_path="customers.csv")
chat_history.append(SystemMessage(content=f"The data file `{remote_file.full_path}` has been uploaded."))



In [36]:
response = agent_executor.invoke(
    {
        "input": "What is the average amount purchased by all customers in Arizona?",
        "chat_history": chat_history,
    }
)




[1m> Entering new AgentExecutor chain...[0m
Refreshing Azure token...
[32;1m[1;3m
Invoking: `Python_REPL` with `import pandas as pd

data = pd.read_csv('/mnt/data/customers.csv')

# Display the first few rows of the data
data.head()`


[0m[36;1m[1;3m{
  "result": "  FirstName    LastName                          Email        Country  \\\n0    Daleen          du  daleen.du.plessis@contoso.com  United States   \n1       Ben         van        ben.van.zyl@contoso.com  United States   \n2     Chris  Groenewalt   chris.groenewalt@contoso.com  United States   \n3    Jannie          du     jannie.du.toit@contoso.com  United States   \n4    Helena       Botha       helena.botha@contoso.com  United States   \n\n         City State  PostCode      StreetAddress Created_date  cid  \\\n0  Washington    DC     20020  12345 Main Street      8/24/12    0   \n1  Washington    DC     20020  12345 Main Street     10/11/12    1   \n2  Washington    DC     20020  12345 Main Street     11/30/17    

In [37]:

response = agent_executor.invoke(
    {
        "input": "What is the state who has purchased the most stuff?",
        "chat_history": chat_history,
    }
)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Python_REPL` with `import pandas as pd

data = pd.read_csv('/mnt/data/customers.csv')
state_purchases = data.groupby('State')['Purchases'].sum()
state_with_most_purchases = state_purchases.idxmax()
state_with_most_purchases`


[0m[36;1m[1;3m{
  "result": null,
  "stdout": "",
}[0m[32;1m[1;3m
Invoking: `Python_REPL` with `import pandas as pd

data = pd.read_csv('/mnt/data/customers.csv')
state_purchases = data.groupby('State')['Total Purchases'].sum()
state_with_most_purchases = state_purchases.idxmax()
state_with_most_purchases`


[0m[36;1m[1;3m{
  "result": null,
  "stdout": "",
}[0m[32;1m[1;3m
Invoking: `Python_REPL` with `import pandas as pd

data = pd.read_csv('/mnt/data/customers.csv')
data.head()`
responded: It seems that the column names "Purchases" and "Total Purchases" are not present in the dataset. Let me first check the first few rows of the dataset to understand the column names and data s

In [38]:

response = agent_executor.invoke(
    {
        "input": "Who are the top 5 happiest customers?",
        "chat_history": chat_history,
    }
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Python_REPL` with `import pandas as pd

# Read the data from the file
data = pd.read_csv('/mnt/data/customers.csv')

# Display the first few rows of the data
data.head()`


[0m[36;1m[1;3m{
  "result": "  FirstName    LastName                          Email        Country  \\\n0    Daleen          du  daleen.du.plessis@contoso.com  United States   \n1       Ben         van        ben.van.zyl@contoso.com  United States   \n2     Chris  Groenewalt   chris.groenewalt@contoso.com  United States   \n3    Jannie          du     jannie.du.toit@contoso.com  United States   \n4    Helena       Botha       helena.botha@contoso.com  United States   \n\n         City State  PostCode      StreetAddress Created_date  cid  \\\n0  Washington    DC     20020  12345 Main Street      8/24/12    0   \n1  Washington    DC     20020  12345 Main Street     10/11/12    1   \n2  Washington    DC     20020  12345 Main Street     11/30/17