In [1]:
# ==================== SETTING UP THE MODEL FROM OPEN AI ====================

import os
import pandas as pd
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain_groq import ChatGroq

# ==================== LODING THE ENV VARIABLES =========================
load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

# ==================== INISIATING THE MODEL =============================
model = ChatOpenAI(model="gpt-4o-mini-2024-07-18")
groq_llm = ChatGroq(temperature=0, model_name="llama3-8b-8192")

In [2]:
# ================ READING THE DATA FROM THE CSV FILE =====================
df = pd.read_csv('./data/all-states-history.csv').fillna(value=0)

In [4]:
# =============== SETTING UP THE AGENTS ==========================
from langchain.agents.agent_types import AgentType
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent

agent = create_pandas_dataframe_agent(llm=model, df=df, verbose=True, allow_dangerous_code=True)
agent.invoke("how many rows are there?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: To determine how many rows are in the dataframe `df`, I can use the `len()` function or the `shape` attribute of the dataframe, which provides the number of rows and columns. I'll proceed with checking the number of rows using the `shape` attribute for clarity.

Action: python_repl_ast
Action Input: df.shape[0][0m[36;1m[1;3m20780[0m[32;1m[1;3mI now know the final answer
Final Answer: There are 20,780 rows in the dataframe `df`.[0m

[1m> Finished chain.[0m


{'input': 'how many rows are there?',
 'output': 'There are 20,780 rows in the dataframe `df`.'}

In [6]:
agent.invoke("how many columns we have in the dataframe?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: To determine the number of columns in the dataframe `df`, I can use the `shape` attribute of the dataframe, which returns a tuple containing the number of rows and the number of columns. I will access the second element of this tuple to get the number of columns.

Action: python_repl_ast
Action Input: df.shape[1][0m[36;1m[1;3m41[0m[32;1m[1;3mI now know the final answer
Final Answer: 41[0m

[1m> Finished chain.[0m


{'input': 'how many columns we have in the dataframe?', 'output': '41'}

In [10]:
CSV_PROMPT_PREFIX = """
First set the pandas display options to show all the columns,
get the column names, then answer the question.
"""

CSV_PROMPT_SUFFIX = """
- **ALWAYS** before giving the Final Answer, try another method.
Then reflect on the answers of the two methods you did and ask yourself
if it answers correctly the original question.
If you are not sure, try another method.
- If the methods tried do not give the same result,reflect and
try again until you have two methods that have the same result.
- If you still cannot arrive to a consistent result, say that
you are not sure of the answer.
- If you are sure of the correct answer, create a beautiful
and thorough response using Markdown.
- **DO NOT MAKE UP AN ANSWER OR USE PRIOR KNOWLEDGE,
ONLY USE THE RESULTS OF THE CALCULATIONS YOU HAVE DONE**.
- **ALWAYS**, as part of your "Final Answer", explain how you got
to the answer on a section that starts with: "\n\nExplanation:\n".
In the explanation, mention the column names that you used to get
to the final answer.
"""

QUESTION = "how many people were hospitalized during the october 2022"

In [12]:
agent.invoke(CSV_PROMPT_PREFIX + QUESTION + CSV_PROMPT_SUFFIX)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: To answer the question about how many people were hospitalized during October 2022, I need to filter the dataframe for the relevant date range and then sum the hospitalization numbers. First, I'll set the pandas display options to show all columns, then get the relevant column names.

Action: I will set the pandas display options and retrieve the column names from the dataframe.
Action Input: 
```python
import pandas as pd

# Set the display option to show all columns
pd.set_option('display.max_columns', None)

# Get the column names
column_names = df.columns.tolist()
column_names
```[0mI will set the pandas display options and retrieve the column names from the dataframe. is not a valid tool, try one of [python_repl_ast].[32;1m[1;3mAction: I will set the pandas display options and retrieve the column names from the dataframe.
Action Input: 
```python
import pandas as pd

# Set the display option to show all colum

{'input': '\nFirst set the pandas display options to show all the columns,\nget the column names, then answer the question.\nhow many people were hospitalized during the october 2022\n- **ALWAYS** before giving the Final Answer, try another method.\nThen reflect on the answers of the two methods you did and ask yourself\nif it answers correctly the original question.\nIf you are not sure, try another method.\n- If the methods tried do not give the same result,reflect and\ntry again until you have two methods that have the same result.\n- If you still cannot arrive to a consistent result, say that\nyou are not sure of the answer.\n- If you are sure of the correct answer, create a beautiful\nand thorough response using Markdown.\n- **DO NOT MAKE UP AN ANSWER OR USE PRIOR KNOWLEDGE,\nONLY USE THE RESULTS OF THE CALCULATIONS YOU HAVE DONE**.\n- **ALWAYS**, as part of your "Final Answer", explain how you got\nto the answer on a section that starts with: "\n\nExplanation:\n".\nIn the explana