# CSV Analyzer


# SetUp

1. Install the Langchain packages: Open a terminal window and enter the command below. You can also [install it in a virtualenv](https://googleapis.dev/python/aiplatform/latest/index.html)

In [2]:
#!pip install --upgrade google-cloud-aiplatform
!pip install langchain-google-genai
!pip install langchain
!pip install langchain-experimental
!pip install langchain-google-vertexai

Collecting langchain-google-genai
  Downloading langchain_google_genai-2.0.3-py3-none-any.whl.metadata (3.9 kB)
Collecting google-generativeai<0.9.0,>=0.8.0 (from langchain-google-genai)
  Downloading google_generativeai-0.8.3-py3-none-any.whl.metadata (3.9 kB)
Collecting langchain-core<0.4,>=0.3.13 (from langchain-google-genai)
  Downloading langchain_core-0.3.13-py3-none-any.whl.metadata (6.3 kB)
Collecting google-ai-generativelanguage==0.6.10 (from google-generativeai<0.9.0,>=0.8.0->langchain-google-genai)
  Downloading google_ai_generativelanguage-0.6.10-py3-none-any.whl.metadata (5.6 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain-core<0.4,>=0.3.13->langchain-google-genai)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl.metadata (3.0 kB)
Collecting langsmith<0.2.0,>=0.1.125 (from langchain-core<0.4,>=0.3.13->langchain-google-genai)
  Downloading langsmith-0.1.138-py3-none-any.whl.metadata (13 kB)
Collecting jsonpointer>=1.9 (from jsonpatch<2.0,>=1.33->langchain-core<0.4,>=0.

Collecting langchain
  Downloading langchain-0.3.5-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-text-splitters<0.4.0,>=0.3.0 (from langchain)
  Downloading langchain_text_splitters-0.3.1-py3-none-any.whl.metadata (2.3 kB)
Downloading langchain-0.3.5-py3-none-any.whl (1.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading langchain_text_splitters-0.3.1-py3-none-any.whl (25 kB)
Installing collected packages: langchain-text-splitters, langchain
Successfully installed langchain-0.3.5 langchain-text-splitters-0.3.1
Collecting langchain-experimental
  Downloading langchain_experimental-0.3.2-py3-none-any.whl.metadata (1.7 kB)
Collecting langchain-community<0.4.0,>=0.3.0 (from langchain-experimental)
  Downloading langchain_community-0.3.3-py3-none-any.whl.metadata (2.8 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community<0.4.0,>=0.3.0->langchain-experimental)
  Downloading data

## Download File

In [3]:
from google.cloud import storage


# Replace with your actual bucket and file information
bucket_name = 'felipe-sandbox-354619-bucket'
file_name = 'ShipperrNPSfile_Sept-Aug2024(APAC)_SP-1K.csv'
destination_file_name = 'downloaded_data.csv'

# Initialize a GCS client
storage_client = storage.Client()

# Get the bucket and blob (file)
bucket = storage_client.bucket(bucket_name)
blob = bucket.blob(file_name)

# Download the file to your local system
blob.download_to_filename(destination_file_name)

Tools

In [29]:
from langchain.agents import Tool
from langchain.tools import tool
import pandas as pd
from io import StringIO
import pandas as pd
from langchain.pydantic_v1 import BaseModel, Field


@tool("pandas_calculator", return_direct=False)
def pandas_calculator(expr:str) -> pd.DataFrame:
    """
    Evaluates an expression on a Pandas DataFrame and returns the result.
    """
    try:
        # evaluate expr on the pandas dataframe
        result = eval(expr)
        # return the result
        return result
    except Exception as e:
        return f"Error: Could not evaluate expression. {e}"

In [14]:
# prompt: create a langchain tool that takes a file as input and loads it into a pandas dataframe and returns the dataframe

import pandas as pd
@tool("load_csv_tool", return_direct=False)
def load_csv_tool(csv_file_path: str) -> pd.DataFrame:
  """
  Loads a CSV file into a Pandas DataFrame.
  """
  try:
    df = pd.read_csv(csv_file_path)
    return df
  except Exception as e:
    return f"Error: Could not load CSV file. {e}"



In [30]:
from langchain import hub
from langchain.agents import AgentExecutor, create_react_agent
from langchain_google_vertexai import ChatVertexAI
from langchain.memory import ConversationBufferWindowMemory


llm = ChatVertexAI(model="gemini-1.5-pro-002")
prompt = hub.pull("hwchase17/react-chat")
chat_history = ConversationBufferWindowMemory(k=10)


#@tool("data_fill_tool", args_schema=DataFillTool, return_direct=False)
#def data_fill_tool(input:str) -> str:
#    '''transforms the data based on instructions.'''

#    return ({"text" : "A, B , C \n 1, 2, 3 \n"})

agent = create_react_agent(llm, [pandas_calculator,load_csv_tool], prompt)
agent_executor = AgentExecutor(agent=agent,
                               tools=[pandas_calculator,load_csv_tool],
                               verbose=True, handle_parsing_errors=True,
                               max_iterations=5)


def append_chat_history(input, response):
    chat_history.save_context({"input": input}, {"output": response})

def invoke(input):
    msg = {
        "input": input,
        "chat_history": chat_history.load_memory_variables({}),
    }
    print(f"Input: {msg}")

    response = agent_executor.invoke(msg)
    print(f"Response: {response}")

    append_chat_history(response["input"], response["output"])
    print(f"History: {chat_history.load_memory_variables({})}")



In [31]:
# read downloaded csv to pandas df
df = pd.read_csv(destination_file_name)

# convert df to string
df_str = df.to_string(index=False)
invoke("Hi")
invoke(destination_file_name + "\n\n how many rows are in this file?")

Input: {'input': 'Hi', 'chat_history': {'history': ''}}


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: Do I need to use a tool? No
Final Answer: Hello! How can I help you today?
 How can I help you today?
[0m

[1m> Finished chain.[0m
Response: {'input': 'Hi', 'chat_history': {'history': ''}, 'output': 'Hello! How can I help you today?\n How can I help you today?'}
History: {'history': 'Human: Hi\nAI: Hello! How can I help you today?\n How can I help you today?'}
Input: {'input': 'downloaded_data.csv\n\n how many rows are in this file?', 'chat_history': {'history': 'Human: Hi\nAI: Hello! How can I help you today?\n How can I help you today?'}}


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: Do I need to use a tool? Yes
Action: load_csv_tool
Action Input: downloaded_data.csv[0m[33;1m[1;3m     Index                                           Comments
0        1  The service is perfect, the delivery is timely...
1        2  At present, amon

In [32]:
invoke("can you add the numbers in the Index column?")

Input: {'input': 'can you add the numbers in the Index column?', 'chat_history': {'history': 'Human: Hi\nAI: Hello! How can I help you today?\n How can I help you today?\nHuman: downloaded_data.csv\n\n how many rows are in this file?\nAI: The file has 1000 rows.\n1000 rows.'}}


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: Do I need to use a tool? Yes
Action: load_csv_tool
Action Input: downloaded_data.csv[0m[33;1m[1;3m     Index                                           Comments
0        1  The service is perfect, the delivery is timely...
1        2  At present, among the orders sent by UPS expre...
2        3                                        too rubbish
3        4  The investigation of the transport package was...
4        5  The customs declarer can't handle the return a...
..     ...                                                ...
995    996  The reason why I choose UPS now is because the...
996    997  The collect service is very good. Sometimes

In [33]:
invoke("how many rows include the word UPS?")

Input: {'input': 'how many rows include the word UPS?', 'chat_history': {'history': "Human: Hi\nAI: Hello! How can I help you today?\n How can I help you today?\nHuman: downloaded_data.csv\n\n how many rows are in this file?\nAI: The file has 1000 rows.\n1000 rows.\nHuman: can you add the numbers in the Index column?\nAI: The sum of the numbers in the 'Index' column is 500500."}}


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: Do I need to use a tool? Yes
Action: load_csv_tool
Action Input: downloaded_data.csv[0m[33;1m[1;3m     Index                                           Comments
0        1  The service is perfect, the delivery is timely...
1        2  At present, among the orders sent by UPS expre...
2        3                                        too rubbish
3        4  The investigation of the transport package was...
4        5  The customs declarer can't handle the return a...
..     ...                                                ...
995    996  

In [37]:
invoke("how many rows are positive and how many are negative?")

Input: {'input': 'how many rows are positive and how many are negative?', 'chat_history': {'history': 'Human: Hi\nAI: Hello! How can I help you today?\n How can I help you today?\nHuman: downloaded_data.csv\n\n how many rows are in this file?\nAI: The file has 1000 rows.\n1000 rows.\nHuman: can you add the numbers in the Index column?\nAI: The sum of the numbers in the \'Index\' column is 500500.\nHuman: how many rows include the word UPS?\nAI: There are 100 rows that include the word "UPS".\n00 rows that include the word "UPS".'}}


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: Do I need to use a tool? Yes
Action: load_csv_tool
Action Input: downloaded_data.csv[0m[33;1m[1;3m     Index                                           Comments
0        1  The service is perfect, the delivery is timely...
1        2  At present, among the orders sent by UPS expre...
2        3                                        too rubbish
3        4  The investigation of the transp