In [12]:
import pandas as pd

df = pd.read_csv('./data/mortgage-300h-360m-5r.csv')
print(df.shape)
print(df.columns.tolist())
print(df.loc[df['Period'] == df['Period'].max() - 1, 'Principal Balance'].values[0])

(361, 5)
['Period', 'Monthly Payment', 'Computed Interest Due', 'Principal Due', 'Principal Balance']
1603.7824421934038


In [4]:
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
from langchain_experimental.tools import PythonAstREPLTool

# LLM from Ollama
local_model = "llama3.1:latest"
llm = ChatOllama(model=local_model, temperature=0)
tool = PythonAstREPLTool(locals={"df": df})
tool.invoke("df['Period'].mean()")

np.float64(180.0)

In [5]:
llm_with_tools = llm.bind_tools([tool], tool_choice=tool.name)
response = llm_with_tools.invoke(
    "I have a dataframe 'df' and want to know the correlation between the 'Monthly Payment' and 'Principal Balance' columns"
)
response

AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'llama3.1:latest', 'created_at': '2024-12-27T17:12:17.8537885Z', 'done': True, 'done_reason': 'stop', 'total_duration': 9856670600, 'load_duration': 6170089800, 'prompt_eval_count': 219, 'prompt_eval_duration': 814000000, 'eval_count': 29, 'eval_duration': 2861000000, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-6c04c858-308f-48a3-a46c-9372cbd39a91-0', tool_calls=[{'name': 'python_repl_ast', 'args': {'query': "df['Monthly Payment'].corr(df['Principal Balance'])"}, 'id': 'b9aec179-99ee-4f02-9d19-d41e4f33a988', 'type': 'tool_call'}], usage_metadata={'input_tokens': 219, 'output_tokens': 29, 'total_tokens': 248})

In [6]:
response.tool_calls

[{'name': 'python_repl_ast',
  'args': {'query': "df['Monthly Payment'].corr(df['Principal Balance'])"},
  'id': 'b9aec179-99ee-4f02-9d19-d41e4f33a988',
  'type': 'tool_call'}]

In [7]:
from langchain_core.output_parsers import JsonOutputKeyToolsParser

parser = JsonOutputKeyToolsParser(key_name=tool.name, first_tool_only=True)
(llm_with_tools | parser).invoke(
    "I have a dataframe 'df' and want to know the correlation between the 'Monthly Payment' and 'Principal Balance' columns"
)

{'query': "df['Monthly Payment'].corr(df['Principal Balance'])"}

In [13]:
system = f"""You have access to a pandas dataframe `df`. 
Here is the output of `df.head().to_markdown()`:

{df.head().to_markdown()}

Given a user question, write the Python code to answer it. 
Return ONLY the valid Python code and nothing else. 
Don't assume you have access to any libraries other than built-in Python ones and pandas."""
prompt = ChatPromptTemplate.from_messages([("system", system), ("human", "{question}")])
code_chain = prompt | llm_with_tools | parser
response = code_chain.invoke({"question": "What's is the value of Principal Balance at the last period?"})
print(response)

{'query': "df['Principal Balance'].iloc[-1]"}


In [14]:
response = code_chain.invoke({"question": "What's is the value of Principal Balance at the second to last period?"})
print(response)

{'query': "df.loc[df['Period'] == df['Period'].max() - 1, 'Principal Balance'].values[0]"}


In [16]:
chain = prompt | llm_with_tools | parser | tool
print(chain.invoke({"question": "What's the correlation between Monthly Payment and Principal Balance"}))

-0.07042128741936182


In [17]:
print(df['Monthly Payment'].corr(df['Principal Balance']))

-0.07042128741936182


In [30]:
from operator import itemgetter

from langchain_core.messages import ToolMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.runnables import RunnablePassthrough

system = f"""You have access to a pandas dataframe `df`.
Here is the output of `df.head().to_markdown()`:

{df.head().to_markdown()}

Given a user question, write the Python code to answer it. 
Don't assume you have access to any libraries other than built-in Python ones and pandas.
Respond directly to the question once you have enough information to answer it."""
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            system,
        ),
        ("human", "{question}"),
        # This MessagesPlaceholder allows us to optionally append an arbitrary number of messages
        # at the end of the prompt using the 'chat_history' arg.
        MessagesPlaceholder("chat_history", optional=True),
    ]
)


def _get_chat_history(x: dict) -> list:
    """Parse the chain output up to this point into a list of chat history messages to insert in the prompt."""
    ai_msg = x["ai_msg"]
    tool_call_id = x["ai_msg"].tool_calls[0]["id"]
    tool_msg = ToolMessage(tool_call_id=tool_call_id, content=str(x["tool_output"]))
    return [ai_msg, tool_msg]


chain = (
    RunnablePassthrough.assign(ai_msg=prompt | llm_with_tools)
    .assign(tool_output=itemgetter("ai_msg") | parser | tool)
    .assign(chat_history=_get_chat_history)
    .assign(response=prompt | llm | StrOutputParser())
    .pick(["tool_output", "response"])
)

In [31]:
print(chain.invoke({"question": "What's the correlation between Monthly Payment and Principal Balance"}))

content='' additional_kwargs={} response_metadata={'model': 'llama3.1:latest', 'created_at': '2024-12-27T17:38:00.3693176Z', 'done': True, 'done_reason': 'stop', 'total_duration': 3670116200, 'load_duration': 21343100, 'prompt_eval_count': 479, 'prompt_eval_duration': 337000000, 'eval_count': 29, 'eval_duration': 3310000000, 'message': Message(role='assistant', content='', images=None, tool_calls=None)} id='run-36ab9e8f-c29f-451d-ada3-b2752d86ba72-0' tool_calls=[{'name': 'python_repl_ast', 'args': {'query': "df['Monthly Payment'].corr(df['Principal Balance'])"}, 'id': '7c052a85-9768-4646-aa0f-f8d02717d8da', 'type': 'tool_call'}] usage_metadata={'input_tokens': 479, 'output_tokens': 29, 'total_tokens': 508}
7c052a85-9768-4646-aa0f-f8d02717d8da


KeyError: 'tool_calls'

In [1]:
from langchain_experimental.agents import create_pandas_dataframe_agent
import pandas as pd
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
from langchain_experimental.tools import PythonAstREPLTool

# LLM from Ollama
local_model = "mistral:latest"
llm = ChatOllama(model=local_model, temperature=0)

df = pd.read_csv('./data/mortgage-300h-360m-5r.csv')
print(df.shape)
print(df.columns.tolist())
pd_agent = create_pandas_dataframe_agent(llm, df, agent_type="tool-calling", verbose=True, allow_dangerous_code=True)
# pd_agent.run("what is the mean of the profit?")
query = "What's the correlation between Monthly Payment and Principal Balance?"
query = query + " using tool python_repl_ast"
pd_agent.invoke(query)

(361, 5)
['Period', 'Monthly Payment', 'Computed Interest Due', 'Principal Due', 'Principal Balance']


[1m> Entering new AgentExecutor chain...[0m


  pd_agent.run(query)


[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "correlation = df['Monthly Payment'].corr(df['Principal Balance']) ; print(correlation)"}`


[0m[36;1m[1;3m-0.0704212874193625
[0m[32;1m[1;3m The correlation between Monthly Payment and Principal Balance is approximately -0.07, which suggests a weak negative correlation. This means that as the Principal Balance increases, there is a slight decrease in the Monthly Payment, but the relationship is not strong enough to be considered significant.[0m

[1m> Finished chain.[0m


' The correlation between Monthly Payment and Principal Balance is approximately -0.07, which suggests a weak negative correlation. This means that as the Principal Balance increases, there is a slight decrease in the Monthly Payment, but the relationship is not strong enough to be considered significant.'

In [2]:
query = "What's the Principal Balance on 360 period?"
query = query + " using tool python_repl_ast"
pd_agent.invoke(query)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m To find the Principal Balance on the 360th period, we can use the `iloc` function to access the row corresponding to the 360th index. Here is the code snippet:

```python
principal_balance = df.iloc[359]['Principal Balance']
print(principal_balance)
```

This will give you the Principal Balance on the 360th period.[0m

[1m> Finished chain.[0m


{'input': "What's the Principal Balance on 360 period? using tool python_repl_ast",
 'output': " To find the Principal Balance on the 360th period, we can use the `iloc` function to access the row corresponding to the 360th index. Here is the code snippet:\n\n```python\nprincipal_balance = df.iloc[359]['Principal Balance']\nprint(principal_balance)\n```\n\nThis will give you the Principal Balance on the 360th period."}

In [3]:
query = "What's is the value of Principal Balance on 360 period?"
query = query + " using tool python_repl_ast"
pd_agent.invoke(query)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m To find the value of Principal Balance on the 360th period, we can use the `.loc` function to select the row with index 360 and access the 'Principal Balance' column:

```python
principal_balance_at_360 = df.loc[360, "Principal Balance"]
```

Since we don't know the exact index value for the 360th period, let's assume that the Period is incremented by one every month and find the index of the row corresponding to the 360th month:

```python
import datetime as dt

start_date = df.iloc[0]['Period']
end_date = df.iloc[-1]['Period'] + 1
periods_count = (end_date - start_date) / dt.timedelta(days=30)
index_at_360 = int((360 - start_date) / dt.timedelta(days=30))
```

Now, we can use the index to find the value of Principal Balance at the 360th period:

```python
principal_balance_at_360 = df.loc[index_at_360, "Principal Balance"]
```[0m

[1m> Finished chain.[0m


{'input': "What's is the value of Principal Balance on 360 period? using tool python_repl_ast",
 'output': ' To find the value of Principal Balance on the 360th period, we can use the `.loc` function to select the row with index 360 and access the \'Principal Balance\' column:\n\n```python\nprincipal_balance_at_360 = df.loc[360, "Principal Balance"]\n```\n\nSince we don\'t know the exact index value for the 360th period, let\'s assume that the Period is incremented by one every month and find the index of the row corresponding to the 360th month:\n\n```python\nimport datetime as dt\n\nstart_date = df.iloc[0][\'Period\']\nend_date = df.iloc[-1][\'Period\'] + 1\nperiods_count = (end_date - start_date) / dt.timedelta(days=30)\nindex_at_360 = int((360 - start_date) / dt.timedelta(days=30))\n```\n\nNow, we can use the index to find the value of Principal Balance at the 360th period:\n\n```python\nprincipal_balance_at_360 = df.loc[index_at_360, "Principal Balance"]\n```'}