In [None]:
import pandas as pd

df = pd.read_csv('./data/mortgage-300h-360m-5r.csv')
print(df.shape)
print(df.columns.tolist())
print(df.loc[df['Period'] == df['Period'].max() - 1, 'Principal Balance'].values[0])

In [None]:
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
from langchain_experimental.tools import PythonAstREPLTool

# LLM from Ollama
local_model = "llama3.1:latest"
llm = ChatOllama(model=local_model, temperature=0)
tool = PythonAstREPLTool(locals={"df": df})
tool.invoke("df['Period'].mean()")

In [None]:
llm_with_tools = llm.bind_tools([tool], tool_choice=tool.name)
response = llm_with_tools.invoke(
    "I have a dataframe 'df' and want to know the correlation between the 'Monthly Payment' and 'Principal Balance' columns"
)
response

In [None]:
response.tool_calls

In [None]:
from langchain_core.output_parsers import JsonOutputKeyToolsParser

parser = JsonOutputKeyToolsParser(key_name=tool.name, first_tool_only=True)
(llm_with_tools | parser).invoke(
    "I have a dataframe 'df' and want to know the correlation between the 'Monthly Payment' and 'Principal Balance' columns"
)

In [None]:
system = f"""You have access to a pandas dataframe `df`. 
Here is the output of `df.head().to_markdown()`:

{df.head().to_markdown()}

Given a user question, write the Python code to answer it. 
Return ONLY the valid Python code and nothing else. 
Don't assume you have access to any libraries other than built-in Python ones and pandas."""
prompt = ChatPromptTemplate.from_messages([("system", system), ("human", "{question}")])
code_chain = prompt | llm_with_tools | parser
response = code_chain.invoke({"question": "What's is the value of Principal Balance at the last period?"})
print(response)

In [None]:
response = code_chain.invoke({"question": "What's is the value of Principal Balance at the second to last period?"})
print(response)

In [None]:
chain = prompt | llm_with_tools | parser | tool
print(chain.invoke({"question": "What's the correlation between Monthly Payment and Principal Balance"}))

In [None]:
print(df['Monthly Payment'].corr(df['Principal Balance']))

In [33]:
from operator import itemgetter

from langchain_core.messages import ToolMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.runnables import RunnablePassthrough

system = f"""You have access to a pandas dataframe `df`.
Here is the output of `df.head().to_markdown()`:

{df.head().to_markdown()}

Given a user question, write the Python code to answer it. 
Don't assume you have access to any libraries other than built-in Python ones and pandas.
Respond directly to the question once you have enough information to answer it."""
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            system,
        ),
        ("human", "{question}"),
        # This MessagesPlaceholder allows us to optionally append an arbitrary number of messages
        # at the end of the prompt using the 'chat_history' arg.
        MessagesPlaceholder("chat_history", optional=True),
    ]
)


def _get_chat_history(x: dict) -> list:
    """Parse the chain output up to this point into a list of chat history messages to insert in the prompt."""
    ai_msg = x["ai_msg"]
    tool_call_id = x["ai_msg"].tool_calls[0]["id"]
    tool_msg = ToolMessage(tool_call_id=tool_call_id, content=str(x["tool_output"]))
    return [ai_msg, tool_msg]


chain = (
    RunnablePassthrough.assign(ai_msg=prompt | llm_with_tools)
    .assign(tool_output=itemgetter("ai_msg") | parser | tool)
    .assign(chat_history=_get_chat_history)
    .assign(response=prompt | llm | StrOutputParser())
    .pick(["tool_output", "response"])
)

In [None]:
print(chain.invoke({"question": "What's the correlation between Monthly Payment and Principal Balance"}))

In [None]:
from langchain_experimental.agents import create_pandas_dataframe_agent
import pandas as pd
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
from langchain_experimental.tools import PythonAstREPLTool

# LLM from Ollama
local_model = "llama3.1:latest"
llm = ChatOllama(model=local_model, temperature=0)

df = pd.read_csv('./data/mortgage-300h-360m-5r.csv')
print(df.shape)
print(df.columns.tolist())
pd_agent = create_pandas_dataframe_agent(llm, df, agent_type="tool-calling", verbose=True, allow_dangerous_code=True)
# pd_agent.run("what is the mean of the profit?")
query = "What's the correlation between Monthly Payment and Principal Balance?"
query = query + " using tool python_repl_ast"
pd_agent.invoke(query)

In [None]:
query = "What's the Principal Balance at the 360 period?"
query = query + " using tool python_repl_ast"
pd_agent.invoke(query)

In [None]:
query = "What's is the total amount of interest paid after 360 months?"
query = query + " using tool python_repl_ast"
pd_agent.invoke(query)