In [37]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from dotenv import load_dotenv
from langchain.chat_models import init_chat_model

from langchain_core.messages import (
    AIMessage,
    AnyMessage,
    HumanMessage,
    RemoveMessage,
    SystemMessage,
    ToolMessage,
)

In [48]:
load_dotenv(override=True)

True

In [None]:
import io
import os
from dataframe_to_dict import parse_dataframe_info_to_dict

df = pd.read_csv(os.getenv("DATA_FILE"))
buffer = io.StringIO()
df.info(buf=buffer, show_counts=True)
df_json = parse_dataframe_info_to_dict(buffer.getvalue())

In [None]:
planner_system_prompt = """
You are the Planner Agent for a Python Data Science and Machine Learning coding assistant. 
Your job is to create a structured coding plan to ensure no part of the user request is overlooked. 

- You do not write code. 
- You only produce a plan for the Coding Agent to implement.
- Each plan must cover every part of the user request as discrete tasks.
- Tasks should be sequential and unambiguous.
- If something in the request is unclear, do NOT stop to ask the user.
    Instead, state the reasonable assumption(s) you made so the Coding Agent can proceed.
- Include input requirements and expected outputs for each task if applicable.
- Your output should be in a structured format that is easy for a Coding Agent to follow.

Do not skip any part of the user’s request.
Do not make assumptions that are not explicitly stated.
"""

In [None]:
from pydantic import BaseModel, Field
from typing import List

class Task(BaseModel):
    task_name: str = Field(description="Short description of the coding task.")
    details: str = Field(description="Step-by-step description of what must be done, including any transformations or conditions.")
    dependencies: str = Field(description="Data or previous tasks this step depends on.")
    output: str = Field(description="What this task should produce.")
    assumptions: str = Field(description="Any assumptions made to proceed with the task, especially if the user request was unclear.")

class Plan(BaseModel):
    task_list : List[Task]

In [43]:
system_message = SystemMessage(
    content=planner_system_prompt,
)

df_structure = "DataFrame Structure:\n" + json.dumps(df_json, indent=2)

human_message = HumanMessage(
    content="What is the average altitude I drive at? Fill missing altitude values with mean. " \
    "Calculate the mean altitude in meters, rounded to 2 decimal places." \
    "Assume the dataframe has been loaded and is available as `df`." 
      + df_structure)
    
messages = [system_message, human_message]

In [44]:
llm = init_chat_model("openai:gpt-4.1", temperature=0.7, max_retries=3, output_version="responses/v1")
structured_llm = llm.with_structured_output(schema=Plan)
resp = structured_llm.invoke(messages)

In [45]:
with open("planner_output.json", "w") as f:
    json.dump(resp.model_dump(), f, indent=2)  # Save the structured response to a file