In [None]:
!pip install langchain langchain-openai langchain-community langchain-experimental openai-agents

In [None]:
import os
import pandas as pd
import pandas
from langchain_experimental.agents import create_pandas_dataframe_agent
from langchain.chat_models import ChatOpenAI
from langchain_experimental.agents import create_csv_agent
from agents import Agent, Runner, function_tool
from typing import List, Optional, Union, Dict, Any
from google.colab import userdata
os.environ["OPENAI_API_KEY"] = userdata.get('SVAI_OPENAI')

In [None]:
df = pd.read_csv('information_schema.csv')
df.head()

In [None]:
pd_agent = create_pandas_dataframe_agent(ChatOpenAI(temperature=0, model="gpt-4o"),
                         df,
                         verbose=True,
                         allow_dangerous_code=True)

In [None]:
pd_agent.run("What is step name for step id 305?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mTo find the step name for step id 305, I will filter the dataframe `df` to locate the row where `step_id` is 305 and then retrieve the `step_name` for that row.

Thought: I need to filter the dataframe to find the row where `step_id` is 305 and then extract the `step_name` from that row.
Action: python_repl_ast
Action Input: `df[df['step_id'] == 305]['step_name'].iloc[0]`[0m[36;1m[1;3mA. GENERAL INSTRUCTION FOR MANUFACTURING[0m[32;1m[1;3mI now know the final answer.

Final Answer: The step name for step id 305 is "A. GENERAL INSTRUCTION FOR MANUFACTURING".[0m

[1m> Finished chain.[0m


'The step name for step id 305 is "A. GENERAL INSTRUCTION FOR MANUFACTURING".'

In [None]:
@function_tool
def query_dataframe(question: str) -> str:
    """
    Query the pandas DataFrame using natural language.

    Args:
        question: Natural language question about the DataFrame

    Returns:
        Response from the pandas DataFrame agent
    """
    try:
        result = pd_agent.run(question)
        return result
    except Exception as e:
        return f"Error querying DataFrame: {str(e)}"

In [1]:
@function_tool
def get_dataframe_info() -> str:
    """
    Get information about the DataFrame structure.

    Returns:
        String with information about columns and sample data
    """
    try:
        info = f"DataFrame loaded successfully with {len(df)} rows and {len(df.columns)} columns.\n"
        info += f"Columns: {', '.join(df.columns.tolist())}\n"
        info += f"Sample data (first 5 rows):\n{df.head().to_string()}"
        return info
    except Exception as e:
        return f"Error loading DataFrame: {str(e)}"

NameError: name 'function_tool' is not defined

In [None]:
@function_tool
def analyze_steps_sequence(step_id1: int, step_id2: int) -> str:
    """
    Analyze the sequence of steps to determine execution order.

    Args:
        step_id1: ID of the first step
        step_id2: ID of the second step

    Returns:
        Analysis of the step sequence
    """
    try:
        query = f"Analyze steps with step_id {step_id1} and {step_id2}. When were they executed? Which was executed first? Return detailed information."
        result = pd_agent.run(query)
        return result
    except Exception as e:
        return f"Error analyzing step sequence: {str(e)}"

In [None]:
@function_tool
def check_step_custom_input(step_id: int) -> str:
    """
    Check the custom_input value for a specific step.

    Args:
        step_id: ID of the step to check

    Returns:
        Information about the step's custom_input
    """
    try:
        query = f"What is the custom_input value for step_id {step_id}? Is it an empty list? Show the exact value."
        result = pd_agent.run(query)
        return result
    except Exception as e:
        return f"Error checking step custom_input: {str(e)}"

In [None]:
agent = Agent(
    name="Workflow Assistant",
    instructions=(
        "You're an experienced data analyst specialized in CSV data analysis using pandas. "
        "Use the provided tools to query and analyze the DataFrame. Don't rely on "
        "your own knowledge about the data but instead use your tools to help "
        "you query and analyze the DataFrame.\n\n"
        "Available tools:\n"
        "1. get_dataframe_info(): Get information about the DataFrame structure\n"
        "2. query_dataframe(question): Query the DataFrame using natural language\n"
        "3. analyze_steps_sequence(step_id1, step_id2): Analyze execution sequence of steps\n"
        "4. check_step_custom_input(step_id): Check custom_input for a specific step\n\n"
        "Always start by getting DataFrame info to understand its structure before performing specific queries."
    ),
    model="gpt-4o",
    tools=[get_dataframe_info, query_dataframe, analyze_steps_sequence, check_step_custom_input]
)

In [None]:
async def run_agent(input_query: str):
    result = await Runner.run(
        starting_agent=agent,
        input=input_query
    )
    return result.final_output

In [None]:
# Example usage:
input_query = ("In table steps the step_id 22 was supposed to be executed before step_id 32, but the opposite happened. "
              "This case could only happen when custom_input column of step_id 25 is an empty list []. "
              "How do I figure this out using the steps table? How do I view that table? "
              "Also mention if you used the tool to determine or not.")
result = await run_agent(input_query)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mTo find the `custom_input` value for `step_id` 25, I need to filter the dataframe for the row where `step_id` is 25 and then check the `step_info` column for that row. Let's perform this operation.

Thought: I will filter the dataframe for `step_id` 25 and then check the `step_info` column.
Action: python_repl_ast
Action Input: df[df['step_id'] == 25]['step_info'][0m[36;1m[1;3mSeries([], Name: step_info, dtype: object)[0m[32;1m[1;3mThe result is an empty series, which means there is no row in the dataframe with `step_id` 25. Therefore, the `custom_input` value for `step_id` 25 does not exist in the dataframe. 

Final Answer: The `custom_input` value for `step_id` 25 does not exist in the dataframe.[0m

[1m> Finished chain.[0m


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThe `step_id` 25 is not present in the provided dataframe snippet. To find the `step_info` for `step_id` 25, I need to search the ent

In [None]:
print(result)

It appears that `step_id 25` is not present in the DataFrame, so we can't verify the `custom_input` condition. 

If you need further analysis on other steps or information, feel free to ask!
