In [31]:
import sys
import io
import os
import json

import pandas as pd
from langchain_openai import AzureChatOpenAI
from langgraph.pregel import GraphRecursionError

## Input parsing

In [32]:
print(os.listdir())  # Your query_input.txt and datafile should be listed here

['class.csv', 'query_input.txt', 'temperature.ipynb', 'README.md', 'class_reflect.txt', 'class_errors.txt', 'others', 'class.py', 'langgraph.ipynb', 'class.txt', 'class_query.txt', 'langchain.ipynb']


In [33]:
# Read what which files and operations we are expected to perform (query input)
def read_query_input(query_input_file):
    try:
        with open(query_input_file) as f:
            query_input_content = [line.strip() for line in f.readlines()]
        partially_parsed = [line.split(':') for line in query_input_content[:-1]]
        info = {line[0]: line[1] for line in partially_parsed}
        info['columns'] = info['columns'].split(',')
        return info
    except FileNotFoundError:
        print(f"Error: The file {query_input_file} was not found.")
        return None
    except Exception as e:
        print(f"An error occurred while reading {query_input_file}: {str(e)}")
        return None


In [34]:
def read_data_file(data_file):
    try:
        assert data_file.endswith('.csv'), "The data file must be a CSV file."
        data_file_content = pd.read_csv(data_file)
        return data_file_content
    except FileNotFoundError:
        print(f"Error: The file {data_file} was not found.")
        return None
    except AssertionError as ae:
        print(f"Error: {str(ae)}")
        return None
    except Exception as e:
        print(f"An error occurred while reading {data_file}: {str(e)}")
        return None


In [35]:
def read_query_file(query_file):
    try:
        query_file_name = query_file + '_query.txt'
        with open(query_file_name) as f:
            return f.read()
    except FileNotFoundError:
        print(f"Error: The file {query_file_name} was not found.")
        return None
    except Exception as e:
        print(f"An error occurred while reading {query_file_name}: {str(e)}")
        return None


In [36]:
def read_input(state):
  print("Executing Agent Initialize")  # This method is called by the first node
  query_input = read_query_input('query_input.txt')

  data_file = query_input.get('data_file')
  state['data_file'] = data_file

  data = read_data_file(data_file)
  state['data'] = data  # Adding the data to the state

  query_name = query_input.get('query_name')
  state['query_name'] = query_name

  query = read_query_file(query_name)
  state['query'] = query  # Adding the query to the state

  columns = query_input.get('columns')
  state['columns'] = columns

  return state

## Setting up LLM

In [37]:
os.environ["AZURE_OPENAI_API_KEY"] = ""
os.environ["AZURE_OPENAI_ENDPOINT"] = ""

AZURE_OPENAI_API_KEY = os.getenv('AZURE_OPENAI_API_KEY')
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")


model = AzureChatOpenAI(
    azure_deployment="gpt-4",
    api_version="2023-06-01-preview",
    temperature=0.7,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

In [38]:
model.invoke('hello')

AIMessage(content='Hello! How can I assist you today?', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 8, 'total_tokens': 17, 'completion_tokens_details': None}, 'model_name': 'gpt-4', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {}}, id='run-505332b9-e80b-4fae-9f18-87784ec56d77-0', usage_metadata={'input_tokens': 8, 'output_tokens': 9, 'total_tokens': 17})

## LangGraph

In [39]:
from typing import List
from typing_extensions import TypedDict


# state that is passed around graph
class State(TypedDict):
  query_name: str
  query: str
  data_file: str
  data: str
  columns: List[str]
  query_code: str
  execution_number: int
  execution_result: str
  error_check: bool
  error_message: str
  reflection: str

In [40]:
def clean_code(code):
    '''
    Strips away the tickmark (`) surrounding the code.
    '''
    if '`' in code:
      start = code.find("`")
      code = code[start+9:]  # Removing code opening: ```python
      end = code.find("`")  # Removing closing ```
      code = code[:end]
    return code.strip()


def generate_program(state):
  print("Executing Agent GenQueryProgram")  # Invoked by the 2nd node

  prompt_format = (
      "I have data in `csv` format in {} containing the following columns: {}\n"
      "I want you to write a valid Python function the performs the following: {}\n\n"
      "Note: I will insert your code into a Python interpreter, so make sure your"
      " response is valid Python code, without any other output."
      " Make sure you do not include any introduction, explanation, or summarization of your answer."
      " The last line of your code should be printing the result JSON"
  )

  # call llm to get response
  code = model.invoke(prompt_format.format(state['data_file'], state['columns'], state['query'])).content
  cleaned_code = clean_code(code)

  state['query_code'] = cleaned_code
  return state


In [41]:
# Executes the program, adds the output/error to the state
def execute_program(state):
  state['execution_number'] += 1
  print(f"Executing Agent ExecuteProgram (Execution number {state['execution_number']})")

  code = state['query_code']

  # Redirect stdout to capture the output
  old_stdout = sys.stdout
  redirected_output = sys.stdout = io.StringIO()

  try:
    exec(code)
    output = redirected_output.getvalue()
  except Exception as e:
    output = "[ERROR] Execution failed: " + str(e)
  finally:
    sys.stdout = old_stdout

  state['execution_result'] = output
  return state

In [42]:
def check_errors(state):
  print("Executing Agent Chk4rErr")

  def is_json(myjson):
    try:
      json.loads(myjson)
    except ValueError as e:
      return False
    return True

  output = state['execution_result']
  errors_found = False

  if output.startswith("[ERROR]"):  # Encountered an exception
    msg = state['execution_result']
  elif not output:  # Empty response
    msg = "Empty response"
  elif not is_json(output):  # Invalid JSON
    msg = "Invalid JSON"
  else:
    state['error_check'] = False
    return state

  state['error_check'] = True
  state['error_message'] = msg
  return state

In [43]:
from langchain_core.prompts import ChatPromptTemplate


def reflect_on_error(state):
  print("Executing Agent ReflectOnErr")

  code = state['query_code']
  error = state['error_message']

  system_prompt = (
    "You're a Python programming assitant."
    " You are presented with user code and a resulting error."
    " Your mission is to explain the problem,"
    " and suggest ways to fix it."
  )

  user_prompt = (
      "I encountered a bug in my Python code."
      " Could you help me understand my mistake?\n"
      "The problem I encounter is:\n{error}.\n"
      "Here is the code:\n{code}"
      )

  template = ChatPromptTemplate.from_messages([
      ("system", (system_prompt)),
      ("human", (user_prompt))
  ])

  prompt_value = template.invoke({
      "error": error,
      "code": code
  })

  reflection = model.invoke(prompt_value).content
  state['reflection'] = reflection
  return state

In [44]:
def should_retry(state):
  print("Executing Conditional Edge (ReGenQueryPgm or Finalize)")
  if state['error_check']:
    # print(f"Found an error {state['error_message']}, retrying...")
    return 'ReflectOnErr'  # Error detected. Reflect, rewrite, retest.
  # print("Solution seems fine, proceeding!")
  return "Finalize"  # Will move to the finalizing agent

In [45]:
def regenerate_code(state):
  print("Executing Agent ReGenQueryPgm")

  template = (
      "I'm trying to write a Python script that {query}."
      " My data is saved in a file called {query_name},"
      " and contains the following columns: {columns}\n"
      "I got the following error when running my implementation:\n{error}\n"
      "Here is my code: {code}\n"
      "Please help me modify my code so it runs properly."
      "Please respond with a fixed version of my code."
      " I will insert your response into a Python interpreter,"
      " so make sure your response is valid Python code,"
      " without any introduction or explanation,"
      " or without any other output."
      " Specifically, do not start your reply with 'certainly', or 'sure,"
      " simply respond with valid python code."
  )

  prompt = template.format(query_name=state['data_file'],
                           columns=state['columns'],
                           query=state['query'],
                           error=state['error_message'],
                           code=state['query_code'],
                           # hint=state['reflection']
                           )

  code = model.invoke(prompt).content
  cleaned_code = clean_code(code)

  # print(f"\n\n***{cleaned_code}***\n\n")
  state['query_code'] = cleaned_code
  return state

In [46]:
def finalize(state):
  print("Executing Agent Finalize")

  query_name = state['query_name']
  code = state['query_code']
  output = state['execution_result']
  errors = state['error_message'] if state['error_message'] else ""
  reflection = state['reflection']

  with open(f'{query_name}.py', 'w') as code_f:
    code_f.write(code)

  with open(f'{query_name}.txt', 'w') as output_f:
    output_f.write(output)

  with open(f'{query_name}_errors.txt', 'w') as errors_f:
    errors_f.write(errors)

  with open(f'{query_name}_reflect.txt', 'w') as reflection_f:
    reflection_f.write(reflection)


In [47]:
from langgraph.graph import END, StateGraph


workflow = StateGraph(State)

# Part I: Deterministic flow. Will allows run.

# Initial agent, reading input
workflow.add_node("Initialize", read_input)
workflow.set_entry_point("Initialize")

# Agents that writes Python code for the query
workflow.add_node("GenQueryProgram", generate_program)
workflow.add_edge("Initialize", "GenQueryProgram")

# Agent that runs the Python code generated in last step
workflow.add_node("ExecuteProgram", execute_program)
workflow.add_edge("GenQueryProgram", "ExecuteProgram")

# Agent that checks for errors in the code execution
workflow.add_node("Chk4rErr", check_errors)
workflow.add_edge("ExecuteProgram", "Chk4rErr")


# Part II: Conditional flow

workflow.add_node("ReflectOnErr", reflect_on_error)
workflow.add_conditional_edges("Chk4rErr", should_retry)  # Continue to ReflectOnErr or to Finalize, depending on errors

"""
NOTE:
Instead of using the `regenrate_code` function, we are now using the
`generate program` function itself again.
Our attempts so far with prompt engineering the regenerate function
did not succeed, and attempting to solve the query from scratch did.
"""
workflow.add_node("ReGenQueryPgm", regenerate_code)
workflow.add_edge("ReflectOnErr", "ReGenQueryPgm")

# Close a cycle in the graph:
# execute -> check for errors -> reflect -> regenerate -> execute..
# Continue cycling until solution seems fine
workflow.add_edge("ReGenQueryPgm", "ExecuteProgram")

workflow.add_node("Finalize", finalize)
workflow.add_edge("Finalize", END)

runnable = workflow.compile()
mystate = State(query="", execution_number=0, error_message="", reflection="")


In [49]:
results = {}  # Initialize results to an empty dictionary

try:
    results = runnable.invoke(mystate)
except GraphRecursionError:
    print("Error: Recursion limit reached without resolving the issue. Please rephrase your query and try again.")
except Exception as e:
    print(f"An unexpected error occurred: {str(e)}")

# if results:
#     print(*[f"{k}: {v}" for k, v in results.items()], sep='\n' + "*" * 10 + '\n')
# else:
#     print("No results to display due to an error during execution.")


Executing Agent Initialize
Executing Agent GenQueryProgram
Executing Agent ExecuteProgram (Execution number 1)
Executing Agent Chk4rErr
Executing Conditional Edge (ReGenQueryPgm or Finalize)
Executing Agent ReflectOnErr
Executing Agent ReGenQueryPgm
Executing Agent ExecuteProgram (Execution number 2)
Executing Agent Chk4rErr
Executing Conditional Edge (ReGenQueryPgm or Finalize)
Executing Agent Finalize


In [30]:
# print(*[f"{k}: {v}" for k, v in results.items()], sep='\n' + "*" * 10 + '\n')
print(results['execution_result'])

{"First": "Shalom", "Last": "Levi", "Year": 3, "Average-Grade": 92.4}

