In [1]:
import os
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain.agents import create_agent
import pandas as pd
from pydantic import BaseModel, Field
from langchain_core.tools import tool
from functools import partial
from pprint import pprint
from langchain.agents.middleware import HumanInTheLoopMiddleware
from langgraph.checkpoint.memory import InMemorySaver
from langgraph.types import interrupt
import getpass

if "GROQ_API_KEY" not in os.environ:
    os.environ["GROQ_API_KEY"] = getpass.getpass("Enter your Groq API key: ")

### Response Format

In [2]:


class StudyPlanEvaluation(BaseModel):
    """
    Full evaluation of a student's semester study plan by multiple agents.
    """
    scheduling_score: int = Field(
        description="Score out of 100 given by the Scheduling Agent."
    )
    alignment_score: int = Field(
        description="Score out of 100 given by the Alignment Agent."
    )
    weighted_color: str = Field(
        description="Final approval color code (red/yellow/green) given by main agent."
    )
    scheduling_reasoning: str = Field(
        description="Brief reasoning from Scheduling Agent."
    )
    alignment_reasoning: str = Field(
        description="Brief reasoning from Alignment Agent."
    )
    overall_recommendation: str = Field(
        description="Joint summary or recommendation by main agent (optional)."
    )
    workload_score: int = Field(
        description="Score out of 100 given by the Workload Agent."
    )



### Evaluation score averaging tool

In [3]:
@tool
def weighted_score_tool(scheduling_score:int, alignment_score:int, workload_score: int) -> str:
    """
    Calculate the weighted average score and return a color code based on the score."""
    w_avg = (0.4*scheduling_score + 0.4*alignment_score + 0.2*workload_score)
    if 0 <= w_avg <= 45:
        return "red"
    elif 46 <= w_avg <= 75:
        return "yellow"
    else:
        return "green"


### Dataframe query tool

In [4]:
# Load your CSV into a pandas DataFrame
course_description_path = 'course_description.csv'
course_masterlist_path = 'course_masterlist.csv'
exams_path = 'exams.csv'
lectures_path = 'lectures.csv'

course_description_df = pd.read_csv(course_description_path)
course_masterlist_df = pd.read_csv(course_masterlist_path)
exams_df = pd.read_csv(exams_path)
lectures_df = pd.read_csv(lectures_path)

def query_dataframe(df: pd.DataFrame,query: str) -> str:
    try:
        result = df.query(query).to_string()
        return result
    except Exception as e:
        return f"Error executing query: {e}"


course_description_query = partial(query_dataframe, course_description_df)
course_masterlist_query = partial(query_dataframe, course_masterlist_df)
exams_query = partial(query_dataframe, exams_df)
lectures_query = partial(query_dataframe, lectures_df)

print(course_description_df.info())
print("-----")
print(course_masterlist_df.info())
print("-----")
print(exams_df.info())
print("-----")
print(lectures_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27 entries, 0 to 26
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Course Code  27 non-null     object
 1   Full Name    27 non-null     object
 2   Description  27 non-null     object
dtypes: object(3)
memory usage: 780.0+ bytes
None
-----
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27 entries, 0 to 26
Data columns (total 4 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Course Code         27 non-null     object
 1   Department          27 non-null     object
 2   Course Level        27 non-null     object
 3   Possible Full Name  27 non-null     object
dtypes: object(4)
memory usage: 996.0+ bytes
None
-----
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17 entries, 0 to 16
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Da

### Sub-agent prompts

In [5]:
def load_prompt(path):
    with open(path, 'r') as f:
        return f.read()

scheduling_prompt_text = load_prompt("schedule_prompt.txt")
alignment_prompt_text = load_prompt("alignment_prompt.txt")
print(scheduling_prompt_text)
print("-----")
print(alignment_prompt_text)

Given a Study Plan: {study_plan}
To extract semester schedule details for each selected course, use the course_masterlist, exams, and lectures tables.
Evaluate when lectures, exercises, and exams occur for selected courses.
Score out of 100 based on:
- Breaks (<1hr) between lectures/exercises
- Exams too close (<3 days)
- Overlaps/conflicts not tolerated
Explain your score briefly.

-----
Given a study plan: {study_plan}
Analyze selection of courses for the semester using the course_description table and student's major/minor.
Do not consider schedule.
Score out of 100. Assessment must account for one exploratory course.
Explain your score briefly.



In [20]:
@tool
def course_description_tool(query: str) -> str:
    """
    Query the course description dataframe. Accepts a pandas query string."""
    return query_dataframe(df=course_description_df,query=query)

@tool
def course_masterlist_tool(query: str) -> str:
    """
    Query the course masterlist dataframe. Accepts a pandas query string."""
    return query_dataframe(df=course_masterlist_df,query=query)

@tool
def exams_tool(query: str) -> str:
    """
    Query the exams dataframe. Accepts a pandas query string.
    """
    return query_dataframe(df=course_masterlist_df,query=query)
@tool
def lectures_tool(query: str) -> str:
    """
    Query the lectures dataframe. Accepts a pandas query string.
    """
    return query_dataframe(df=lectures_df,query=query)


# Scheduling agent prompt
scheduling_prompt = ChatPromptTemplate.from_messages(('human',scheduling_prompt_text))

# Alignment agent prompt
alignment_prompt = ChatPromptTemplate.from_messages(('human',alignment_prompt_text))

llm = ChatGroq(model="llama-3.3-70b-versatile",
    temperature=0.0,
    max_retries=2)  # Substitute with your preferred model



### Sub-agent initialization

In [11]:
# Define each agent as a LangChain AgentExecutor, with relevant tools
scheduling_agent = create_agent(model=llm, tools=[exams_tool,lectures_tool])  # Add relevant table tools

@tool("schedule_evaluator", description="Assesses study plan schedule for breaks and conflicts.")
def call_scheduling_agent(study_plan: str) -> dict:
    chain = scheduling_prompt | scheduling_agent
    result = chain.invoke({"study_plan": study_plan})
    # Ensure the output structure matches StudyPlanEvaluation (score and reasoning)
    return {
        "score": result.get("score"),
        "reasoning": result.get("reasoning")
    }

alignment_agent = create_agent(model=llm, tools=[course_description_tool,course_masterlist_tool])  # Add relevant table tools
@tool("alignment_evaluator", description="Evaluates courses alignment with major/minor.")
def call_alignment_agent(study_plan: str) -> dict:
    chain = alignment_prompt | alignment_agent
    result = chain.invoke({"study_plan": study_plan})
    # Ensure the output structure matches StudyPlanEvaluation (score and reasoning)
    return {
        "score": result.get("score"),
        "reasoning": result.get("reasoning")
    }




### Main Agent init and Prompt

In [12]:

main_agent_prompt_text = """
You are the study plan supervisor. For the provided study plan, call schedule_evaluator and alignment_evaluator as needed.
Aggregate their results, call weighted_score, and provide an output containing:
- scheduling_score, alignment_score, weighted_color
- scheduling_reasoning, alignment_reasoning
- overall_recommendation
Additionally, evaluate a score based on the amount of courses selected called workload_score and make your evaluation based on that.
Be critical and thorough in your evaluation, avoid being over optimistic.
"""
# Use from_messages to construct the ChatPromptTemplate (ChatPromptTemplate requires 'messages' when using constructor)
main_agent_prompt = ChatPromptTemplate.from_messages([('system',main_agent_prompt_text),('human',"Given a study plan: \n{study_plan}\n")])

main_agent = create_agent(
    model=llm,
    tools=[call_scheduling_agent, call_alignment_agent, weighted_score_tool],
    response_format=StudyPlanEvaluation
)
chain = main_agent_prompt | main_agent 

In [13]:
def run_evaluation(study_plan: str):
    result = chain.invoke({"study_plan": study_plan})
    # Parse results into StudyPlanEvaluation
    return result

In [14]:
green_case = load_prompt("green_case.txt")
yellow_case = load_prompt("yellow_structured.txt")
red_case = load_prompt("red_structured.txt")
pprint(run_evaluation(red_case)['structured_response'].model_dump())

{'alignment_reasoning': "The study plan is not well-aligned with the student's "
                        'major and minor.',
 'alignment_score': 60,
 'overall_recommendation': 'The student should reconsider their course '
                           'selection to better align with their academic '
                           'program.',
 'scheduling_reasoning': 'The study plan is overly ambitious and may lead to '
                         'scheduling conflicts.',
 'scheduling_score': 40,
 'weighted_color': 'red',
 'workload_score': 20}


In [15]:
@tool
def weighted_score_tool_with_interrupt(scheduling_score:int, alignment_score:int, workload_score: int) -> str:
    """
    Calculate the weighted average score and return a color code based on the score.
    """
    w_avg = 0.4 * scheduling_score + 0.5 * alignment_score + 0.1 * workload_score
    
    if 0 <= w_avg <= 45:
        return "red"
    elif 46 <= w_avg <= 75:
        # Pause and request human review
        human_input = interrupt(
            "Review required: The evaluation outcome is YELLOW.\n"
            "Please review reasoning, scores, and add additional context if needed. Decisions: [approve, edit, reject]."
        )
        # Incorporate human input for final output (assume human_input is a dict with 'decision' and optional 'context')
        if human_input['decision'] == "approve":
            return "green"
        elif human_input['decision'] == "edit":
            # Recalculate based on human context (for simplicity, assume human provides new scores)
            new_scheduling_score = human_input.get('scheduling_score', scheduling_score)
            new_alignment_score = human_input.get('alignment_score', alignment_score)
            new_workload_score = human_input.get('workload_score', workload_score)
            return f"Revaluate based on new scores: {new_scheduling_score}, {new_alignment_score}, {new_workload_score}"
        elif human_input['decision'] == "reject":
            return "red"
    else:
        return "green"

In [21]:
interrupt_agent_prompt_text = """
You are the study plan supervisor. For the provided study plan, call schedule_evaluator and alignment_evaluator as needed.
Aggregate their results, call weighted_score_tool_with_interrupt, and provide an output containing:
- scheduling_score, alignment_score, weighted_color
- scheduling_reasoning, alignment_reasoning
- overall_recommendation
Additionally, evaluate a score based on the amount of courses selected called workload_score and make your evaluation based on that.
Be critical and thorough in your evaluation, avoid being over optimistic.
"""
# Use from_messages to construct the ChatPromptTemplate (ChatPromptTemplate requires 'messages' when using constructor)
interrupt_agent_prompt = ChatPromptTemplate.from_messages([('system',interrupt_agent_prompt_text),('human',"Given a study plan: \n{study_plan}\n")])
interrupt_agent = create_agent(
    model=llm,
    tools=[weighted_score_tool_with_interrupt, call_scheduling_agent, call_alignment_agent],
    middleware=[
        HumanInTheLoopMiddleware(
            interrupt_on={"weighted_score_tool_with_interrupt": True}  # Only calls interrupt inside tool
        ),
    ],
    checkpointer=InMemorySaver(),  # InMemory for testing; use persistent in production
    response_format=StudyPlanEvaluation
)

chain_with_interrupt = interrupt_agent_prompt | interrupt_agent

In [22]:
from langgraph.types import Command

thread_id = "interrupt-123"
yellow_plan = load_prompt("yellow_structured.txt")
# First invocation, agent runs until interrupt (if yellow)
result = chain_with_interrupt.invoke(
    {"study_plan": yellow_plan},
    config={"configurable": {"thread_id": thread_id}}
)

if '__interrupt__' in result:
    # Present review information to human, collect decision:
    action = result['__interrupt__']
    print(action[0].value['action_requests'][0]['description'])  # Display the interrupt message
    # Simulate getting human review:
    decision = input("Enter decision (approve/edit/reject): ")
    context = ""
    if decision == "edit":
        context = input("Provide additional context: ")

    # Resume agent using actual human input
    resume_payload = {"decision": decision, "context": context}
    review_result = chain_with_interrupt.invoke(
        Command(resume={"decisions": [resume_payload]}),
        config={"configurable": {"thread_id": thread_id}}
    )
    try:
        pprint(review_result['structured_response'].model_dump())  # Will contain the final output after human review
    except KeyError as e:
        print(f"Error retrieving final output: {e}")
        pprint(review_result)
else:
    print(result)

Tool execution requires approval

Tool: weighted_score_tool_with_interrupt
Args: {'alignment_score': 80, 'scheduling_score': 70, 'workload_score': 60}
Error retrieving final output: 'structured_response'
{'__interrupt__': [Interrupt(value={'action_requests': [{'args': {'alignment_score': 80,
                                                                  'scheduling_score': 90,
                                                                  'workload_score': 90},
                                                         'description': 'Tool '
                                                                        'execution '
                                                                        'requires '
                                                                        'approval\n'
                                                                        '\n'
                                                                        'Tool: '
                                    