In [1]:
import os, getpass

def _set_env(var: str):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"{var}: ")

_set_env("OPENAI_API_KEY")

OPENAI_API_KEY:  ········


In [27]:
from langchain_openai import ChatOpenAI
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from langgraph.graph import MessagesState
import requests
import json

In [36]:
# System message for context

# Function to extract Java classes from markdown content via ChatGPT
def extract_classes(path: str, state: MessagesState):
    sys_msg = SystemMessage(content="You are a helpful assistant tasked with extracting Java classes and their code from the provided markdown content.")
    # Fetch the markdown content from the provided link
    with open(path, 'r') as file:
        markdown_content = file.read()
    
    # Prepare the human message containing the markdown content
    human_msg = HumanMessage(content=f"Extract the different Java classes from the following markdown content:\n\n{markdown_content}\n\nProvide a list of dictionaries where each dictionary contains the 'class_name' and 'class_code'. Clean the response to not have anything other than the list of dictionaries in the output.")

    # Update the state with the human message
    state["messages"].append(human_msg)
    llm_for_extraction = ChatOpenAI(model="gpt-4o-mini")
    # Invoke the LLM using the provided system message and the state
    extracted_classes = llm_for_extraction.invoke([sys_msg] + state["messages"])
    
    # Parse the AI response (assuming it returns the correct format directly)
    return extracted_classes

# Example usage
state = MessagesState(messages=[])
link_to_markdown = "C:/Hardik/m24-llm-midsem/data/simple-scenario/student_solution.md"  # Replace with the actual link to the markdown file
extracted_student_classes = json.loads(extract_classes(link_to_markdown, state).content)
link_to_markdown = "C:/Hardik/m24-llm-midsem/data/simple-scenario/model_solution.md"  # Replace with the actual link to the markdown file
extracted_model_classes = json.loads(extract_classes(link_to_markdown, state).content)
extracted_model_classes

[{'class_name': 'StringManipulator',
  'class_code': 'import java.util.Scanner;\n\npublic class StringManipulator {\n    public static void main(String[] args) {\n        Scanner sc = new Scanner(System.in);\n        System.out.print("Enter a string: ");\n        String input = sc.nextLine();\n        System.out.println("Original String: " + input);\n        System.out.println("Uppercase String: " + input.toUpperCase());\n        String reversed = new StringBuilder(input).reverse().toString();\n        System.out.println("Reversed String: " + reversed);\n        System.out.println("Number of Characters: " + input.length());\n        sc.close();\n    }\n}'}]

In [37]:
# System message for context


# Function to extract Java classes from markdown content via ChatGPT
def extract_rubric(path: str, state: MessagesState):
    sys_msg = SystemMessage(content="You are a helpful assistant tasked with extracting rubric for each Java class from the provided markdown content.")
    # Fetch the markdown content from the provided link
    with open(path, 'r') as file:
        markdown_content = file.read()
    
    # Prepare the human message containing the markdown content
    human_msg = HumanMessage(content=f"The following markdown contains rubric details for a Java submission:\n\n{markdown_content}\n\nPlease extract the rubric details relevant to each of the mentioned classes.\n\nProvide a list of dictionaries where each dictionary contains the 'class_name' and 'class_rubric'. Clean the response to not have anything other than the list of dictionaries in the output.")

    # Update the state with the human message
    state["messages"].append(human_msg)
    llm_for_rubric_extraction = ChatOpenAI(model="gpt-4o-mini")
    # Invoke the LLM using the provided system message and the state
    extracted_rubric = llm_for_rubric_extraction.invoke([sys_msg] + state["messages"])
    
    # Parse the AI response (assuming it returns the correct format directly)
    return extracted_rubric

# Example usage
state = MessagesState(messages=[])
link_to_markdown = "C:/Hardik/m24-llm-midsem/data/simple-scenario/rubric.md"  # Replace with the actual link to the markdown file
extracted_rubric = json.loads(extract_rubric(link_to_markdown, state).content)

extracted_rubric

[{'class_name': 'StringManipulator',
  'class_rubric': {'1. Program Correctness and Functionality': {'Total Marks': 70,
    'Compilation and Execution': {'Total Marks': 10,
     'Program compiles without errors': 5,
     'Program runs without runtime errors': 5},
    'User Input Handling': {'Total Marks': 10,
     'Prompts the user to enter a string': 5,
     "Correctly reads and stores the user's input": 5},
    'String Manipulations': {'Total Marks': 40,
     'Displaying Original String': {'Total Marks': 5,
      'Outputs the original string with appropriate labeling': 5},
     'Converting to Uppercase': {'Total Marks': 10,
      'Converts the string to uppercase correctly using toUpperCase()': 5,
      'Displays the uppercase string with appropriate labeling': 5},
     'Reversing the String': {'Total Marks': 15,
      'Accurately reverses the string using appropriate methods': 10,
      'Displays the reversed string with appropriate labeling': 5},
     'Counting Characters': {'Total

In [54]:

# Function to evaluate student code based on model code and rubric
def evaluate_student_code(student_code: str, model_code: str, rubric_content: str, state: MessagesState):
    sys_msg = SystemMessage(content="You are a helpful assistant tasked with evaluating a student's Java code against a model solution based on a provided rubric. You will give numeric scores for each criterion and provide detailed comments, including correctness, errors, and suggestions for improvement.")

    # Prepare the human message containing the student code, model code, and rubric
    human_msg = HumanMessage(content=f"Here is a student's Java code submission:\n\n{student_code}\n\nAnd here is the correct model solution:\n\n{model_code}\n\nUsing the following rubric:\n\n{rubric_content}\n\nEvaluate the student's submission, providing a numeric score for each criterion from the rubric. Along with the score, give detailed comments on the correctness, errors, and suggestions for improvement.")

    # Update the state with the human message
    state["messages"].append(human_msg)
    llm_for_evaluation = ChatOpenAI(model="gpt-4o-mini")
    # Invoke the LLM using the system message and the state
    evaluation_response = llm_for_evaluation.invoke([sys_msg] + state["messages"])
    
    # Parse the AI response (assuming it returns the correct format directly)
    return evaluation_response

def initial_evaluator(extracted_student_classes, extracted_model_classes, extracted_rubric):
    evaluations=[]
    for i in extracted_student_classes:
        rubric=''
        model=''
        for class_dict in extracted_model_classes:
            if class_dict.get('class_name') == i['class_name']:
                model=class_dict['class_code']
        for class_dict in extracted_rubric:
            if class_dict.get('class_name') == i['class_name']:
                rubric=class_dict['class_rubric']
        state = MessagesState(messages=[])
        evaluations.append(evaluate_student_code(i['class_code'], model, rubric, state).content)
    return evaluations

evaluations=initial_evaluator(extracted_student_classes, extracted_model_classes, extracted_rubric)
evaluations

["### Evaluation of Student's Java Code Submission\n\n**1. Program Correctness and Functionality (Total Marks: 70)**\n\n- **Compilation and Execution (Total Marks: 10)**\n  - **Program compiles without errors (5/5)**: The code compiles successfully.\n  - **Program runs without runtime errors (5/5)**: The program executes without crashing, but there are logical errors.\n\n- **User Input Handling (Total Marks: 10)**\n  - **Prompts the user to enter a string (5/5)**: The prompt is correctly displayed.\n  - **Correctly reads and stores the user's input (3/5)**: The use of `sc.next()` reads only the next token, which may not capture the entire string if there are spaces. It should use `sc.nextLine()` to read the full line.\n\n- **String Manipulations (Total Marks: 40)**\n  - **Displaying Original String (5/5)**: Correctly outputs the original string with appropriate labeling.\n  - **Converting to Uppercase (0/10)**: Incorrect method used (`toLowerCase()` instead of `toUpperCase()`).\n  - **

In [69]:
#does not run
# PLEASE SKIP

def reevaluate_student_code(student_code: str, model_code: str, rubric_content: str, prior_evaluation: str, state: MessagesState):
    sys_msg = SystemMessage(content="You are a helpful assistant tasked with evaluating a student's Java code against a model solution based on a provided rubric. You will give numeric scores for each criterion and provide detailed comments, including correctness, errors, and suggestions for improvement.")

    # Prepare the human message containing the student code, model code, and rubric
    human_msg = HumanMessage(content=f"Here is a student's Java code submission:\n\n{student_code}\n\nAnd here is the correct model solution:\n\n{model_code}\n\nUsing the following rubric:\n\n{rubric_content}\n\nEvaluate the student's submission, providing a numeric score for each criterion from the rubric. Along with the score, give detailed comments on the correctness, errors, and suggestions for improvement.")

    # Update the state with the human message
    state["messages"].append(human_msg)
    llm_for_evaluation = ChatOpenAI(model="gpt-4o-mini")
    # Invoke the LLM using the system message and the state
    evaluation_response = llm_for_evaluation.invoke([sys_msg] + state["messages"])
    
    # Parse the AI response (assuming it returns the correct format directly)
    return evaluation_response

def re_evaluator(extracted_student_classes, extracted_model_classes, extracted_rubric, evaluations):
    evaluations=[]
    for x, i in enumerate(extracted_student_classes):
        rubric=''
        model=''
        for class_dict in extracted_model_classes:
            if class_dict.get('class_name') == i['class_name']:
                model=class_dict['class_code']
        for class_dict in extracted_rubric:
            if class_dict.get('class_name') == i['class_name']:
                rubric=class_dict['class_rubric']
        state = MessagesState(messages=[])
        evaluations.append(reevaluate_student_code(i['class_code'], model, rubric, evaluations[x], state).content)
    return evaluations

reevaluations=re_evaluator(extracted_student_classes, extracted_model_classes, extracted_rubric, evaluations)
reevaluations

IndexError: list index out of range

In [70]:
import ast

def extract_marks(evaluation):
    sys_msg = SystemMessage(content="You are a helpful assistant tasked with extracting stepwise makrs from the given evaluation.")

    # Prepare the human message containing the student code, model code, and rubric
    human_msg = HumanMessage(content=f"Here is a student's Java code evaluation:\n\n{evaluation}. Extract the stepwise marks given, and return a comma separated list of all the marks given for this question. Ensure that marks which have already been counted are not counted again. The output should be a clean list of numbers with no text anywhere.")

    # Update the state with the human message
    state["messages"].append(human_msg)
    llm_for_extraction = ChatOpenAI(model="gpt-4o-mini")
    # Invoke the LLM using the system message and the state
    extraction_response = llm_for_extraction.invoke([sys_msg] + state["messages"])
    
    # Parse the AI response (assuming it returns the correct format directly)
    return extraction_response

def extract_marks_for_all_classes(evaluations):
    marks=[]
    for i in evaluations:
        marks.extend(list(map(int, extract_marks(i).content.split(','))))
    return marks

list_of_marks=extract_marks_for_all_classes(evaluations)

In [64]:
list_of_marks

[5, 5, 3, 5, 0, 5, 5, 5, 2, 0, 0, 5, 5, 10]

In [25]:
def sum_marks(list_of_marks) -> int:
    return sum(list_of_marks)

tools = [sum_marks]
llm = ChatOpenAI(model="gpt-4o-mini")
llm_with_tools = llm.bind_tools(tools)

In [None]:
#WILL NOT RUN

from langgraph.graph import START, StateGraph
from langgraph.prebuilt import tools_condition, ToolNode
from IPython.display import Image, display

# Graph
builder = StateGraph(MessagesState)

# Define nodes: these do the work
builder.add_node("assistant", assistant)
builder.add_node("tools", ToolNode(tools))

# Define edges: these determine how the control flow moves
builder.add_edge(START, "assistant")
builder.add_conditional_edges(
    "assistant",
    # If the latest message (result) from assistant is a tool call -> tools_condition routes to tools
    # If the latest message (result) from assistant is a not a tool call -> tools_condition routes to END
    tools_condition,
)
builder.add_edge("tools", "assistant")
react_graph = builder.compile()

# Show
display(Image(react_graph.get_graph(xray=True).draw_mermaid_png()))

In [65]:
#i did not have enough time to put it all in a chain, but the flow of the graph is a straight chain 
#from the start to the end with the functions/agents called in the order above

In [None]:
Link to the chat:

https://chatgpt.com/share/67065bd2-6234-800b-bc71-842393404dc9