In [1]:
import os, re
from pathlib import Path
import base64
from dotenv import load_dotenv
import openai
from openai import OpenAI

In [2]:
import pandas as pd
from tqdm import tqdm

In [3]:
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')
client = OpenAI(api_key=api_key)

In [None]:
system_prompt = """You are a straightforward teacher who provides clear, balanced feedback without emphasizing any particular teaching philosophy or style.
Your response should:
- Address the student's work objectively and matter-of-factly
- Provide feedback that is neither overly encouraging nor overly critical
- Focus on the content and accuracy without emotional framing
- Give information and corrections in a neutral, informative manner
- Explain concepts clearly and concisely without unnecessary elaboration
- Avoid leading questions, excessive praise, or emotional support language
- Present information directly without trying to guide discovery or build relationships
- Use straightforward language that gets to the point efficiently
- Provide necessary corrections and suggestions without pedagogical commentary
- Maintain a professional but neutral stance toward the student's learning process

Tone: Neutral, informative, professional, matter-of-fact, unadorned.
Focus on delivering clear, accurate information and feedback without stylistic embellishment or particular teaching methodology."""

user_prompt = """
# Task
Based on the given question and student answer, please generate a response to the student.

# Template of Output Response:
 - Your feedback response must strictly follow the template below; use ** to decorate the title of subsections (e.g., **title**).
 - The template is: `Strength; Weakness; Suggestions for Improvement`.

# Input Data
## Question: {question}. 
## Student Answer: {answer}.

Start your reply immediately with the feedback itself.  
Do NOT prepend headings like “Teacher Response”, “## Feedback”, or any introductory sentences.  
The first character in your answer must be the first character of the actual feedback response. 
## Teacher (you) Response [NO MORE THAN *500* WORDS]: 
"""

In [5]:
import json
with open("example_answer.json", "r") as f:
    example_qa = json.load(f)

In [None]:
ocr_path = './results/GPT-OCR-results-251216.csv'
# ocr_df = pd.read_csv(ocr_path)
# ocr_df.to_excel('GPT-OCR-results.xlsx', index=False, engine='openpyxl')

## setting 1 (step 2): feedback from code

In [52]:
import tiktoken

def num_tokens_from_messages(messages, model="gpt-5-nano"):
    encoding = tiktoken.encoding_for_model(model)
    num_tokens = 0
    for message in messages:
        for key, value in message.items():
            num_tokens += len(encoding.encode(value))
        num_tokens += 4
    num_tokens += 2
    return num_tokens

def llm_generate(system_prompt: str, user_prompt: str, model: str = 'gpt-5-nano', max_token: int = 4096) -> str:
    messages = [
        {'role': 'system', 'content': system_prompt},
        {'role': 'user', 'content': user_prompt}
    ]
    
    existing_num_token = num_tokens_from_messages(messages, model=model)
    
    while existing_num_token > max_token - 512:
        max_token += 512

    response = client.chat.completions.create(
        model=model,
        max_completion_tokens=max_token,
        messages=messages
    )
    # 提取文本内容返回
    return response


In [50]:
ocr_path = './results/GPT-OCR-results-251216A.csv'
output_path1 = './results/GPT-setting1-results-251216A.csv'

s1_results = pd.read_csv(output_path1, index_col=None)
ocr_results = pd.read_csv(ocr_path, index_col=None)


# for col in s1_results.columns:
#     s1_results[col] = s1_results[col].apply(
#         lambda x: "'" + x if isinstance(x, str) and x.startswith("=") else x
#     )
# s1_results = s1_results.sort_values(by=['q_idx', 'a_idx'])
# xlsx_path = output_path1.replace('.csv', '.xlsx')
# s1_results.to_excel(xlsx_path, index=False, engine='openpyxl')


In [30]:
for idx, row in tqdm(s1_results.iterrows()):
    question, answer = "q"+str(row['q_idx']), "a"+str(row['a_idx'])
    if isinstance(row['feedback'], str) and len(row['feedback'].strip()) > 0:
        continue
    else:
        print(question, answer, row['feedback'])

25it [00:00, 14517.18it/s]

q1 a2 nan
q2 a1 nan
q2 a2 nan
q2 a3 nan
q4 a1 nan
q4 a2 nan
q5 a3 nan
q5 a4 nan
q5 a5 nan





In [None]:
s1_results = s1_results.drop(columns=['answer']) if "answer" in s1_results.columns else s1_results

In [None]:
# s1_results.at[15, 'feedback'] = ""

In [51]:
for idx, row in tqdm(s1_results.iterrows()):
    question, answer = "q"+str(row['q_idx']), "a"+str(row['a_idx'])
    if isinstance(row['feedback'], str) and len(row['feedback'].strip()) > 0:
        continue
    
    print(f"Processing question idx {row['q_idx']} and answer idx {row['a_idx']}...")
    
    # answer_text = row['code']
    answer_text = ocr_results.loc[idx, 'answer_code']
    question_text = example_qa[question]['content']
    user_prompt_this = user_prompt.format(question=question_text, answer=answer_text)
    
    feedback, add_up = "", 0
    while str(feedback).strip() == "":
        print(f"current feedback is ```{feedback}```; len={len(str(feedback).strip())}; next add_up={add_up}") if add_up > 0 else None
        response = llm_generate(system_prompt, user_prompt_this, max_token = 4096+add_up*512)
        add_up += 1
        feedback = response.choices[0].message.content if hasattr(response, 'choices') else response
    
    # print(f">> {feedback} \n >> {response} \n ")
    print(f">> Feedback generated for question idx {row['q_idx']} and answer idx {row['a_idx']}.")
    feedback = "'" + feedback if isinstance(feedback, str) and feedback.startswith("=") else feedback
    s1_results.at[idx, 'question'] = question_text
    # s1_results.at[idx, 'answer'] = answer_text
    s1_results.at[idx, 'answer_code'] = answer_text
    s1_results.at[idx, 'feedback'] = feedback
# 
    s1_results.to_csv(output_path1, index=False)
    xlsx_path1 = output_path1.replace('.csv', '.xlsx')
    s1_results.to_excel(xlsx_path1, index=False, engine='openpyxl')

0it [00:00, ?it/s]

Processing question idx 5 and answer idx 3...


23it [00:30,  1.32s/it]

>> Feedback generated for question idx 5 and answer idx 3.
Processing question idx 5 and answer idx 4...
current feedback is ``````; len=0; next add_up=1
current feedback is ``````; len=0; next add_up=2


25it [02:52,  6.90s/it]

>> Feedback generated for question idx 5 and answer idx 4.





In [None]:
# cols = [c if c not in ['question','answer_code'] else ('answer_code' if c=='question' else 'question') for c in s1_results.columns.tolist()]
# s1_results = s1_results[cols]

# s1_results.to_csv(output_path1, index=False)
# xlsx_path1 = output_path1.replace('.csv', '.xlsx')
# s1_results.to_excel(xlsx_path1, index=False, engine='openpyxl')

In [None]:
# df1 = s1_results.drop(columns=['code'])
# df1.to_excel(output_path1.replace('csv', 'xlsx'), index=False, engine='openpyxl')

In [10]:
### makeing up mode
ocr_results = pd.read_excel('GPT-OCR-results-251209.xlsx', index_col=None)
s1_results = pd.read_excel('GPT-setting1-results.xlsx', index_col=None)

for idx, row in tqdm(ocr_results.iterrows()):
    q_idx, a_idx = row['q_idx'], row['a_idx']
#  
    if len(s1_results[(s1_results['q_idx'].astype(str) == str(q_idx)) & (s1_results['a_idx'].astype(str) == str(a_idx))]) > 0:
        continue
#  
    question, answer = "q"+str(q_idx), "a"+str(a_idx)
    answer_text = row['code']
    question_text = example_qa[question]['content']
    user_prompt_this = user_prompt.format(question=question_text, answer=answer_text)
    response = llm_generate(system_prompt, user_prompt_this)
    feedback = response.choices[0].message.content if hasattr(response, 'choices') else response
#  
    new_row = {
        'q_idx': q_idx,
        'a_idx': a_idx,
        'question': question_text,
        'answer': answer_text,
        'feedback': feedback
    }
    s1_results = pd.concat([s1_results, pd.DataFrame([new_row])], ignore_index=True)
    s1_results = s1_results.sort_values(by=['q_idx', 'a_idx'])
    s1_results.to_excel('GPT-setting1-results-251209.xlsx', index=False)

0it [00:00, ?it/s]

25it [00:23,  1.05it/s]


## setting 2: feedback directly from image

In [7]:
def encode_image(image_path: str) -> str:
    with open(image_path, 'rb') as image_file:
        return base64.standard_b64encode(image_file.read()).decode('utf-8')


def get_image_media_type(image_path: str) -> str:
    ext = Path(image_path).suffix.lower()
    media_types = {
        '.jpg': 'image/jpeg',
        '.jpeg': 'image/jpeg',
        '.png': 'image/png',
        '.gif': 'image/gif',
        '.webp': 'image/webp'
    }
    return media_types.get(ext, 'image/jpeg')

In [8]:
def llm_generate_cv(
    system_prompt: str,
    user_prompt: str,
    image_path: str = None,
    model: str = "gpt-5-nano"
) -> str:

    input_content = []

    # ---- text input ----
    if user_prompt:
        input_content.append({
            "type": "input_text",
            "text": user_prompt
        })

    # ---- image input ----
    if image_path and os.path.exists(image_path):
        image_data = encode_image(image_path)
        media_type = get_image_media_type(image_path)
        input_content.append({
            "type": "input_image",
            "image_url": f"data:{media_type};base64,{image_data}"
        })
    elif image_path:
        print(f"[ERROR] Image not found: {image_path}")

    # ---- Responses API ----
    response = client.responses.create(
        model=model,
        input=[
            {
                "role": "system",
                "content": system_prompt
            },
            {
                "role": "user",
                "content": input_content
            }
        ],
        max_output_tokens=8192
    )

    return response


In [9]:
image_dir = "./Images"
output_path2 = 'GPT-setting2-results-251216b.csv'
results = pd.read_csv(output_path2, index_col=None).to_dict(orient='records') if os.path.exists(output_path2) else []

In [11]:
# os.listdir(image_dir)

In [12]:
user_prompt2 = """
# Task
Based on the given question (shown below) and student answer (shown in the image), please generate a response to the student.

# Template of Output Response:
 - Your feedback response must strictly follow the template below; use ** to decorate the title of subsections (e.g., **title**).
 - The template is: `Strength; Weakness; Suggestions for Improvement`.

# Important:
 - Start your reply immediately with the feedback itself.  
 - Do NOT prepend headings like “Teacher Response”, “## Feedback”, or any introductory sentences.  
 - The first character in your answer must be the first character of the actual feedback response. 
 - If no student answer is provided in the image, please just say "NO IMAGE PROVIDED" without any other feedback.

# Input Question: {question}.
"""

In [13]:
for image_file in tqdm(sorted(os.listdir(image_dir))):
    if not image_file.lower().endswith('.png'):
        continue

    q_match = re.findall(r'q(\d)', image_file)
    a_match = re.findall(r'a(\d)', image_file)

    if not q_match or not a_match:
        print(f'no q or a idx extracted - {image_file}')
        continue

    q_idx = str(q_match[0])
    a_idx = str(a_match[0])
    image_path = os.path.join(image_dir, image_file)
    
    if any(str(r['q_idx']) == str(q_idx) and str(r['a_idx']) == str(a_idx) for r in results):
        continue
    
    print("processing q {} a {} ...".format(q_idx, a_idx))
    question_text = example_qa["q"+str(q_idx)]['content']
    user_prompt_this = user_prompt2.format(question=question_text)
    response = llm_generate_cv(system_prompt, user_prompt_this, image_path)
    
    if hasattr(response, 'choices'):
        feedback = response.choices[0].message.content
    elif hasattr(response, 'output'):
        feedback = response.output[1].content[0].text
    else:
        feedback = response
        
    results.append({
        'q_idx': q_idx,
        'a_idx': a_idx,
        'question': question_text,
        'answer_path': f"{image_file}",
        'feedback': str(feedback)
    })
    
    df = pd.DataFrame(results)
    df = df.astype(str)
    df = df.sort_values(by=['q_idx', 'a_idx'])
    df.to_csv(output_path2, index=False)
    xlsx_path = output_path2.replace('.csv', '.xlsx')
    df.to_excel(xlsx_path, index=False)


  0%|          | 0/25 [00:00<?, ?it/s]

processing q 1 a 1 ...


  4%|▍         | 1/25 [00:23<09:23, 23.49s/it]

processing q 1 a 2 ...


  8%|▊         | 2/25 [00:46<08:57, 23.38s/it]

processing q 1 a 3 ...


 12%|█▏        | 3/25 [01:14<09:18, 25.38s/it]

processing q 1 a 4 ...


 16%|█▌        | 4/25 [01:52<10:40, 30.49s/it]

processing q 1 a 5 ...


 20%|██        | 5/25 [02:19<09:43, 29.20s/it]

processing q 2 a 1 ...


 24%|██▍       | 6/25 [02:57<10:09, 32.06s/it]

processing q 2 a 2 ...


 28%|██▊       | 7/25 [03:43<10:58, 36.59s/it]

processing q 2 a 3 ...


 32%|███▏      | 8/25 [04:45<12:41, 44.81s/it]

processing q 2 a 4 ...


 36%|███▌      | 9/25 [05:24<11:27, 42.95s/it]

processing q 2 a 5 ...


 40%|████      | 10/25 [06:06<10:39, 42.62s/it]

processing q 3 a 1 ...


 44%|████▍     | 11/25 [06:33<08:47, 37.71s/it]

processing q 3 a 2 ...


 48%|████▊     | 12/25 [06:54<07:07, 32.89s/it]

processing q 3 a 3 ...


 52%|█████▏    | 13/25 [07:23<06:20, 31.69s/it]

processing q 3 a 4 ...


 56%|█████▌    | 14/25 [07:56<05:50, 31.83s/it]

processing q 3 a 5 ...


 60%|██████    | 15/25 [08:27<05:17, 31.73s/it]

processing q 4 a 1 ...


 64%|██████▍   | 16/25 [09:03<04:57, 33.06s/it]

processing q 4 a 2 ...


 68%|██████▊   | 17/25 [09:48<04:52, 36.62s/it]

processing q 4 a 3 ...


 72%|███████▏  | 18/25 [10:30<04:26, 38.12s/it]

processing q 4 a 4 ...


 76%|███████▌  | 19/25 [11:16<04:03, 40.63s/it]

processing q 4 a 5 ...


 80%|████████  | 20/25 [11:57<03:22, 40.59s/it]

processing q 5 a 1 ...


 84%|████████▍ | 21/25 [12:32<02:36, 39.05s/it]

processing q 5 a 2 ...


 88%|████████▊ | 22/25 [13:06<01:52, 37.54s/it]

processing q 5 a 3 ...


 92%|█████████▏| 23/25 [13:34<01:09, 34.72s/it]

processing q 5 a 4 ...


 96%|█████████▌| 24/25 [14:14<00:36, 36.38s/it]

processing q 5 a 5 ...


100%|██████████| 25/25 [14:49<00:00, 35.59s/it]


In [47]:
response

Response(id='resp_060476285992a7c900694171df725c8190b087f13d1d30bbd3', created_at=1765896671.0, error=None, incomplete_details=None, instructions=None, metadata={}, model='gpt-5-nano-2025-08-07', object='response', output=[ResponseReasoningItem(id='rs_060476285992a7c900694171e0572c819094f0343b1ac98314', summary=[], type='reasoning', status=None), ResponseOutputMessage(id='msg_060476285992a7c9006941720460b481909505a1922487d6a0', content=[ResponseOutputText(annotations=[], text='Strength; Weakness; Suggestions for Improvement\nStrength: No answer is provided to evaluate; cannot assess understanding from the submission.\nWeakness: The submission contains only a request about the image and does not include any solution steps or final result.\nSuggestions for Improvement: Provide a complete solution. For the integral ∫ (ln x / x)^2 dx = ∫ (ln x)^2 / x^2 dx, a correct approach is:\n- Use integration by parts with u = (ln x)^2 and dv = x^-2 dx, so v = -1/x and du = 2(ln x)/x dx.\n- Then I = ∫

In [None]:
response.output[1].content[0].text

'Strength; Weakness; Suggestions for Improvement\nStrength: No answer is provided to evaluate; cannot assess understanding from the submission.\nWeakness: The submission contains only a request about the image and does not include any solution steps or final result.\nSuggestions for Improvement: Provide a complete solution. For the integral ∫ (ln x / x)^2 dx = ∫ (ln x)^2 / x^2 dx, a correct approach is:\n- Use integration by parts with u = (ln x)^2 and dv = x^-2 dx, so v = -1/x and du = 2(ln x)/x dx.\n- Then I = ∫ (ln x)^2 / x^2 dx = -(ln x)^2/x + 2∫ (ln x)/x^2 dx.\n- For J = ∫ (ln x)/x^2 dx, use parts again with u = ln x and dv = x^-2 dx, giving J = -(ln x)/x - 1/x + C.\n- Combine: I = -(ln x)^2/x + 2[-(ln x)/x - 1/x] + C = -[(ln x)^2 + 2 ln x + 2]/x + C.\n- Domain note: x > 0.'

In [14]:
import ast

for item in results:
    # 将字符串转换回对象
    try:
        response_obj = ast.literal_eval(item['feedback'])
    except Exception:
        # 如果不是可解析对象，直接跳过或保持原字符串
        response_obj = item['feedback']

    # --- 统一提取文本 ---
    if hasattr(response_obj, 'choices'):
        feedback_text = response_obj.choices[0].message.content
    elif hasattr(response_obj, 'output'):
        feedback_text = response_obj.output[1].content[0].text
    else:
        feedback_text = str(response_obj)

    # --- 更新 results 中的 feedback ---
    item['feedback'] = feedback_text

In [15]:
output_path3 = output_path2.replace('b.csv', 'c.csv')
df = pd.DataFrame(results)
df = df.astype(str)
df = df.sort_values(by=['q_idx', 'a_idx'])
df.to_csv(output_path3, index=False)
xlsx_path3 = output_path3.replace('.csv', '.xlsx')
df.to_excel(xlsx_path3, index=False)

In [30]:
df.to_excel(output_path2.replace('csv', 'xlsx'), index=False, engine='openpyxl')

In [14]:
### making up mode

image_dir = "./Images"
output_path2 = 'GPT-setting2-results-251209.xlsx'
s2_results = pd.read_excel('GPT-setting2-results.xlsx', index_col=None) 

for image_file in tqdm(sorted(os.listdir(image_dir))):
    if not image_file.lower().endswith('.png'):
        continue
#  
    q_match = re.findall(r'q(\d)', image_file)
    a_match = re.findall(r'a(\d)', image_file)
#  
    if not q_match or not a_match:
        print(f'no q or a idx extracted - {image_file}')
        continue
#  
    q_idx = str(q_match[0])
    a_idx = str(a_match[0])
#  
    if len(s2_results[(s2_results['q_idx'].astype(str) == q_idx) & (s2_results['a_idx'].astype(str) == a_idx)]) > 0:
        continue
#  
    image_path = os.path.join(image_dir, image_file)
    print("processing q {} a {} ...".format(q_idx, a_idx))
    question_text = example_qa["q"+str(q_idx)]['content']
    user_prompt_this = user_prompt.format(question=question_text, answer="`Please refer to the image provided.`")
    response = llm_generate_cv(system_prompt, user_prompt_this, image_path)
    feedback = response.choices[0].message.content if hasattr(response, 'choices') else response
#  
    new_row = {
        'q_idx': q_idx,
        'a_idx': a_idx,
        'question': question_text,
        'answer': image_file,
        'feedback': str(feedback)
    }
    s2_results = pd.concat([s2_results, pd.DataFrame([new_row])], ignore_index=True)
    s2_results = s2_results.astype(str).sort_values(by=['q_idx', 'a_idx'])

s2_results.to_excel(output_path2, index=False)

100%|██████████| 25/25 [00:00<00:00, 2602.38it/s]
