In [None]:
!pip -q install pandas openpyxl requests

import os, re, json, time
import pandas as pd
import requests


In [None]:
def get_openrouter_key():
    try:
        from google.colab import userdata  # type: ignore
        key = userdata.get("OPENROUTER_API_KEY")
        if key:
            return key
    except Exception:
        pass
    return os.environ.get("OPENROUTER_API_KEY")

OPENROUTER_API_KEY = get_openrouter_key()
assert OPENROUTER_API_KEY, (
    "ERROR: Set OPENROUTER_API_KEY in Colab Secrets OR as environment variable.\n"
    "Colab: Secrets -> OPENROUTER_API_KEY\n"
    "Local: export OPENROUTER_API_KEY='...'"
)

OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"


In [None]:
MODELS = {
    "qwen": "qwen/qwen3-coder",
    "gpt_oss": "openai/gpt-oss-120b",
    "judge": "deepseek/deepseek-v3.2",
}


In [None]:
INPUT_PATH = "syntax_100_hybrid_unique.xlsx"
df = pd.read_excel(INPUT_PATH)

print("Loaded rows:", len(df))
print("Columns:", list(df.columns))
df.head(2)


Loaded rows: 100
Columns: ['question', 'code', 'exec_feedback', 'error_type', 'error_group', 'question_id', 'row_no']


Unnamed: 0,question,code,exec_feedback,error_type,error_group,question_id,row_no
0,Add two numbers using a python script,"int add(int a, int b)\n{\n int c = 0;\n c = ...",Error: Traceback (most recent call last):\n F...,SYNTAX_SyntaxError,syn,1,22
1,Define a function called <code>counter(s)</cod...,def counter(s):\n lump = s.lower().split()\n ...,Error: Traceback (most recent call last):\n F...,SYNTAX_SyntaxError,syn,3,31


In [None]:
QID_COL  = "question_id"
ROW_COL  = "row_no"
CODE_COL = "code"
EXEC_COL = "exec_feedback"
QUES_COL = "question"
ETYPE_COL = "error_type"
EGROUP_COL = "error_group"

# Dry-run with 2 rows. Later: picked = df.copy().reset_index(drop=True)
picked = df.copy().reset_index(drop=True)   # runs all 100 rows in your file


print("Picked rows:", len(picked))
picked[[QID_COL, ROW_COL, ETYPE_COL, EGROUP_COL]].head(2)


Picked rows: 100


Unnamed: 0,question_id,row_no,error_type,error_group
0,1,22,SYNTAX_SyntaxError,syn
1,3,31,SYNTAX_SyntaxError,syn


In [None]:
def call_openrouter(model_id, messages, temperature=0.1, max_tokens=900, retries=3, sleep_s=1.2):
    headers = {
        "Authorization": f"Bearer {OPENROUTER_API_KEY}",
        "Content-Type": "application/json",
    }
    body = {
        "model": model_id,
        "messages": messages,
        "temperature": temperature,
        "max_tokens": max_tokens,
    }

    last_err = None
    for i in range(retries):
        try:
            r = requests.post(OPENROUTER_URL, headers=headers, data=json.dumps(body), timeout=90)
            if r.status_code != 200:
                last_err = f"HTTP {r.status_code}: {r.text[:400]}"
                time.sleep(sleep_s * (i + 1))
                continue
            data = r.json()
            content = data["choices"][0]["message"].get("content", "")
            if not content.strip():
                last_err = "Empty content"
                time.sleep(sleep_s * (i + 1))
                continue
            return content
        except Exception as e:
            last_err = str(e)
            time.sleep(sleep_s * (i + 1))
    return f"__ERROR__ {last_err}"

def extract_json(text):
    if not isinstance(text, str):
        return None
    t = text.strip()
    t = re.sub(r"^```json\s*", "", t, flags=re.IGNORECASE)
    t = re.sub(r"^```\s*", "", t)
    t = re.sub(r"\s*```$", "", t)
    t = re.sub(r"^[^\{]*", "", t)

    try:
        obj = json.loads(t)
        return obj if isinstance(obj, dict) else None
    except:
        pass

    start = t.find("{")
    if start == -1:
        return None
    depth = 0
    for i in range(start, len(t)):
        if t[i] == "{":
            depth += 1
        elif t[i] == "}":
            depth -= 1
        if depth == 0:
            chunk = t[start:i+1]
            try:
                obj = json.loads(chunk)
                return obj if isinstance(obj, dict) else None
            except:
                return None
    return None


In [None]:
def compile_ok(code: str):
    try:
        compile(code, "<student_code>", "exec")
        return True, ""
    except Exception as e:
        return False, str(e)


In [None]:
STAGEA_SCHEMA = """Return VALID JSON only (no markdown).
Schema:
{
  "helpful_output": "2-5 sentences. Mention what the syntax error indicates and what kind of correction is needed. Do NOT paste full corrected code.",
  "fix_outline": ["1-3 bullets describing the kind of edits (no full code)"],
  "fixed_code": "full corrected Python code (for verification only)",
  "confidence": 0.0
}
"""

def build_stageA_prompt(question_text, code, exec_feedback):
    return f"""
You are a Python TA writing a helpful message when the student's program has a SYNTAX error.

Goals:
- Explain what the execution feedback indicates and where it likely occurs.
- Describe the kind of correction needed (missing colon, indentation mismatch, bracket/quote mismatch, etc.).
- Do NOT provide the entire corrected program in helpful_output.
- Still produce fixed_code for verification.

{STAGEA_SCHEMA}

QUESTION:
<<<
{question_text}
>>>

BUGGY CODE:
<<<
{code}
>>>

EXECUTION FEEDBACK:
<<<
{exec_feedback}
>>>
""".strip()

stageA_rows = []
for _, row in picked.iterrows():
    qid = row[QID_COL]
    row_index = int(row[ROW_COL])
    question_text = str(row[QUES_COL] or "")
    code = str(row[CODE_COL] or "")
    fb   = str(row[EXEC_COL] or "")

    prompt = build_stageA_prompt(question_text, code, fb)

    for proposer_key in ["qwen", "gpt_oss"]:
        raw = call_openrouter(
            MODELS[proposer_key],
            messages=[
                {"role": "system", "content": "Return JSON only. No markdown."},
                {"role": "user", "content": prompt},
            ],
            temperature=0.1,
            max_tokens=1000,
        )

        js = extract_json(raw)
        fixed_code = js.get("fixed_code") if isinstance(js, dict) else None
        ok, err = compile_ok(fixed_code) if isinstance(fixed_code, str) else (False, "No fixed_code")

        stageA_rows.append({
            "qid": qid,
            "row_index": row_index,
            "proposer": proposer_key,
            "raw": raw,
            "helpful_output": js.get("helpful_output") if isinstance(js, dict) else None,
            "fix_outline": json.dumps(js.get("fix_outline"), ensure_ascii=False) if isinstance(js, dict) else None,
            "confidence": js.get("confidence") if isinstance(js, dict) else None,
            "compile_ok": ok,
            "compile_error": err[:300] if err else "",
            "fixed_code": fixed_code,   # kept only for verification; not shown to students
        })

stageA_df = pd.DataFrame(stageA_rows)
stageA_df[["qid","row_index","proposer","confidence","compile_ok","compile_error","helpful_output"]]


Unnamed: 0,qid,row_index,proposer,confidence,compile_ok,compile_error,helpful_output
0,1,22,qwen,0.95,True,,The syntax error indicates that Python doesn't...
1,1,22,gpt_oss,1.00,True,,The error points to the line where you wrote '...
2,3,31,qwen,0.95,True,,The syntax error indicates that the 'return' s...
3,3,31,gpt_oss,0.99,True,,"The error says 'return' outside function, whic..."
4,11,47,qwen,0.95,True,,The syntax error indicates that Python encount...
...,...,...,...,...,...,...,...
195,605,46079,gpt_oss,0.99,True,,"The error points to the line ""palindrome(strin..."
196,624,46955,qwen,0.95,True,,The execution feedback indicates an Indentatio...
197,624,46955,gpt_oss,0.97,True,,The traceback shows an IndentationError on lin...
198,607,47292,qwen,0.95,True,,The syntax error indicates that Python encount...


In [None]:
OUT_STAGEA = "/content/stageA_proposals.xlsx"
stageA_df.to_excel(OUT_STAGEA, index=False)
print("Saved:", OUT_STAGEA)


Saved: /content/stageA_proposals.xlsx


In [None]:
STAGEB_SCHEMA = """Return VALID JSON only (no markdown).
Schema:
{
  "evidence_alignment": "pass|weak|fail",
  "helpfulness": "pass|weak|fail",
  "issues": [
    {"severity":"low|med|high", "issue":"short", "evidence":"quote from feedback/code"}
  ],
  "suggested_improvement": "1-2 sentences (no full corrected code)"
}
"""

def build_stageB_prompt(question_text, code, exec_feedback, target_helpful_output, target_compile_ok):
    return f"""
You are reviewing another assistant's SYNTAX-error helper message.

Critique criteria:
1) Evidence alignment: Does it match the given exec feedback?
2) Correctness: Does it identify the right kind of syntax mistake?
3) Helpfulness: Is it specific enough without giving the full solution?
4) If compile_ok is false, treat the underlying fix as unreliable.

Do NOT provide full corrected code.

{STAGEB_SCHEMA}

QUESTION:
<<<
{question_text}
>>>

BUGGY CODE:
<<<
{code}
>>>

EXECUTION FEEDBACK:
<<<
{exec_feedback}
>>>

OTHER ASSISTANT MESSAGE:
<<<
{target_helpful_output}
>>>

UNDERLYING FIX COMPILES? {target_compile_ok}
""".strip()

stageB_rows = []
for _, row in picked.iterrows():
    qid = row[QID_COL]
    row_index = int(row[ROW_COL])
    question_text = str(row[QUES_COL] or "")
    code = str(row[CODE_COL] or "")
    fb   = str(row[EXEC_COL] or "")

    qwenA = stageA_df[(stageA_df.qid == qid) & (stageA_df.proposer == "qwen")].iloc[0]
    ossA  = stageA_df[(stageA_df.qid == qid) & (stageA_df.proposer == "gpt_oss")].iloc[0]

    # Qwen critiques GPT-OSS
    raw1 = call_openrouter(
        MODELS["qwen"],
        messages=[
            {"role": "system", "content": "Return JSON only. No markdown."},
            {"role": "user", "content": build_stageB_prompt(question_text, code, fb, str(ossA["helpful_output"]), bool(ossA["compile_ok"]))},
        ],
        temperature=0.1,
        max_tokens=650
    )
    js1 = extract_json(raw1)

    stageB_rows.append({
        "qid": qid,
        "row_index": row_index,
        "critic": "qwen",
        "target": "gpt_oss",
        "raw": raw1,
        "json": json.dumps(js1, ensure_ascii=False) if isinstance(js1, dict) else None,
    })

    # GPT-OSS critiques Qwen
    raw2 = call_openrouter(
        MODELS["gpt_oss"],
        messages=[
            {"role": "system", "content": "Return JSON only. No markdown."},
            {"role": "user", "content": build_stageB_prompt(question_text, code, fb, str(qwenA["helpful_output"]), bool(qwenA["compile_ok"]))},
        ],
        temperature=0.1,
        max_tokens=650
    )
    js2 = extract_json(raw2)

    stageB_rows.append({
        "qid": qid,
        "row_index": row_index,
        "critic": "gpt_oss",
        "target": "qwen",
        "raw": raw2,
        "json": json.dumps(js2, ensure_ascii=False) if isinstance(js2, dict) else None,
    })

stageB_df = pd.DataFrame(stageB_rows)
stageB_df


Unnamed: 0,qid,row_index,critic,target,raw,json
0,1,22,qwen,gpt_oss,"{\n ""evidence_alignment"": ""pass"",\n ""helpful...","{""evidence_alignment"": ""pass"", ""helpfulness"": ..."
1,1,22,gpt_oss,qwen,"{\n ""evidence_alignment"": ""pass"",\n ""helpful...","{""evidence_alignment"": ""pass"", ""helpfulness"": ..."
2,3,31,qwen,gpt_oss,"{\n ""evidence_alignment"": ""pass"",\n ""helpful...","{""evidence_alignment"": ""pass"", ""helpfulness"": ..."
3,3,31,gpt_oss,qwen,"JSON.\n\n{\n ""evidence_alignment"": ""pass"",\n ...","{""evidence_alignment"": ""pass"", ""helpfulness"": ..."
4,11,47,qwen,gpt_oss,"{\n ""evidence_alignment"": ""pass"",\n ""helpful...","{""evidence_alignment"": ""pass"", ""helpfulness"": ..."
...,...,...,...,...,...,...
195,605,46079,gpt_oss,qwen,"{\n ""evidence_alignment"": ""pass"",\n ""helpful...","{""evidence_alignment"": ""pass"", ""helpfulness"": ..."
196,624,46955,qwen,gpt_oss,"{\n ""evidence_alignment"": ""pass"",\n ""helpful...","{""evidence_alignment"": ""pass"", ""helpfulness"": ..."
197,624,46955,gpt_oss,qwen,"{\n ""evidence_alignment"": ""pass"",\n ""helpful...","{""evidence_alignment"": ""pass"", ""helpfulness"": ..."
198,607,47292,qwen,gpt_oss,"{\n ""evidence_alignment"": ""pass"",\n ""helpful...","{""evidence_alignment"": ""pass"", ""helpfulness"": ..."


In [None]:
OUT_STAGEB = "/content/stageB_critiques.xlsx"
stageB_df.to_excel(OUT_STAGEB, index=False)
print("Saved:", OUT_STAGEB)


Saved: /content/stageB_critiques.xlsx


In [None]:
STAGEC_SCHEMA = """Return VALID JSON only (no markdown).
Schema:
{
  "chosen_source": "qwen|gpt_oss|hybrid",
  "final_helpful_output": "2-6 sentences. Evidence-grounded, points to what to fix and where, without dumping full corrected code.",
  "final_fix_outline": ["1-3 bullets describing what to change (no full code)"],
  "confidence": 0.0
}
"""

def build_stageC_prompt(question_text, code, exec_feedback, qwen_pack, oss_pack, critiques_pack):
    return f"""
You are the final judge improving a SYNTAX-error helper message.

Goal:
- Output ONLY a strong final_helpful_output that is specific and evidence-grounded.
- It can mention line numbers / constructs like missing ')' or wrong indentation.
- Do NOT reveal the entire corrected program or paste full code.
- Use critiques to remove mistakes and improve clarity.
- Use compile_ok signals only as a reliability hint.

{STAGEC_SCHEMA}

QUESTION:
<<<
{question_text}
>>>

BUGGY CODE:
<<<
{code}
>>>

EXECUTION FEEDBACK:
<<<
{exec_feedback}
>>>

PROPOSAL: QWEN (message + compile signal)
<<<
{qwen_pack}
>>>

PROPOSAL: GPT_OSS (message + compile signal)
<<<
{oss_pack}
>>>

CROSS-CRITIQUES:
<<<
{critiques_pack}
>>>
""".strip()

stageC_rows = []
for _, row in picked.iterrows():
    qid = row[QID_COL]
    row_index = int(row[ROW_COL])
    question_text = str(row[QUES_COL] or "")
    code = str(row[CODE_COL] or "")
    fb   = str(row[EXEC_COL] or "")

    qwenA = stageA_df[(stageA_df.qid == qid) & (stageA_df.proposer == "qwen")].iloc[0]
    ossA  = stageA_df[(stageA_df.qid == qid) & (stageA_df.proposer == "gpt_oss")].iloc[0]
    crits = stageB_df[stageB_df.qid == qid][["critic","target","json"]].to_dict(orient="records")

    qwen_pack = {
        "helpful_output": qwenA["helpful_output"],
        "fix_outline": qwenA["fix_outline"],
        "confidence": qwenA["confidence"],
        "compile_ok": bool(qwenA["compile_ok"]),
        "compile_error": qwenA["compile_error"],
    }
    oss_pack = {
        "helpful_output": ossA["helpful_output"],
        "fix_outline": ossA["fix_outline"],
        "confidence": ossA["confidence"],
        "compile_ok": bool(ossA["compile_ok"]),
        "compile_error": ossA["compile_error"],
    }

    prompt = build_stageC_prompt(
        question_text, code, fb,
        json.dumps(qwen_pack, ensure_ascii=False),
        json.dumps(oss_pack, ensure_ascii=False),
        json.dumps(crits, ensure_ascii=False),
    )

    raw = call_openrouter(
        MODELS["judge"],
        messages=[
            {"role": "system", "content": "Return JSON only. No markdown."},
            {"role": "user", "content": prompt},
        ],
        temperature=0.1,
        max_tokens=900
    )

    js = extract_json(raw)

    stageC_rows.append({
        "qid": qid,
        "row_index": row_index,
        "judge_raw": raw,
        "judge_json": json.dumps(js, ensure_ascii=False) if isinstance(js, dict) else None,
        "chosen_source": js.get("chosen_source") if isinstance(js, dict) else None,
        "confidence": js.get("confidence") if isinstance(js, dict) else None,
        "final_helpful_output": js.get("final_helpful_output") if isinstance(js, dict) else None,
        "final_fix_outline": json.dumps(js.get("final_fix_outline"), ensure_ascii=False) if isinstance(js, dict) else None,
    })

stageC_df = pd.DataFrame(stageC_rows)
stageC_df[["qid","row_index","chosen_source","confidence","final_helpful_output"]]


Unnamed: 0,qid,row_index,chosen_source,confidence,final_helpful_output
0,1,22,hybrid,0.95,The syntax error occurs at line 1 where you're...
1,3,31,hybrid,0.99,The syntax error occurs because the 'return' s...
2,11,47,hybrid,0.99,The syntax error occurs on line 1 where the fu...
3,4,120,gpt_oss,0.99,The SyntaxError on line 6 is due to an incompl...
4,29,563,hybrid,0.98,The SyntaxError on line 1 points to a missing ...
...,...,...,...,...,...
95,557,44413,hybrid,0.99,The syntax error 'unexpected EOF while parsing...
96,552,44940,hybrid,0.99,The syntax error occurs on line 1 because the ...
97,605,46079,gpt_oss,0.99,The syntax error occurs at 'palindrome(string ...
98,624,46955,hybrid,0.98,The IndentationError on line 2 occurs because ...


In [None]:
OUT_STAGEC = "/content/stageC_final_helpful_output.xlsx"
stageC_df.to_excel(OUT_STAGEC, index=False)
print("Saved:", OUT_STAGEC)

# Student-facing = just the final outputs (no raw)
final_student = stageC_df[[
    "qid","row_index","chosen_source","confidence","final_helpful_output","final_fix_outline"
]].copy()

OUT_FINAL = "/content/final_student_helpful_output.xlsx"
final_student.to_excel(OUT_FINAL, index=False)
print("Saved:", OUT_FINAL)

print("Outputs:")
print(OUT_STAGEA)
print(OUT_STAGEB)
print(OUT_STAGEC)
print(OUT_FINAL)


Saved: /content/stageC_final_helpful_output.xlsx
Saved: /content/final_student_helpful_output.xlsx
Outputs:
/content/stageA_proposals.xlsx
/content/stageB_critiques.xlsx
/content/stageC_final_helpful_output.xlsx
/content/final_student_helpful_output.xlsx


In [None]:
def safe_load_json(s):
    try:
        return json.loads(s) if isinstance(s, str) and s.strip() else None
    except:
        return None

def critique_compact(json_str, max_issues=2):
    js = safe_load_json(json_str)
    if not isinstance(js, dict):
        return "", ""
    ea = js.get("evidence_alignment", "")
    hp = js.get("helpfulness", "")
    issues = js.get("issues", []) or []
    out = []
    for it in issues[:max_issues]:
        sev = it.get("severity","")
        msg = it.get("issue","")
        out.append(f"[{sev}] {msg}")
    return f"evidence={ea}, helpful={hp}", " | ".join(out)

audit_rows = []
for _, row in picked.iterrows():
    qid = row[QID_COL]
    row_index = int(row[ROW_COL])

    question_text = str(row[QUES_COL] or "")
    error_type = str(row.get(ETYPE_COL, "") or "")
    error_group = str(row.get(EGROUP_COL, "") or "")
    exec_fb = str(row[EXEC_COL] or "")

    qwenA = stageA_df[(stageA_df.qid == qid) & (stageA_df.proposer == "qwen")].iloc[0]
    ossA  = stageA_df[(stageA_df.qid == qid) & (stageA_df.proposer == "gpt_oss")].iloc[0]

    c1 = stageB_df[(stageB_df.qid == qid) & (stageB_df.critic=="qwen") & (stageB_df.target=="gpt_oss")]
    c2 = stageB_df[(stageB_df.qid == qid) & (stageB_df.critic=="gpt_oss") & (stageB_df.target=="qwen")]

    c1_json = c1.iloc[0]["json"] if len(c1) else ""
    c2_json = c2.iloc[0]["json"] if len(c2) else ""

    c1_score, c1_issues = critique_compact(c1_json)
    c2_score, c2_issues = critique_compact(c2_json)

    judge_row = stageC_df[stageC_df.qid == qid].iloc[0]

    audit_rows.append({
        "qid": qid,
        "row_index": row_index,
        "error_type": error_type,
        "error_group": error_group,
        "question": question_text,
        "exec_feedback": exec_fb,

        "qwen_conf": qwenA["confidence"],
        "qwen_compile_ok": qwenA["compile_ok"],
        "qwen_helpful_output": qwenA["helpful_output"],

        "gpt_oss_conf": ossA["confidence"],
        "gpt_oss_compile_ok": ossA["compile_ok"],
        "gpt_oss_helpful_output": ossA["helpful_output"],

        "qwen_crit_on_oss_score": c1_score,
        "qwen_crit_on_oss_issues": c1_issues,
        "oss_crit_on_qwen_score": c2_score,
        "oss_crit_on_qwen_issues": c2_issues,

        "judge_choice": judge_row["chosen_source"],
        "judge_conf": judge_row["confidence"],
        "final_helpful_output": judge_row["final_helpful_output"],
    })

audit_df = pd.DataFrame(audit_rows)
audit_df


Unnamed: 0,qid,row_index,error_type,error_group,question,exec_feedback,qwen_conf,qwen_compile_ok,qwen_helpful_output,gpt_oss_conf,gpt_oss_compile_ok,gpt_oss_helpful_output,qwen_crit_on_oss_score,qwen_crit_on_oss_issues,oss_crit_on_qwen_score,oss_crit_on_qwen_issues,judge_choice,judge_conf,final_helpful_output
0,1,22,SYNTAX_SyntaxError,syn,Add two numbers using a python script,Error: Traceback (most recent call last):\n F...,0.95,True,The syntax error indicates that Python doesn't...,1.00,True,The error points to the line where you wrote '...,"evidence=pass, helpful=pass",[high] Uses C-style function syntax instead of...,"evidence=pass, helpful=pass",[med] Did not mention the missing colon after ...,hybrid,0.95,The syntax error occurs at line 1 where you're...
1,3,31,SYNTAX_SyntaxError,syn,Define a function called <code>counter(s)</cod...,Error: Traceback (most recent call last):\n F...,0.95,True,The syntax error indicates that the 'return' s...,0.99,True,"The error says 'return' outside function, whic...","evidence=pass, helpful=pass",[high] Incorrect indentation of return statement,"evidence=pass, helpful=pass",,hybrid,0.99,The syntax error occurs because the 'return' s...
2,11,47,SYNTAX_SyntaxError,syn,Write <b> a function</b> <code>reverse</code>....,Error: Traceback (most recent call last):\n F...,0.95,True,The syntax error indicates that Python encount...,0.99,True,The error points to the line where the functio...,"evidence=pass, helpful=pass",[high] Missing colon after function parameter ...,"evidence=pass, helpful=pass",,hybrid,0.99,The syntax error occurs on line 1 where the fu...
3,4,120,SYNTAX_SyntaxError,syn,Define a function called <code>fibonacci(n)</c...,Error: Traceback (most recent call last):\n F...,0.95,True,The syntax error indicates there's a problem w...,0.99,True,The SyntaxError points to the line with `elif:...,"evidence=pass, helpful=pass",[high] Missing condition in elif statement | [...,"evidence=pass, helpful=pass",[high] elif missing condition | [med] typo in ...,gpt_oss,0.99,The SyntaxError on line 6 is due to an incompl...
4,29,563,SYNTAX_SyntaxError,syn,Write a function <code>is_palindrome(arg)</cod...,Error: Traceback (most recent call last):\n F...,0.95,True,The syntax error indicates that Python encount...,0.97,True,The SyntaxError points to the function definit...,"evidence=pass, helpful=pass",[high] Missing colon in function definition | ...,"evidence=pass, helpful=pass",[low] Missing colon after function definition,hybrid,0.98,The SyntaxError on line 1 points to a missing ...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,557,44413,SYNTAX_EXPLICIT_OTHER,syn,Write a function <code>except_last_two</code> ...,"[""Error: Traceback (most recent call last):\n ...",0.95,True,The syntax error indicates that Python reached...,0.99,True,"The error says ""unexpected EOF while parsing"" ...","evidence=pass, helpful=pass",[low] Could be more specific about what consti...,"evidence=pass, helpful=pass",,hybrid,0.99,The syntax error 'unexpected EOF while parsing...
96,552,44940,SYNTAX_EXPLICIT_OTHER,syn,Write a function called <code>strip_string</co...,"[""Error: Traceback (most recent call last):\n ...",0.90,True,The syntax error indicates that your function ...,0.99,True,The traceback shows a SyntaxError at the funct...,"evidence=pass, helpful=pass",[high] Missing colon in function definition,"evidence=pass, helpful=pass",,hybrid,0.99,The syntax error occurs on line 1 because the ...
97,605,46079,SYNTAX_EXPLICIT_OTHER,syn,Write a function <code>is_palindrome(arg)</cod...,"[""Error: Traceback (most recent call last):\n ...",0.95,True,The syntax error indicates that your function ...,0.99,True,"The error points to the line ""palindrome(strin...","evidence=pass, helpful=pass",[high] Incorrect function definition syntax | ...,"evidence=pass, helpful=pass",[med] use of curly braces and missing colon | ...,gpt_oss,0.99,The syntax error occurs at 'palindrome(string ...
98,624,46955,SYNTAX_EXPLICIT_OTHER,syn,Write a Function <b>num_square</b> to find the...,"[""Error: Traceback (most recent call last):\n ...",0.95,True,The execution feedback indicates an Indentatio...,0.97,True,The traceback shows an IndentationError on lin...,"evidence=pass, helpful=pass",[high] IndentationError due to non-code text,"evidence=pass, helpful=pass",,hybrid,0.98,The IndentationError on line 2 occurs because ...


In [None]:
OUT_AUDIT = "/content/decision_audit_compact.xlsx"
audit_df.to_excel(OUT_AUDIT, index=False)
print("Saved:", OUT_AUDIT)

print("All outputs:")
print(OUT_STAGEA)
print(OUT_STAGEB)
print(OUT_STAGEC)
print(OUT_FINAL)
print(OUT_AUDIT)


Saved: /content/decision_audit_compact.xlsx
All outputs:
/content/stageA_proposals.xlsx
/content/stageB_critiques.xlsx
/content/stageC_final_helpful_output.xlsx
/content/final_student_helpful_output.xlsx
/content/decision_audit_compact.xlsx


# **single model**

---



In [None]:
def get_openrouter_key():
    try:
        from google.colab import userdata  # type: ignore
        key = userdata.get("OPENROUTER_API_KEY")
        if key:
            return key
    except Exception:
        pass
    return os.environ.get("OPENROUTER_API_KEY")

OPENROUTER_API_KEY = get_openrouter_key()
assert OPENROUTER_API_KEY, "Set OPENROUTER_API_KEY (Colab Secrets or env var)"

OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
MODEL = "deepseek/deepseek-v3.2"


In [None]:
INPUT_PATH = "syntax_100_hybrid_unique.xlsx"
df = pd.read_excel(INPUT_PATH)

QID_COL  = "question_id"
ROW_COL  = "row_no"
CODE_COL = "code"
EXEC_COL = "exec_feedback"
QUES_COL = "question"

print("Rows:", len(df))
df.head(2)


Rows: 100


Unnamed: 0,question,code,exec_feedback,error_type,error_group,question_id,row_no
0,Add two numbers using a python script,"int add(int a, int b)\n{\n int c = 0;\n c = ...",Error: Traceback (most recent call last):\n F...,SYNTAX_SyntaxError,syn,1,22
1,Define a function called <code>counter(s)</cod...,def counter(s):\n lump = s.lower().split()\n ...,Error: Traceback (most recent call last):\n F...,SYNTAX_SyntaxError,syn,3,31


In [None]:
def call_openrouter(model_id, messages, temperature=0.1, max_tokens=700, retries=3, sleep_s=1.2):
    headers = {"Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json"}
    body = {"model": model_id, "messages": messages, "temperature": temperature, "max_tokens": max_tokens}

    last_err = None
    for i in range(retries):
        try:
            r = requests.post(OPENROUTER_URL, headers=headers, data=json.dumps(body), timeout=90)
            if r.status_code != 200:
                last_err = f"HTTP {r.status_code}: {r.text[:400]}"
                time.sleep(sleep_s * (i + 1))
                continue
            content = r.json()["choices"][0]["message"].get("content", "")
            if not content.strip():
                last_err = "Empty content"
                time.sleep(sleep_s * (i + 1))
                continue
            return content
        except Exception as e:
            last_err = str(e)
            time.sleep(sleep_s * (i + 1))
    return f"__ERROR__ {last_err}"

def extract_json(text):
    if not isinstance(text, str): return None
    t = text.strip()
    t = re.sub(r"^```json\s*", "", t, flags=re.IGNORECASE)
    t = re.sub(r"^```\s*", "", t)
    t = re.sub(r"\s*```$", "", t)
    t = re.sub(r"^[^\{]*", "", t)
    try:
        obj = json.loads(t)
        return obj if isinstance(obj, dict) else None
    except:
        return None


In [None]:
SCHEMA = """Return VALID JSON only (no markdown).
Schema:
{
  "helpful_output": "2-6 sentences. Explain what the syntax error indicates and what kind of correction is needed. Do not paste the full corrected code.",
  "fix_outline": ["1-3 bullets (high-level, no full code)"],
  "confidence": 0.0
}
"""

def build_prompt(question_text, code, exec_feedback):
    return f"""
You are a Python TA writing a helpful message when a student's code has a SYNTAX error.

Rules:
- Be specific and evidence-grounded (mention likely line/construct based on feedback).
- Describe the kind of fix needed (missing colon/bracket/quote, indentation mismatch, etc.).
- Do NOT paste the entire corrected program or provide full replacement code.

{SCHEMA}

QUESTION:
<<<
{question_text}
>>>

BUGGY CODE:
<<<
{code}
>>>

EXECUTION FEEDBACK:
<<<
{exec_feedback}
>>>
""".strip()


In [None]:
rows = []
for _, r in df.iterrows():
    qid = r[QID_COL]
    row_index = int(r[ROW_COL])
    question_text = str(r[QUES_COL] or "")
    code = str(r[CODE_COL] or "")
    fb = str(r[EXEC_COL] or "")

    raw = call_openrouter(
        MODEL,
        messages=[
            {"role": "system", "content": "Return JSON only. No markdown."},
            {"role": "user", "content": build_prompt(question_text, code, fb)},
        ],
        temperature=0.1,
        max_tokens=700
    )

    js = extract_json(raw)
    rows.append({
        "qid": qid,
        "row_index": row_index,
        "raw": raw,
        "json": json.dumps(js, ensure_ascii=False) if isinstance(js, dict) else None,
        "helpful_output": js.get("helpful_output") if isinstance(js, dict) else None,
        "fix_outline": json.dumps(js.get("fix_outline"), ensure_ascii=False) if isinstance(js, dict) else None,
        "confidence": js.get("confidence") if isinstance(js, dict) else None,
        "fetch_error": raw.startswith("__ERROR__"),
        "parse_error": not isinstance(js, dict),
    })

out_df = pd.DataFrame(rows)

OUT_PATH = "/content/deepseek_single_helpful_output.xlsx"
out_df.to_excel(OUT_PATH, index=False)
print("Saved:", OUT_PATH)

# quick health stats
print("Fetch errors:", int(out_df["fetch_error"].sum()))
print("Parse errors:", int(out_df["parse_error"].sum()))


Saved: /content/deepseek_single_helpful_output.xlsx
Fetch errors: 0
Parse errors: 0
