In [None]:
# Cell 1: Imports
import sys
sys.path.append("..")  # allow importing from src
from src.utils import (
    load_student_files,
    score_diagnosticity,
    score_conceptual_focus,
    score_solution_neutrality
)

import json
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Cell 2: Paths
RAW_RESULTS_DIR = Path("../results/raw")
PROCESSED_DIR = Path("../results/processed")
PROCESSED_DIR.mkdir(parents=True, exist_ok=True)

MODELS = ["chatgpt", "gemini", "claude", "perplexity", "starcoder"]

In [None]:
# Cell 3: Load Results
summary = []

for file_path in RAW_RESULTS_DIR.glob("*_results.json"):
    with open(file_path, "r", encoding="utf-8") as f:
        results = json.load(f)

    row = {"filename": file_path.stem.replace("_results", "")}

    for model in MODELS:
        text = results.get(model, "")
        row[f"{model}_diagnosticity"] = score_diagnosticity(text)
        row[f"{model}_conceptual_focus"] = score_conceptual_focus(text)
        row[f"{model}_solution_neutrality"] = score_solution_neutrality(text)

    summary.append(row)

df = pd.DataFrame(summary)
df.to_csv(PROCESSED_DIR / "analysis_summary.csv", index=False)
df.head()



In [None]:


# Cell 4: Visualize Diagnosticity by Model
plt.figure(figsize=(10,6))
sns.boxplot(data=df[[f"{m}_diagnosticity" for m in MODELS]])
plt.title("Diagnosticity Scores by Model")
plt.ylabel("Diagnosticity Score")
plt.xlabel("Models")
plt.show()





In [None]:
# Cell 5: Visualize Conceptual Focus
plt.figure(figsize=(10,6))
sns.boxplot(data=df[[f"{m}_conceptual_focus" for m in MODELS]])
plt.title("Conceptual Focus Scores by Model")
plt.ylabel("Conceptual Focus Score")
plt.xlabel("Models")
plt.show()


In [None]:
# Cell 6: Visualize Solution Neutrality
plt.figure(figsize=(10,6))
sns.boxplot(data=df[[f"{m}_solution_neutrality" for m in MODELS]])
plt.title("Solution Neutrality Scores by Model")
plt.ylabel("Solution Neutrality (1=Solution-Neutral)")
plt.xlabel("Models")
plt.show()
