In [None]:
# Cell 1: Imports
import sys
sys.path.append("..")  # allow importing from src
from src.utils import load_student_files, save_model_results, ensure_dir
from pathlib import Path
import os
import json

# Model clients
import openai
import anthropic
import google.generativeai as genai
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from dotenv import load_dotenv

# Load API keys
load_dotenv()
OPENAI_KEY = os.getenv("OPENAI_API_KEY")
GOOGLE_KEY = os.getenv("GOOGLE_API_KEY")
ANTHROPIC_KEY = os.getenv("ANTHROPIC_API_KEY")
PERPLEXITY_KEY = os.getenv("PERPLEXITY_API_KEY")


In [None]:
# Cell 2: Paths
DATA_DIR = Path("../data/processed_submissions")
RESULTS_DIR = Path("../results/raw")
ensure_dir(RESULTS_DIR)

MODELS = ["chatgpt", "gemini", "claude", "perplexity", "starcoder"]


# StarCoder setup
STARCODER_MODEL = "bigcode/starcoder"
tokenizer = AutoTokenizer.from_pretrained(STARCODER_MODEL)
model = AutoModelForCausalLM.from_pretrained(STARCODER_MODEL)

# Helper functions for models

def query_chatgpt(code):
    openai.api_key = OPENAI_KEY
    prompt = f"Analyze this Python code and generate three reflective, non-solution prompts:\n\n{code}"
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=500
    )
    return response.choices[0].message["content"]

def query_gemini(code):
    genai.configure(api_key=GOOGLE_KEY)
    prompt = f"Analyze this Python code and generate three reflective, non-solution prompts:\n\n{code}"
    response = genai.chat.create(
        model="gemini-1.5",
        messages=[{"role": "user", "content": prompt}]
    )
    return response.last_response

def query_claude(code):
    client = anthropic.Client(ANTHROPIC_KEY)
    prompt = f"Analyze this Python code and generate three reflective, non-solution prompts:\n\n{code}"
    response = client.completions.create(
        model="claude-2",
        prompt=prompt,
        max_tokens_to_sample=500
    )
    return response.completion

def query_perplexity(code):
    # Placeholder for actual API call
    return f"Perplexity AI response placeholder for code:\n{code[:50]}..."

def query_starcoder(code):
    inputs = tokenizer(code, return_tensors="pt")
    outputs = model.generate(**inputs, max_new_tokens=150)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)


In [None]:
# Cell 3: Load Student Submissions
submissions = load_student_files(data_dir=DATA_DIR)
print(f"Loaded {len(submissions)} student submissions.")


In [None]:
# Cell 4: Query All Models
for filename, code in submissions.items():
    results = {
        "chatgpt": query_chatgpt(code),
        "gemini": query_gemini(code),
        "claude": query_claude(code),
        "perplexity": query_perplexity(code),
        "starcoder": query_starcoder(code)
    }

    output_file = save_model_results(results, filename)
    print(f"{filename} processed and saved to {output_file}")